Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Instrument IR to perform memory checking operations.         ---*/
      4 /*---                                               mc_translate.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of MemCheck, a heavyweight Valgrind tool for
      9    detecting memory errors.
     10 
     11    Copyright (C) 2000-2011 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_hashtable.h"     // For mc_include.h
     34 #include "pub_tool_libcassert.h"
     35 #include "pub_tool_libcprint.h"
     36 #include "pub_tool_tooliface.h"
     37 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
     38 #include "pub_tool_xarray.h"
     39 #include "pub_tool_mallocfree.h"
     40 #include "pub_tool_libcbase.h"
     41 
     42 #include "mc_include.h"
     43 
     44 
     45 /* FIXMEs JRS 2011-June-16.
     46 
     47    Check the interpretation for vector narrowing and widening ops,
     48    particularly the saturating ones.  I suspect they are either overly
     49    pessimistic and/or wrong.
     50 */
     51 
     52 /* This file implements the Memcheck instrumentation, and in
     53    particular contains the core of its undefined value detection
     54    machinery.  For a comprehensive background of the terminology,
     55    algorithms and rationale used herein, read:
     56 
     57      Using Valgrind to detect undefined value errors with
     58      bit-precision
     59 
     60      Julian Seward and Nicholas Nethercote
     61 
     62      2005 USENIX Annual Technical Conference (General Track),
     63      Anaheim, CA, USA, April 10-15, 2005.
     64 
     65    ----
     66 
     67    Here is as good a place as any to record exactly when V bits are and
     68    should be checked, why, and what function is responsible.
     69 
     70 
     71    Memcheck complains when an undefined value is used:
     72 
     73    1. In the condition of a conditional branch.  Because it could cause
     74       incorrect control flow, and thus cause incorrect externally-visible
     75       behaviour.  [mc_translate.c:complainIfUndefined]
     76 
     77    2. As an argument to a system call, or as the value that specifies
     78       the system call number.  Because it could cause an incorrect
     79       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
     80 
     81    3. As the address in a load or store.  Because it could cause an
     82       incorrect value to be used later, which could cause externally-visible
     83       behaviour (eg. via incorrect control flow or an incorrect system call
     84       argument)  [complainIfUndefined]
     85 
     86    4. As the target address of a branch.  Because it could cause incorrect
     87       control flow.  [complainIfUndefined]
     88 
     89    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
     90       an incorrect value into the external environment.
     91       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
     92 
     93    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
     94       [complainIfUndefined]
     95 
     96    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
     97       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
     98       requested it.  [in memcheck.h]
     99 
    100 
    101    Memcheck also complains, but should not, when an undefined value is used:
    102 
    103    8. As the shift value in certain SIMD shift operations (but not in the
    104       standard integer shift operations).  This inconsistency is due to
    105       historical reasons.)  [complainIfUndefined]
    106 
    107 
    108    Memcheck does not complain, but should, when an undefined value is used:
    109 
    110    9. As an input to a client request.  Because the client request may
    111       affect the visible behaviour -- see bug #144362 for an example
    112       involving the malloc replacements in vg_replace_malloc.c and
    113       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
    114       isn't identified.  That bug report also has some info on how to solve
    115       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
    116 
    117 
    118    In practice, 1 and 2 account for the vast majority of cases.
    119 */
    120 
    121 /*------------------------------------------------------------*/
    122 /*--- Forward decls                                        ---*/
    123 /*------------------------------------------------------------*/
    124 
    125 struct _MCEnv;
    126 
    127 static IRType  shadowTypeV ( IRType ty );
    128 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
    129 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
    130 
    131 static IRExpr *i128_const_zero(void);
    132 
    133 /*------------------------------------------------------------*/
    134 /*--- Memcheck running state, and tmp management.          ---*/
    135 /*------------------------------------------------------------*/
    136 
    137 /* Carries info about a particular tmp.  The tmp's number is not
    138    recorded, as this is implied by (equal to) its index in the tmpMap
    139    in MCEnv.  The tmp's type is also not recorded, as this is present
    140    in MCEnv.sb->tyenv.
    141 
    142    When .kind is Orig, .shadowV and .shadowB may give the identities
    143    of the temps currently holding the associated definedness (shadowV)
    144    and origin (shadowB) values, or these may be IRTemp_INVALID if code
    145    to compute such values has not yet been emitted.
    146 
    147    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
    148    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
    149    illogical for a shadow tmp itself to be shadowed.
    150 */
    151 typedef
    152    enum { Orig=1, VSh=2, BSh=3 }
    153    TempKind;
    154 
    155 typedef
    156    struct {
    157       TempKind kind;
    158       IRTemp   shadowV;
    159       IRTemp   shadowB;
    160    }
    161    TempMapEnt;
    162 
    163 
    164 /* Carries around state during memcheck instrumentation. */
    165 typedef
    166    struct _MCEnv {
    167       /* MODIFIED: the superblock being constructed.  IRStmts are
    168          added. */
    169       IRSB* sb;
    170       Bool  trace;
    171 
    172       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
    173          current kind and possibly shadow temps for each temp in the
    174          IRSB being constructed.  Note that it does not contain the
    175          type of each tmp.  If you want to know the type, look at the
    176          relevant entry in sb->tyenv.  It follows that at all times
    177          during the instrumentation process, the valid indices for
    178          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
    179          total number of Orig, V- and B- temps allocated so far.
    180 
    181          The reason for this strange split (types in one place, all
    182          other info in another) is that we need the types to be
    183          attached to sb so as to make it possible to do
    184          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
    185          instrumentation process. */
    186       XArray* /* of TempMapEnt */ tmpMap;
    187 
    188       /* MODIFIED: indicates whether "bogus" literals have so far been
    189          found.  Starts off False, and may change to True. */
    190       Bool    bogusLiterals;
    191 
    192       /* READONLY: the guest layout.  This indicates which parts of
    193          the guest state should be regarded as 'always defined'. */
    194       VexGuestLayout* layout;
    195 
    196       /* READONLY: the host word type.  Needed for constructing
    197          arguments of type 'HWord' to be passed to helper functions.
    198          Ity_I32 or Ity_I64 only. */
    199       IRType hWordTy;
    200    }
    201    MCEnv;
    202 
    203 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
    204    demand), as they are encountered.  This is for two reasons.
    205 
    206    (1) (less important reason): Many original tmps are unused due to
    207    initial IR optimisation, and we do not want to spaces in tables
    208    tracking them.
    209 
    210    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
    211    table indexed [0 .. n_types-1], which gives the current shadow for
    212    each original tmp, or INVALID_IRTEMP if none is so far assigned.
    213    It is necessary to support making multiple assignments to a shadow
    214    -- specifically, after testing a shadow for definedness, it needs
    215    to be made defined.  But IR's SSA property disallows this.
    216 
    217    (2) (more important reason): Therefore, when a shadow needs to get
    218    a new value, a new temporary is created, the value is assigned to
    219    that, and the tmpMap is updated to reflect the new binding.
    220 
    221    A corollary is that if the tmpMap maps a given tmp to
    222    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
    223    there's a read-before-write error in the original tmps.  The IR
    224    sanity checker should catch all such anomalies, however.
    225 */
    226 
    227 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
    228    both the table in mce->sb and to our auxiliary mapping.  Note that
    229    newTemp may cause mce->tmpMap to resize, hence previous results
    230    from VG_(indexXA)(mce->tmpMap) are invalidated. */
    231 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
    232 {
    233    Word       newIx;
    234    TempMapEnt ent;
    235    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
    236    ent.kind    = kind;
    237    ent.shadowV = IRTemp_INVALID;
    238    ent.shadowB = IRTemp_INVALID;
    239    newIx = VG_(addToXA)( mce->tmpMap, &ent );
    240    tl_assert(newIx == (Word)tmp);
    241    return tmp;
    242 }
    243 
    244 
    245 /* Find the tmp currently shadowing the given original tmp.  If none
    246    so far exists, allocate one.  */
    247 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
    248 {
    249    TempMapEnt* ent;
    250    /* VG_(indexXA) range-checks 'orig', hence no need to check
    251       here. */
    252    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    253    tl_assert(ent->kind == Orig);
    254    if (ent->shadowV == IRTemp_INVALID) {
    255       IRTemp tmpV
    256         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    257       /* newTemp may cause mce->tmpMap to resize, hence previous results
    258          from VG_(indexXA) are invalid. */
    259       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    260       tl_assert(ent->kind == Orig);
    261       tl_assert(ent->shadowV == IRTemp_INVALID);
    262       ent->shadowV = tmpV;
    263    }
    264    return ent->shadowV;
    265 }
    266 
    267 /* Allocate a new shadow for the given original tmp.  This means any
    268    previous shadow is abandoned.  This is needed because it is
    269    necessary to give a new value to a shadow once it has been tested
    270    for undefinedness, but unfortunately IR's SSA property disallows
    271    this.  Instead we must abandon the old shadow, allocate a new one
    272    and use that instead.
    273 
    274    This is the same as findShadowTmpV, except we don't bother to see
    275    if a shadow temp already existed -- we simply allocate a new one
    276    regardless. */
    277 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
    278 {
    279    TempMapEnt* ent;
    280    /* VG_(indexXA) range-checks 'orig', hence no need to check
    281       here. */
    282    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    283    tl_assert(ent->kind == Orig);
    284    if (1) {
    285       IRTemp tmpV
    286         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    287       /* newTemp may cause mce->tmpMap to resize, hence previous results
    288          from VG_(indexXA) are invalid. */
    289       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    290       tl_assert(ent->kind == Orig);
    291       ent->shadowV = tmpV;
    292    }
    293 }
    294 
    295 
    296 /*------------------------------------------------------------*/
    297 /*--- IRAtoms -- a subset of IRExprs                       ---*/
    298 /*------------------------------------------------------------*/
    299 
    300 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
    301    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
    302    input, most of this code deals in atoms.  Usefully, a value atom
    303    always has a V-value which is also an atom: constants are shadowed
    304    by constants, and temps are shadowed by the corresponding shadow
    305    temporary. */
    306 
    307 typedef  IRExpr  IRAtom;
    308 
    309 /* (used for sanity checks only): is this an atom which looks
    310    like it's from original code? */
    311 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
    312 {
    313    if (a1->tag == Iex_Const)
    314       return True;
    315    if (a1->tag == Iex_RdTmp) {
    316       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    317       return ent->kind == Orig;
    318    }
    319    return False;
    320 }
    321 
    322 /* (used for sanity checks only): is this an atom which looks
    323    like it's from shadow code? */
    324 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
    325 {
    326    if (a1->tag == Iex_Const)
    327       return True;
    328    if (a1->tag == Iex_RdTmp) {
    329       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    330       return ent->kind == VSh || ent->kind == BSh;
    331    }
    332    return False;
    333 }
    334 
    335 /* (used for sanity checks only): check that both args are atoms and
    336    are identically-kinded. */
    337 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
    338 {
    339    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
    340       return True;
    341    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
    342       return True;
    343    return False;
    344 }
    345 
    346 
    347 /*------------------------------------------------------------*/
    348 /*--- Type management                                      ---*/
    349 /*------------------------------------------------------------*/
    350 
    351 /* Shadow state is always accessed using integer types.  This returns
    352    an integer type with the same size (as per sizeofIRType) as the
    353    given type.  The only valid shadow types are Bit, I8, I16, I32,
    354    I64, I128, V128. */
    355 
    356 static IRType shadowTypeV ( IRType ty )
    357 {
    358    switch (ty) {
    359       case Ity_I1:
    360       case Ity_I8:
    361       case Ity_I16:
    362       case Ity_I32:
    363       case Ity_I64:
    364       case Ity_I128: return ty;
    365       case Ity_F32:  return Ity_I32;
    366       case Ity_F64:  return Ity_I64;
    367       case Ity_F128: return Ity_I128;
    368       case Ity_V128: return Ity_V128;
    369       default: ppIRType(ty);
    370                VG_(tool_panic)("memcheck:shadowTypeV");
    371    }
    372 }
    373 
    374 /* Produce a 'defined' value of the given shadow type.  Should only be
    375    supplied shadow types (Bit/I8/I16/I32/UI64). */
    376 static IRExpr* definedOfType ( IRType ty ) {
    377    switch (ty) {
    378       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
    379       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
    380       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
    381       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
    382       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
    383       case Ity_I128: return i128_const_zero();
    384       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
    385       default:       VG_(tool_panic)("memcheck:definedOfType");
    386    }
    387 }
    388 
    389 
    390 /*------------------------------------------------------------*/
    391 /*--- Constructing IR fragments                            ---*/
    392 /*------------------------------------------------------------*/
    393 
    394 /* add stmt to a bb */
    395 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
    396    if (mce->trace) {
    397       VG_(printf)("  %c: ", cat);
    398       ppIRStmt(st);
    399       VG_(printf)("\n");
    400    }
    401    addStmtToIRSB(mce->sb, st);
    402 }
    403 
    404 /* assign value to tmp */
    405 static inline
    406 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
    407    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
    408 }
    409 
    410 /* build various kinds of expressions */
    411 #define triop(_op, _arg1, _arg2, _arg3) \
    412                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
    413 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    414 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
    415 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
    416 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
    417 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    418 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    419 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
    420 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    421 
    422 /* Bind the given expression to a new temporary, and return the
    423    temporary.  This effectively converts an arbitrary expression into
    424    an atom.
    425 
    426    'ty' is the type of 'e' and hence the type that the new temporary
    427    needs to be.  But passing it in is redundant, since we can deduce
    428    the type merely by inspecting 'e'.  So at least use that fact to
    429    assert that the two types agree. */
    430 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
    431 {
    432    TempKind k;
    433    IRTemp   t;
    434    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
    435    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
    436    switch (cat) {
    437       case 'V': k = VSh;  break;
    438       case 'B': k = BSh;  break;
    439       case 'C': k = Orig; break;
    440                 /* happens when we are making up new "orig"
    441                    expressions, for IRCAS handling */
    442       default: tl_assert(0);
    443    }
    444    t = newTemp(mce, ty, k);
    445    assign(cat, mce, t, e);
    446    return mkexpr(t);
    447 }
    448 
    449 
    450 /*------------------------------------------------------------*/
    451 /*--- Helper functions for 128-bit ops                     ---*/
    452 /*------------------------------------------------------------*/
    453 static IRExpr *i128_const_zero(void)
    454 {
    455   return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
    456                IRExpr_Const(IRConst_U64(0)));
    457 }
    458 
    459 /* There are no 128-bit loads and/or stores. So we do not need to worry
    460    about that in expr2vbits_Load */
    461 
    462 /*------------------------------------------------------------*/
    463 /*--- Constructing definedness primitive ops               ---*/
    464 /*------------------------------------------------------------*/
    465 
    466 /* --------- Defined-if-either-defined --------- */
    467 
    468 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    469    tl_assert(isShadowAtom(mce,a1));
    470    tl_assert(isShadowAtom(mce,a2));
    471    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
    472 }
    473 
    474 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    475    tl_assert(isShadowAtom(mce,a1));
    476    tl_assert(isShadowAtom(mce,a2));
    477    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
    478 }
    479 
    480 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    481    tl_assert(isShadowAtom(mce,a1));
    482    tl_assert(isShadowAtom(mce,a2));
    483    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
    484 }
    485 
    486 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    487    tl_assert(isShadowAtom(mce,a1));
    488    tl_assert(isShadowAtom(mce,a2));
    489    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
    490 }
    491 
    492 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    493    tl_assert(isShadowAtom(mce,a1));
    494    tl_assert(isShadowAtom(mce,a2));
    495    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
    496 }
    497 
    498 /* --------- Undefined-if-either-undefined --------- */
    499 
    500 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    501    tl_assert(isShadowAtom(mce,a1));
    502    tl_assert(isShadowAtom(mce,a2));
    503    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
    504 }
    505 
    506 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    507    tl_assert(isShadowAtom(mce,a1));
    508    tl_assert(isShadowAtom(mce,a2));
    509    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
    510 }
    511 
    512 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    513    tl_assert(isShadowAtom(mce,a1));
    514    tl_assert(isShadowAtom(mce,a2));
    515    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
    516 }
    517 
    518 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    519    tl_assert(isShadowAtom(mce,a1));
    520    tl_assert(isShadowAtom(mce,a2));
    521    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
    522 }
    523 
    524 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    525    IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
    526    tl_assert(isShadowAtom(mce,a1));
    527    tl_assert(isShadowAtom(mce,a2));
    528    tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
    529    tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
    530    tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
    531    tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
    532    tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
    533    tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
    534 
    535    return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
    536 }
    537 
    538 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    539    tl_assert(isShadowAtom(mce,a1));
    540    tl_assert(isShadowAtom(mce,a2));
    541    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
    542 }
    543 
    544 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
    545    switch (vty) {
    546       case Ity_I8:   return mkUifU8(mce, a1, a2);
    547       case Ity_I16:  return mkUifU16(mce, a1, a2);
    548       case Ity_I32:  return mkUifU32(mce, a1, a2);
    549       case Ity_I64:  return mkUifU64(mce, a1, a2);
    550       case Ity_I128: return mkUifU128(mce, a1, a2);
    551       case Ity_V128: return mkUifUV128(mce, a1, a2);
    552       default:
    553          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
    554          VG_(tool_panic)("memcheck:mkUifU");
    555    }
    556 }
    557 
    558 /* --------- The Left-family of operations. --------- */
    559 
    560 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
    561    tl_assert(isShadowAtom(mce,a1));
    562    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
    563 }
    564 
    565 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
    566    tl_assert(isShadowAtom(mce,a1));
    567    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
    568 }
    569 
    570 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
    571    tl_assert(isShadowAtom(mce,a1));
    572    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
    573 }
    574 
    575 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
    576    tl_assert(isShadowAtom(mce,a1));
    577    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
    578 }
    579 
    580 /* --------- 'Improvement' functions for AND/OR. --------- */
    581 
    582 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
    583    defined (0); all other -> undefined (1).
    584 */
    585 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    586 {
    587    tl_assert(isOriginalAtom(mce, data));
    588    tl_assert(isShadowAtom(mce, vbits));
    589    tl_assert(sameKindedAtoms(data, vbits));
    590    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
    591 }
    592 
    593 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    594 {
    595    tl_assert(isOriginalAtom(mce, data));
    596    tl_assert(isShadowAtom(mce, vbits));
    597    tl_assert(sameKindedAtoms(data, vbits));
    598    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
    599 }
    600 
    601 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    602 {
    603    tl_assert(isOriginalAtom(mce, data));
    604    tl_assert(isShadowAtom(mce, vbits));
    605    tl_assert(sameKindedAtoms(data, vbits));
    606    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
    607 }
    608 
    609 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    610 {
    611    tl_assert(isOriginalAtom(mce, data));
    612    tl_assert(isShadowAtom(mce, vbits));
    613    tl_assert(sameKindedAtoms(data, vbits));
    614    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
    615 }
    616 
    617 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    618 {
    619    tl_assert(isOriginalAtom(mce, data));
    620    tl_assert(isShadowAtom(mce, vbits));
    621    tl_assert(sameKindedAtoms(data, vbits));
    622    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
    623 }
    624 
    625 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
    626    defined (0); all other -> undefined (1).
    627 */
    628 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    629 {
    630    tl_assert(isOriginalAtom(mce, data));
    631    tl_assert(isShadowAtom(mce, vbits));
    632    tl_assert(sameKindedAtoms(data, vbits));
    633    return assignNew(
    634              'V', mce, Ity_I8,
    635              binop(Iop_Or8,
    636                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
    637                    vbits) );
    638 }
    639 
    640 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    641 {
    642    tl_assert(isOriginalAtom(mce, data));
    643    tl_assert(isShadowAtom(mce, vbits));
    644    tl_assert(sameKindedAtoms(data, vbits));
    645    return assignNew(
    646              'V', mce, Ity_I16,
    647              binop(Iop_Or16,
    648                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
    649                    vbits) );
    650 }
    651 
    652 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    653 {
    654    tl_assert(isOriginalAtom(mce, data));
    655    tl_assert(isShadowAtom(mce, vbits));
    656    tl_assert(sameKindedAtoms(data, vbits));
    657    return assignNew(
    658              'V', mce, Ity_I32,
    659              binop(Iop_Or32,
    660                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
    661                    vbits) );
    662 }
    663 
    664 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    665 {
    666    tl_assert(isOriginalAtom(mce, data));
    667    tl_assert(isShadowAtom(mce, vbits));
    668    tl_assert(sameKindedAtoms(data, vbits));
    669    return assignNew(
    670              'V', mce, Ity_I64,
    671              binop(Iop_Or64,
    672                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
    673                    vbits) );
    674 }
    675 
    676 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    677 {
    678    tl_assert(isOriginalAtom(mce, data));
    679    tl_assert(isShadowAtom(mce, vbits));
    680    tl_assert(sameKindedAtoms(data, vbits));
    681    return assignNew(
    682              'V', mce, Ity_V128,
    683              binop(Iop_OrV128,
    684                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
    685                    vbits) );
    686 }
    687 
    688 /* --------- Pessimising casts. --------- */
    689 
    690 /* The function returns an expression of type DST_TY. If any of the VBITS
    691    is undefined (value == 1) the resulting expression has all bits set to
    692    1. Otherwise, all bits are 0. */
    693 
    694 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
    695 {
    696    IRType  src_ty;
    697    IRAtom* tmp1;
    698    /* Note, dst_ty is a shadow type, not an original type. */
    699    /* First of all, collapse vbits down to a single bit. */
    700    tl_assert(isShadowAtom(mce,vbits));
    701    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
    702 
    703    /* Fast-track some common cases */
    704    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
    705       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    706 
    707    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
    708       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
    709 
    710    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
    711       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    712       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
    713    }
    714 
    715    /* Else do it the slow way .. */
    716    tmp1   = NULL;
    717    switch (src_ty) {
    718       case Ity_I1:
    719          tmp1 = vbits;
    720          break;
    721       case Ity_I8:
    722          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
    723          break;
    724       case Ity_I16:
    725          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
    726          break;
    727       case Ity_I32:
    728          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
    729          break;
    730       case Ity_I64:
    731          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
    732          break;
    733       case Ity_I128: {
    734          /* Gah.  Chop it in half, OR the halves together, and compare
    735             that with zero. */
    736          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
    737          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
    738          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
    739          tmp1         = assignNew('V', mce, Ity_I1,
    740                                        unop(Iop_CmpNEZ64, tmp4));
    741          break;
    742       }
    743       default:
    744          ppIRType(src_ty);
    745          VG_(tool_panic)("mkPCastTo(1)");
    746    }
    747    tl_assert(tmp1);
    748    /* Now widen up to the dst type. */
    749    switch (dst_ty) {
    750       case Ity_I1:
    751          return tmp1;
    752       case Ity_I8:
    753          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
    754       case Ity_I16:
    755          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
    756       case Ity_I32:
    757          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
    758       case Ity_I64:
    759          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
    760       case Ity_V128:
    761          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    762          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
    763          return tmp1;
    764       case Ity_I128:
    765          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    766          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
    767          return tmp1;
    768       default:
    769          ppIRType(dst_ty);
    770          VG_(tool_panic)("mkPCastTo(2)");
    771    }
    772 }
    773 
    774 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
    775 /*
    776    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
    777    PCasting to Ity_U1.  However, sometimes it is necessary to be more
    778    accurate.  The insight is that the result is defined if two
    779    corresponding bits can be found, one from each argument, so that
    780    both bits are defined but are different -- that makes EQ say "No"
    781    and NE say "Yes".  Hence, we compute an improvement term and DifD
    782    it onto the "normal" (UifU) result.
    783 
    784    The result is:
    785 
    786    PCastTo<1> (
    787       -- naive version
    788       PCastTo<sz>( UifU<sz>(vxx, vyy) )
    789 
    790       `DifD<sz>`
    791 
    792       -- improvement term
    793       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
    794    )
    795 
    796    where
    797      vec contains 0 (defined) bits where the corresponding arg bits
    798      are defined but different, and 1 bits otherwise.
    799 
    800      vec = Or<sz>( vxx,   // 0 iff bit defined
    801                    vyy,   // 0 iff bit defined
    802                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
    803                  )
    804 
    805      If any bit of vec is 0, the result is defined and so the
    806      improvement term should produce 0...0, else it should produce
    807      1...1.
    808 
    809      Hence require for the improvement term:
    810 
    811         if vec == 1...1 then 1...1 else 0...0
    812      ->
    813         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
    814 
    815    This was extensively re-analysed and checked on 6 July 05.
    816 */
    817 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
    818                                     IRType  ty,
    819                                     IRAtom* vxx, IRAtom* vyy,
    820                                     IRAtom* xx,  IRAtom* yy )
    821 {
    822    IRAtom *naive, *vec, *improvement_term;
    823    IRAtom *improved, *final_cast, *top;
    824    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
    825 
    826    tl_assert(isShadowAtom(mce,vxx));
    827    tl_assert(isShadowAtom(mce,vyy));
    828    tl_assert(isOriginalAtom(mce,xx));
    829    tl_assert(isOriginalAtom(mce,yy));
    830    tl_assert(sameKindedAtoms(vxx,xx));
    831    tl_assert(sameKindedAtoms(vyy,yy));
    832 
    833    switch (ty) {
    834       case Ity_I32:
    835          opOR   = Iop_Or32;
    836          opDIFD = Iop_And32;
    837          opUIFU = Iop_Or32;
    838          opNOT  = Iop_Not32;
    839          opXOR  = Iop_Xor32;
    840          opCMP  = Iop_CmpEQ32;
    841          top    = mkU32(0xFFFFFFFF);
    842          break;
    843       case Ity_I64:
    844          opOR   = Iop_Or64;
    845          opDIFD = Iop_And64;
    846          opUIFU = Iop_Or64;
    847          opNOT  = Iop_Not64;
    848          opXOR  = Iop_Xor64;
    849          opCMP  = Iop_CmpEQ64;
    850          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
    851          break;
    852       default:
    853          VG_(tool_panic)("expensiveCmpEQorNE");
    854    }
    855 
    856    naive
    857       = mkPCastTo(mce,ty,
    858                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
    859 
    860    vec
    861       = assignNew(
    862            'V', mce,ty,
    863            binop( opOR,
    864                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
    865                   assignNew(
    866                      'V', mce,ty,
    867                      unop( opNOT,
    868                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
    869 
    870    improvement_term
    871       = mkPCastTo( mce,ty,
    872                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
    873 
    874    improved
    875       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
    876 
    877    final_cast
    878       = mkPCastTo( mce, Ity_I1, improved );
    879 
    880    return final_cast;
    881 }
    882 
    883 
    884 /* --------- Semi-accurate interpretation of CmpORD. --------- */
    885 
    886 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
    887 
    888       CmpORD32S(x,y) = 1<<3   if  x <s y
    889                      = 1<<2   if  x >s y
    890                      = 1<<1   if  x == y
    891 
    892    and similarly the unsigned variant.  The default interpretation is:
    893 
    894       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
    895                                   & (7<<1)
    896 
    897    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
    898    are zero and therefore defined (viz, zero).
    899 
    900    Also deal with a special case better:
    901 
    902       CmpORD32S(x,0)
    903 
    904    Here, bit 3 (LT) of the result is a copy of the top bit of x and
    905    will be defined even if the rest of x isn't.  In which case we do:
    906 
    907       CmpORD32S#(x,x#,0,{impliedly 0}#)
    908          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
    909            | (x# >>u 31) << 3      -- LT# = x#[31]
    910 
    911    Analogous handling for CmpORD64{S,U}.
    912 */
    913 static Bool isZeroU32 ( IRAtom* e )
    914 {
    915    return
    916       toBool( e->tag == Iex_Const
    917               && e->Iex.Const.con->tag == Ico_U32
    918               && e->Iex.Const.con->Ico.U32 == 0 );
    919 }
    920 
    921 static Bool isZeroU64 ( IRAtom* e )
    922 {
    923    return
    924       toBool( e->tag == Iex_Const
    925               && e->Iex.Const.con->tag == Ico_U64
    926               && e->Iex.Const.con->Ico.U64 == 0 );
    927 }
    928 
    929 static IRAtom* doCmpORD ( MCEnv*  mce,
    930                           IROp    cmp_op,
    931                           IRAtom* xxhash, IRAtom* yyhash,
    932                           IRAtom* xx,     IRAtom* yy )
    933 {
    934    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
    935    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
    936    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
    937    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
    938    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
    939    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
    940    IRType ty     = m64 ? Ity_I64   : Ity_I32;
    941    Int    width  = m64 ? 64        : 32;
    942 
    943    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
    944 
    945    IRAtom* threeLeft1 = NULL;
    946    IRAtom* sevenLeft1 = NULL;
    947 
    948    tl_assert(isShadowAtom(mce,xxhash));
    949    tl_assert(isShadowAtom(mce,yyhash));
    950    tl_assert(isOriginalAtom(mce,xx));
    951    tl_assert(isOriginalAtom(mce,yy));
    952    tl_assert(sameKindedAtoms(xxhash,xx));
    953    tl_assert(sameKindedAtoms(yyhash,yy));
    954    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
    955              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
    956 
    957    if (0) {
    958       ppIROp(cmp_op); VG_(printf)(" ");
    959       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
    960    }
    961 
    962    if (syned && isZero(yy)) {
    963       /* fancy interpretation */
    964       /* if yy is zero, then it must be fully defined (zero#). */
    965       tl_assert(isZero(yyhash));
    966       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
    967       return
    968          binop(
    969             opOR,
    970             assignNew(
    971                'V', mce,ty,
    972                binop(
    973                   opAND,
    974                   mkPCastTo(mce,ty, xxhash),
    975                   threeLeft1
    976                )),
    977             assignNew(
    978                'V', mce,ty,
    979                binop(
    980                   opSHL,
    981                   assignNew(
    982                      'V', mce,ty,
    983                      binop(opSHR, xxhash, mkU8(width-1))),
    984                   mkU8(3)
    985                ))
    986 	 );
    987    } else {
    988       /* standard interpretation */
    989       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
    990       return
    991          binop(
    992             opAND,
    993             mkPCastTo( mce,ty,
    994                        mkUifU(mce,ty, xxhash,yyhash)),
    995             sevenLeft1
    996          );
    997    }
    998 }
    999 
   1000 
   1001 /*------------------------------------------------------------*/
   1002 /*--- Emit a test and complaint if something is undefined. ---*/
   1003 /*------------------------------------------------------------*/
   1004 
   1005 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
   1006 
   1007 
   1008 /* Set the annotations on a dirty helper to indicate that the stack
   1009    pointer and instruction pointers might be read.  This is the
   1010    behaviour of all 'emit-a-complaint' style functions we might
   1011    call. */
   1012 
   1013 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
   1014    di->nFxState = 2;
   1015    di->fxState[0].fx     = Ifx_Read;
   1016    di->fxState[0].offset = mce->layout->offset_SP;
   1017    di->fxState[0].size   = mce->layout->sizeof_SP;
   1018    di->fxState[1].fx     = Ifx_Read;
   1019    di->fxState[1].offset = mce->layout->offset_IP;
   1020    di->fxState[1].size   = mce->layout->sizeof_IP;
   1021 }
   1022 
   1023 
   1024 /* Check the supplied **original** atom for undefinedness, and emit a
   1025    complaint if so.  Once that happens, mark it as defined.  This is
   1026    possible because the atom is either a tmp or literal.  If it's a
   1027    tmp, it will be shadowed by a tmp, and so we can set the shadow to
   1028    be defined.  In fact as mentioned above, we will have to allocate a
   1029    new tmp to carry the new 'defined' shadow value, and update the
   1030    original->tmp mapping accordingly; we cannot simply assign a new
   1031    value to an existing shadow tmp as this breaks SSAness -- resulting
   1032    in the post-instrumentation sanity checker spluttering in disapproval.
   1033 */
   1034 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
   1035 {
   1036    IRAtom*  vatom;
   1037    IRType   ty;
   1038    Int      sz;
   1039    IRDirty* di;
   1040    IRAtom*  cond;
   1041    IRAtom*  origin;
   1042    void*    fn;
   1043    HChar*   nm;
   1044    IRExpr** args;
   1045    Int      nargs;
   1046 
   1047    // Don't do V bit tests if we're not reporting undefined value errors.
   1048    if (MC_(clo_mc_level) == 1)
   1049       return;
   1050 
   1051    /* Since the original expression is atomic, there's no duplicated
   1052       work generated by making multiple V-expressions for it.  So we
   1053       don't really care about the possibility that someone else may
   1054       also create a V-interpretion for it. */
   1055    tl_assert(isOriginalAtom(mce, atom));
   1056    vatom = expr2vbits( mce, atom );
   1057    tl_assert(isShadowAtom(mce, vatom));
   1058    tl_assert(sameKindedAtoms(atom, vatom));
   1059 
   1060    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1061 
   1062    /* sz is only used for constructing the error message */
   1063    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
   1064 
   1065    cond = mkPCastTo( mce, Ity_I1, vatom );
   1066    /* cond will be 0 if all defined, and 1 if any not defined. */
   1067 
   1068    /* Get the origin info for the value we are about to check.  At
   1069       least, if we are doing origin tracking.  If not, use a dummy
   1070       zero origin. */
   1071    if (MC_(clo_mc_level) == 3) {
   1072       origin = schemeE( mce, atom );
   1073       if (mce->hWordTy == Ity_I64) {
   1074          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
   1075       }
   1076    } else {
   1077       origin = NULL;
   1078    }
   1079 
   1080    fn    = NULL;
   1081    nm    = NULL;
   1082    args  = NULL;
   1083    nargs = -1;
   1084 
   1085    switch (sz) {
   1086       case 0:
   1087          if (origin) {
   1088             fn    = &MC_(helperc_value_check0_fail_w_o);
   1089             nm    = "MC_(helperc_value_check0_fail_w_o)";
   1090             args  = mkIRExprVec_1(origin);
   1091             nargs = 1;
   1092          } else {
   1093             fn    = &MC_(helperc_value_check0_fail_no_o);
   1094             nm    = "MC_(helperc_value_check0_fail_no_o)";
   1095             args  = mkIRExprVec_0();
   1096             nargs = 0;
   1097          }
   1098          break;
   1099       case 1:
   1100          if (origin) {
   1101             fn    = &MC_(helperc_value_check1_fail_w_o);
   1102             nm    = "MC_(helperc_value_check1_fail_w_o)";
   1103             args  = mkIRExprVec_1(origin);
   1104             nargs = 1;
   1105          } else {
   1106             fn    = &MC_(helperc_value_check1_fail_no_o);
   1107             nm    = "MC_(helperc_value_check1_fail_no_o)";
   1108             args  = mkIRExprVec_0();
   1109             nargs = 0;
   1110          }
   1111          break;
   1112       case 4:
   1113          if (origin) {
   1114             fn    = &MC_(helperc_value_check4_fail_w_o);
   1115             nm    = "MC_(helperc_value_check4_fail_w_o)";
   1116             args  = mkIRExprVec_1(origin);
   1117             nargs = 1;
   1118          } else {
   1119             fn    = &MC_(helperc_value_check4_fail_no_o);
   1120             nm    = "MC_(helperc_value_check4_fail_no_o)";
   1121             args  = mkIRExprVec_0();
   1122             nargs = 0;
   1123          }
   1124          break;
   1125       case 8:
   1126          if (origin) {
   1127             fn    = &MC_(helperc_value_check8_fail_w_o);
   1128             nm    = "MC_(helperc_value_check8_fail_w_o)";
   1129             args  = mkIRExprVec_1(origin);
   1130             nargs = 1;
   1131          } else {
   1132             fn    = &MC_(helperc_value_check8_fail_no_o);
   1133             nm    = "MC_(helperc_value_check8_fail_no_o)";
   1134             args  = mkIRExprVec_0();
   1135             nargs = 0;
   1136          }
   1137          break;
   1138       case 2:
   1139       case 16:
   1140          if (origin) {
   1141             fn    = &MC_(helperc_value_checkN_fail_w_o);
   1142             nm    = "MC_(helperc_value_checkN_fail_w_o)";
   1143             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
   1144             nargs = 2;
   1145          } else {
   1146             fn    = &MC_(helperc_value_checkN_fail_no_o);
   1147             nm    = "MC_(helperc_value_checkN_fail_no_o)";
   1148             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
   1149             nargs = 1;
   1150          }
   1151          break;
   1152       default:
   1153          VG_(tool_panic)("unexpected szB");
   1154    }
   1155 
   1156    tl_assert(fn);
   1157    tl_assert(nm);
   1158    tl_assert(args);
   1159    tl_assert(nargs >= 0 && nargs <= 2);
   1160    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
   1161               || (MC_(clo_mc_level) == 2 && origin == NULL) );
   1162 
   1163    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
   1164                            VG_(fnptr_to_fnentry)( fn ), args );
   1165    di->guard = cond;
   1166    setHelperAnns( mce, di );
   1167    stmt( 'V', mce, IRStmt_Dirty(di));
   1168 
   1169    /* Set the shadow tmp to be defined.  First, update the
   1170       orig->shadow tmp mapping to reflect the fact that this shadow is
   1171       getting a new value. */
   1172    tl_assert(isIRAtom(vatom));
   1173    /* sameKindedAtoms ... */
   1174    if (vatom->tag == Iex_RdTmp) {
   1175       tl_assert(atom->tag == Iex_RdTmp);
   1176       newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
   1177       assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
   1178                        definedOfType(ty));
   1179    }
   1180 }
   1181 
   1182 
   1183 /*------------------------------------------------------------*/
   1184 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
   1185 /*------------------------------------------------------------*/
   1186 
   1187 /* Examine the always-defined sections declared in layout to see if
   1188    the (offset,size) section is within one.  Note, is is an error to
   1189    partially fall into such a region: (offset,size) should either be
   1190    completely in such a region or completely not-in such a region.
   1191 */
   1192 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
   1193 {
   1194    Int minoffD, maxoffD, i;
   1195    Int minoff = offset;
   1196    Int maxoff = minoff + size - 1;
   1197    tl_assert((minoff & ~0xFFFF) == 0);
   1198    tl_assert((maxoff & ~0xFFFF) == 0);
   1199 
   1200    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
   1201       minoffD = mce->layout->alwaysDefd[i].offset;
   1202       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
   1203       tl_assert((minoffD & ~0xFFFF) == 0);
   1204       tl_assert((maxoffD & ~0xFFFF) == 0);
   1205 
   1206       if (maxoff < minoffD || maxoffD < minoff)
   1207          continue; /* no overlap */
   1208       if (minoff >= minoffD && maxoff <= maxoffD)
   1209          return True; /* completely contained in an always-defd section */
   1210 
   1211       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
   1212    }
   1213    return False; /* could not find any containing section */
   1214 }
   1215 
   1216 
   1217 /* Generate into bb suitable actions to shadow this Put.  If the state
   1218    slice is marked 'always defined', do nothing.  Otherwise, write the
   1219    supplied V bits to the shadow state.  We can pass in either an
   1220    original atom or a V-atom, but not both.  In the former case the
   1221    relevant V-bits are then generated from the original.
   1222 */
   1223 static
   1224 void do_shadow_PUT ( MCEnv* mce,  Int offset,
   1225                      IRAtom* atom, IRAtom* vatom )
   1226 {
   1227    IRType ty;
   1228 
   1229    // Don't do shadow PUTs if we're not doing undefined value checking.
   1230    // Their absence lets Vex's optimiser remove all the shadow computation
   1231    // that they depend on, which includes GETs of the shadow registers.
   1232    if (MC_(clo_mc_level) == 1)
   1233       return;
   1234 
   1235    if (atom) {
   1236       tl_assert(!vatom);
   1237       tl_assert(isOriginalAtom(mce, atom));
   1238       vatom = expr2vbits( mce, atom );
   1239    } else {
   1240       tl_assert(vatom);
   1241       tl_assert(isShadowAtom(mce, vatom));
   1242    }
   1243 
   1244    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1245    tl_assert(ty != Ity_I1);
   1246    tl_assert(ty != Ity_I128);
   1247    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1248       /* later: no ... */
   1249       /* emit code to emit a complaint if any of the vbits are 1. */
   1250       /* complainIfUndefined(mce, atom); */
   1251    } else {
   1252       /* Do a plain shadow Put. */
   1253       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
   1254    }
   1255 }
   1256 
   1257 
   1258 /* Return an expression which contains the V bits corresponding to the
   1259    given GETI (passed in in pieces).
   1260 */
   1261 static
   1262 void do_shadow_PUTI ( MCEnv* mce,
   1263                       IRRegArray* descr,
   1264                       IRAtom* ix, Int bias, IRAtom* atom )
   1265 {
   1266    IRAtom* vatom;
   1267    IRType  ty, tyS;
   1268    Int     arrSize;;
   1269 
   1270    // Don't do shadow PUTIs if we're not doing undefined value checking.
   1271    // Their absence lets Vex's optimiser remove all the shadow computation
   1272    // that they depend on, which includes GETIs of the shadow registers.
   1273    if (MC_(clo_mc_level) == 1)
   1274       return;
   1275 
   1276    tl_assert(isOriginalAtom(mce,atom));
   1277    vatom = expr2vbits( mce, atom );
   1278    tl_assert(sameKindedAtoms(atom, vatom));
   1279    ty   = descr->elemTy;
   1280    tyS  = shadowTypeV(ty);
   1281    arrSize = descr->nElems * sizeofIRType(ty);
   1282    tl_assert(ty != Ity_I1);
   1283    tl_assert(isOriginalAtom(mce,ix));
   1284    complainIfUndefined(mce,ix);
   1285    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1286       /* later: no ... */
   1287       /* emit code to emit a complaint if any of the vbits are 1. */
   1288       /* complainIfUndefined(mce, atom); */
   1289    } else {
   1290       /* Do a cloned version of the Put that refers to the shadow
   1291          area. */
   1292       IRRegArray* new_descr
   1293          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1294                          tyS, descr->nElems);
   1295       stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
   1296    }
   1297 }
   1298 
   1299 
   1300 /* Return an expression which contains the V bits corresponding to the
   1301    given GET (passed in in pieces).
   1302 */
   1303 static
   1304 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
   1305 {
   1306    IRType tyS = shadowTypeV(ty);
   1307    tl_assert(ty != Ity_I1);
   1308    tl_assert(ty != Ity_I128);
   1309    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1310       /* Always defined, return all zeroes of the relevant type */
   1311       return definedOfType(tyS);
   1312    } else {
   1313       /* return a cloned version of the Get that refers to the shadow
   1314          area. */
   1315       /* FIXME: this isn't an atom! */
   1316       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
   1317    }
   1318 }
   1319 
   1320 
   1321 /* Return an expression which contains the V bits corresponding to the
   1322    given GETI (passed in in pieces).
   1323 */
   1324 static
   1325 IRExpr* shadow_GETI ( MCEnv* mce,
   1326                       IRRegArray* descr, IRAtom* ix, Int bias )
   1327 {
   1328    IRType ty   = descr->elemTy;
   1329    IRType tyS  = shadowTypeV(ty);
   1330    Int arrSize = descr->nElems * sizeofIRType(ty);
   1331    tl_assert(ty != Ity_I1);
   1332    tl_assert(isOriginalAtom(mce,ix));
   1333    complainIfUndefined(mce,ix);
   1334    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1335       /* Always defined, return all zeroes of the relevant type */
   1336       return definedOfType(tyS);
   1337    } else {
   1338       /* return a cloned version of the Get that refers to the shadow
   1339          area. */
   1340       IRRegArray* new_descr
   1341          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1342                          tyS, descr->nElems);
   1343       return IRExpr_GetI( new_descr, ix, bias );
   1344    }
   1345 }
   1346 
   1347 
   1348 /*------------------------------------------------------------*/
   1349 /*--- Generating approximations for unknown operations,    ---*/
   1350 /*--- using lazy-propagate semantics                       ---*/
   1351 /*------------------------------------------------------------*/
   1352 
   1353 /* Lazy propagation of undefinedness from two values, resulting in the
   1354    specified shadow type.
   1355 */
   1356 static
   1357 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
   1358 {
   1359    IRAtom* at;
   1360    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1361    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1362    tl_assert(isShadowAtom(mce,va1));
   1363    tl_assert(isShadowAtom(mce,va2));
   1364 
   1365    /* The general case is inefficient because PCast is an expensive
   1366       operation.  Here are some special cases which use PCast only
   1367       once rather than twice. */
   1368 
   1369    /* I64 x I64 -> I64 */
   1370    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
   1371       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
   1372       at = mkUifU(mce, Ity_I64, va1, va2);
   1373       at = mkPCastTo(mce, Ity_I64, at);
   1374       return at;
   1375    }
   1376 
   1377    /* I64 x I64 -> I32 */
   1378    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
   1379       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
   1380       at = mkUifU(mce, Ity_I64, va1, va2);
   1381       at = mkPCastTo(mce, Ity_I32, at);
   1382       return at;
   1383    }
   1384 
   1385    if (0) {
   1386       VG_(printf)("mkLazy2 ");
   1387       ppIRType(t1);
   1388       VG_(printf)("_");
   1389       ppIRType(t2);
   1390       VG_(printf)("_");
   1391       ppIRType(finalVty);
   1392       VG_(printf)("\n");
   1393    }
   1394 
   1395    /* General case: force everything via 32-bit intermediaries. */
   1396    at = mkPCastTo(mce, Ity_I32, va1);
   1397    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1398    at = mkPCastTo(mce, finalVty, at);
   1399    return at;
   1400 }
   1401 
   1402 
   1403 /* 3-arg version of the above. */
   1404 static
   1405 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
   1406                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
   1407 {
   1408    IRAtom* at;
   1409    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1410    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1411    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1412    tl_assert(isShadowAtom(mce,va1));
   1413    tl_assert(isShadowAtom(mce,va2));
   1414    tl_assert(isShadowAtom(mce,va3));
   1415 
   1416    /* The general case is inefficient because PCast is an expensive
   1417       operation.  Here are some special cases which use PCast only
   1418       twice rather than three times. */
   1419 
   1420    /* I32 x I64 x I64 -> I64 */
   1421    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
   1422    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1423        && finalVty == Ity_I64) {
   1424       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
   1425       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1426          mode indication which is fully defined, this should get
   1427          folded out later. */
   1428       at = mkPCastTo(mce, Ity_I64, va1);
   1429       /* Now fold in 2nd and 3rd args. */
   1430       at = mkUifU(mce, Ity_I64, at, va2);
   1431       at = mkUifU(mce, Ity_I64, at, va3);
   1432       /* and PCast once again. */
   1433       at = mkPCastTo(mce, Ity_I64, at);
   1434       return at;
   1435    }
   1436 
   1437    /* I32 x I64 x I64 -> I32 */
   1438    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1439        && finalVty == Ity_I32) {
   1440       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
   1441       at = mkPCastTo(mce, Ity_I64, va1);
   1442       at = mkUifU(mce, Ity_I64, at, va2);
   1443       at = mkUifU(mce, Ity_I64, at, va3);
   1444       at = mkPCastTo(mce, Ity_I32, at);
   1445       return at;
   1446    }
   1447 
   1448    /* I32 x I32 x I32 -> I32 */
   1449    /* 32-bit FP idiom, as (eg) happens on ARM */
   1450    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
   1451        && finalVty == Ity_I32) {
   1452       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
   1453       at = va1;
   1454       at = mkUifU(mce, Ity_I32, at, va2);
   1455       at = mkUifU(mce, Ity_I32, at, va3);
   1456       at = mkPCastTo(mce, Ity_I32, at);
   1457       return at;
   1458    }
   1459 
   1460    /* I32 x I128 x I128 -> I128 */
   1461    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
   1462    if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
   1463        && finalVty == Ity_I128) {
   1464       if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
   1465       /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
   1466          mode indication which is fully defined, this should get
   1467          folded out later. */
   1468       at = mkPCastTo(mce, Ity_I128, va1);
   1469       /* Now fold in 2nd and 3rd args. */
   1470       at = mkUifU(mce, Ity_I128, at, va2);
   1471       at = mkUifU(mce, Ity_I128, at, va3);
   1472       /* and PCast once again. */
   1473       at = mkPCastTo(mce, Ity_I128, at);
   1474       return at;
   1475    }
   1476    if (1) {
   1477       VG_(printf)("mkLazy3: ");
   1478       ppIRType(t1);
   1479       VG_(printf)(" x ");
   1480       ppIRType(t2);
   1481       VG_(printf)(" x ");
   1482       ppIRType(t3);
   1483       VG_(printf)(" -> ");
   1484       ppIRType(finalVty);
   1485       VG_(printf)("\n");
   1486    }
   1487 
   1488    tl_assert(0);
   1489    /* General case: force everything via 32-bit intermediaries. */
   1490    /*
   1491    at = mkPCastTo(mce, Ity_I32, va1);
   1492    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1493    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
   1494    at = mkPCastTo(mce, finalVty, at);
   1495    return at;
   1496    */
   1497 }
   1498 
   1499 
   1500 /* 4-arg version of the above. */
   1501 static
   1502 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
   1503                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
   1504 {
   1505    IRAtom* at;
   1506    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1507    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1508    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1509    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
   1510    tl_assert(isShadowAtom(mce,va1));
   1511    tl_assert(isShadowAtom(mce,va2));
   1512    tl_assert(isShadowAtom(mce,va3));
   1513    tl_assert(isShadowAtom(mce,va4));
   1514 
   1515    /* The general case is inefficient because PCast is an expensive
   1516       operation.  Here are some special cases which use PCast only
   1517       twice rather than three times. */
   1518 
   1519    /* I32 x I64 x I64 x I64 -> I64 */
   1520    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
   1521    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
   1522        && finalVty == Ity_I64) {
   1523       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
   1524       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1525          mode indication which is fully defined, this should get
   1526          folded out later. */
   1527       at = mkPCastTo(mce, Ity_I64, va1);
   1528       /* Now fold in 2nd, 3rd, 4th args. */
   1529       at = mkUifU(mce, Ity_I64, at, va2);
   1530       at = mkUifU(mce, Ity_I64, at, va3);
   1531       at = mkUifU(mce, Ity_I64, at, va4);
   1532       /* and PCast once again. */
   1533       at = mkPCastTo(mce, Ity_I64, at);
   1534       return at;
   1535    }
   1536    /* I32 x I32 x I32 x I32 -> I32 */
   1537    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
   1538    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
   1539        && finalVty == Ity_I32) {
   1540       if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
   1541       at = va1;
   1542       /* Now fold in 2nd, 3rd, 4th args. */
   1543       at = mkUifU(mce, Ity_I32, at, va2);
   1544       at = mkUifU(mce, Ity_I32, at, va3);
   1545       at = mkUifU(mce, Ity_I32, at, va4);
   1546       at = mkPCastTo(mce, Ity_I32, at);
   1547       return at;
   1548    }
   1549 
   1550    if (1) {
   1551       VG_(printf)("mkLazy4: ");
   1552       ppIRType(t1);
   1553       VG_(printf)(" x ");
   1554       ppIRType(t2);
   1555       VG_(printf)(" x ");
   1556       ppIRType(t3);
   1557       VG_(printf)(" x ");
   1558       ppIRType(t4);
   1559       VG_(printf)(" -> ");
   1560       ppIRType(finalVty);
   1561       VG_(printf)("\n");
   1562    }
   1563 
   1564    tl_assert(0);
   1565 }
   1566 
   1567 
   1568 /* Do the lazy propagation game from a null-terminated vector of
   1569    atoms.  This is presumably the arguments to a helper call, so the
   1570    IRCallee info is also supplied in order that we can know which
   1571    arguments should be ignored (via the .mcx_mask field).
   1572 */
   1573 static
   1574 IRAtom* mkLazyN ( MCEnv* mce,
   1575                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
   1576 {
   1577    Int     i;
   1578    IRAtom* here;
   1579    IRAtom* curr;
   1580    IRType  mergeTy;
   1581    Bool    mergeTy64 = True;
   1582 
   1583    /* Decide on the type of the merge intermediary.  If all relevant
   1584       args are I64, then it's I64.  In all other circumstances, use
   1585       I32. */
   1586    for (i = 0; exprvec[i]; i++) {
   1587       tl_assert(i < 32);
   1588       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1589       if (cee->mcx_mask & (1<<i))
   1590          continue;
   1591       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
   1592          mergeTy64 = False;
   1593    }
   1594 
   1595    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
   1596    curr    = definedOfType(mergeTy);
   1597 
   1598    for (i = 0; exprvec[i]; i++) {
   1599       tl_assert(i < 32);
   1600       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1601       /* Only take notice of this arg if the callee's mc-exclusion
   1602          mask does not say it is to be excluded. */
   1603       if (cee->mcx_mask & (1<<i)) {
   1604          /* the arg is to be excluded from definedness checking.  Do
   1605             nothing. */
   1606          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
   1607       } else {
   1608          /* calculate the arg's definedness, and pessimistically merge
   1609             it in. */
   1610          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
   1611          curr = mergeTy64
   1612                    ? mkUifU64(mce, here, curr)
   1613                    : mkUifU32(mce, here, curr);
   1614       }
   1615    }
   1616    return mkPCastTo(mce, finalVtype, curr );
   1617 }
   1618 
   1619 
   1620 /*------------------------------------------------------------*/
   1621 /*--- Generating expensive sequences for exact carry-chain ---*/
   1622 /*--- propagation in add/sub and related operations.       ---*/
   1623 /*------------------------------------------------------------*/
   1624 
   1625 static
   1626 IRAtom* expensiveAddSub ( MCEnv*  mce,
   1627                           Bool    add,
   1628                           IRType  ty,
   1629                           IRAtom* qaa, IRAtom* qbb,
   1630                           IRAtom* aa,  IRAtom* bb )
   1631 {
   1632    IRAtom *a_min, *b_min, *a_max, *b_max;
   1633    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
   1634 
   1635    tl_assert(isShadowAtom(mce,qaa));
   1636    tl_assert(isShadowAtom(mce,qbb));
   1637    tl_assert(isOriginalAtom(mce,aa));
   1638    tl_assert(isOriginalAtom(mce,bb));
   1639    tl_assert(sameKindedAtoms(qaa,aa));
   1640    tl_assert(sameKindedAtoms(qbb,bb));
   1641 
   1642    switch (ty) {
   1643       case Ity_I32:
   1644          opAND = Iop_And32;
   1645          opOR  = Iop_Or32;
   1646          opXOR = Iop_Xor32;
   1647          opNOT = Iop_Not32;
   1648          opADD = Iop_Add32;
   1649          opSUB = Iop_Sub32;
   1650          break;
   1651       case Ity_I64:
   1652          opAND = Iop_And64;
   1653          opOR  = Iop_Or64;
   1654          opXOR = Iop_Xor64;
   1655          opNOT = Iop_Not64;
   1656          opADD = Iop_Add64;
   1657          opSUB = Iop_Sub64;
   1658          break;
   1659       default:
   1660          VG_(tool_panic)("expensiveAddSub");
   1661    }
   1662 
   1663    // a_min = aa & ~qaa
   1664    a_min = assignNew('V', mce,ty,
   1665                      binop(opAND, aa,
   1666                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
   1667 
   1668    // b_min = bb & ~qbb
   1669    b_min = assignNew('V', mce,ty,
   1670                      binop(opAND, bb,
   1671                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
   1672 
   1673    // a_max = aa | qaa
   1674    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
   1675 
   1676    // b_max = bb | qbb
   1677    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
   1678 
   1679    if (add) {
   1680       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
   1681       return
   1682       assignNew('V', mce,ty,
   1683          binop( opOR,
   1684                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1685                 assignNew('V', mce,ty,
   1686                    binop( opXOR,
   1687                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
   1688                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
   1689                    )
   1690                 )
   1691          )
   1692       );
   1693    } else {
   1694       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
   1695       return
   1696       assignNew('V', mce,ty,
   1697          binop( opOR,
   1698                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1699                 assignNew('V', mce,ty,
   1700                    binop( opXOR,
   1701                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
   1702                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
   1703                    )
   1704                 )
   1705          )
   1706       );
   1707    }
   1708 
   1709 }
   1710 
   1711 
   1712 /*------------------------------------------------------------*/
   1713 /*--- Scalar shifts.                                       ---*/
   1714 /*------------------------------------------------------------*/
   1715 
   1716 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
   1717    idea is to shift the definedness bits by the original shift amount.
   1718    This introduces 0s ("defined") in new positions for left shifts and
   1719    unsigned right shifts, and copies the top definedness bit for
   1720    signed right shifts.  So, conveniently, applying the original shift
   1721    operator to the definedness bits for the left arg is exactly the
   1722    right thing to do:
   1723 
   1724       (qaa << bb)
   1725 
   1726    However if the shift amount is undefined then the whole result
   1727    is undefined.  Hence need:
   1728 
   1729       (qaa << bb) `UifU` PCast(qbb)
   1730 
   1731    If the shift amount bb is a literal than qbb will say 'all defined'
   1732    and the UifU and PCast will get folded out by post-instrumentation
   1733    optimisation.
   1734 */
   1735 static IRAtom* scalarShift ( MCEnv*  mce,
   1736                              IRType  ty,
   1737                              IROp    original_op,
   1738                              IRAtom* qaa, IRAtom* qbb,
   1739                              IRAtom* aa,  IRAtom* bb )
   1740 {
   1741    tl_assert(isShadowAtom(mce,qaa));
   1742    tl_assert(isShadowAtom(mce,qbb));
   1743    tl_assert(isOriginalAtom(mce,aa));
   1744    tl_assert(isOriginalAtom(mce,bb));
   1745    tl_assert(sameKindedAtoms(qaa,aa));
   1746    tl_assert(sameKindedAtoms(qbb,bb));
   1747    return
   1748       assignNew(
   1749          'V', mce, ty,
   1750          mkUifU( mce, ty,
   1751                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
   1752                  mkPCastTo(mce, ty, qbb)
   1753          )
   1754    );
   1755 }
   1756 
   1757 
   1758 /*------------------------------------------------------------*/
   1759 /*--- Helpers for dealing with vector primops.             ---*/
   1760 /*------------------------------------------------------------*/
   1761 
   1762 /* Vector pessimisation -- pessimise within each lane individually. */
   1763 
   1764 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
   1765 {
   1766    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
   1767 }
   1768 
   1769 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
   1770 {
   1771    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
   1772 }
   1773 
   1774 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
   1775 {
   1776    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
   1777 }
   1778 
   1779 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
   1780 {
   1781    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
   1782 }
   1783 
   1784 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
   1785 {
   1786    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
   1787 }
   1788 
   1789 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
   1790 {
   1791    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
   1792 }
   1793 
   1794 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
   1795 {
   1796    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
   1797 }
   1798 
   1799 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
   1800 {
   1801    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
   1802 }
   1803 
   1804 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
   1805 {
   1806    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
   1807 }
   1808 
   1809 
   1810 /* Here's a simple scheme capable of handling ops derived from SSE1
   1811    code and while only generating ops that can be efficiently
   1812    implemented in SSE1. */
   1813 
   1814 /* All-lanes versions are straightforward:
   1815 
   1816    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
   1817 
   1818    unary32Fx4(x,y)    ==> PCast32x4(x#)
   1819 
   1820    Lowest-lane-only versions are more complex:
   1821 
   1822    binary32F0x4(x,y)  ==> SetV128lo32(
   1823                              x#,
   1824                              PCast32(V128to32(UifUV128(x#,y#)))
   1825                           )
   1826 
   1827    This is perhaps not so obvious.  In particular, it's faster to
   1828    do a V128-bit UifU and then take the bottom 32 bits than the more
   1829    obvious scheme of taking the bottom 32 bits of each operand
   1830    and doing a 32-bit UifU.  Basically since UifU is fast and
   1831    chopping lanes off vector values is slow.
   1832 
   1833    Finally:
   1834 
   1835    unary32F0x4(x)     ==> SetV128lo32(
   1836                              x#,
   1837                              PCast32(V128to32(x#))
   1838                           )
   1839 
   1840    Where:
   1841 
   1842    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
   1843    PCast32x4(v#) = CmpNEZ32x4(v#)
   1844 */
   1845 
   1846 static
   1847 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1848 {
   1849    IRAtom* at;
   1850    tl_assert(isShadowAtom(mce, vatomX));
   1851    tl_assert(isShadowAtom(mce, vatomY));
   1852    at = mkUifUV128(mce, vatomX, vatomY);
   1853    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
   1854    return at;
   1855 }
   1856 
   1857 static
   1858 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
   1859 {
   1860    IRAtom* at;
   1861    tl_assert(isShadowAtom(mce, vatomX));
   1862    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
   1863    return at;
   1864 }
   1865 
   1866 static
   1867 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1868 {
   1869    IRAtom* at;
   1870    tl_assert(isShadowAtom(mce, vatomX));
   1871    tl_assert(isShadowAtom(mce, vatomY));
   1872    at = mkUifUV128(mce, vatomX, vatomY);
   1873    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
   1874    at = mkPCastTo(mce, Ity_I32, at);
   1875    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1876    return at;
   1877 }
   1878 
   1879 static
   1880 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
   1881 {
   1882    IRAtom* at;
   1883    tl_assert(isShadowAtom(mce, vatomX));
   1884    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
   1885    at = mkPCastTo(mce, Ity_I32, at);
   1886    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1887    return at;
   1888 }
   1889 
   1890 /* --- ... and ... 64Fx2 versions of the same ... --- */
   1891 
   1892 static
   1893 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1894 {
   1895    IRAtom* at;
   1896    tl_assert(isShadowAtom(mce, vatomX));
   1897    tl_assert(isShadowAtom(mce, vatomY));
   1898    at = mkUifUV128(mce, vatomX, vatomY);
   1899    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
   1900    return at;
   1901 }
   1902 
   1903 static
   1904 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1905 {
   1906    IRAtom* at;
   1907    tl_assert(isShadowAtom(mce, vatomX));
   1908    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
   1909    return at;
   1910 }
   1911 
   1912 static
   1913 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1914 {
   1915    IRAtom* at;
   1916    tl_assert(isShadowAtom(mce, vatomX));
   1917    tl_assert(isShadowAtom(mce, vatomY));
   1918    at = mkUifUV128(mce, vatomX, vatomY);
   1919    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
   1920    at = mkPCastTo(mce, Ity_I64, at);
   1921    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1922    return at;
   1923 }
   1924 
   1925 static
   1926 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
   1927 {
   1928    IRAtom* at;
   1929    tl_assert(isShadowAtom(mce, vatomX));
   1930    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
   1931    at = mkPCastTo(mce, Ity_I64, at);
   1932    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1933    return at;
   1934 }
   1935 
   1936 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
   1937 
   1938 static
   1939 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1940 {
   1941    IRAtom* at;
   1942    tl_assert(isShadowAtom(mce, vatomX));
   1943    tl_assert(isShadowAtom(mce, vatomY));
   1944    at = mkUifU64(mce, vatomX, vatomY);
   1945    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
   1946    return at;
   1947 }
   1948 
   1949 static
   1950 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1951 {
   1952    IRAtom* at;
   1953    tl_assert(isShadowAtom(mce, vatomX));
   1954    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
   1955    return at;
   1956 }
   1957 
   1958 /* --- --- Vector saturated narrowing --- --- */
   1959 
   1960 /* We used to do something very clever here, but on closer inspection
   1961    (2011-Jun-15), and in particular bug #279698, it turns out to be
   1962    wrong.  Part of the problem came from the fact that for a long
   1963    time, the IR primops to do with saturated narrowing were
   1964    underspecified and managed to confuse multiple cases which needed
   1965    to be separate: the op names had a signedness qualifier, but in
   1966    fact the source and destination signednesses needed to be specified
   1967    independently, so the op names really need two independent
   1968    signedness specifiers.
   1969 
   1970    As of 2011-Jun-15 (ish) the underspecification was sorted out
   1971    properly.  The incorrect instrumentation remained, though.  That
   1972    has now (2011-Oct-22) been fixed.
   1973 
   1974    What we now do is simple:
   1975 
   1976    Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
   1977    number of lanes, X is the source lane width and signedness, and Y
   1978    is the destination lane width and signedness.  In all cases the
   1979    destination lane width is half the source lane width, so the names
   1980    have a bit of redundancy, but are at least easy to read.
   1981 
   1982    For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
   1983    to unsigned 16s.
   1984 
   1985    Let Vanilla(OP) be a function that takes OP, one of these
   1986    saturating narrowing ops, and produces the same "shaped" narrowing
   1987    op which is not saturating, but merely dumps the most significant
   1988    bits.  "same shape" means that the lane numbers and widths are the
   1989    same as with OP.
   1990 
   1991    For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
   1992                   = Iop_NarrowBin32to16x8,
   1993    that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
   1994    dumping the top half of each lane.
   1995 
   1996    So, with that in place, the scheme is simple, and it is simple to
   1997    pessimise each lane individually and then apply Vanilla(OP) so as
   1998    to get the result in the right "shape".  If the original OP is
   1999    QNarrowBinXtoYxZ then we produce
   2000 
   2001    Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
   2002 
   2003    or for the case when OP is unary (Iop_QNarrowUn*)
   2004 
   2005    Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
   2006 */
   2007 static
   2008 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
   2009 {
   2010    switch (qnarrowOp) {
   2011       /* Binary: (128, 128) -> 128 */
   2012       case Iop_QNarrowBin16Sto8Ux16:
   2013       case Iop_QNarrowBin16Sto8Sx16:
   2014       case Iop_QNarrowBin16Uto8Ux16:
   2015          return Iop_NarrowBin16to8x16;
   2016       case Iop_QNarrowBin32Sto16Ux8:
   2017       case Iop_QNarrowBin32Sto16Sx8:
   2018       case Iop_QNarrowBin32Uto16Ux8:
   2019          return Iop_NarrowBin32to16x8;
   2020       /* Binary: (64, 64) -> 64 */
   2021       case Iop_QNarrowBin32Sto16Sx4:
   2022          return Iop_NarrowBin32to16x4;
   2023       case Iop_QNarrowBin16Sto8Ux8:
   2024       case Iop_QNarrowBin16Sto8Sx8:
   2025          return Iop_NarrowBin16to8x8;
   2026       /* Unary: 128 -> 64 */
   2027       case Iop_QNarrowUn64Uto32Ux2:
   2028       case Iop_QNarrowUn64Sto32Sx2:
   2029       case Iop_QNarrowUn64Sto32Ux2:
   2030          return Iop_NarrowUn64to32x2;
   2031       case Iop_QNarrowUn32Uto16Ux4:
   2032       case Iop_QNarrowUn32Sto16Sx4:
   2033       case Iop_QNarrowUn32Sto16Ux4:
   2034          return Iop_NarrowUn32to16x4;
   2035       case Iop_QNarrowUn16Uto8Ux8:
   2036       case Iop_QNarrowUn16Sto8Sx8:
   2037       case Iop_QNarrowUn16Sto8Ux8:
   2038          return Iop_NarrowUn16to8x8;
   2039       default:
   2040          ppIROp(qnarrowOp);
   2041          VG_(tool_panic)("vanillaNarrowOpOfShape");
   2042    }
   2043 }
   2044 
   2045 static
   2046 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
   2047                               IRAtom* vatom1, IRAtom* vatom2)
   2048 {
   2049    IRAtom *at1, *at2, *at3;
   2050    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2051    switch (narrow_op) {
   2052       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
   2053       case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
   2054       case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
   2055       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
   2056       case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
   2057       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
   2058       default: VG_(tool_panic)("vectorNarrowBinV128");
   2059    }
   2060    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2061    tl_assert(isShadowAtom(mce,vatom1));
   2062    tl_assert(isShadowAtom(mce,vatom2));
   2063    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   2064    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
   2065    at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
   2066    return at3;
   2067 }
   2068 
   2069 static
   2070 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
   2071                             IRAtom* vatom1, IRAtom* vatom2)
   2072 {
   2073    IRAtom *at1, *at2, *at3;
   2074    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2075    switch (narrow_op) {
   2076       case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
   2077       case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
   2078       case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
   2079       default: VG_(tool_panic)("vectorNarrowBin64");
   2080    }
   2081    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2082    tl_assert(isShadowAtom(mce,vatom1));
   2083    tl_assert(isShadowAtom(mce,vatom2));
   2084    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
   2085    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
   2086    at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
   2087    return at3;
   2088 }
   2089 
   2090 static
   2091 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
   2092                              IRAtom* vatom1)
   2093 {
   2094    IRAtom *at1, *at2;
   2095    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2096    tl_assert(isShadowAtom(mce,vatom1));
   2097    /* For vanilla narrowing (non-saturating), we can just apply
   2098       the op directly to the V bits. */
   2099    switch (narrow_op) {
   2100       case Iop_NarrowUn16to8x8:
   2101       case Iop_NarrowUn32to16x4:
   2102       case Iop_NarrowUn64to32x2:
   2103          at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
   2104          return at1;
   2105       default:
   2106          break; /* Do Plan B */
   2107    }
   2108    /* Plan B: for ops that involve a saturation operation on the args,
   2109       we must PCast before the vanilla narrow. */
   2110    switch (narrow_op) {
   2111       case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
   2112       case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
   2113       case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
   2114       case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
   2115       case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
   2116       case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
   2117       case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
   2118       case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
   2119       case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
   2120       default: VG_(tool_panic)("vectorNarrowUnV128");
   2121    }
   2122    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2123    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   2124    at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
   2125    return at2;
   2126 }
   2127 
   2128 static
   2129 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
   2130                          IRAtom* vatom1)
   2131 {
   2132    IRAtom *at1, *at2;
   2133    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2134    switch (longen_op) {
   2135       case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
   2136       case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
   2137       case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
   2138       case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
   2139       case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
   2140       case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
   2141       default: VG_(tool_panic)("vectorWidenI64");
   2142    }
   2143    tl_assert(isShadowAtom(mce,vatom1));
   2144    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
   2145    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
   2146    return at2;
   2147 }
   2148 
   2149 
   2150 /* --- --- Vector integer arithmetic --- --- */
   2151 
   2152 /* Simple ... UifU the args and per-lane pessimise the results. */
   2153 
   2154 /* --- V128-bit versions --- */
   2155 
   2156 static
   2157 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2158 {
   2159    IRAtom* at;
   2160    at = mkUifUV128(mce, vatom1, vatom2);
   2161    at = mkPCast8x16(mce, at);
   2162    return at;
   2163 }
   2164 
   2165 static
   2166 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2167 {
   2168    IRAtom* at;
   2169    at = mkUifUV128(mce, vatom1, vatom2);
   2170    at = mkPCast16x8(mce, at);
   2171    return at;
   2172 }
   2173 
   2174 static
   2175 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2176 {
   2177    IRAtom* at;
   2178    at = mkUifUV128(mce, vatom1, vatom2);
   2179    at = mkPCast32x4(mce, at);
   2180    return at;
   2181 }
   2182 
   2183 static
   2184 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2185 {
   2186    IRAtom* at;
   2187    at = mkUifUV128(mce, vatom1, vatom2);
   2188    at = mkPCast64x2(mce, at);
   2189    return at;
   2190 }
   2191 
   2192 /* --- 64-bit versions --- */
   2193 
   2194 static
   2195 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2196 {
   2197    IRAtom* at;
   2198    at = mkUifU64(mce, vatom1, vatom2);
   2199    at = mkPCast8x8(mce, at);
   2200    return at;
   2201 }
   2202 
   2203 static
   2204 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2205 {
   2206    IRAtom* at;
   2207    at = mkUifU64(mce, vatom1, vatom2);
   2208    at = mkPCast16x4(mce, at);
   2209    return at;
   2210 }
   2211 
   2212 static
   2213 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2214 {
   2215    IRAtom* at;
   2216    at = mkUifU64(mce, vatom1, vatom2);
   2217    at = mkPCast32x2(mce, at);
   2218    return at;
   2219 }
   2220 
   2221 static
   2222 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2223 {
   2224    IRAtom* at;
   2225    at = mkUifU64(mce, vatom1, vatom2);
   2226    at = mkPCastTo(mce, Ity_I64, at);
   2227    return at;
   2228 }
   2229 
   2230 /* --- 32-bit versions --- */
   2231 
   2232 static
   2233 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2234 {
   2235    IRAtom* at;
   2236    at = mkUifU32(mce, vatom1, vatom2);
   2237    at = mkPCast8x4(mce, at);
   2238    return at;
   2239 }
   2240 
   2241 static
   2242 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2243 {
   2244    IRAtom* at;
   2245    at = mkUifU32(mce, vatom1, vatom2);
   2246    at = mkPCast16x2(mce, at);
   2247    return at;
   2248 }
   2249 
   2250 
   2251 /*------------------------------------------------------------*/
   2252 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
   2253 /*------------------------------------------------------------*/
   2254 
   2255 static
   2256 IRAtom* expr2vbits_Qop ( MCEnv* mce,
   2257                          IROp op,
   2258                          IRAtom* atom1, IRAtom* atom2,
   2259                          IRAtom* atom3, IRAtom* atom4 )
   2260 {
   2261    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2262    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2263    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2264    IRAtom* vatom4 = expr2vbits( mce, atom4 );
   2265 
   2266    tl_assert(isOriginalAtom(mce,atom1));
   2267    tl_assert(isOriginalAtom(mce,atom2));
   2268    tl_assert(isOriginalAtom(mce,atom3));
   2269    tl_assert(isOriginalAtom(mce,atom4));
   2270    tl_assert(isShadowAtom(mce,vatom1));
   2271    tl_assert(isShadowAtom(mce,vatom2));
   2272    tl_assert(isShadowAtom(mce,vatom3));
   2273    tl_assert(isShadowAtom(mce,vatom4));
   2274    tl_assert(sameKindedAtoms(atom1,vatom1));
   2275    tl_assert(sameKindedAtoms(atom2,vatom2));
   2276    tl_assert(sameKindedAtoms(atom3,vatom3));
   2277    tl_assert(sameKindedAtoms(atom4,vatom4));
   2278    switch (op) {
   2279       case Iop_MAddF64:
   2280       case Iop_MAddF64r32:
   2281       case Iop_MSubF64:
   2282       case Iop_MSubF64r32:
   2283          /* I32(rm) x F64 x F64 x F64 -> F64 */
   2284          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
   2285 
   2286       case Iop_MAddF32:
   2287       case Iop_MSubF32:
   2288          /* I32(rm) x F32 x F32 x F32 -> F32 */
   2289          return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
   2290 
   2291       default:
   2292          ppIROp(op);
   2293          VG_(tool_panic)("memcheck:expr2vbits_Qop");
   2294    }
   2295 }
   2296 
   2297 
   2298 static
   2299 IRAtom* expr2vbits_Triop ( MCEnv* mce,
   2300                            IROp op,
   2301                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
   2302 {
   2303    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2304    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2305    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2306 
   2307    tl_assert(isOriginalAtom(mce,atom1));
   2308    tl_assert(isOriginalAtom(mce,atom2));
   2309    tl_assert(isOriginalAtom(mce,atom3));
   2310    tl_assert(isShadowAtom(mce,vatom1));
   2311    tl_assert(isShadowAtom(mce,vatom2));
   2312    tl_assert(isShadowAtom(mce,vatom3));
   2313    tl_assert(sameKindedAtoms(atom1,vatom1));
   2314    tl_assert(sameKindedAtoms(atom2,vatom2));
   2315    tl_assert(sameKindedAtoms(atom3,vatom3));
   2316    switch (op) {
   2317       case Iop_AddF128:
   2318       case Iop_SubF128:
   2319       case Iop_MulF128:
   2320       case Iop_DivF128:
   2321          /* I32(rm) x F128 x F128 -> F128 */
   2322          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
   2323       case Iop_AddF64:
   2324       case Iop_AddF64r32:
   2325       case Iop_SubF64:
   2326       case Iop_SubF64r32:
   2327       case Iop_MulF64:
   2328       case Iop_MulF64r32:
   2329       case Iop_DivF64:
   2330       case Iop_DivF64r32:
   2331       case Iop_ScaleF64:
   2332       case Iop_Yl2xF64:
   2333       case Iop_Yl2xp1F64:
   2334       case Iop_AtanF64:
   2335       case Iop_PRemF64:
   2336       case Iop_PRem1F64:
   2337          /* I32(rm) x F64 x F64 -> F64 */
   2338          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
   2339       case Iop_PRemC3210F64:
   2340       case Iop_PRem1C3210F64:
   2341          /* I32(rm) x F64 x F64 -> I32 */
   2342          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2343       case Iop_AddF32:
   2344       case Iop_SubF32:
   2345       case Iop_MulF32:
   2346       case Iop_DivF32:
   2347          /* I32(rm) x F32 x F32 -> I32 */
   2348          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2349       case Iop_ExtractV128:
   2350          complainIfUndefined(mce, atom3);
   2351          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
   2352       case Iop_Extract64:
   2353          complainIfUndefined(mce, atom3);
   2354          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
   2355       case Iop_SetElem8x8:
   2356       case Iop_SetElem16x4:
   2357       case Iop_SetElem32x2:
   2358          complainIfUndefined(mce, atom2);
   2359          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
   2360       default:
   2361          ppIROp(op);
   2362          VG_(tool_panic)("memcheck:expr2vbits_Triop");
   2363    }
   2364 }
   2365 
   2366 
   2367 static
   2368 IRAtom* expr2vbits_Binop ( MCEnv* mce,
   2369                            IROp op,
   2370                            IRAtom* atom1, IRAtom* atom2 )
   2371 {
   2372    IRType  and_or_ty;
   2373    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
   2374    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
   2375    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
   2376 
   2377    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2378    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2379 
   2380    tl_assert(isOriginalAtom(mce,atom1));
   2381    tl_assert(isOriginalAtom(mce,atom2));
   2382    tl_assert(isShadowAtom(mce,vatom1));
   2383    tl_assert(isShadowAtom(mce,vatom2));
   2384    tl_assert(sameKindedAtoms(atom1,vatom1));
   2385    tl_assert(sameKindedAtoms(atom2,vatom2));
   2386    switch (op) {
   2387 
   2388       /* 32-bit SIMD */
   2389 
   2390       case Iop_Add16x2:
   2391       case Iop_HAdd16Ux2:
   2392       case Iop_HAdd16Sx2:
   2393       case Iop_Sub16x2:
   2394       case Iop_HSub16Ux2:
   2395       case Iop_HSub16Sx2:
   2396       case Iop_QAdd16Sx2:
   2397       case Iop_QSub16Sx2:
   2398          return binary16Ix2(mce, vatom1, vatom2);
   2399 
   2400       case Iop_Add8x4:
   2401       case Iop_HAdd8Ux4:
   2402       case Iop_HAdd8Sx4:
   2403       case Iop_Sub8x4:
   2404       case Iop_HSub8Ux4:
   2405       case Iop_HSub8Sx4:
   2406       case Iop_QSub8Ux4:
   2407       case Iop_QAdd8Ux4:
   2408       case Iop_QSub8Sx4:
   2409       case Iop_QAdd8Sx4:
   2410          return binary8Ix4(mce, vatom1, vatom2);
   2411 
   2412       /* 64-bit SIMD */
   2413 
   2414       case Iop_ShrN8x8:
   2415       case Iop_ShrN16x4:
   2416       case Iop_ShrN32x2:
   2417       case Iop_SarN8x8:
   2418       case Iop_SarN16x4:
   2419       case Iop_SarN32x2:
   2420       case Iop_ShlN16x4:
   2421       case Iop_ShlN32x2:
   2422       case Iop_ShlN8x8:
   2423          /* Same scheme as with all other shifts. */
   2424          complainIfUndefined(mce, atom2);
   2425          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   2426 
   2427       case Iop_QNarrowBin32Sto16Sx4:
   2428       case Iop_QNarrowBin16Sto8Sx8:
   2429       case Iop_QNarrowBin16Sto8Ux8:
   2430          return vectorNarrowBin64(mce, op, vatom1, vatom2);
   2431 
   2432       case Iop_Min8Ux8:
   2433       case Iop_Min8Sx8:
   2434       case Iop_Max8Ux8:
   2435       case Iop_Max8Sx8:
   2436       case Iop_Avg8Ux8:
   2437       case Iop_QSub8Sx8:
   2438       case Iop_QSub8Ux8:
   2439       case Iop_Sub8x8:
   2440       case Iop_CmpGT8Sx8:
   2441       case Iop_CmpGT8Ux8:
   2442       case Iop_CmpEQ8x8:
   2443       case Iop_QAdd8Sx8:
   2444       case Iop_QAdd8Ux8:
   2445       case Iop_QSal8x8:
   2446       case Iop_QShl8x8:
   2447       case Iop_Add8x8:
   2448       case Iop_Mul8x8:
   2449       case Iop_PolynomialMul8x8:
   2450          return binary8Ix8(mce, vatom1, vatom2);
   2451 
   2452       case Iop_Min16Sx4:
   2453       case Iop_Min16Ux4:
   2454       case Iop_Max16Sx4:
   2455       case Iop_Max16Ux4:
   2456       case Iop_Avg16Ux4:
   2457       case Iop_QSub16Ux4:
   2458       case Iop_QSub16Sx4:
   2459       case Iop_Sub16x4:
   2460       case Iop_Mul16x4:
   2461       case Iop_MulHi16Sx4:
   2462       case Iop_MulHi16Ux4:
   2463       case Iop_CmpGT16Sx4:
   2464       case Iop_CmpGT16Ux4:
   2465       case Iop_CmpEQ16x4:
   2466       case Iop_QAdd16Sx4:
   2467       case Iop_QAdd16Ux4:
   2468       case Iop_QSal16x4:
   2469       case Iop_QShl16x4:
   2470       case Iop_Add16x4:
   2471       case Iop_QDMulHi16Sx4:
   2472       case Iop_QRDMulHi16Sx4:
   2473          return binary16Ix4(mce, vatom1, vatom2);
   2474 
   2475       case Iop_Sub32x2:
   2476       case Iop_Mul32x2:
   2477       case Iop_Max32Sx2:
   2478       case Iop_Max32Ux2:
   2479       case Iop_Min32Sx2:
   2480       case Iop_Min32Ux2:
   2481       case Iop_CmpGT32Sx2:
   2482       case Iop_CmpGT32Ux2:
   2483       case Iop_CmpEQ32x2:
   2484       case Iop_Add32x2:
   2485       case Iop_QAdd32Ux2:
   2486       case Iop_QAdd32Sx2:
   2487       case Iop_QSub32Ux2:
   2488       case Iop_QSub32Sx2:
   2489       case Iop_QSal32x2:
   2490       case Iop_QShl32x2:
   2491       case Iop_QDMulHi32Sx2:
   2492       case Iop_QRDMulHi32Sx2:
   2493          return binary32Ix2(mce, vatom1, vatom2);
   2494 
   2495       case Iop_QSub64Ux1:
   2496       case Iop_QSub64Sx1:
   2497       case Iop_QAdd64Ux1:
   2498       case Iop_QAdd64Sx1:
   2499       case Iop_QSal64x1:
   2500       case Iop_QShl64x1:
   2501       case Iop_Sal64x1:
   2502          return binary64Ix1(mce, vatom1, vatom2);
   2503 
   2504       case Iop_QShlN8Sx8:
   2505       case Iop_QShlN8x8:
   2506       case Iop_QSalN8x8:
   2507          complainIfUndefined(mce, atom2);
   2508          return mkPCast8x8(mce, vatom1);
   2509 
   2510       case Iop_QShlN16Sx4:
   2511       case Iop_QShlN16x4:
   2512       case Iop_QSalN16x4:
   2513          complainIfUndefined(mce, atom2);
   2514          return mkPCast16x4(mce, vatom1);
   2515 
   2516       case Iop_QShlN32Sx2:
   2517       case Iop_QShlN32x2:
   2518       case Iop_QSalN32x2:
   2519          complainIfUndefined(mce, atom2);
   2520          return mkPCast32x2(mce, vatom1);
   2521 
   2522       case Iop_QShlN64Sx1:
   2523       case Iop_QShlN64x1:
   2524       case Iop_QSalN64x1:
   2525          complainIfUndefined(mce, atom2);
   2526          return mkPCast32x2(mce, vatom1);
   2527 
   2528       case Iop_PwMax32Sx2:
   2529       case Iop_PwMax32Ux2:
   2530       case Iop_PwMin32Sx2:
   2531       case Iop_PwMin32Ux2:
   2532       case Iop_PwMax32Fx2:
   2533       case Iop_PwMin32Fx2:
   2534          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
   2535                      mkPCast32x2(mce, vatom2)));
   2536 
   2537       case Iop_PwMax16Sx4:
   2538       case Iop_PwMax16Ux4:
   2539       case Iop_PwMin16Sx4:
   2540       case Iop_PwMin16Ux4:
   2541          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
   2542                      mkPCast16x4(mce, vatom2)));
   2543 
   2544       case Iop_PwMax8Sx8:
   2545       case Iop_PwMax8Ux8:
   2546       case Iop_PwMin8Sx8:
   2547       case Iop_PwMin8Ux8:
   2548          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
   2549                      mkPCast8x8(mce, vatom2)));
   2550 
   2551       case Iop_PwAdd32x2:
   2552       case Iop_PwAdd32Fx2:
   2553          return mkPCast32x2(mce,
   2554                assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
   2555                      mkPCast32x2(mce, vatom2))));
   2556 
   2557       case Iop_PwAdd16x4:
   2558          return mkPCast16x4(mce,
   2559                assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
   2560                      mkPCast16x4(mce, vatom2))));
   2561 
   2562       case Iop_PwAdd8x8:
   2563          return mkPCast8x8(mce,
   2564                assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
   2565                      mkPCast8x8(mce, vatom2))));
   2566 
   2567       case Iop_Shl8x8:
   2568       case Iop_Shr8x8:
   2569       case Iop_Sar8x8:
   2570       case Iop_Sal8x8:
   2571          return mkUifU64(mce,
   2572                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2573                    mkPCast8x8(mce,vatom2)
   2574                 );
   2575 
   2576       case Iop_Shl16x4:
   2577       case Iop_Shr16x4:
   2578       case Iop_Sar16x4:
   2579       case Iop_Sal16x4:
   2580          return mkUifU64(mce,
   2581                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2582                    mkPCast16x4(mce,vatom2)
   2583                 );
   2584 
   2585       case Iop_Shl32x2:
   2586       case Iop_Shr32x2:
   2587       case Iop_Sar32x2:
   2588       case Iop_Sal32x2:
   2589          return mkUifU64(mce,
   2590                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2591                    mkPCast32x2(mce,vatom2)
   2592                 );
   2593 
   2594       /* 64-bit data-steering */
   2595       case Iop_InterleaveLO32x2:
   2596       case Iop_InterleaveLO16x4:
   2597       case Iop_InterleaveLO8x8:
   2598       case Iop_InterleaveHI32x2:
   2599       case Iop_InterleaveHI16x4:
   2600       case Iop_InterleaveHI8x8:
   2601       case Iop_CatOddLanes8x8:
   2602       case Iop_CatEvenLanes8x8:
   2603       case Iop_CatOddLanes16x4:
   2604       case Iop_CatEvenLanes16x4:
   2605       case Iop_InterleaveOddLanes8x8:
   2606       case Iop_InterleaveEvenLanes8x8:
   2607       case Iop_InterleaveOddLanes16x4:
   2608       case Iop_InterleaveEvenLanes16x4:
   2609          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   2610 
   2611       case Iop_GetElem8x8:
   2612          complainIfUndefined(mce, atom2);
   2613          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   2614       case Iop_GetElem16x4:
   2615          complainIfUndefined(mce, atom2);
   2616          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   2617       case Iop_GetElem32x2:
   2618          complainIfUndefined(mce, atom2);
   2619          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   2620 
   2621       /* Perm8x8: rearrange values in left arg using steering values
   2622         from right arg.  So rearrange the vbits in the same way but
   2623         pessimise wrt steering values. */
   2624       case Iop_Perm8x8:
   2625          return mkUifU64(
   2626                    mce,
   2627                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2628                    mkPCast8x8(mce, vatom2)
   2629                 );
   2630 
   2631       /* V128-bit SIMD */
   2632 
   2633       case Iop_ShrN8x16:
   2634       case Iop_ShrN16x8:
   2635       case Iop_ShrN32x4:
   2636       case Iop_ShrN64x2:
   2637       case Iop_SarN8x16:
   2638       case Iop_SarN16x8:
   2639       case Iop_SarN32x4:
   2640       case Iop_SarN64x2:
   2641       case Iop_ShlN8x16:
   2642       case Iop_ShlN16x8:
   2643       case Iop_ShlN32x4:
   2644       case Iop_ShlN64x2:
   2645          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
   2646             this is wrong now, scalar shifts are done properly lazily.
   2647             Vector shifts should be fixed too. */
   2648          complainIfUndefined(mce, atom2);
   2649          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   2650 
   2651       /* V x V shifts/rotates are done using the standard lazy scheme. */
   2652       case Iop_Shl8x16:
   2653       case Iop_Shr8x16:
   2654       case Iop_Sar8x16:
   2655       case Iop_Sal8x16:
   2656       case Iop_Rol8x16:
   2657          return mkUifUV128(mce,
   2658                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2659                    mkPCast8x16(mce,vatom2)
   2660                 );
   2661 
   2662       case Iop_Shl16x8:
   2663       case Iop_Shr16x8:
   2664       case Iop_Sar16x8:
   2665       case Iop_Sal16x8:
   2666       case Iop_Rol16x8:
   2667          return mkUifUV128(mce,
   2668                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2669                    mkPCast16x8(mce,vatom2)
   2670                 );
   2671 
   2672       case Iop_Shl32x4:
   2673       case Iop_Shr32x4:
   2674       case Iop_Sar32x4:
   2675       case Iop_Sal32x4:
   2676       case Iop_Rol32x4:
   2677          return mkUifUV128(mce,
   2678                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2679                    mkPCast32x4(mce,vatom2)
   2680                 );
   2681 
   2682       case Iop_Shl64x2:
   2683       case Iop_Shr64x2:
   2684       case Iop_Sar64x2:
   2685       case Iop_Sal64x2:
   2686          return mkUifUV128(mce,
   2687                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2688                    mkPCast64x2(mce,vatom2)
   2689                 );
   2690 
   2691       case Iop_F32ToFixed32Ux4_RZ:
   2692       case Iop_F32ToFixed32Sx4_RZ:
   2693       case Iop_Fixed32UToF32x4_RN:
   2694       case Iop_Fixed32SToF32x4_RN:
   2695          complainIfUndefined(mce, atom2);
   2696          return mkPCast32x4(mce, vatom1);
   2697 
   2698       case Iop_F32ToFixed32Ux2_RZ:
   2699       case Iop_F32ToFixed32Sx2_RZ:
   2700       case Iop_Fixed32UToF32x2_RN:
   2701       case Iop_Fixed32SToF32x2_RN:
   2702          complainIfUndefined(mce, atom2);
   2703          return mkPCast32x2(mce, vatom1);
   2704 
   2705       case Iop_QSub8Ux16:
   2706       case Iop_QSub8Sx16:
   2707       case Iop_Sub8x16:
   2708       case Iop_Min8Ux16:
   2709       case Iop_Min8Sx16:
   2710       case Iop_Max8Ux16:
   2711       case Iop_Max8Sx16:
   2712       case Iop_CmpGT8Sx16:
   2713       case Iop_CmpGT8Ux16:
   2714       case Iop_CmpEQ8x16:
   2715       case Iop_Avg8Ux16:
   2716       case Iop_Avg8Sx16:
   2717       case Iop_QAdd8Ux16:
   2718       case Iop_QAdd8Sx16:
   2719       case Iop_QSal8x16:
   2720       case Iop_QShl8x16:
   2721       case Iop_Add8x16:
   2722       case Iop_Mul8x16:
   2723       case Iop_PolynomialMul8x16:
   2724          return binary8Ix16(mce, vatom1, vatom2);
   2725 
   2726       case Iop_QSub16Ux8:
   2727       case Iop_QSub16Sx8:
   2728       case Iop_Sub16x8:
   2729       case Iop_Mul16x8:
   2730       case Iop_MulHi16Sx8:
   2731       case Iop_MulHi16Ux8:
   2732       case Iop_Min16Sx8:
   2733       case Iop_Min16Ux8:
   2734       case Iop_Max16Sx8:
   2735       case Iop_Max16Ux8:
   2736       case Iop_CmpGT16Sx8:
   2737       case Iop_CmpGT16Ux8:
   2738       case Iop_CmpEQ16x8:
   2739       case Iop_Avg16Ux8:
   2740       case Iop_Avg16Sx8:
   2741       case Iop_QAdd16Ux8:
   2742       case Iop_QAdd16Sx8:
   2743       case Iop_QSal16x8:
   2744       case Iop_QShl16x8:
   2745       case Iop_Add16x8:
   2746       case Iop_QDMulHi16Sx8:
   2747       case Iop_QRDMulHi16Sx8:
   2748          return binary16Ix8(mce, vatom1, vatom2);
   2749 
   2750       case Iop_Sub32x4:
   2751       case Iop_CmpGT32Sx4:
   2752       case Iop_CmpGT32Ux4:
   2753       case Iop_CmpEQ32x4:
   2754       case Iop_QAdd32Sx4:
   2755       case Iop_QAdd32Ux4:
   2756       case Iop_QSub32Sx4:
   2757       case Iop_QSub32Ux4:
   2758       case Iop_QSal32x4:
   2759       case Iop_QShl32x4:
   2760       case Iop_Avg32Ux4:
   2761       case Iop_Avg32Sx4:
   2762       case Iop_Add32x4:
   2763       case Iop_Max32Ux4:
   2764       case Iop_Max32Sx4:
   2765       case Iop_Min32Ux4:
   2766       case Iop_Min32Sx4:
   2767       case Iop_Mul32x4:
   2768       case Iop_QDMulHi32Sx4:
   2769       case Iop_QRDMulHi32Sx4:
   2770          return binary32Ix4(mce, vatom1, vatom2);
   2771 
   2772       case Iop_Sub64x2:
   2773       case Iop_Add64x2:
   2774       case Iop_CmpEQ64x2:
   2775       case Iop_CmpGT64Sx2:
   2776       case Iop_QSal64x2:
   2777       case Iop_QShl64x2:
   2778       case Iop_QAdd64Ux2:
   2779       case Iop_QAdd64Sx2:
   2780       case Iop_QSub64Ux2:
   2781       case Iop_QSub64Sx2:
   2782          return binary64Ix2(mce, vatom1, vatom2);
   2783 
   2784       case Iop_QNarrowBin32Sto16Sx8:
   2785       case Iop_QNarrowBin32Uto16Ux8:
   2786       case Iop_QNarrowBin32Sto16Ux8:
   2787       case Iop_QNarrowBin16Sto8Sx16:
   2788       case Iop_QNarrowBin16Uto8Ux16:
   2789       case Iop_QNarrowBin16Sto8Ux16:
   2790          return vectorNarrowBinV128(mce, op, vatom1, vatom2);
   2791 
   2792       case Iop_Sub64Fx2:
   2793       case Iop_Mul64Fx2:
   2794       case Iop_Min64Fx2:
   2795       case Iop_Max64Fx2:
   2796       case Iop_Div64Fx2:
   2797       case Iop_CmpLT64Fx2:
   2798       case Iop_CmpLE64Fx2:
   2799       case Iop_CmpEQ64Fx2:
   2800       case Iop_CmpUN64Fx2:
   2801       case Iop_Add64Fx2:
   2802          return binary64Fx2(mce, vatom1, vatom2);
   2803 
   2804       case Iop_Sub64F0x2:
   2805       case Iop_Mul64F0x2:
   2806       case Iop_Min64F0x2:
   2807       case Iop_Max64F0x2:
   2808       case Iop_Div64F0x2:
   2809       case Iop_CmpLT64F0x2:
   2810       case Iop_CmpLE64F0x2:
   2811       case Iop_CmpEQ64F0x2:
   2812       case Iop_CmpUN64F0x2:
   2813       case Iop_Add64F0x2:
   2814          return binary64F0x2(mce, vatom1, vatom2);
   2815 
   2816       case Iop_Sub32Fx4:
   2817       case Iop_Mul32Fx4:
   2818       case Iop_Min32Fx4:
   2819       case Iop_Max32Fx4:
   2820       case Iop_Div32Fx4:
   2821       case Iop_CmpLT32Fx4:
   2822       case Iop_CmpLE32Fx4:
   2823       case Iop_CmpEQ32Fx4:
   2824       case Iop_CmpUN32Fx4:
   2825       case Iop_CmpGT32Fx4:
   2826       case Iop_CmpGE32Fx4:
   2827       case Iop_Add32Fx4:
   2828       case Iop_Recps32Fx4:
   2829       case Iop_Rsqrts32Fx4:
   2830          return binary32Fx4(mce, vatom1, vatom2);
   2831 
   2832       case Iop_Sub32Fx2:
   2833       case Iop_Mul32Fx2:
   2834       case Iop_Min32Fx2:
   2835       case Iop_Max32Fx2:
   2836       case Iop_CmpEQ32Fx2:
   2837       case Iop_CmpGT32Fx2:
   2838       case Iop_CmpGE32Fx2:
   2839       case Iop_Add32Fx2:
   2840       case Iop_Recps32Fx2:
   2841       case Iop_Rsqrts32Fx2:
   2842          return binary32Fx2(mce, vatom1, vatom2);
   2843 
   2844       case Iop_Sub32F0x4:
   2845       case Iop_Mul32F0x4:
   2846       case Iop_Min32F0x4:
   2847       case Iop_Max32F0x4:
   2848       case Iop_Div32F0x4:
   2849       case Iop_CmpLT32F0x4:
   2850       case Iop_CmpLE32F0x4:
   2851       case Iop_CmpEQ32F0x4:
   2852       case Iop_CmpUN32F0x4:
   2853       case Iop_Add32F0x4:
   2854          return binary32F0x4(mce, vatom1, vatom2);
   2855 
   2856       case Iop_QShlN8Sx16:
   2857       case Iop_QShlN8x16:
   2858       case Iop_QSalN8x16:
   2859          complainIfUndefined(mce, atom2);
   2860          return mkPCast8x16(mce, vatom1);
   2861 
   2862       case Iop_QShlN16Sx8:
   2863       case Iop_QShlN16x8:
   2864       case Iop_QSalN16x8:
   2865          complainIfUndefined(mce, atom2);
   2866          return mkPCast16x8(mce, vatom1);
   2867 
   2868       case Iop_QShlN32Sx4:
   2869       case Iop_QShlN32x4:
   2870       case Iop_QSalN32x4:
   2871          complainIfUndefined(mce, atom2);
   2872          return mkPCast32x4(mce, vatom1);
   2873 
   2874       case Iop_QShlN64Sx2:
   2875       case Iop_QShlN64x2:
   2876       case Iop_QSalN64x2:
   2877          complainIfUndefined(mce, atom2);
   2878          return mkPCast32x4(mce, vatom1);
   2879 
   2880       case Iop_Mull32Sx2:
   2881       case Iop_Mull32Ux2:
   2882       case Iop_QDMulLong32Sx2:
   2883          return vectorWidenI64(mce, Iop_Widen32Sto64x2,
   2884                                     mkUifU64(mce, vatom1, vatom2));
   2885 
   2886       case Iop_Mull16Sx4:
   2887       case Iop_Mull16Ux4:
   2888       case Iop_QDMulLong16Sx4:
   2889          return vectorWidenI64(mce, Iop_Widen16Sto32x4,
   2890                                     mkUifU64(mce, vatom1, vatom2));
   2891 
   2892       case Iop_Mull8Sx8:
   2893       case Iop_Mull8Ux8:
   2894       case Iop_PolynomialMull8x8:
   2895          return vectorWidenI64(mce, Iop_Widen8Sto16x8,
   2896                                     mkUifU64(mce, vatom1, vatom2));
   2897 
   2898       case Iop_PwAdd32x4:
   2899          return mkPCast32x4(mce,
   2900                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
   2901                      mkPCast32x4(mce, vatom2))));
   2902 
   2903       case Iop_PwAdd16x8:
   2904          return mkPCast16x8(mce,
   2905                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
   2906                      mkPCast16x8(mce, vatom2))));
   2907 
   2908       case Iop_PwAdd8x16:
   2909          return mkPCast8x16(mce,
   2910                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
   2911                      mkPCast8x16(mce, vatom2))));
   2912 
   2913       /* V128-bit data-steering */
   2914       case Iop_SetV128lo32:
   2915       case Iop_SetV128lo64:
   2916       case Iop_64HLtoV128:
   2917       case Iop_InterleaveLO64x2:
   2918       case Iop_InterleaveLO32x4:
   2919       case Iop_InterleaveLO16x8:
   2920       case Iop_InterleaveLO8x16:
   2921       case Iop_InterleaveHI64x2:
   2922       case Iop_InterleaveHI32x4:
   2923       case Iop_InterleaveHI16x8:
   2924       case Iop_InterleaveHI8x16:
   2925       case Iop_CatOddLanes8x16:
   2926       case Iop_CatOddLanes16x8:
   2927       case Iop_CatOddLanes32x4:
   2928       case Iop_CatEvenLanes8x16:
   2929       case Iop_CatEvenLanes16x8:
   2930       case Iop_CatEvenLanes32x4:
   2931       case Iop_InterleaveOddLanes8x16:
   2932       case Iop_InterleaveOddLanes16x8:
   2933       case Iop_InterleaveOddLanes32x4:
   2934       case Iop_InterleaveEvenLanes8x16:
   2935       case Iop_InterleaveEvenLanes16x8:
   2936       case Iop_InterleaveEvenLanes32x4:
   2937          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
   2938 
   2939       case Iop_GetElem8x16:
   2940          complainIfUndefined(mce, atom2);
   2941          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   2942       case Iop_GetElem16x8:
   2943          complainIfUndefined(mce, atom2);
   2944          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   2945       case Iop_GetElem32x4:
   2946          complainIfUndefined(mce, atom2);
   2947          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   2948       case Iop_GetElem64x2:
   2949          complainIfUndefined(mce, atom2);
   2950          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   2951 
   2952      /* Perm8x16: rearrange values in left arg using steering values
   2953         from right arg.  So rearrange the vbits in the same way but
   2954         pessimise wrt steering values. */
   2955       case Iop_Perm8x16:
   2956          return mkUifUV128(
   2957                    mce,
   2958                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2959                    mkPCast8x16(mce, vatom2)
   2960                 );
   2961 
   2962      /* These two take the lower half of each 16-bit lane, sign/zero
   2963         extend it to 32, and multiply together, producing a 32x4
   2964         result (and implicitly ignoring half the operand bits).  So
   2965         treat it as a bunch of independent 16x8 operations, but then
   2966         do 32-bit shifts left-right to copy the lower half results
   2967         (which are all 0s or all 1s due to PCasting in binary16Ix8)
   2968         into the upper half of each result lane. */
   2969       case Iop_MullEven16Ux8:
   2970       case Iop_MullEven16Sx8: {
   2971          IRAtom* at;
   2972          at = binary16Ix8(mce,vatom1,vatom2);
   2973          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
   2974          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
   2975 	 return at;
   2976       }
   2977 
   2978       /* Same deal as Iop_MullEven16{S,U}x8 */
   2979       case Iop_MullEven8Ux16:
   2980       case Iop_MullEven8Sx16: {
   2981          IRAtom* at;
   2982          at = binary8Ix16(mce,vatom1,vatom2);
   2983          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
   2984          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
   2985 	 return at;
   2986       }
   2987 
   2988       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
   2989          32x4 -> 16x8 laneage, discarding the upper half of each lane.
   2990          Simply apply same op to the V bits, since this really no more
   2991          than a data steering operation. */
   2992       case Iop_NarrowBin32to16x8:
   2993       case Iop_NarrowBin16to8x16:
   2994          return assignNew('V', mce, Ity_V128,
   2995                                     binop(op, vatom1, vatom2));
   2996 
   2997       case Iop_ShrV128:
   2998       case Iop_ShlV128:
   2999          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
   3000             this is wrong now, scalar shifts are done properly lazily.
   3001             Vector shifts should be fixed too. */
   3002          complainIfUndefined(mce, atom2);
   3003          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   3004 
   3005       /* I128-bit data-steering */
   3006       case Iop_64HLto128:
   3007          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
   3008 
   3009       /* Scalar floating point */
   3010 
   3011       case Iop_F32toI64S:
   3012          /* I32(rm) x F32 -> I64 */
   3013          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3014 
   3015       case Iop_I64StoF32:
   3016          /* I32(rm) x I64 -> F32 */
   3017          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3018 
   3019       case Iop_RoundF64toInt:
   3020       case Iop_RoundF64toF32:
   3021       case Iop_F64toI64S:
   3022       case Iop_F64toI64U:
   3023       case Iop_I64StoF64:
   3024       case Iop_I64UtoF64:
   3025       case Iop_SinF64:
   3026       case Iop_CosF64:
   3027       case Iop_TanF64:
   3028       case Iop_2xm1F64:
   3029       case Iop_SqrtF64:
   3030          /* I32(rm) x I64/F64 -> I64/F64 */
   3031          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3032 
   3033       case Iop_RoundF32toInt:
   3034       case Iop_SqrtF32:
   3035          /* I32(rm) x I32/F32 -> I32/F32 */
   3036          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3037 
   3038       case Iop_SqrtF128:
   3039          /* I32(rm) x F128 -> F128 */
   3040          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3041 
   3042       case Iop_I32StoF32:
   3043       case Iop_F32toI32S:
   3044          /* First arg is I32 (rounding mode), second is F32/I32 (data). */
   3045          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3046 
   3047       case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
   3048       case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
   3049          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3050 
   3051       case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
   3052       case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
   3053          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3054 
   3055       case Iop_F64HLtoF128:
   3056          return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2));
   3057 
   3058       case Iop_F64toI32U:
   3059       case Iop_F64toI32S:
   3060       case Iop_F64toF32:
   3061       case Iop_I64UtoF32:
   3062          /* First arg is I32 (rounding mode), second is F64 (data). */
   3063          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3064 
   3065       case Iop_F64toI16S:
   3066          /* First arg is I32 (rounding mode), second is F64 (data). */
   3067          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
   3068 
   3069       case Iop_CmpF32:
   3070       case Iop_CmpF64:
   3071       case Iop_CmpF128:
   3072          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3073 
   3074       /* non-FP after here */
   3075 
   3076       case Iop_DivModU64to32:
   3077       case Iop_DivModS64to32:
   3078          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3079 
   3080       case Iop_DivModU128to64:
   3081       case Iop_DivModS128to64:
   3082          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3083 
   3084       case Iop_16HLto32:
   3085          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
   3086       case Iop_32HLto64:
   3087          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   3088 
   3089       case Iop_DivModS64to64:
   3090       case Iop_MullS64:
   3091       case Iop_MullU64: {
   3092          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   3093          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
   3094          return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
   3095       }
   3096 
   3097       case Iop_MullS32:
   3098       case Iop_MullU32: {
   3099          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   3100          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
   3101          return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
   3102       }
   3103 
   3104       case Iop_MullS16:
   3105       case Iop_MullU16: {
   3106          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   3107          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
   3108          return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
   3109       }
   3110 
   3111       case Iop_MullS8:
   3112       case Iop_MullU8: {
   3113          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   3114          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
   3115          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
   3116       }
   3117 
   3118       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
   3119       case Iop_DivS32:
   3120       case Iop_DivU32:
   3121       case Iop_DivU32E:
   3122       case Iop_DivS32E:
   3123          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3124 
   3125       case Iop_DivS64:
   3126       case Iop_DivU64:
   3127       case Iop_DivS64E:
   3128       case Iop_DivU64E:
   3129          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3130 
   3131       case Iop_Add32:
   3132          if (mce->bogusLiterals)
   3133             return expensiveAddSub(mce,True,Ity_I32,
   3134                                    vatom1,vatom2, atom1,atom2);
   3135          else
   3136             goto cheap_AddSub32;
   3137       case Iop_Sub32:
   3138          if (mce->bogusLiterals)
   3139             return expensiveAddSub(mce,False,Ity_I32,
   3140                                    vatom1,vatom2, atom1,atom2);
   3141          else
   3142             goto cheap_AddSub32;
   3143 
   3144       cheap_AddSub32:
   3145       case Iop_Mul32:
   3146          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   3147 
   3148       case Iop_CmpORD32S:
   3149       case Iop_CmpORD32U:
   3150       case Iop_CmpORD64S:
   3151       case Iop_CmpORD64U:
   3152          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
   3153 
   3154       case Iop_Add64:
   3155          if (mce->bogusLiterals)
   3156             return expensiveAddSub(mce,True,Ity_I64,
   3157                                    vatom1,vatom2, atom1,atom2);
   3158          else
   3159             goto cheap_AddSub64;
   3160       case Iop_Sub64:
   3161          if (mce->bogusLiterals)
   3162             return expensiveAddSub(mce,False,Ity_I64,
   3163                                    vatom1,vatom2, atom1,atom2);
   3164          else
   3165             goto cheap_AddSub64;
   3166 
   3167       cheap_AddSub64:
   3168       case Iop_Mul64:
   3169          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   3170 
   3171       case Iop_Mul16:
   3172       case Iop_Add16:
   3173       case Iop_Sub16:
   3174          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   3175 
   3176       case Iop_Sub8:
   3177       case Iop_Add8:
   3178          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   3179 
   3180       case Iop_CmpEQ64:
   3181       case Iop_CmpNE64:
   3182          if (mce->bogusLiterals)
   3183             return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
   3184          else
   3185             goto cheap_cmp64;
   3186       cheap_cmp64:
   3187       case Iop_CmpLE64S: case Iop_CmpLE64U:
   3188       case Iop_CmpLT64U: case Iop_CmpLT64S:
   3189          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
   3190 
   3191       case Iop_CmpEQ32:
   3192       case Iop_CmpNE32:
   3193          if (mce->bogusLiterals)
   3194             return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
   3195          else
   3196             goto cheap_cmp32;
   3197       cheap_cmp32:
   3198       case Iop_CmpLE32S: case Iop_CmpLE32U:
   3199       case Iop_CmpLT32U: case Iop_CmpLT32S:
   3200          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
   3201 
   3202       case Iop_CmpEQ16: case Iop_CmpNE16:
   3203          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
   3204 
   3205       case Iop_CmpEQ8: case Iop_CmpNE8:
   3206          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
   3207 
   3208       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   3209       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   3210       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   3211       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   3212          /* Just say these all produce a defined result, regardless
   3213             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
   3214          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
   3215 
   3216       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
   3217          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
   3218 
   3219       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
   3220          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
   3221 
   3222       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
   3223          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
   3224 
   3225       case Iop_Shl8: case Iop_Shr8:
   3226          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
   3227 
   3228       case Iop_AndV128:
   3229          uifu = mkUifUV128; difd = mkDifDV128;
   3230          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
   3231       case Iop_And64:
   3232          uifu = mkUifU64; difd = mkDifD64;
   3233          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
   3234       case Iop_And32:
   3235          uifu = mkUifU32; difd = mkDifD32;
   3236          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
   3237       case Iop_And16:
   3238          uifu = mkUifU16; difd = mkDifD16;
   3239          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
   3240       case Iop_And8:
   3241          uifu = mkUifU8; difd = mkDifD8;
   3242          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
   3243 
   3244       case Iop_OrV128:
   3245          uifu = mkUifUV128; difd = mkDifDV128;
   3246          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
   3247       case Iop_Or64:
   3248          uifu = mkUifU64; difd = mkDifD64;
   3249          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
   3250       case Iop_Or32:
   3251          uifu = mkUifU32; difd = mkDifD32;
   3252          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
   3253       case Iop_Or16:
   3254          uifu = mkUifU16; difd = mkDifD16;
   3255          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
   3256       case Iop_Or8:
   3257          uifu = mkUifU8; difd = mkDifD8;
   3258          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
   3259 
   3260       do_And_Or:
   3261          return
   3262          assignNew(
   3263             'V', mce,
   3264             and_or_ty,
   3265             difd(mce, uifu(mce, vatom1, vatom2),
   3266                       difd(mce, improve(mce, atom1, vatom1),
   3267                                 improve(mce, atom2, vatom2) ) ) );
   3268 
   3269       case Iop_Xor8:
   3270          return mkUifU8(mce, vatom1, vatom2);
   3271       case Iop_Xor16:
   3272          return mkUifU16(mce, vatom1, vatom2);
   3273       case Iop_Xor32:
   3274          return mkUifU32(mce, vatom1, vatom2);
   3275       case Iop_Xor64:
   3276          return mkUifU64(mce, vatom1, vatom2);
   3277       case Iop_XorV128:
   3278          return mkUifUV128(mce, vatom1, vatom2);
   3279 
   3280       default:
   3281          ppIROp(op);
   3282          VG_(tool_panic)("memcheck:expr2vbits_Binop");
   3283    }
   3284 }
   3285 
   3286 
   3287 static
   3288 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
   3289 {
   3290    IRAtom* vatom = expr2vbits( mce, atom );
   3291    tl_assert(isOriginalAtom(mce,atom));
   3292    switch (op) {
   3293 
   3294       case Iop_Sqrt64Fx2:
   3295          return unary64Fx2(mce, vatom);
   3296 
   3297       case Iop_Sqrt64F0x2:
   3298          return unary64F0x2(mce, vatom);
   3299 
   3300       case Iop_Sqrt32Fx4:
   3301       case Iop_RSqrt32Fx4:
   3302       case Iop_Recip32Fx4:
   3303       case Iop_I32UtoFx4:
   3304       case Iop_I32StoFx4:
   3305       case Iop_QFtoI32Ux4_RZ:
   3306       case Iop_QFtoI32Sx4_RZ:
   3307       case Iop_RoundF32x4_RM:
   3308       case Iop_RoundF32x4_RP:
   3309       case Iop_RoundF32x4_RN:
   3310       case Iop_RoundF32x4_RZ:
   3311       case Iop_Recip32x4:
   3312       case Iop_Abs32Fx4:
   3313       case Iop_Neg32Fx4:
   3314       case Iop_Rsqrte32Fx4:
   3315          return unary32Fx4(mce, vatom);
   3316 
   3317       case Iop_I32UtoFx2:
   3318       case Iop_I32StoFx2:
   3319       case Iop_Recip32Fx2:
   3320       case Iop_Recip32x2:
   3321       case Iop_Abs32Fx2:
   3322       case Iop_Neg32Fx2:
   3323       case Iop_Rsqrte32Fx2:
   3324          return unary32Fx2(mce, vatom);
   3325 
   3326       case Iop_Sqrt32F0x4:
   3327       case Iop_RSqrt32F0x4:
   3328       case Iop_Recip32F0x4:
   3329          return unary32F0x4(mce, vatom);
   3330 
   3331       case Iop_32UtoV128:
   3332       case Iop_64UtoV128:
   3333       case Iop_Dup8x16:
   3334       case Iop_Dup16x8:
   3335       case Iop_Dup32x4:
   3336       case Iop_Reverse16_8x16:
   3337       case Iop_Reverse32_8x16:
   3338       case Iop_Reverse32_16x8:
   3339       case Iop_Reverse64_8x16:
   3340       case Iop_Reverse64_16x8:
   3341       case Iop_Reverse64_32x4:
   3342          return assignNew('V', mce, Ity_V128, unop(op, vatom));
   3343 
   3344       case Iop_F128HItoF64:  /* F128 -> high half of F128 */
   3345          return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
   3346       case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
   3347          return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
   3348 
   3349       case Iop_NegF128:
   3350       case Iop_AbsF128:
   3351          return mkPCastTo(mce, Ity_I128, vatom);
   3352 
   3353       case Iop_I32StoF128: /* signed I32 -> F128 */
   3354       case Iop_I64StoF128: /* signed I64 -> F128 */
   3355       case Iop_F32toF128:  /* F32 -> F128 */
   3356       case Iop_F64toF128:  /* F64 -> F128 */
   3357          return mkPCastTo(mce, Ity_I128, vatom);
   3358 
   3359       case Iop_F32toF64:
   3360       case Iop_I32StoF64:
   3361       case Iop_I32UtoF64:
   3362       case Iop_NegF64:
   3363       case Iop_AbsF64:
   3364       case Iop_Est5FRSqrt:
   3365       case Iop_RoundF64toF64_NEAREST:
   3366       case Iop_RoundF64toF64_NegINF:
   3367       case Iop_RoundF64toF64_PosINF:
   3368       case Iop_RoundF64toF64_ZERO:
   3369       case Iop_Clz64:
   3370       case Iop_Ctz64:
   3371          return mkPCastTo(mce, Ity_I64, vatom);
   3372 
   3373       case Iop_Clz32:
   3374       case Iop_Ctz32:
   3375       case Iop_TruncF64asF32:
   3376       case Iop_NegF32:
   3377       case Iop_AbsF32:
   3378          return mkPCastTo(mce, Ity_I32, vatom);
   3379 
   3380       case Iop_1Uto64:
   3381       case Iop_1Sto64:
   3382       case Iop_8Uto64:
   3383       case Iop_8Sto64:
   3384       case Iop_16Uto64:
   3385       case Iop_16Sto64:
   3386       case Iop_32Sto64:
   3387       case Iop_32Uto64:
   3388       case Iop_V128to64:
   3389       case Iop_V128HIto64:
   3390       case Iop_128HIto64:
   3391       case Iop_128to64:
   3392       case Iop_Dup8x8:
   3393       case Iop_Dup16x4:
   3394       case Iop_Dup32x2:
   3395       case Iop_Reverse16_8x8:
   3396       case Iop_Reverse32_8x8:
   3397       case Iop_Reverse32_16x4:
   3398       case Iop_Reverse64_8x8:
   3399       case Iop_Reverse64_16x4:
   3400       case Iop_Reverse64_32x2:
   3401          return assignNew('V', mce, Ity_I64, unop(op, vatom));
   3402 
   3403       case Iop_I16StoF32:
   3404       case Iop_64to32:
   3405       case Iop_64HIto32:
   3406       case Iop_1Uto32:
   3407       case Iop_1Sto32:
   3408       case Iop_8Uto32:
   3409       case Iop_16Uto32:
   3410       case Iop_16Sto32:
   3411       case Iop_8Sto32:
   3412       case Iop_V128to32:
   3413          return assignNew('V', mce, Ity_I32, unop(op, vatom));
   3414 
   3415       case Iop_8Sto16:
   3416       case Iop_8Uto16:
   3417       case Iop_32to16:
   3418       case Iop_32HIto16:
   3419       case Iop_64to16:
   3420          return assignNew('V', mce, Ity_I16, unop(op, vatom));
   3421 
   3422       case Iop_1Uto8:
   3423       case Iop_1Sto8:
   3424       case Iop_16to8:
   3425       case Iop_16HIto8:
   3426       case Iop_32to8:
   3427       case Iop_64to8:
   3428          return assignNew('V', mce, Ity_I8, unop(op, vatom));
   3429 
   3430       case Iop_32to1:
   3431          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
   3432 
   3433       case Iop_64to1:
   3434          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
   3435 
   3436       case Iop_ReinterpF64asI64:
   3437       case Iop_ReinterpI64asF64:
   3438       case Iop_ReinterpI32asF32:
   3439       case Iop_ReinterpF32asI32:
   3440       case Iop_NotV128:
   3441       case Iop_Not64:
   3442       case Iop_Not32:
   3443       case Iop_Not16:
   3444       case Iop_Not8:
   3445       case Iop_Not1:
   3446          return vatom;
   3447 
   3448       case Iop_CmpNEZ8x8:
   3449       case Iop_Cnt8x8:
   3450       case Iop_Clz8Sx8:
   3451       case Iop_Cls8Sx8:
   3452       case Iop_Abs8x8:
   3453          return mkPCast8x8(mce, vatom);
   3454 
   3455       case Iop_CmpNEZ8x16:
   3456       case Iop_Cnt8x16:
   3457       case Iop_Clz8Sx16:
   3458       case Iop_Cls8Sx16:
   3459       case Iop_Abs8x16:
   3460          return mkPCast8x16(mce, vatom);
   3461 
   3462       case Iop_CmpNEZ16x4:
   3463       case Iop_Clz16Sx4:
   3464       case Iop_Cls16Sx4:
   3465       case Iop_Abs16x4:
   3466          return mkPCast16x4(mce, vatom);
   3467 
   3468       case Iop_CmpNEZ16x8:
   3469       case Iop_Clz16Sx8:
   3470       case Iop_Cls16Sx8:
   3471       case Iop_Abs16x8:
   3472          return mkPCast16x8(mce, vatom);
   3473 
   3474       case Iop_CmpNEZ32x2:
   3475       case Iop_Clz32Sx2:
   3476       case Iop_Cls32Sx2:
   3477       case Iop_FtoI32Ux2_RZ:
   3478       case Iop_FtoI32Sx2_RZ:
   3479       case Iop_Abs32x2:
   3480          return mkPCast32x2(mce, vatom);
   3481 
   3482       case Iop_CmpNEZ32x4:
   3483       case Iop_Clz32Sx4:
   3484       case Iop_Cls32Sx4:
   3485       case Iop_FtoI32Ux4_RZ:
   3486       case Iop_FtoI32Sx4_RZ:
   3487       case Iop_Abs32x4:
   3488          return mkPCast32x4(mce, vatom);
   3489 
   3490       case Iop_CmpwNEZ64:
   3491          return mkPCastTo(mce, Ity_I64, vatom);
   3492 
   3493       case Iop_CmpNEZ64x2:
   3494          return mkPCast64x2(mce, vatom);
   3495 
   3496       case Iop_NarrowUn16to8x8:
   3497       case Iop_NarrowUn32to16x4:
   3498       case Iop_NarrowUn64to32x2:
   3499       case Iop_QNarrowUn16Sto8Sx8:
   3500       case Iop_QNarrowUn16Sto8Ux8:
   3501       case Iop_QNarrowUn16Uto8Ux8:
   3502       case Iop_QNarrowUn32Sto16Sx4:
   3503       case Iop_QNarrowUn32Sto16Ux4:
   3504       case Iop_QNarrowUn32Uto16Ux4:
   3505       case Iop_QNarrowUn64Sto32Sx2:
   3506       case Iop_QNarrowUn64Sto32Ux2:
   3507       case Iop_QNarrowUn64Uto32Ux2:
   3508          return vectorNarrowUnV128(mce, op, vatom);
   3509 
   3510       case Iop_Widen8Sto16x8:
   3511       case Iop_Widen8Uto16x8:
   3512       case Iop_Widen16Sto32x4:
   3513       case Iop_Widen16Uto32x4:
   3514       case Iop_Widen32Sto64x2:
   3515       case Iop_Widen32Uto64x2:
   3516          return vectorWidenI64(mce, op, vatom);
   3517 
   3518       case Iop_PwAddL32Ux2:
   3519       case Iop_PwAddL32Sx2:
   3520          return mkPCastTo(mce, Ity_I64,
   3521                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
   3522 
   3523       case Iop_PwAddL16Ux4:
   3524       case Iop_PwAddL16Sx4:
   3525          return mkPCast32x2(mce,
   3526                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
   3527 
   3528       case Iop_PwAddL8Ux8:
   3529       case Iop_PwAddL8Sx8:
   3530          return mkPCast16x4(mce,
   3531                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
   3532 
   3533       case Iop_PwAddL32Ux4:
   3534       case Iop_PwAddL32Sx4:
   3535          return mkPCast64x2(mce,
   3536                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
   3537 
   3538       case Iop_PwAddL16Ux8:
   3539       case Iop_PwAddL16Sx8:
   3540          return mkPCast32x4(mce,
   3541                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
   3542 
   3543       case Iop_PwAddL8Ux16:
   3544       case Iop_PwAddL8Sx16:
   3545          return mkPCast16x8(mce,
   3546                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
   3547 
   3548       case Iop_I64UtoF32:
   3549       default:
   3550          ppIROp(op);
   3551          VG_(tool_panic)("memcheck:expr2vbits_Unop");
   3552    }
   3553 }
   3554 
   3555 
   3556 /* Worker function; do not call directly. */
   3557 static
   3558 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
   3559                               IREndness end, IRType ty,
   3560                               IRAtom* addr, UInt bias )
   3561 {
   3562    void*    helper;
   3563    Char*    hname;
   3564    IRDirty* di;
   3565    IRTemp   datavbits;
   3566    IRAtom*  addrAct;
   3567 
   3568    tl_assert(isOriginalAtom(mce,addr));
   3569    tl_assert(end == Iend_LE || end == Iend_BE);
   3570 
   3571    /* First, emit a definedness test for the address.  This also sets
   3572       the address (shadow) to 'defined' following the test. */
   3573    complainIfUndefined( mce, addr );
   3574 
   3575    /* Now cook up a call to the relevant helper function, to read the
   3576       data V bits from shadow memory. */
   3577    ty = shadowTypeV(ty);
   3578 
   3579    if (end == Iend_LE) {
   3580       switch (ty) {
   3581          case Ity_I64: helper = &MC_(helperc_LOADV64le);
   3582                        hname = "MC_(helperc_LOADV64le)";
   3583                        break;
   3584          case Ity_I32: helper = &MC_(helperc_LOADV32le);
   3585                        hname = "MC_(helperc_LOADV32le)";
   3586                        break;
   3587          case Ity_I16: helper = &MC_(helperc_LOADV16le);
   3588                        hname = "MC_(helperc_LOADV16le)";
   3589                        break;
   3590          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3591                        hname = "MC_(helperc_LOADV8)";
   3592                        break;
   3593          default:      ppIRType(ty);
   3594                        VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
   3595       }
   3596    } else {
   3597       switch (ty) {
   3598          case Ity_I64: helper = &MC_(helperc_LOADV64be);
   3599                        hname = "MC_(helperc_LOADV64be)";
   3600                        break;
   3601          case Ity_I32: helper = &MC_(helperc_LOADV32be);
   3602                        hname = "MC_(helperc_LOADV32be)";
   3603                        break;
   3604          case Ity_I16: helper = &MC_(helperc_LOADV16be);
   3605                        hname = "MC_(helperc_LOADV16be)";
   3606                        break;
   3607          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3608                        hname = "MC_(helperc_LOADV8)";
   3609                        break;
   3610          default:      ppIRType(ty);
   3611                        VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
   3612       }
   3613    }
   3614 
   3615    /* Generate the actual address into addrAct. */
   3616    if (bias == 0) {
   3617       addrAct = addr;
   3618    } else {
   3619       IROp    mkAdd;
   3620       IRAtom* eBias;
   3621       IRType  tyAddr  = mce->hWordTy;
   3622       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   3623       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   3624       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   3625       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
   3626    }
   3627 
   3628    /* We need to have a place to park the V bits we're just about to
   3629       read. */
   3630    datavbits = newTemp(mce, ty, VSh);
   3631    di = unsafeIRDirty_1_N( datavbits,
   3632                            1/*regparms*/,
   3633                            hname, VG_(fnptr_to_fnentry)( helper ),
   3634                            mkIRExprVec_1( addrAct ));
   3635    setHelperAnns( mce, di );
   3636    stmt( 'V', mce, IRStmt_Dirty(di) );
   3637 
   3638    return mkexpr(datavbits);
   3639 }
   3640 
   3641 
   3642 static
   3643 IRAtom* expr2vbits_Load ( MCEnv* mce,
   3644                           IREndness end, IRType ty,
   3645                           IRAtom* addr, UInt bias )
   3646 {
   3647    IRAtom *v64hi, *v64lo;
   3648    tl_assert(end == Iend_LE || end == Iend_BE);
   3649    switch (shadowTypeV(ty)) {
   3650       case Ity_I8:
   3651       case Ity_I16:
   3652       case Ity_I32:
   3653       case Ity_I64:
   3654          return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
   3655       case Ity_V128:
   3656          if (end == Iend_LE) {
   3657             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
   3658             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3659          } else {
   3660             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
   3661             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3662          }
   3663          return assignNew( 'V', mce,
   3664                            Ity_V128,
   3665                            binop(Iop_64HLtoV128, v64hi, v64lo));
   3666       default:
   3667          VG_(tool_panic)("expr2vbits_Load");
   3668    }
   3669 }
   3670 
   3671 
   3672 static
   3673 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
   3674                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
   3675 {
   3676    IRAtom *vbitsC, *vbits0, *vbitsX;
   3677    IRType ty;
   3678    /* Given Mux0X(cond,expr0,exprX), generate
   3679          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
   3680       That is, steer the V bits like the originals, but trash the
   3681       result if the steering value is undefined.  This gives
   3682       lazy propagation. */
   3683    tl_assert(isOriginalAtom(mce, cond));
   3684    tl_assert(isOriginalAtom(mce, expr0));
   3685    tl_assert(isOriginalAtom(mce, exprX));
   3686 
   3687    vbitsC = expr2vbits(mce, cond);
   3688    vbits0 = expr2vbits(mce, expr0);
   3689    vbitsX = expr2vbits(mce, exprX);
   3690    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
   3691 
   3692    return
   3693       mkUifU(mce, ty, assignNew('V', mce, ty,
   3694                                      IRExpr_Mux0X(cond, vbits0, vbitsX)),
   3695                       mkPCastTo(mce, ty, vbitsC) );
   3696 }
   3697 
   3698 /* --------- This is the main expression-handling function. --------- */
   3699 
   3700 static
   3701 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
   3702 {
   3703    switch (e->tag) {
   3704 
   3705       case Iex_Get:
   3706          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
   3707 
   3708       case Iex_GetI:
   3709          return shadow_GETI( mce, e->Iex.GetI.descr,
   3710                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   3711 
   3712       case Iex_RdTmp:
   3713          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
   3714 
   3715       case Iex_Const:
   3716          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
   3717 
   3718       case Iex_Qop:
   3719          return expr2vbits_Qop(
   3720                    mce,
   3721                    e->Iex.Qop.op,
   3722                    e->Iex.Qop.arg1, e->Iex.Qop.arg2,
   3723 		   e->Iex.Qop.arg3, e->Iex.Qop.arg4
   3724                 );
   3725 
   3726       case Iex_Triop:
   3727          return expr2vbits_Triop(
   3728                    mce,
   3729                    e->Iex.Triop.op,
   3730                    e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
   3731                 );
   3732 
   3733       case Iex_Binop:
   3734          return expr2vbits_Binop(
   3735                    mce,
   3736                    e->Iex.Binop.op,
   3737                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
   3738                 );
   3739 
   3740       case Iex_Unop:
   3741          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
   3742 
   3743       case Iex_Load:
   3744          return expr2vbits_Load( mce, e->Iex.Load.end,
   3745                                       e->Iex.Load.ty,
   3746                                       e->Iex.Load.addr, 0/*addr bias*/ );
   3747 
   3748       case Iex_CCall:
   3749          return mkLazyN( mce, e->Iex.CCall.args,
   3750                               e->Iex.CCall.retty,
   3751                               e->Iex.CCall.cee );
   3752 
   3753       case Iex_Mux0X:
   3754          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
   3755                                        e->Iex.Mux0X.exprX);
   3756 
   3757       default:
   3758          VG_(printf)("\n");
   3759          ppIRExpr(e);
   3760          VG_(printf)("\n");
   3761          VG_(tool_panic)("memcheck: expr2vbits");
   3762    }
   3763 }
   3764 
   3765 /*------------------------------------------------------------*/
   3766 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
   3767 /*------------------------------------------------------------*/
   3768 
   3769 /* Widen a value to the host word size. */
   3770 
   3771 static
   3772 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
   3773 {
   3774    IRType ty, tyH;
   3775 
   3776    /* vatom is vbits-value and as such can only have a shadow type. */
   3777    tl_assert(isShadowAtom(mce,vatom));
   3778 
   3779    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
   3780    tyH = mce->hWordTy;
   3781 
   3782    if (tyH == Ity_I32) {
   3783       switch (ty) {
   3784          case Ity_I32:
   3785             return vatom;
   3786          case Ity_I16:
   3787             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
   3788          case Ity_I8:
   3789             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
   3790          default:
   3791             goto unhandled;
   3792       }
   3793    } else
   3794    if (tyH == Ity_I64) {
   3795       switch (ty) {
   3796          case Ity_I32:
   3797             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
   3798          case Ity_I16:
   3799             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   3800                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
   3801          case Ity_I8:
   3802             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   3803                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
   3804          default:
   3805             goto unhandled;
   3806       }
   3807    } else {
   3808       goto unhandled;
   3809    }
   3810   unhandled:
   3811    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
   3812    VG_(tool_panic)("zwidenToHostWord");
   3813 }
   3814 
   3815 
   3816 /* Generate a shadow store.  addr is always the original address atom.
   3817    You can pass in either originals or V-bits for the data atom, but
   3818    obviously not both.  guard :: Ity_I1 controls whether the store
   3819    really happens; NULL means it unconditionally does.  Note that
   3820    guard itself is not checked for definedness; the caller of this
   3821    function must do that if necessary. */
   3822 
   3823 static
   3824 void do_shadow_Store ( MCEnv* mce,
   3825                        IREndness end,
   3826                        IRAtom* addr, UInt bias,
   3827                        IRAtom* data, IRAtom* vdata,
   3828                        IRAtom* guard )
   3829 {
   3830    IROp     mkAdd;
   3831    IRType   ty, tyAddr;
   3832    void*    helper = NULL;
   3833    Char*    hname = NULL;
   3834    IRConst* c;
   3835 
   3836    tyAddr = mce->hWordTy;
   3837    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   3838    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   3839    tl_assert( end == Iend_LE || end == Iend_BE );
   3840 
   3841    if (data) {
   3842       tl_assert(!vdata);
   3843       tl_assert(isOriginalAtom(mce, data));
   3844       tl_assert(bias == 0);
   3845       vdata = expr2vbits( mce, data );
   3846    } else {
   3847       tl_assert(vdata);
   3848    }
   3849 
   3850    tl_assert(isOriginalAtom(mce,addr));
   3851    tl_assert(isShadowAtom(mce,vdata));
   3852 
   3853    if (guard) {
   3854       tl_assert(isOriginalAtom(mce, guard));
   3855       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   3856    }
   3857 
   3858    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
   3859 
   3860    // If we're not doing undefined value checking, pretend that this value
   3861    // is "all valid".  That lets Vex's optimiser remove some of the V bit
   3862    // shadow computation ops that precede it.
   3863    if (MC_(clo_mc_level) == 1) {
   3864       switch (ty) {
   3865          case Ity_V128: // V128 weirdness
   3866                         c = IRConst_V128(V_BITS16_DEFINED); break;
   3867          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
   3868          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
   3869          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
   3870          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
   3871          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   3872       }
   3873       vdata = IRExpr_Const( c );
   3874    }
   3875 
   3876    /* First, emit a definedness test for the address.  This also sets
   3877       the address (shadow) to 'defined' following the test. */
   3878    complainIfUndefined( mce, addr );
   3879 
   3880    /* Now decide which helper function to call to write the data V
   3881       bits into shadow memory. */
   3882    if (end == Iend_LE) {
   3883       switch (ty) {
   3884          case Ity_V128: /* we'll use the helper twice */
   3885          case Ity_I64: helper = &MC_(helperc_STOREV64le);
   3886                        hname = "MC_(helperc_STOREV64le)";
   3887                        break;
   3888          case Ity_I32: helper = &MC_(helperc_STOREV32le);
   3889                        hname = "MC_(helperc_STOREV32le)";
   3890                        break;
   3891          case Ity_I16: helper = &MC_(helperc_STOREV16le);
   3892                        hname = "MC_(helperc_STOREV16le)";
   3893                        break;
   3894          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   3895                        hname = "MC_(helperc_STOREV8)";
   3896                        break;
   3897          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   3898       }
   3899    } else {
   3900       switch (ty) {
   3901          case Ity_V128: /* we'll use the helper twice */
   3902          case Ity_I64: helper = &MC_(helperc_STOREV64be);
   3903                        hname = "MC_(helperc_STOREV64be)";
   3904                        break;
   3905          case Ity_I32: helper = &MC_(helperc_STOREV32be);
   3906                        hname = "MC_(helperc_STOREV32be)";
   3907                        break;
   3908          case Ity_I16: helper = &MC_(helperc_STOREV16be);
   3909                        hname = "MC_(helperc_STOREV16be)";
   3910                        break;
   3911          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   3912                        hname = "MC_(helperc_STOREV8)";
   3913                        break;
   3914          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
   3915       }
   3916    }
   3917 
   3918    if (ty == Ity_V128) {
   3919 
   3920       /* V128-bit case */
   3921       /* See comment in next clause re 64-bit regparms */
   3922       /* also, need to be careful about endianness */
   3923 
   3924       Int     offLo64, offHi64;
   3925       IRDirty *diLo64, *diHi64;
   3926       IRAtom  *addrLo64, *addrHi64;
   3927       IRAtom  *vdataLo64, *vdataHi64;
   3928       IRAtom  *eBiasLo64, *eBiasHi64;
   3929 
   3930       if (end == Iend_LE) {
   3931          offLo64 = 0;
   3932          offHi64 = 8;
   3933       } else {
   3934          offLo64 = 8;
   3935          offHi64 = 0;
   3936       }
   3937 
   3938       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
   3939       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
   3940       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
   3941       diLo64    = unsafeIRDirty_0_N(
   3942                      1/*regparms*/,
   3943                      hname, VG_(fnptr_to_fnentry)( helper ),
   3944                      mkIRExprVec_2( addrLo64, vdataLo64 )
   3945                   );
   3946       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
   3947       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
   3948       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
   3949       diHi64    = unsafeIRDirty_0_N(
   3950                      1/*regparms*/,
   3951                      hname, VG_(fnptr_to_fnentry)( helper ),
   3952                      mkIRExprVec_2( addrHi64, vdataHi64 )
   3953                   );
   3954       if (guard) diLo64->guard = guard;
   3955       if (guard) diHi64->guard = guard;
   3956       setHelperAnns( mce, diLo64 );
   3957       setHelperAnns( mce, diHi64 );
   3958       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
   3959       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
   3960 
   3961    } else {
   3962 
   3963       IRDirty *di;
   3964       IRAtom  *addrAct;
   3965 
   3966       /* 8/16/32/64-bit cases */
   3967       /* Generate the actual address into addrAct. */
   3968       if (bias == 0) {
   3969          addrAct = addr;
   3970       } else {
   3971          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   3972          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
   3973       }
   3974 
   3975       if (ty == Ity_I64) {
   3976          /* We can't do this with regparm 2 on 32-bit platforms, since
   3977             the back ends aren't clever enough to handle 64-bit
   3978             regparm args.  Therefore be different. */
   3979          di = unsafeIRDirty_0_N(
   3980                  1/*regparms*/,
   3981                  hname, VG_(fnptr_to_fnentry)( helper ),
   3982                  mkIRExprVec_2( addrAct, vdata )
   3983               );
   3984       } else {
   3985          di = unsafeIRDirty_0_N(
   3986                  2/*regparms*/,
   3987                  hname, VG_(fnptr_to_fnentry)( helper ),
   3988                  mkIRExprVec_2( addrAct,
   3989                                 zwidenToHostWord( mce, vdata ))
   3990               );
   3991       }
   3992       if (guard) di->guard = guard;
   3993       setHelperAnns( mce, di );
   3994       stmt( 'V', mce, IRStmt_Dirty(di) );
   3995    }
   3996 
   3997 }
   3998 
   3999 
   4000 /* Do lazy pessimistic propagation through a dirty helper call, by
   4001    looking at the annotations on it.  This is the most complex part of
   4002    Memcheck. */
   4003 
   4004 static IRType szToITy ( Int n )
   4005 {
   4006    switch (n) {
   4007       case 1: return Ity_I8;
   4008       case 2: return Ity_I16;
   4009       case 4: return Ity_I32;
   4010       case 8: return Ity_I64;
   4011       default: VG_(tool_panic)("szToITy(memcheck)");
   4012    }
   4013 }
   4014 
   4015 static
   4016 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
   4017 {
   4018    Int       i, n, toDo, gSz, gOff;
   4019    IRAtom    *src, *here, *curr;
   4020    IRType    tySrc, tyDst;
   4021    IRTemp    dst;
   4022    IREndness end;
   4023 
   4024    /* What's the native endianness?  We need to know this. */
   4025 #  if defined(VG_BIGENDIAN)
   4026    end = Iend_BE;
   4027 #  elif defined(VG_LITTLEENDIAN)
   4028    end = Iend_LE;
   4029 #  else
   4030 #    error "Unknown endianness"
   4031 #  endif
   4032 
   4033    /* First check the guard. */
   4034    complainIfUndefined(mce, d->guard);
   4035 
   4036    /* Now round up all inputs and PCast over them. */
   4037    curr = definedOfType(Ity_I32);
   4038 
   4039    /* Inputs: unmasked args */
   4040    for (i = 0; d->args[i]; i++) {
   4041       if (d->cee->mcx_mask & (1<<i)) {
   4042          /* ignore this arg */
   4043       } else {
   4044          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
   4045          curr = mkUifU32(mce, here, curr);
   4046       }
   4047    }
   4048 
   4049    /* Inputs: guest state that we read. */
   4050    for (i = 0; i < d->nFxState; i++) {
   4051       tl_assert(d->fxState[i].fx != Ifx_None);
   4052       if (d->fxState[i].fx == Ifx_Write)
   4053          continue;
   4054 
   4055       /* Ignore any sections marked as 'always defined'. */
   4056       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   4057          if (0)
   4058          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   4059                      d->fxState[i].offset, d->fxState[i].size );
   4060          continue;
   4061       }
   4062 
   4063       /* This state element is read or modified.  So we need to
   4064          consider it.  If larger than 8 bytes, deal with it in 8-byte
   4065          chunks. */
   4066       gSz  = d->fxState[i].size;
   4067       gOff = d->fxState[i].offset;
   4068       tl_assert(gSz > 0);
   4069       while (True) {
   4070          if (gSz == 0) break;
   4071          n = gSz <= 8 ? gSz : 8;
   4072          /* update 'curr' with UifU of the state slice
   4073             gOff .. gOff+n-1 */
   4074          tySrc = szToITy( n );
   4075          src   = assignNew( 'V', mce, tySrc,
   4076                                  shadow_GET(mce, gOff, tySrc ) );
   4077          here = mkPCastTo( mce, Ity_I32, src );
   4078          curr = mkUifU32(mce, here, curr);
   4079          gSz -= n;
   4080          gOff += n;
   4081       }
   4082 
   4083    }
   4084 
   4085    /* Inputs: memory.  First set up some info needed regardless of
   4086       whether we're doing reads or writes. */
   4087 
   4088    if (d->mFx != Ifx_None) {
   4089       /* Because we may do multiple shadow loads/stores from the same
   4090          base address, it's best to do a single test of its
   4091          definedness right now.  Post-instrumentation optimisation
   4092          should remove all but this test. */
   4093       IRType tyAddr;
   4094       tl_assert(d->mAddr);
   4095       complainIfUndefined(mce, d->mAddr);
   4096 
   4097       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
   4098       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   4099       tl_assert(tyAddr == mce->hWordTy); /* not really right */
   4100    }
   4101 
   4102    /* Deal with memory inputs (reads or modifies) */
   4103    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   4104       toDo   = d->mSize;
   4105       /* chew off 32-bit chunks.  We don't care about the endianness
   4106          since it's all going to be condensed down to a single bit,
   4107          but nevertheless choose an endianness which is hopefully
   4108          native to the platform. */
   4109       while (toDo >= 4) {
   4110          here = mkPCastTo(
   4111                    mce, Ity_I32,
   4112                    expr2vbits_Load ( mce, end, Ity_I32,
   4113                                      d->mAddr, d->mSize - toDo )
   4114                 );
   4115          curr = mkUifU32(mce, here, curr);
   4116          toDo -= 4;
   4117       }
   4118       /* chew off 16-bit chunks */
   4119       while (toDo >= 2) {
   4120          here = mkPCastTo(
   4121                    mce, Ity_I32,
   4122                    expr2vbits_Load ( mce, end, Ity_I16,
   4123                                      d->mAddr, d->mSize - toDo )
   4124                 );
   4125          curr = mkUifU32(mce, here, curr);
   4126          toDo -= 2;
   4127       }
   4128       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   4129    }
   4130 
   4131    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
   4132       all the inputs to the helper.  Now we need to re-distribute the
   4133       results to all destinations. */
   4134 
   4135    /* Outputs: the destination temporary, if there is one. */
   4136    if (d->tmp != IRTemp_INVALID) {
   4137       dst   = findShadowTmpV(mce, d->tmp);
   4138       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
   4139       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
   4140    }
   4141 
   4142    /* Outputs: guest state that we write or modify. */
   4143    for (i = 0; i < d->nFxState; i++) {
   4144       tl_assert(d->fxState[i].fx != Ifx_None);
   4145       if (d->fxState[i].fx == Ifx_Read)
   4146          continue;
   4147       /* Ignore any sections marked as 'always defined'. */
   4148       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   4149          continue;
   4150       /* This state element is written or modified.  So we need to
   4151          consider it.  If larger than 8 bytes, deal with it in 8-byte
   4152          chunks. */
   4153       gSz  = d->fxState[i].size;
   4154       gOff = d->fxState[i].offset;
   4155       tl_assert(gSz > 0);
   4156       while (True) {
   4157          if (gSz == 0) break;
   4158          n = gSz <= 8 ? gSz : 8;
   4159          /* Write suitably-casted 'curr' to the state slice
   4160             gOff .. gOff+n-1 */
   4161          tyDst = szToITy( n );
   4162          do_shadow_PUT( mce, gOff,
   4163                              NULL, /* original atom */
   4164                              mkPCastTo( mce, tyDst, curr ) );
   4165          gSz -= n;
   4166          gOff += n;
   4167       }
   4168    }
   4169 
   4170    /* Outputs: memory that we write or modify.  Same comments about
   4171       endianness as above apply. */
   4172    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   4173       toDo   = d->mSize;
   4174       /* chew off 32-bit chunks */
   4175       while (toDo >= 4) {
   4176          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   4177                           NULL, /* original data */
   4178                           mkPCastTo( mce, Ity_I32, curr ),
   4179                           NULL/*guard*/ );
   4180          toDo -= 4;
   4181       }
   4182       /* chew off 16-bit chunks */
   4183       while (toDo >= 2) {
   4184          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   4185                           NULL, /* original data */
   4186                           mkPCastTo( mce, Ity_I16, curr ),
   4187                           NULL/*guard*/ );
   4188          toDo -= 2;
   4189       }
   4190       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   4191    }
   4192 
   4193 }
   4194 
   4195 
   4196 /* We have an ABI hint telling us that [base .. base+len-1] is to
   4197    become undefined ("writable").  Generate code to call a helper to
   4198    notify the A/V bit machinery of this fact.
   4199 
   4200    We call
   4201    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
   4202                                                     Addr nia );
   4203 */
   4204 static
   4205 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
   4206 {
   4207    IRDirty* di;
   4208    /* Minor optimisation: if not doing origin tracking, ignore the
   4209       supplied nia and pass zero instead.  This is on the basis that
   4210       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
   4211       almost always generate a shorter instruction to put zero into a
   4212       register than any other value. */
   4213    if (MC_(clo_mc_level) < 3)
   4214       nia = mkIRExpr_HWord(0);
   4215 
   4216    di = unsafeIRDirty_0_N(
   4217            0/*regparms*/,
   4218            "MC_(helperc_MAKE_STACK_UNINIT)",
   4219            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
   4220            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
   4221         );
   4222    stmt( 'V', mce, IRStmt_Dirty(di) );
   4223 }
   4224 
   4225 
   4226 /* ------ Dealing with IRCAS (big and complex) ------ */
   4227 
   4228 /* FWDS */
   4229 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
   4230                              IRAtom* baseaddr, Int offset );
   4231 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
   4232 static void    gen_store_b ( MCEnv* mce, Int szB,
   4233                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
   4234                              IRAtom* guard );
   4235 
   4236 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
   4237 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
   4238 
   4239 
   4240 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
   4241    IRExpr.Consts, else this asserts.  If they are both Consts, it
   4242    doesn't do anything.  So that just leaves the RdTmp case.
   4243 
   4244    In which case: this assigns the shadow value SHADOW to the IR
   4245    shadow temporary associated with ORIG.  That is, ORIG, being an
   4246    original temporary, will have a shadow temporary associated with
   4247    it.  However, in the case envisaged here, there will so far have
   4248    been no IR emitted to actually write a shadow value into that
   4249    temporary.  What this routine does is to (emit IR to) copy the
   4250    value in SHADOW into said temporary, so that after this call,
   4251    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
   4252    value in SHADOW.
   4253 
   4254    Point is to allow callers to compute "by hand" a shadow value for
   4255    ORIG, and force it to be associated with ORIG.
   4256 
   4257    How do we know that that shadow associated with ORIG has not so far
   4258    been assigned to?  Well, we don't per se know that, but supposing
   4259    it had.  Then this routine would create a second assignment to it,
   4260    and later the IR sanity checker would barf.  But that never
   4261    happens.  QED.
   4262 */
   4263 static void bind_shadow_tmp_to_orig ( UChar how,
   4264                                       MCEnv* mce,
   4265                                       IRAtom* orig, IRAtom* shadow )
   4266 {
   4267    tl_assert(isOriginalAtom(mce, orig));
   4268    tl_assert(isShadowAtom(mce, shadow));
   4269    switch (orig->tag) {
   4270       case Iex_Const:
   4271          tl_assert(shadow->tag == Iex_Const);
   4272          break;
   4273       case Iex_RdTmp:
   4274          tl_assert(shadow->tag == Iex_RdTmp);
   4275          if (how == 'V') {
   4276             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
   4277                    shadow);
   4278          } else {
   4279             tl_assert(how == 'B');
   4280             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
   4281                    shadow);
   4282          }
   4283          break;
   4284       default:
   4285          tl_assert(0);
   4286    }
   4287 }
   4288 
   4289 
   4290 static
   4291 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
   4292 {
   4293    /* Scheme is (both single- and double- cases):
   4294 
   4295       1. fetch data#,dataB (the proposed new value)
   4296 
   4297       2. fetch expd#,expdB (what we expect to see at the address)
   4298 
   4299       3. check definedness of address
   4300 
   4301       4. load old#,oldB from shadow memory; this also checks
   4302          addressibility of the address
   4303 
   4304       5. the CAS itself
   4305 
   4306       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
   4307 
   4308       7. if "expected == old" (as computed by (6))
   4309             store data#,dataB to shadow memory
   4310 
   4311       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
   4312       'data' but 7 stores 'data#'.  Hence it is possible for the
   4313       shadow data to be incorrectly checked and/or updated:
   4314 
   4315       * 7 is at least gated correctly, since the 'expected == old'
   4316         condition is derived from outputs of 5.  However, the shadow
   4317         write could happen too late: imagine after 5 we are
   4318         descheduled, a different thread runs, writes a different
   4319         (shadow) value at the address, and then we resume, hence
   4320         overwriting the shadow value written by the other thread.
   4321 
   4322       Because the original memory access is atomic, there's no way to
   4323       make both the original and shadow accesses into a single atomic
   4324       thing, hence this is unavoidable.
   4325 
   4326       At least as Valgrind stands, I don't think it's a problem, since
   4327       we're single threaded *and* we guarantee that there are no
   4328       context switches during the execution of any specific superblock
   4329       -- context switches can only happen at superblock boundaries.
   4330 
   4331       If Valgrind ever becomes MT in the future, then it might be more
   4332       of a problem.  A possible kludge would be to artificially
   4333       associate with the location, a lock, which we must acquire and
   4334       release around the transaction as a whole.  Hmm, that probably
   4335       would't work properly since it only guards us against other
   4336       threads doing CASs on the same location, not against other
   4337       threads doing normal reads and writes.
   4338 
   4339       ------------------------------------------------------------
   4340 
   4341       COMMENT_ON_CasCmpEQ:
   4342 
   4343       Note two things.  Firstly, in the sequence above, we compute
   4344       "expected == old", but we don't check definedness of it.  Why
   4345       not?  Also, the x86 and amd64 front ends use
   4346       Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
   4347       determination (expected == old ?) for themselves, and we also
   4348       don't check definedness for those primops; we just say that the
   4349       result is defined.  Why?  Details follow.
   4350 
   4351       x86/amd64 contains various forms of locked insns:
   4352       * lock prefix before all basic arithmetic insn;
   4353         eg lock xorl %reg1,(%reg2)
   4354       * atomic exchange reg-mem
   4355       * compare-and-swaps
   4356 
   4357       Rather than attempt to represent them all, which would be a
   4358       royal PITA, I used a result from Maurice Herlihy
   4359       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
   4360       demonstrates that compare-and-swap is a primitive more general
   4361       than the other two, and so can be used to represent all of them.
   4362       So the translation scheme for (eg) lock incl (%reg) is as
   4363       follows:
   4364 
   4365         again:
   4366          old = * %reg
   4367          new = old + 1
   4368          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
   4369 
   4370       The "atomically" is the CAS bit.  The scheme is always the same:
   4371       get old value from memory, compute new value, atomically stuff
   4372       new value back in memory iff the old value has not changed (iow,
   4373       no other thread modified it in the meantime).  If it has changed
   4374       then we've been out-raced and we have to start over.
   4375 
   4376       Now that's all very neat, but it has the bad side effect of
   4377       introducing an explicit equality test into the translation.
   4378       Consider the behaviour of said code on a memory location which
   4379       is uninitialised.  We will wind up doing a comparison on
   4380       uninitialised data, and mc duly complains.
   4381 
   4382       What's difficult about this is, the common case is that the
   4383       location is uncontended, and so we're usually comparing the same
   4384       value (* %reg) with itself.  So we shouldn't complain even if it
   4385       is undefined.  But mc doesn't know that.
   4386 
   4387       My solution is to mark the == in the IR specially, so as to tell
   4388       mc that it almost certainly compares a value with itself, and we
   4389       should just regard the result as always defined.  Rather than
   4390       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
   4391       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
   4392 
   4393       So there's always the question of, can this give a false
   4394       negative?  eg, imagine that initially, * %reg is defined; and we
   4395       read that; but then in the gap between the read and the CAS, a
   4396       different thread writes an undefined (and different) value at
   4397       the location.  Then the CAS in this thread will fail and we will
   4398       go back to "again:", but without knowing that the trip back
   4399       there was based on an undefined comparison.  No matter; at least
   4400       the other thread won the race and the location is correctly
   4401       marked as undefined.  What if it wrote an uninitialised version
   4402       of the same value that was there originally, though?
   4403 
   4404       etc etc.  Seems like there's a small corner case in which we
   4405       might lose the fact that something's defined -- we're out-raced
   4406       in between the "old = * reg" and the "atomically {", _and_ the
   4407       other thread is writing in an undefined version of what's
   4408       already there.  Well, that seems pretty unlikely.
   4409 
   4410       ---
   4411 
   4412       If we ever need to reinstate it .. code which generates a
   4413       definedness test for "expected == old" was removed at r10432 of
   4414       this file.
   4415    */
   4416    if (cas->oldHi == IRTemp_INVALID) {
   4417       do_shadow_CAS_single( mce, cas );
   4418    } else {
   4419       do_shadow_CAS_double( mce, cas );
   4420    }
   4421 }
   4422 
   4423 
   4424 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
   4425 {
   4426    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4427    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4428    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4429    IRAtom *expd_eq_old = NULL;
   4430    IROp   opCasCmpEQ;
   4431    Int    elemSzB;
   4432    IRType elemTy;
   4433    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4434 
   4435    /* single CAS */
   4436    tl_assert(cas->oldHi == IRTemp_INVALID);
   4437    tl_assert(cas->expdHi == NULL);
   4438    tl_assert(cas->dataHi == NULL);
   4439 
   4440    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4441    switch (elemTy) {
   4442       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
   4443       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
   4444       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
   4445       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
   4446       default: tl_assert(0); /* IR defn disallows any other types */
   4447    }
   4448 
   4449    /* 1. fetch data# (the proposed new value) */
   4450    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4451    vdataLo
   4452       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4453    tl_assert(isShadowAtom(mce, vdataLo));
   4454    if (otrak) {
   4455       bdataLo
   4456          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4457       tl_assert(isShadowAtom(mce, bdataLo));
   4458    }
   4459 
   4460    /* 2. fetch expected# (what we expect to see at the address) */
   4461    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4462    vexpdLo
   4463       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   4464    tl_assert(isShadowAtom(mce, vexpdLo));
   4465    if (otrak) {
   4466       bexpdLo
   4467          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   4468       tl_assert(isShadowAtom(mce, bexpdLo));
   4469    }
   4470 
   4471    /* 3. check definedness of address */
   4472    /* 4. fetch old# from shadow memory; this also checks
   4473          addressibility of the address */
   4474    voldLo
   4475       = assignNew(
   4476            'V', mce, elemTy,
   4477            expr2vbits_Load(
   4478               mce,
   4479               cas->end, elemTy, cas->addr, 0/*Addr bias*/
   4480         ));
   4481    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   4482    if (otrak) {
   4483       boldLo
   4484          = assignNew('B', mce, Ity_I32,
   4485                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
   4486       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   4487    }
   4488 
   4489    /* 5. the CAS itself */
   4490    stmt( 'C', mce, IRStmt_CAS(cas) );
   4491 
   4492    /* 6. compute "expected == old" */
   4493    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   4494    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   4495       tree, but it's not copied from the input block. */
   4496    expd_eq_old
   4497       = assignNew('C', mce, Ity_I1,
   4498                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
   4499 
   4500    /* 7. if "expected == old"
   4501             store data# to shadow memory */
   4502    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
   4503                     NULL/*data*/, vdataLo/*vdata*/,
   4504                     expd_eq_old/*guard for store*/ );
   4505    if (otrak) {
   4506       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
   4507                    bdataLo/*bdata*/,
   4508                    expd_eq_old/*guard for store*/ );
   4509    }
   4510 }
   4511 
   4512 
   4513 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
   4514 {
   4515    IRAtom *vdataHi = NULL, *bdataHi = NULL;
   4516    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4517    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
   4518    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4519    IRAtom *voldHi  = NULL, *boldHi  = NULL;
   4520    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4521    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
   4522    IRAtom *expd_eq_old = NULL, *zero = NULL;
   4523    IROp   opCasCmpEQ, opOr, opXor;
   4524    Int    elemSzB, memOffsLo, memOffsHi;
   4525    IRType elemTy;
   4526    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4527 
   4528    /* double CAS */
   4529    tl_assert(cas->oldHi != IRTemp_INVALID);
   4530    tl_assert(cas->expdHi != NULL);
   4531    tl_assert(cas->dataHi != NULL);
   4532 
   4533    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4534    switch (elemTy) {
   4535       case Ity_I8:
   4536          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
   4537          elemSzB = 1; zero = mkU8(0);
   4538          break;
   4539       case Ity_I16:
   4540          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
   4541          elemSzB = 2; zero = mkU16(0);
   4542          break;
   4543       case Ity_I32:
   4544          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
   4545          elemSzB = 4; zero = mkU32(0);
   4546          break;
   4547       case Ity_I64:
   4548          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
   4549          elemSzB = 8; zero = mkU64(0);
   4550          break;
   4551       default:
   4552          tl_assert(0); /* IR defn disallows any other types */
   4553    }
   4554 
   4555    /* 1. fetch data# (the proposed new value) */
   4556    tl_assert(isOriginalAtom(mce, cas->dataHi));
   4557    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4558    vdataHi
   4559       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
   4560    vdataLo
   4561       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4562    tl_assert(isShadowAtom(mce, vdataHi));
   4563    tl_assert(isShadowAtom(mce, vdataLo));
   4564    if (otrak) {
   4565       bdataHi
   4566          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
   4567       bdataLo
   4568          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4569       tl_assert(isShadowAtom(mce, bdataHi));
   4570       tl_assert(isShadowAtom(mce, bdataLo));
   4571    }
   4572 
   4573    /* 2. fetch expected# (what we expect to see at the address) */
   4574    tl_assert(isOriginalAtom(mce, cas->expdHi));
   4575    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4576    vexpdHi
   4577       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
   4578    vexpdLo
   4579       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   4580    tl_assert(isShadowAtom(mce, vexpdHi));
   4581    tl_assert(isShadowAtom(mce, vexpdLo));
   4582    if (otrak) {
   4583       bexpdHi
   4584          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
   4585       bexpdLo
   4586          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   4587       tl_assert(isShadowAtom(mce, bexpdHi));
   4588       tl_assert(isShadowAtom(mce, bexpdLo));
   4589    }
   4590 
   4591    /* 3. check definedness of address */
   4592    /* 4. fetch old# from shadow memory; this also checks
   4593          addressibility of the address */
   4594    if (cas->end == Iend_LE) {
   4595       memOffsLo = 0;
   4596       memOffsHi = elemSzB;
   4597    } else {
   4598       tl_assert(cas->end == Iend_BE);
   4599       memOffsLo = elemSzB;
   4600       memOffsHi = 0;
   4601    }
   4602    voldHi
   4603       = assignNew(
   4604            'V', mce, elemTy,
   4605            expr2vbits_Load(
   4606               mce,
   4607               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
   4608         ));
   4609    voldLo
   4610       = assignNew(
   4611            'V', mce, elemTy,
   4612            expr2vbits_Load(
   4613               mce,
   4614               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
   4615         ));
   4616    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
   4617    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   4618    if (otrak) {
   4619       boldHi
   4620          = assignNew('B', mce, Ity_I32,
   4621                      gen_load_b(mce, elemSzB, cas->addr,
   4622                                 memOffsHi/*addr bias*/));
   4623       boldLo
   4624          = assignNew('B', mce, Ity_I32,
   4625                      gen_load_b(mce, elemSzB, cas->addr,
   4626                                 memOffsLo/*addr bias*/));
   4627       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
   4628       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   4629    }
   4630 
   4631    /* 5. the CAS itself */
   4632    stmt( 'C', mce, IRStmt_CAS(cas) );
   4633 
   4634    /* 6. compute "expected == old" */
   4635    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   4636    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   4637       tree, but it's not copied from the input block. */
   4638    /*
   4639       xHi = oldHi ^ expdHi;
   4640       xLo = oldLo ^ expdLo;
   4641       xHL = xHi | xLo;
   4642       expd_eq_old = xHL == 0;
   4643    */
   4644    xHi = assignNew('C', mce, elemTy,
   4645                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
   4646    xLo = assignNew('C', mce, elemTy,
   4647                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
   4648    xHL = assignNew('C', mce, elemTy,
   4649                    binop(opOr, xHi, xLo));
   4650    expd_eq_old
   4651       = assignNew('C', mce, Ity_I1,
   4652                   binop(opCasCmpEQ, xHL, zero));
   4653 
   4654    /* 7. if "expected == old"
   4655             store data# to shadow memory */
   4656    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
   4657                     NULL/*data*/, vdataHi/*vdata*/,
   4658                     expd_eq_old/*guard for store*/ );
   4659    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
   4660                     NULL/*data*/, vdataLo/*vdata*/,
   4661                     expd_eq_old/*guard for store*/ );
   4662    if (otrak) {
   4663       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
   4664                    bdataHi/*bdata*/,
   4665                    expd_eq_old/*guard for store*/ );
   4666       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
   4667                    bdataLo/*bdata*/,
   4668                    expd_eq_old/*guard for store*/ );
   4669    }
   4670 }
   4671 
   4672 
   4673 /* ------ Dealing with LL/SC (not difficult) ------ */
   4674 
   4675 static void do_shadow_LLSC ( MCEnv*    mce,
   4676                              IREndness stEnd,
   4677                              IRTemp    stResult,
   4678                              IRExpr*   stAddr,
   4679                              IRExpr*   stStoredata )
   4680 {
   4681    /* In short: treat a load-linked like a normal load followed by an
   4682       assignment of the loaded (shadow) data to the result temporary.
   4683       Treat a store-conditional like a normal store, and mark the
   4684       result temporary as defined. */
   4685    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
   4686    IRTemp resTmp = findShadowTmpV(mce, stResult);
   4687 
   4688    tl_assert(isIRAtom(stAddr));
   4689    if (stStoredata)
   4690       tl_assert(isIRAtom(stStoredata));
   4691 
   4692    if (stStoredata == NULL) {
   4693       /* Load Linked */
   4694       /* Just treat this as a normal load, followed by an assignment of
   4695          the value to .result. */
   4696       /* Stay sane */
   4697       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   4698                 || resTy == Ity_I16 || resTy == Ity_I8);
   4699       assign( 'V', mce, resTmp,
   4700                    expr2vbits_Load(
   4701                       mce, stEnd, resTy, stAddr, 0/*addr bias*/));
   4702    } else {
   4703       /* Store Conditional */
   4704       /* Stay sane */
   4705       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
   4706                                    stStoredata);
   4707       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
   4708                 || dataTy == Ity_I16 || dataTy == Ity_I8);
   4709       do_shadow_Store( mce, stEnd,
   4710                             stAddr, 0/* addr bias */,
   4711                             stStoredata,
   4712                             NULL /* shadow data */,
   4713                             NULL/*guard*/ );
   4714       /* This is a store conditional, so it writes to .result a value
   4715          indicating whether or not the store succeeded.  Just claim
   4716          this value is always defined.  In the PowerPC interpretation
   4717          of store-conditional, definedness of the success indication
   4718          depends on whether the address of the store matches the
   4719          reservation address.  But we can't tell that here (and
   4720          anyway, we're not being PowerPC-specific).  At least we are
   4721          guaranteed that the definedness of the store address, and its
   4722          addressibility, will be checked as per normal.  So it seems
   4723          pretty safe to just say that the success indication is always
   4724          defined.
   4725 
   4726          In schemeS, for origin tracking, we must correspondingly set
   4727          a no-origin value for the origin shadow of .result.
   4728       */
   4729       tl_assert(resTy == Ity_I1);
   4730       assign( 'V', mce, resTmp, definedOfType(resTy) );
   4731    }
   4732 }
   4733 
   4734 
   4735 /*------------------------------------------------------------*/
   4736 /*--- Memcheck main                                        ---*/
   4737 /*------------------------------------------------------------*/
   4738 
   4739 static void schemeS ( MCEnv* mce, IRStmt* st );
   4740 
   4741 static Bool isBogusAtom ( IRAtom* at )
   4742 {
   4743    ULong n = 0;
   4744    IRConst* con;
   4745    tl_assert(isIRAtom(at));
   4746    if (at->tag == Iex_RdTmp)
   4747       return False;
   4748    tl_assert(at->tag == Iex_Const);
   4749    con = at->Iex.Const.con;
   4750    switch (con->tag) {
   4751       case Ico_U1:   return False;
   4752       case Ico_U8:   n = (ULong)con->Ico.U8; break;
   4753       case Ico_U16:  n = (ULong)con->Ico.U16; break;
   4754       case Ico_U32:  n = (ULong)con->Ico.U32; break;
   4755       case Ico_U64:  n = (ULong)con->Ico.U64; break;
   4756       case Ico_F64:  return False;
   4757       case Ico_F32i: return False;
   4758       case Ico_F64i: return False;
   4759       case Ico_V128: return False;
   4760       default: ppIRExpr(at); tl_assert(0);
   4761    }
   4762    /* VG_(printf)("%llx\n", n); */
   4763    return (/*32*/    n == 0xFEFEFEFFULL
   4764            /*32*/ || n == 0x80808080ULL
   4765            /*32*/ || n == 0x7F7F7F7FULL
   4766            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
   4767            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
   4768            /*64*/ || n == 0x0000000000008080ULL
   4769            /*64*/ || n == 0x8080808080808080ULL
   4770            /*64*/ || n == 0x0101010101010101ULL
   4771           );
   4772 }
   4773 
   4774 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
   4775 {
   4776    Int      i;
   4777    IRExpr*  e;
   4778    IRDirty* d;
   4779    IRCAS*   cas;
   4780    switch (st->tag) {
   4781       case Ist_WrTmp:
   4782          e = st->Ist.WrTmp.data;
   4783          switch (e->tag) {
   4784             case Iex_Get:
   4785             case Iex_RdTmp:
   4786                return False;
   4787             case Iex_Const:
   4788                return isBogusAtom(e);
   4789             case Iex_Unop:
   4790                return isBogusAtom(e->Iex.Unop.arg);
   4791             case Iex_GetI:
   4792                return isBogusAtom(e->Iex.GetI.ix);
   4793             case Iex_Binop:
   4794                return isBogusAtom(e->Iex.Binop.arg1)
   4795                       || isBogusAtom(e->Iex.Binop.arg2);
   4796             case Iex_Triop:
   4797                return isBogusAtom(e->Iex.Triop.arg1)
   4798                       || isBogusAtom(e->Iex.Triop.arg2)
   4799                       || isBogusAtom(e->Iex.Triop.arg3);
   4800             case Iex_Qop:
   4801                return isBogusAtom(e->Iex.Qop.arg1)
   4802                       || isBogusAtom(e->Iex.Qop.arg2)
   4803                       || isBogusAtom(e->Iex.Qop.arg3)
   4804                       || isBogusAtom(e->Iex.Qop.arg4);
   4805             case Iex_Mux0X:
   4806                return isBogusAtom(e->Iex.Mux0X.cond)
   4807                       || isBogusAtom(e->Iex.Mux0X.expr0)
   4808                       || isBogusAtom(e->Iex.Mux0X.exprX);
   4809             case Iex_Load:
   4810                return isBogusAtom(e->Iex.Load.addr);
   4811             case Iex_CCall:
   4812                for (i = 0; e->Iex.CCall.args[i]; i++)
   4813                   if (isBogusAtom(e->Iex.CCall.args[i]))
   4814                      return True;
   4815                return False;
   4816             default:
   4817                goto unhandled;
   4818          }
   4819       case Ist_Dirty:
   4820          d = st->Ist.Dirty.details;
   4821          for (i = 0; d->args[i]; i++)
   4822             if (isBogusAtom(d->args[i]))
   4823                return True;
   4824          if (d->guard && isBogusAtom(d->guard))
   4825             return True;
   4826          if (d->mAddr && isBogusAtom(d->mAddr))
   4827             return True;
   4828          return False;
   4829       case Ist_Put:
   4830          return isBogusAtom(st->Ist.Put.data);
   4831       case Ist_PutI:
   4832          return isBogusAtom(st->Ist.PutI.ix)
   4833                 || isBogusAtom(st->Ist.PutI.data);
   4834       case Ist_Store:
   4835          return isBogusAtom(st->Ist.Store.addr)
   4836                 || isBogusAtom(st->Ist.Store.data);
   4837       case Ist_Exit:
   4838          return isBogusAtom(st->Ist.Exit.guard);
   4839       case Ist_AbiHint:
   4840          return isBogusAtom(st->Ist.AbiHint.base)
   4841                 || isBogusAtom(st->Ist.AbiHint.nia);
   4842       case Ist_NoOp:
   4843       case Ist_IMark:
   4844       case Ist_MBE:
   4845          return False;
   4846       case Ist_CAS:
   4847          cas = st->Ist.CAS.details;
   4848          return isBogusAtom(cas->addr)
   4849                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
   4850                 || isBogusAtom(cas->expdLo)
   4851                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
   4852                 || isBogusAtom(cas->dataLo);
   4853       case Ist_LLSC:
   4854          return isBogusAtom(st->Ist.LLSC.addr)
   4855                 || (st->Ist.LLSC.storedata
   4856                        ? isBogusAtom(st->Ist.LLSC.storedata)
   4857                        : False);
   4858       default:
   4859       unhandled:
   4860          ppIRStmt(st);
   4861          VG_(tool_panic)("hasBogusLiterals");
   4862    }
   4863 }
   4864 
   4865 
   4866 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
   4867                         IRSB* sb_in,
   4868                         VexGuestLayout* layout,
   4869                         VexGuestExtents* vge,
   4870                         IRType gWordTy, IRType hWordTy )
   4871 {
   4872    Bool    verboze = 0||False;
   4873    Bool    bogus;
   4874    Int     i, j, first_stmt;
   4875    IRStmt* st;
   4876    MCEnv   mce;
   4877    IRSB*   sb_out;
   4878 
   4879    if (gWordTy != hWordTy) {
   4880       /* We don't currently support this case. */
   4881       VG_(tool_panic)("host/guest word size mismatch");
   4882    }
   4883 
   4884    /* Check we're not completely nuts */
   4885    tl_assert(sizeof(UWord)  == sizeof(void*));
   4886    tl_assert(sizeof(Word)   == sizeof(void*));
   4887    tl_assert(sizeof(Addr)   == sizeof(void*));
   4888    tl_assert(sizeof(ULong)  == 8);
   4889    tl_assert(sizeof(Long)   == 8);
   4890    tl_assert(sizeof(Addr64) == 8);
   4891    tl_assert(sizeof(UInt)   == 4);
   4892    tl_assert(sizeof(Int)    == 4);
   4893 
   4894    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
   4895 
   4896    /* Set up SB */
   4897    sb_out = deepCopyIRSBExceptStmts(sb_in);
   4898 
   4899    /* Set up the running environment.  Both .sb and .tmpMap are
   4900       modified as we go along.  Note that tmps are added to both
   4901       .sb->tyenv and .tmpMap together, so the valid index-set for
   4902       those two arrays should always be identical. */
   4903    VG_(memset)(&mce, 0, sizeof(mce));
   4904    mce.sb             = sb_out;
   4905    mce.trace          = verboze;
   4906    mce.layout         = layout;
   4907    mce.hWordTy        = hWordTy;
   4908    mce.bogusLiterals  = False;
   4909 
   4910    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
   4911                             sizeof(TempMapEnt));
   4912    for (i = 0; i < sb_in->tyenv->types_used; i++) {
   4913       TempMapEnt ent;
   4914       ent.kind    = Orig;
   4915       ent.shadowV = IRTemp_INVALID;
   4916       ent.shadowB = IRTemp_INVALID;
   4917       VG_(addToXA)( mce.tmpMap, &ent );
   4918    }
   4919    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
   4920 
   4921    /* Make a preliminary inspection of the statements, to see if there
   4922       are any dodgy-looking literals.  If there are, we generate
   4923       extra-detailed (hence extra-expensive) instrumentation in
   4924       places.  Scan the whole bb even if dodgyness is found earlier,
   4925       so that the flatness assertion is applied to all stmts. */
   4926 
   4927    bogus = False;
   4928 
   4929    for (i = 0; i < sb_in->stmts_used; i++) {
   4930 
   4931       st = sb_in->stmts[i];
   4932       tl_assert(st);
   4933       tl_assert(isFlatIRStmt(st));
   4934 
   4935       if (!bogus) {
   4936          bogus = checkForBogusLiterals(st);
   4937          if (0 && bogus) {
   4938             VG_(printf)("bogus: ");
   4939             ppIRStmt(st);
   4940             VG_(printf)("\n");
   4941          }
   4942       }
   4943 
   4944    }
   4945 
   4946    mce.bogusLiterals = bogus;
   4947 
   4948    /* Copy verbatim any IR preamble preceding the first IMark */
   4949 
   4950    tl_assert(mce.sb == sb_out);
   4951    tl_assert(mce.sb != sb_in);
   4952 
   4953    i = 0;
   4954    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
   4955 
   4956       st = sb_in->stmts[i];
   4957       tl_assert(st);
   4958       tl_assert(isFlatIRStmt(st));
   4959 
   4960       stmt( 'C', &mce, sb_in->stmts[i] );
   4961       i++;
   4962    }
   4963 
   4964    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
   4965       cause the IR following the preamble to contain references to IR
   4966       temporaries defined in the preamble.  Because the preamble isn't
   4967       instrumented, these temporaries don't have any shadows.
   4968       Nevertheless uses of them following the preamble will cause
   4969       memcheck to generate references to their shadows.  End effect is
   4970       to cause IR sanity check failures, due to references to
   4971       non-existent shadows.  This is only evident for the complex
   4972       preambles used for function wrapping on TOC-afflicted platforms
   4973       (ppc64-linux).
   4974 
   4975       The following loop therefore scans the preamble looking for
   4976       assignments to temporaries.  For each one found it creates an
   4977       assignment to the corresponding (V) shadow temp, marking it as
   4978       'defined'.  This is the same resulting IR as if the main
   4979       instrumentation loop before had been applied to the statement
   4980       'tmp = CONSTANT'.
   4981 
   4982       Similarly, if origin tracking is enabled, we must generate an
   4983       assignment for the corresponding origin (B) shadow, claiming
   4984       no-origin, as appropriate for a defined value.
   4985    */
   4986    for (j = 0; j < i; j++) {
   4987       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
   4988          /* findShadowTmpV checks its arg is an original tmp;
   4989             no need to assert that here. */
   4990          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
   4991          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
   4992          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
   4993          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
   4994          if (MC_(clo_mc_level) == 3) {
   4995             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
   4996             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
   4997             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
   4998          }
   4999          if (0) {
   5000             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
   5001             ppIRType( ty_v );
   5002             VG_(printf)("\n");
   5003          }
   5004       }
   5005    }
   5006 
   5007    /* Iterate over the remaining stmts to generate instrumentation. */
   5008 
   5009    tl_assert(sb_in->stmts_used > 0);
   5010    tl_assert(i >= 0);
   5011    tl_assert(i < sb_in->stmts_used);
   5012    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
   5013 
   5014    for (/* use current i*/; i < sb_in->stmts_used; i++) {
   5015 
   5016       st = sb_in->stmts[i];
   5017       first_stmt = sb_out->stmts_used;
   5018 
   5019       if (verboze) {
   5020          VG_(printf)("\n");
   5021          ppIRStmt(st);
   5022          VG_(printf)("\n");
   5023       }
   5024 
   5025       if (MC_(clo_mc_level) == 3) {
   5026          /* See comments on case Ist_CAS below. */
   5027          if (st->tag != Ist_CAS)
   5028             schemeS( &mce, st );
   5029       }
   5030 
   5031       /* Generate instrumentation code for each stmt ... */
   5032 
   5033       switch (st->tag) {
   5034 
   5035          case Ist_WrTmp:
   5036             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
   5037                                expr2vbits( &mce, st->Ist.WrTmp.data) );
   5038             break;
   5039 
   5040          case Ist_Put:
   5041             do_shadow_PUT( &mce,
   5042                            st->Ist.Put.offset,
   5043                            st->Ist.Put.data,
   5044                            NULL /* shadow atom */ );
   5045             break;
   5046 
   5047          case Ist_PutI:
   5048             do_shadow_PUTI( &mce,
   5049                             st->Ist.PutI.descr,
   5050                             st->Ist.PutI.ix,
   5051                             st->Ist.PutI.bias,
   5052                             st->Ist.PutI.data );
   5053             break;
   5054 
   5055          case Ist_Store:
   5056             do_shadow_Store( &mce, st->Ist.Store.end,
   5057                                    st->Ist.Store.addr, 0/* addr bias */,
   5058                                    st->Ist.Store.data,
   5059                                    NULL /* shadow data */,
   5060                                    NULL/*guard*/ );
   5061             break;
   5062 
   5063          case Ist_Exit:
   5064             complainIfUndefined( &mce, st->Ist.Exit.guard );
   5065             break;
   5066 
   5067          case Ist_IMark:
   5068             break;
   5069 
   5070          case Ist_NoOp:
   5071          case Ist_MBE:
   5072             break;
   5073 
   5074          case Ist_Dirty:
   5075             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
   5076             break;
   5077 
   5078          case Ist_AbiHint:
   5079             do_AbiHint( &mce, st->Ist.AbiHint.base,
   5080                               st->Ist.AbiHint.len,
   5081                               st->Ist.AbiHint.nia );
   5082             break;
   5083 
   5084          case Ist_CAS:
   5085             do_shadow_CAS( &mce, st->Ist.CAS.details );
   5086             /* Note, do_shadow_CAS copies the CAS itself to the output
   5087                block, because it needs to add instrumentation both
   5088                before and after it.  Hence skip the copy below.  Also
   5089                skip the origin-tracking stuff (call to schemeS) above,
   5090                since that's all tangled up with it too; do_shadow_CAS
   5091                does it all. */
   5092             break;
   5093 
   5094          case Ist_LLSC:
   5095             do_shadow_LLSC( &mce,
   5096                             st->Ist.LLSC.end,
   5097                             st->Ist.LLSC.result,
   5098                             st->Ist.LLSC.addr,
   5099                             st->Ist.LLSC.storedata );
   5100             break;
   5101 
   5102          default:
   5103             VG_(printf)("\n");
   5104             ppIRStmt(st);
   5105             VG_(printf)("\n");
   5106             VG_(tool_panic)("memcheck: unhandled IRStmt");
   5107 
   5108       } /* switch (st->tag) */
   5109 
   5110       if (0 && verboze) {
   5111          for (j = first_stmt; j < sb_out->stmts_used; j++) {
   5112             VG_(printf)("   ");
   5113             ppIRStmt(sb_out->stmts[j]);
   5114             VG_(printf)("\n");
   5115          }
   5116          VG_(printf)("\n");
   5117       }
   5118 
   5119       /* ... and finally copy the stmt itself to the output.  Except,
   5120          skip the copy of IRCASs; see comments on case Ist_CAS
   5121          above. */
   5122       if (st->tag != Ist_CAS)
   5123          stmt('C', &mce, st);
   5124    }
   5125 
   5126    /* Now we need to complain if the jump target is undefined. */
   5127    first_stmt = sb_out->stmts_used;
   5128 
   5129    if (verboze) {
   5130       VG_(printf)("sb_in->next = ");
   5131       ppIRExpr(sb_in->next);
   5132       VG_(printf)("\n\n");
   5133    }
   5134 
   5135    complainIfUndefined( &mce, sb_in->next );
   5136 
   5137    if (0 && verboze) {
   5138       for (j = first_stmt; j < sb_out->stmts_used; j++) {
   5139          VG_(printf)("   ");
   5140          ppIRStmt(sb_out->stmts[j]);
   5141          VG_(printf)("\n");
   5142       }
   5143       VG_(printf)("\n");
   5144    }
   5145 
   5146    /* If this fails, there's been some serious snafu with tmp management,
   5147       that should be investigated. */
   5148    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
   5149    VG_(deleteXA)( mce.tmpMap );
   5150 
   5151    tl_assert(mce.sb == sb_out);
   5152    return sb_out;
   5153 }
   5154 
   5155 /*------------------------------------------------------------*/
   5156 /*--- Post-tree-build final tidying                        ---*/
   5157 /*------------------------------------------------------------*/
   5158 
   5159 /* This exploits the observation that Memcheck often produces
   5160    repeated conditional calls of the form
   5161 
   5162    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
   5163 
   5164    with the same guard expression G guarding the same helper call.
   5165    The second and subsequent calls are redundant.  This usually
   5166    results from instrumentation of guest code containing multiple
   5167    memory references at different constant offsets from the same base
   5168    register.  After optimisation of the instrumentation, you get a
   5169    test for the definedness of the base register for each memory
   5170    reference, which is kinda pointless.  MC_(final_tidy) therefore
   5171    looks for such repeated calls and removes all but the first. */
   5172 
   5173 /* A struct for recording which (helper, guard) pairs we have already
   5174    seen. */
   5175 typedef
   5176    struct { void* entry; IRExpr* guard; }
   5177    Pair;
   5178 
   5179 /* Return True if e1 and e2 definitely denote the same value (used to
   5180    compare guards).  Return False if unknown; False is the safe
   5181    answer.  Since guest registers and guest memory do not have the
   5182    SSA property we must return False if any Gets or Loads appear in
   5183    the expression. */
   5184 
   5185 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
   5186 {
   5187    if (e1->tag != e2->tag)
   5188       return False;
   5189    switch (e1->tag) {
   5190       case Iex_Const:
   5191          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
   5192       case Iex_Binop:
   5193          return e1->Iex.Binop.op == e2->Iex.Binop.op
   5194                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
   5195                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
   5196       case Iex_Unop:
   5197          return e1->Iex.Unop.op == e2->Iex.Unop.op
   5198                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
   5199       case Iex_RdTmp:
   5200          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
   5201       case Iex_Mux0X:
   5202          return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
   5203                 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
   5204                 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
   5205       case Iex_Qop:
   5206       case Iex_Triop:
   5207       case Iex_CCall:
   5208          /* be lazy.  Could define equality for these, but they never
   5209             appear to be used. */
   5210          return False;
   5211       case Iex_Get:
   5212       case Iex_GetI:
   5213       case Iex_Load:
   5214          /* be conservative - these may not give the same value each
   5215             time */
   5216          return False;
   5217       case Iex_Binder:
   5218          /* should never see this */
   5219          /* fallthrough */
   5220       default:
   5221          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
   5222          ppIRExpr(e1);
   5223          VG_(tool_panic)("memcheck:sameIRValue");
   5224          return False;
   5225    }
   5226 }
   5227 
   5228 /* See if 'pairs' already has an entry for (entry, guard).  Return
   5229    True if so.  If not, add an entry. */
   5230 
   5231 static
   5232 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
   5233 {
   5234    Pair  p;
   5235    Pair* pp;
   5236    Int   i, n = VG_(sizeXA)( pairs );
   5237    for (i = 0; i < n; i++) {
   5238       pp = VG_(indexXA)( pairs, i );
   5239       if (pp->entry == entry && sameIRValue(pp->guard, guard))
   5240          return True;
   5241    }
   5242    p.guard = guard;
   5243    p.entry = entry;
   5244    VG_(addToXA)( pairs, &p );
   5245    return False;
   5246 }
   5247 
   5248 static Bool is_helperc_value_checkN_fail ( HChar* name )
   5249 {
   5250    return
   5251       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
   5252       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
   5253       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
   5254       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
   5255       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
   5256       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
   5257       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
   5258       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
   5259 }
   5260 
   5261 IRSB* MC_(final_tidy) ( IRSB* sb_in )
   5262 {
   5263    Int i;
   5264    IRStmt*   st;
   5265    IRDirty*  di;
   5266    IRExpr*   guard;
   5267    IRCallee* cee;
   5268    Bool      alreadyPresent;
   5269    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
   5270                                  VG_(free), sizeof(Pair) );
   5271    /* Scan forwards through the statements.  Each time a call to one
   5272       of the relevant helpers is seen, check if we have made a
   5273       previous call to the same helper using the same guard
   5274       expression, and if so, delete the call. */
   5275    for (i = 0; i < sb_in->stmts_used; i++) {
   5276       st = sb_in->stmts[i];
   5277       tl_assert(st);
   5278       if (st->tag != Ist_Dirty)
   5279          continue;
   5280       di = st->Ist.Dirty.details;
   5281       guard = di->guard;
   5282       if (!guard)
   5283          continue;
   5284       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
   5285       cee = di->cee;
   5286       if (!is_helperc_value_checkN_fail( cee->name ))
   5287          continue;
   5288        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
   5289           guard 'guard'.  Check if we have already seen a call to this
   5290           function with the same guard.  If so, delete it.  If not,
   5291           add it to the set of calls we do know about. */
   5292       alreadyPresent = check_or_add( pairs, guard, cee->addr );
   5293       if (alreadyPresent) {
   5294          sb_in->stmts[i] = IRStmt_NoOp();
   5295          if (0) VG_(printf)("XX\n");
   5296       }
   5297    }
   5298    VG_(deleteXA)( pairs );
   5299    return sb_in;
   5300 }
   5301 
   5302 
   5303 /*------------------------------------------------------------*/
   5304 /*--- Origin tracking stuff                                ---*/
   5305 /*------------------------------------------------------------*/
   5306 
   5307 /* Almost identical to findShadowTmpV. */
   5308 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
   5309 {
   5310    TempMapEnt* ent;
   5311    /* VG_(indexXA) range-checks 'orig', hence no need to check
   5312       here. */
   5313    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5314    tl_assert(ent->kind == Orig);
   5315    if (ent->shadowB == IRTemp_INVALID) {
   5316       IRTemp tmpB
   5317         = newTemp( mce, Ity_I32, BSh );
   5318       /* newTemp may cause mce->tmpMap to resize, hence previous results
   5319          from VG_(indexXA) are invalid. */
   5320       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5321       tl_assert(ent->kind == Orig);
   5322       tl_assert(ent->shadowB == IRTemp_INVALID);
   5323       ent->shadowB = tmpB;
   5324    }
   5325    return ent->shadowB;
   5326 }
   5327 
   5328 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
   5329 {
   5330    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
   5331 }
   5332 
   5333 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
   5334                             IRAtom* baseaddr, Int offset )
   5335 {
   5336    void*    hFun;
   5337    HChar*   hName;
   5338    IRTemp   bTmp;
   5339    IRDirty* di;
   5340    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5341    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5342    IRAtom*  ea    = baseaddr;
   5343    if (offset != 0) {
   5344       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5345                                    : mkU64( (Long)(Int)offset );
   5346       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
   5347    }
   5348    bTmp = newTemp(mce, mce->hWordTy, BSh);
   5349 
   5350    switch (szB) {
   5351       case 1: hFun  = (void*)&MC_(helperc_b_load1);
   5352               hName = "MC_(helperc_b_load1)";
   5353               break;
   5354       case 2: hFun  = (void*)&MC_(helperc_b_load2);
   5355               hName = "MC_(helperc_b_load2)";
   5356               break;
   5357       case 4: hFun  = (void*)&MC_(helperc_b_load4);
   5358               hName = "MC_(helperc_b_load4)";
   5359               break;
   5360       case 8: hFun  = (void*)&MC_(helperc_b_load8);
   5361               hName = "MC_(helperc_b_load8)";
   5362               break;
   5363       case 16: hFun  = (void*)&MC_(helperc_b_load16);
   5364                hName = "MC_(helperc_b_load16)";
   5365                break;
   5366       default:
   5367          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
   5368          tl_assert(0);
   5369    }
   5370    di = unsafeIRDirty_1_N(
   5371            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
   5372            mkIRExprVec_1( ea )
   5373         );
   5374    /* no need to mess with any annotations.  This call accesses
   5375       neither guest state nor guest memory. */
   5376    stmt( 'B', mce, IRStmt_Dirty(di) );
   5377    if (mce->hWordTy == Ity_I64) {
   5378       /* 64-bit host */
   5379       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
   5380       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
   5381       return mkexpr(bTmp32);
   5382    } else {
   5383       /* 32-bit host */
   5384       return mkexpr(bTmp);
   5385    }
   5386 }
   5387 
   5388 /* Generate a shadow store.  guard :: Ity_I1 controls whether the
   5389    store really happens; NULL means it unconditionally does. */
   5390 static void gen_store_b ( MCEnv* mce, Int szB,
   5391                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
   5392                           IRAtom* guard )
   5393 {
   5394    void*    hFun;
   5395    HChar*   hName;
   5396    IRDirty* di;
   5397    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5398    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5399    IRAtom*  ea    = baseaddr;
   5400    if (guard) {
   5401       tl_assert(isOriginalAtom(mce, guard));
   5402       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   5403    }
   5404    if (offset != 0) {
   5405       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5406                                    : mkU64( (Long)(Int)offset );
   5407       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
   5408    }
   5409    if (mce->hWordTy == Ity_I64)
   5410       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
   5411 
   5412    switch (szB) {
   5413       case 1: hFun  = (void*)&MC_(helperc_b_store1);
   5414               hName = "MC_(helperc_b_store1)";
   5415               break;
   5416       case 2: hFun  = (void*)&MC_(helperc_b_store2);
   5417               hName = "MC_(helperc_b_store2)";
   5418               break;
   5419       case 4: hFun  = (void*)&MC_(helperc_b_store4);
   5420               hName = "MC_(helperc_b_store4)";
   5421               break;
   5422       case 8: hFun  = (void*)&MC_(helperc_b_store8);
   5423               hName = "MC_(helperc_b_store8)";
   5424               break;
   5425       case 16: hFun  = (void*)&MC_(helperc_b_store16);
   5426                hName = "MC_(helperc_b_store16)";
   5427                break;
   5428       default:
   5429          tl_assert(0);
   5430    }
   5431    di = unsafeIRDirty_0_N( 2/*regparms*/,
   5432            hName, VG_(fnptr_to_fnentry)( hFun ),
   5433            mkIRExprVec_2( ea, dataB )
   5434         );
   5435    /* no need to mess with any annotations.  This call accesses
   5436       neither guest state nor guest memory. */
   5437    if (guard) di->guard = guard;
   5438    stmt( 'B', mce, IRStmt_Dirty(di) );
   5439 }
   5440 
   5441 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
   5442    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5443    if (eTy == Ity_I64)
   5444       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
   5445    if (eTy == Ity_I32)
   5446       return e;
   5447    tl_assert(0);
   5448 }
   5449 
   5450 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
   5451    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5452    tl_assert(eTy == Ity_I32);
   5453    if (dstTy == Ity_I64)
   5454       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
   5455    tl_assert(0);
   5456 }
   5457 
   5458 
   5459 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
   5460 {
   5461    tl_assert(MC_(clo_mc_level) == 3);
   5462 
   5463    switch (e->tag) {
   5464 
   5465       case Iex_GetI: {
   5466          IRRegArray* descr_b;
   5467          IRAtom      *t1, *t2, *t3, *t4;
   5468          IRRegArray* descr      = e->Iex.GetI.descr;
   5469          IRType equivIntTy
   5470             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   5471          /* If this array is unshadowable for whatever reason, use the
   5472             usual approximation. */
   5473          if (equivIntTy == Ity_INVALID)
   5474             return mkU32(0);
   5475          tl_assert(sizeofIRType(equivIntTy) >= 4);
   5476          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   5477          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   5478                                  equivIntTy, descr->nElems );
   5479          /* Do a shadow indexed get of the same size, giving t1.  Take
   5480             the bottom 32 bits of it, giving t2.  Compute into t3 the
   5481             origin for the index (almost certainly zero, but there's
   5482             no harm in being completely general here, since iropt will
   5483             remove any useless code), and fold it in, giving a final
   5484             value t4. */
   5485          t1 = assignNew( 'B', mce, equivIntTy,
   5486                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
   5487                                                 e->Iex.GetI.bias ));
   5488          t2 = narrowTo32( mce, t1 );
   5489          t3 = schemeE( mce, e->Iex.GetI.ix );
   5490          t4 = gen_maxU32( mce, t2, t3 );
   5491          return t4;
   5492       }
   5493       case Iex_CCall: {
   5494          Int i;
   5495          IRAtom*  here;
   5496          IRExpr** args = e->Iex.CCall.args;
   5497          IRAtom*  curr = mkU32(0);
   5498          for (i = 0; args[i]; i++) {
   5499             tl_assert(i < 32);
   5500             tl_assert(isOriginalAtom(mce, args[i]));
   5501             /* Only take notice of this arg if the callee's
   5502                mc-exclusion mask does not say it is to be excluded. */
   5503             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
   5504                /* the arg is to be excluded from definedness checking.
   5505                   Do nothing. */
   5506                if (0) VG_(printf)("excluding %s(%d)\n",
   5507                                   e->Iex.CCall.cee->name, i);
   5508             } else {
   5509                /* calculate the arg's definedness, and pessimistically
   5510                   merge it in. */
   5511                here = schemeE( mce, args[i] );
   5512                curr = gen_maxU32( mce, curr, here );
   5513             }
   5514          }
   5515          return curr;
   5516       }
   5517       case Iex_Load: {
   5518          Int dszB;
   5519          dszB = sizeofIRType(e->Iex.Load.ty);
   5520          /* assert that the B value for the address is already
   5521             available (somewhere) */
   5522          tl_assert(isIRAtom(e->Iex.Load.addr));
   5523          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
   5524          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
   5525       }
   5526       case Iex_Mux0X: {
   5527          IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
   5528          IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
   5529          IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
   5530          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
   5531       }
   5532       case Iex_Qop: {
   5533          IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
   5534          IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
   5535          IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
   5536          IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
   5537          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
   5538                                  gen_maxU32( mce, b3, b4 ) );
   5539       }
   5540       case Iex_Triop: {
   5541          IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
   5542          IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
   5543          IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
   5544          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
   5545       }
   5546       case Iex_Binop: {
   5547          switch (e->Iex.Binop.op) {
   5548             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   5549             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   5550             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   5551             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   5552                /* Just say these all produce a defined result,
   5553                   regardless of their arguments.  See
   5554                   COMMENT_ON_CasCmpEQ in this file. */
   5555                return mkU32(0);
   5556             default: {
   5557                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
   5558                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
   5559                return gen_maxU32( mce, b1, b2 );
   5560             }
   5561          }
   5562          tl_assert(0);
   5563          /*NOTREACHED*/
   5564       }
   5565       case Iex_Unop: {
   5566          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
   5567          return b1;
   5568       }
   5569       case Iex_Const:
   5570          return mkU32(0);
   5571       case Iex_RdTmp:
   5572          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
   5573       case Iex_Get: {
   5574          Int b_offset = MC_(get_otrack_shadow_offset)(
   5575                            e->Iex.Get.offset,
   5576                            sizeofIRType(e->Iex.Get.ty)
   5577                         );
   5578          tl_assert(b_offset >= -1
   5579                    && b_offset <= mce->layout->total_sizeB -4);
   5580          if (b_offset >= 0) {
   5581             /* FIXME: this isn't an atom! */
   5582             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
   5583                                Ity_I32 );
   5584          }
   5585          return mkU32(0);
   5586       }
   5587       default:
   5588          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
   5589          ppIRExpr(e);
   5590          VG_(tool_panic)("memcheck:schemeE");
   5591    }
   5592 }
   5593 
   5594 
   5595 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
   5596 {
   5597    // This is a hacked version of do_shadow_Dirty
   5598    Int       i, n, toDo, gSz, gOff;
   5599    IRAtom    *here, *curr;
   5600    IRTemp    dst;
   5601 
   5602    /* First check the guard. */
   5603    curr = schemeE( mce, d->guard );
   5604 
   5605    /* Now round up all inputs and maxU32 over them. */
   5606 
   5607    /* Inputs: unmasked args */
   5608    for (i = 0; d->args[i]; i++) {
   5609       if (d->cee->mcx_mask & (1<<i)) {
   5610          /* ignore this arg */
   5611       } else {
   5612          here = schemeE( mce, d->args[i] );
   5613          curr = gen_maxU32( mce, curr, here );
   5614       }
   5615    }
   5616 
   5617    /* Inputs: guest state that we read. */
   5618    for (i = 0; i < d->nFxState; i++) {
   5619       tl_assert(d->fxState[i].fx != Ifx_None);
   5620       if (d->fxState[i].fx == Ifx_Write)
   5621          continue;
   5622 
   5623       /* Ignore any sections marked as 'always defined'. */
   5624       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   5625          if (0)
   5626          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   5627                      d->fxState[i].offset, d->fxState[i].size );
   5628          continue;
   5629       }
   5630 
   5631       /* This state element is read or modified.  So we need to
   5632          consider it.  If larger than 4 bytes, deal with it in 4-byte
   5633          chunks. */
   5634       gSz  = d->fxState[i].size;
   5635       gOff = d->fxState[i].offset;
   5636       tl_assert(gSz > 0);
   5637       while (True) {
   5638          Int b_offset;
   5639          if (gSz == 0) break;
   5640          n = gSz <= 4 ? gSz : 4;
   5641          /* update 'curr' with maxU32 of the state slice
   5642             gOff .. gOff+n-1 */
   5643          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   5644          if (b_offset != -1) {
   5645             here = assignNew( 'B',mce,
   5646                                Ity_I32,
   5647                                IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
   5648                                           Ity_I32));
   5649             curr = gen_maxU32( mce, curr, here );
   5650          }
   5651          gSz -= n;
   5652          gOff += n;
   5653       }
   5654 
   5655    }
   5656 
   5657    /* Inputs: memory */
   5658 
   5659    if (d->mFx != Ifx_None) {
   5660       /* Because we may do multiple shadow loads/stores from the same
   5661          base address, it's best to do a single test of its
   5662          definedness right now.  Post-instrumentation optimisation
   5663          should remove all but this test. */
   5664       tl_assert(d->mAddr);
   5665       here = schemeE( mce, d->mAddr );
   5666       curr = gen_maxU32( mce, curr, here );
   5667    }
   5668 
   5669    /* Deal with memory inputs (reads or modifies) */
   5670    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   5671       toDo   = d->mSize;
   5672       /* chew off 32-bit chunks.  We don't care about the endianness
   5673          since it's all going to be condensed down to a single bit,
   5674          but nevertheless choose an endianness which is hopefully
   5675          native to the platform. */
   5676       while (toDo >= 4) {
   5677          here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
   5678          curr = gen_maxU32( mce, curr, here );
   5679          toDo -= 4;
   5680       }
   5681       /* handle possible 16-bit excess */
   5682       while (toDo >= 2) {
   5683          here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
   5684          curr = gen_maxU32( mce, curr, here );
   5685          toDo -= 2;
   5686       }
   5687       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   5688    }
   5689 
   5690    /* Whew!  So curr is a 32-bit B-value which should give an origin
   5691       of some use if any of the inputs to the helper are undefined.
   5692       Now we need to re-distribute the results to all destinations. */
   5693 
   5694    /* Outputs: the destination temporary, if there is one. */
   5695    if (d->tmp != IRTemp_INVALID) {
   5696       dst   = findShadowTmpB(mce, d->tmp);
   5697       assign( 'V', mce, dst, curr );
   5698    }
   5699 
   5700    /* Outputs: guest state that we write or modify. */
   5701    for (i = 0; i < d->nFxState; i++) {
   5702       tl_assert(d->fxState[i].fx != Ifx_None);
   5703       if (d->fxState[i].fx == Ifx_Read)
   5704          continue;
   5705 
   5706       /* Ignore any sections marked as 'always defined'. */
   5707       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   5708          continue;
   5709 
   5710       /* This state element is written or modified.  So we need to
   5711          consider it.  If larger than 4 bytes, deal with it in 4-byte
   5712          chunks. */
   5713       gSz  = d->fxState[i].size;
   5714       gOff = d->fxState[i].offset;
   5715       tl_assert(gSz > 0);
   5716       while (True) {
   5717          Int b_offset;
   5718          if (gSz == 0) break;
   5719          n = gSz <= 4 ? gSz : 4;
   5720          /* Write 'curr' to the state slice gOff .. gOff+n-1 */
   5721          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   5722          if (b_offset != -1) {
   5723            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
   5724                                       curr ));
   5725          }
   5726          gSz -= n;
   5727          gOff += n;
   5728       }
   5729    }
   5730 
   5731    /* Outputs: memory that we write or modify.  Same comments about
   5732       endianness as above apply. */
   5733    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   5734       toDo   = d->mSize;
   5735       /* chew off 32-bit chunks */
   5736       while (toDo >= 4) {
   5737          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
   5738                       NULL/*guard*/ );
   5739          toDo -= 4;
   5740       }
   5741       /* handle possible 16-bit excess */
   5742       while (toDo >= 2) {
   5743         gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
   5744                      NULL/*guard*/ );
   5745          toDo -= 2;
   5746       }
   5747       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   5748    }
   5749 }
   5750 
   5751 
   5752 static void do_origins_Store ( MCEnv* mce,
   5753                                IREndness stEnd,
   5754                                IRExpr* stAddr,
   5755                                IRExpr* stData )
   5756 {
   5757    Int     dszB;
   5758    IRAtom* dataB;
   5759    /* assert that the B value for the address is already available
   5760       (somewhere), since the call to schemeE will want to see it.
   5761       XXXX how does this actually ensure that?? */
   5762    tl_assert(isIRAtom(stAddr));
   5763    tl_assert(isIRAtom(stData));
   5764    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
   5765    dataB = schemeE( mce, stData );
   5766    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
   5767                      NULL/*guard*/ );
   5768 }
   5769 
   5770 
   5771 static void schemeS ( MCEnv* mce, IRStmt* st )
   5772 {
   5773    tl_assert(MC_(clo_mc_level) == 3);
   5774 
   5775    switch (st->tag) {
   5776 
   5777       case Ist_AbiHint:
   5778          /* The value-check instrumenter handles this - by arranging
   5779             to pass the address of the next instruction to
   5780             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
   5781             happen for origin tracking w.r.t. AbiHints.  So there is
   5782             nothing to do here. */
   5783          break;
   5784 
   5785       case Ist_PutI: {
   5786          IRRegArray* descr_b;
   5787          IRAtom      *t1, *t2, *t3, *t4;
   5788          IRRegArray* descr = st->Ist.PutI.descr;
   5789          IRType equivIntTy
   5790             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   5791          /* If this array is unshadowable for whatever reason,
   5792             generate no code. */
   5793          if (equivIntTy == Ity_INVALID)
   5794             break;
   5795          tl_assert(sizeofIRType(equivIntTy) >= 4);
   5796          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   5797          descr_b
   5798             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   5799                             equivIntTy, descr->nElems );
   5800          /* Compute a value to Put - the conjoinment of the origin for
   5801             the data to be Put-ted (obviously) and of the index value
   5802             (not so obviously). */
   5803          t1 = schemeE( mce, st->Ist.PutI.data );
   5804          t2 = schemeE( mce, st->Ist.PutI.ix );
   5805          t3 = gen_maxU32( mce, t1, t2 );
   5806          t4 = zWidenFrom32( mce, equivIntTy, t3 );
   5807          stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
   5808                                       st->Ist.PutI.bias, t4 ));
   5809          break;
   5810       }
   5811 
   5812       case Ist_Dirty:
   5813          do_origins_Dirty( mce, st->Ist.Dirty.details );
   5814          break;
   5815 
   5816       case Ist_Store:
   5817          do_origins_Store( mce, st->Ist.Store.end,
   5818                                 st->Ist.Store.addr,
   5819                                 st->Ist.Store.data );
   5820          break;
   5821 
   5822       case Ist_LLSC: {
   5823          /* In short: treat a load-linked like a normal load followed
   5824             by an assignment of the loaded (shadow) data the result
   5825             temporary.  Treat a store-conditional like a normal store,
   5826             and mark the result temporary as defined. */
   5827          if (st->Ist.LLSC.storedata == NULL) {
   5828             /* Load Linked */
   5829             IRType resTy
   5830                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
   5831             IRExpr* vanillaLoad
   5832                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
   5833             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   5834                       || resTy == Ity_I16 || resTy == Ity_I8);
   5835             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   5836                               schemeE(mce, vanillaLoad));
   5837          } else {
   5838             /* Store conditional */
   5839             do_origins_Store( mce, st->Ist.LLSC.end,
   5840                                    st->Ist.LLSC.addr,
   5841                                    st->Ist.LLSC.storedata );
   5842             /* For the rationale behind this, see comments at the
   5843                place where the V-shadow for .result is constructed, in
   5844                do_shadow_LLSC.  In short, we regard .result as
   5845                always-defined. */
   5846             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   5847                               mkU32(0) );
   5848          }
   5849          break;
   5850       }
   5851 
   5852       case Ist_Put: {
   5853          Int b_offset
   5854             = MC_(get_otrack_shadow_offset)(
   5855                  st->Ist.Put.offset,
   5856                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
   5857               );
   5858          if (b_offset >= 0) {
   5859             /* FIXME: this isn't an atom! */
   5860             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
   5861                                        schemeE( mce, st->Ist.Put.data )) );
   5862          }
   5863          break;
   5864       }
   5865 
   5866       case Ist_WrTmp:
   5867          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
   5868                            schemeE(mce, st->Ist.WrTmp.data) );
   5869          break;
   5870 
   5871       case Ist_MBE:
   5872       case Ist_NoOp:
   5873       case Ist_Exit:
   5874       case Ist_IMark:
   5875          break;
   5876 
   5877       default:
   5878          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
   5879          ppIRStmt(st);
   5880          VG_(tool_panic)("memcheck:schemeS");
   5881    }
   5882 }
   5883 
   5884 
   5885 /*--------------------------------------------------------------------*/
   5886 /*--- end                                           mc_translate.c ---*/
   5887 /*--------------------------------------------------------------------*/
   5888