Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Instrument IR to perform memory checking operations.         ---*/
      4 /*---                                               mc_translate.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of MemCheck, a heavyweight Valgrind tool for
      9    detecting memory errors.
     10 
     11    Copyright (C) 2000-2010 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_hashtable.h"     // For mc_include.h
     34 #include "pub_tool_libcassert.h"
     35 #include "pub_tool_libcprint.h"
     36 #include "pub_tool_tooliface.h"
     37 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
     38 #include "pub_tool_xarray.h"
     39 #include "pub_tool_mallocfree.h"
     40 #include "pub_tool_libcbase.h"
     41 
     42 #include "mc_include.h"
     43 
     44 
     45 /* This file implements the Memcheck instrumentation, and in
     46    particular contains the core of its undefined value detection
     47    machinery.  For a comprehensive background of the terminology,
     48    algorithms and rationale used herein, read:
     49 
     50      Using Valgrind to detect undefined value errors with
     51      bit-precision
     52 
     53      Julian Seward and Nicholas Nethercote
     54 
     55      2005 USENIX Annual Technical Conference (General Track),
     56      Anaheim, CA, USA, April 10-15, 2005.
     57 
     58    ----
     59 
     60    Here is as good a place as any to record exactly when V bits are and
     61    should be checked, why, and what function is responsible.
     62 
     63 
     64    Memcheck complains when an undefined value is used:
     65 
     66    1. In the condition of a conditional branch.  Because it could cause
     67       incorrect control flow, and thus cause incorrect externally-visible
     68       behaviour.  [mc_translate.c:complainIfUndefined]
     69 
     70    2. As an argument to a system call, or as the value that specifies
     71       the system call number.  Because it could cause an incorrect
     72       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
     73 
     74    3. As the address in a load or store.  Because it could cause an
     75       incorrect value to be used later, which could cause externally-visible
     76       behaviour (eg. via incorrect control flow or an incorrect system call
     77       argument)  [complainIfUndefined]
     78 
     79    4. As the target address of a branch.  Because it could cause incorrect
     80       control flow.  [complainIfUndefined]
     81 
     82    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
     83       an incorrect value into the external environment.
     84       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
     85 
     86    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
     87       [complainIfUndefined]
     88 
     89    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
     90       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
     91       requested it.  [in memcheck.h]
     92 
     93 
     94    Memcheck also complains, but should not, when an undefined value is used:
     95 
     96    8. As the shift value in certain SIMD shift operations (but not in the
     97       standard integer shift operations).  This inconsistency is due to
     98       historical reasons.)  [complainIfUndefined]
     99 
    100 
    101    Memcheck does not complain, but should, when an undefined value is used:
    102 
    103    9. As an input to a client request.  Because the client request may
    104       affect the visible behaviour -- see bug #144362 for an example
    105       involving the malloc replacements in vg_replace_malloc.c and
    106       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
    107       isn't identified.  That bug report also has some info on how to solve
    108       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
    109 
    110 
    111    In practice, 1 and 2 account for the vast majority of cases.
    112 */
    113 
    114 /*------------------------------------------------------------*/
    115 /*--- Forward decls                                        ---*/
    116 /*------------------------------------------------------------*/
    117 
    118 struct _MCEnv;
    119 
    120 static IRType  shadowTypeV ( IRType ty );
    121 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
    122 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
    123 
    124 
    125 /*------------------------------------------------------------*/
    126 /*--- Memcheck running state, and tmp management.          ---*/
    127 /*------------------------------------------------------------*/
    128 
    129 /* Carries info about a particular tmp.  The tmp's number is not
    130    recorded, as this is implied by (equal to) its index in the tmpMap
    131    in MCEnv.  The tmp's type is also not recorded, as this is present
    132    in MCEnv.sb->tyenv.
    133 
    134    When .kind is Orig, .shadowV and .shadowB may give the identities
    135    of the temps currently holding the associated definedness (shadowV)
    136    and origin (shadowB) values, or these may be IRTemp_INVALID if code
    137    to compute such values has not yet been emitted.
    138 
    139    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
    140    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
    141    illogical for a shadow tmp itself to be shadowed.
    142 */
    143 typedef
    144    enum { Orig=1, VSh=2, BSh=3 }
    145    TempKind;
    146 
    147 typedef
    148    struct {
    149       TempKind kind;
    150       IRTemp   shadowV;
    151       IRTemp   shadowB;
    152    }
    153    TempMapEnt;
    154 
    155 
    156 /* Carries around state during memcheck instrumentation. */
    157 typedef
    158    struct _MCEnv {
    159       /* MODIFIED: the superblock being constructed.  IRStmts are
    160          added. */
    161       IRSB* sb;
    162       Bool  trace;
    163 
    164       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
    165          current kind and possibly shadow temps for each temp in the
    166          IRSB being constructed.  Note that it does not contain the
    167          type of each tmp.  If you want to know the type, look at the
    168          relevant entry in sb->tyenv.  It follows that at all times
    169          during the instrumentation process, the valid indices for
    170          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
    171          total number of Orig, V- and B- temps allocated so far.
    172 
    173          The reason for this strange split (types in one place, all
    174          other info in another) is that we need the types to be
    175          attached to sb so as to make it possible to do
    176          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
    177          instrumentation process. */
    178       XArray* /* of TempMapEnt */ tmpMap;
    179 
    180       /* MODIFIED: indicates whether "bogus" literals have so far been
    181          found.  Starts off False, and may change to True. */
    182       Bool    bogusLiterals;
    183 
    184       /* READONLY: the guest layout.  This indicates which parts of
    185          the guest state should be regarded as 'always defined'. */
    186       VexGuestLayout* layout;
    187 
    188       /* READONLY: the host word type.  Needed for constructing
    189          arguments of type 'HWord' to be passed to helper functions.
    190          Ity_I32 or Ity_I64 only. */
    191       IRType hWordTy;
    192    }
    193    MCEnv;
    194 
    195 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
    196    demand), as they are encountered.  This is for two reasons.
    197 
    198    (1) (less important reason): Many original tmps are unused due to
    199    initial IR optimisation, and we do not want to spaces in tables
    200    tracking them.
    201 
    202    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
    203    table indexed [0 .. n_types-1], which gives the current shadow for
    204    each original tmp, or INVALID_IRTEMP if none is so far assigned.
    205    It is necessary to support making multiple assignments to a shadow
    206    -- specifically, after testing a shadow for definedness, it needs
    207    to be made defined.  But IR's SSA property disallows this.
    208 
    209    (2) (more important reason): Therefore, when a shadow needs to get
    210    a new value, a new temporary is created, the value is assigned to
    211    that, and the tmpMap is updated to reflect the new binding.
    212 
    213    A corollary is that if the tmpMap maps a given tmp to
    214    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
    215    there's a read-before-write error in the original tmps.  The IR
    216    sanity checker should catch all such anomalies, however.
    217 */
    218 
    219 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
    220    both the table in mce->sb and to our auxiliary mapping.  Note that
    221    newTemp may cause mce->tmpMap to resize, hence previous results
    222    from VG_(indexXA)(mce->tmpMap) are invalidated. */
    223 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
    224 {
    225    Word       newIx;
    226    TempMapEnt ent;
    227    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
    228    ent.kind    = kind;
    229    ent.shadowV = IRTemp_INVALID;
    230    ent.shadowB = IRTemp_INVALID;
    231    newIx = VG_(addToXA)( mce->tmpMap, &ent );
    232    tl_assert(newIx == (Word)tmp);
    233    return tmp;
    234 }
    235 
    236 
    237 /* Find the tmp currently shadowing the given original tmp.  If none
    238    so far exists, allocate one.  */
    239 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
    240 {
    241    TempMapEnt* ent;
    242    /* VG_(indexXA) range-checks 'orig', hence no need to check
    243       here. */
    244    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    245    tl_assert(ent->kind == Orig);
    246    if (ent->shadowV == IRTemp_INVALID) {
    247       IRTemp tmpV
    248         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    249       /* newTemp may cause mce->tmpMap to resize, hence previous results
    250          from VG_(indexXA) are invalid. */
    251       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    252       tl_assert(ent->kind == Orig);
    253       tl_assert(ent->shadowV == IRTemp_INVALID);
    254       ent->shadowV = tmpV;
    255    }
    256    return ent->shadowV;
    257 }
    258 
    259 /* Allocate a new shadow for the given original tmp.  This means any
    260    previous shadow is abandoned.  This is needed because it is
    261    necessary to give a new value to a shadow once it has been tested
    262    for undefinedness, but unfortunately IR's SSA property disallows
    263    this.  Instead we must abandon the old shadow, allocate a new one
    264    and use that instead.
    265 
    266    This is the same as findShadowTmpV, except we don't bother to see
    267    if a shadow temp already existed -- we simply allocate a new one
    268    regardless. */
    269 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
    270 {
    271    TempMapEnt* ent;
    272    /* VG_(indexXA) range-checks 'orig', hence no need to check
    273       here. */
    274    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    275    tl_assert(ent->kind == Orig);
    276    if (1) {
    277       IRTemp tmpV
    278         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    279       /* newTemp may cause mce->tmpMap to resize, hence previous results
    280          from VG_(indexXA) are invalid. */
    281       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    282       tl_assert(ent->kind == Orig);
    283       ent->shadowV = tmpV;
    284    }
    285 }
    286 
    287 
    288 /*------------------------------------------------------------*/
    289 /*--- IRAtoms -- a subset of IRExprs                       ---*/
    290 /*------------------------------------------------------------*/
    291 
    292 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
    293    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
    294    input, most of this code deals in atoms.  Usefully, a value atom
    295    always has a V-value which is also an atom: constants are shadowed
    296    by constants, and temps are shadowed by the corresponding shadow
    297    temporary. */
    298 
    299 typedef  IRExpr  IRAtom;
    300 
    301 /* (used for sanity checks only): is this an atom which looks
    302    like it's from original code? */
    303 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
    304 {
    305    if (a1->tag == Iex_Const)
    306       return True;
    307    if (a1->tag == Iex_RdTmp) {
    308       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    309       return ent->kind == Orig;
    310    }
    311    return False;
    312 }
    313 
    314 /* (used for sanity checks only): is this an atom which looks
    315    like it's from shadow code? */
    316 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
    317 {
    318    if (a1->tag == Iex_Const)
    319       return True;
    320    if (a1->tag == Iex_RdTmp) {
    321       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    322       return ent->kind == VSh || ent->kind == BSh;
    323    }
    324    return False;
    325 }
    326 
    327 /* (used for sanity checks only): check that both args are atoms and
    328    are identically-kinded. */
    329 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
    330 {
    331    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
    332       return True;
    333    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
    334       return True;
    335    return False;
    336 }
    337 
    338 
    339 /*------------------------------------------------------------*/
    340 /*--- Type management                                      ---*/
    341 /*------------------------------------------------------------*/
    342 
    343 /* Shadow state is always accessed using integer types.  This returns
    344    an integer type with the same size (as per sizeofIRType) as the
    345    given type.  The only valid shadow types are Bit, I8, I16, I32,
    346    I64, V128. */
    347 
    348 static IRType shadowTypeV ( IRType ty )
    349 {
    350    switch (ty) {
    351       case Ity_I1:
    352       case Ity_I8:
    353       case Ity_I16:
    354       case Ity_I32:
    355       case Ity_I64:
    356       case Ity_I128: return ty;
    357       case Ity_F32:  return Ity_I32;
    358       case Ity_F64:  return Ity_I64;
    359       case Ity_V128: return Ity_V128;
    360       default: ppIRType(ty);
    361                VG_(tool_panic)("memcheck:shadowTypeV");
    362    }
    363 }
    364 
    365 /* Produce a 'defined' value of the given shadow type.  Should only be
    366    supplied shadow types (Bit/I8/I16/I32/UI64). */
    367 static IRExpr* definedOfType ( IRType ty ) {
    368    switch (ty) {
    369       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
    370       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
    371       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
    372       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
    373       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
    374       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
    375       default:       VG_(tool_panic)("memcheck:definedOfType");
    376    }
    377 }
    378 
    379 
    380 /*------------------------------------------------------------*/
    381 /*--- Constructing IR fragments                            ---*/
    382 /*------------------------------------------------------------*/
    383 
    384 /* add stmt to a bb */
    385 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
    386    if (mce->trace) {
    387       VG_(printf)("  %c: ", cat);
    388       ppIRStmt(st);
    389       VG_(printf)("\n");
    390    }
    391    addStmtToIRSB(mce->sb, st);
    392 }
    393 
    394 /* assign value to tmp */
    395 static inline
    396 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
    397    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
    398 }
    399 
    400 /* build various kinds of expressions */
    401 #define triop(_op, _arg1, _arg2, _arg3) \
    402                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
    403 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    404 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
    405 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
    406 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
    407 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    408 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    409 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
    410 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    411 
    412 /* Bind the given expression to a new temporary, and return the
    413    temporary.  This effectively converts an arbitrary expression into
    414    an atom.
    415 
    416    'ty' is the type of 'e' and hence the type that the new temporary
    417    needs to be.  But passing it in is redundant, since we can deduce
    418    the type merely by inspecting 'e'.  So at least use that fact to
    419    assert that the two types agree. */
    420 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
    421 {
    422    TempKind k;
    423    IRTemp   t;
    424    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
    425    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
    426    switch (cat) {
    427       case 'V': k = VSh;  break;
    428       case 'B': k = BSh;  break;
    429       case 'C': k = Orig; break;
    430                 /* happens when we are making up new "orig"
    431                    expressions, for IRCAS handling */
    432       default: tl_assert(0);
    433    }
    434    t = newTemp(mce, ty, k);
    435    assign(cat, mce, t, e);
    436    return mkexpr(t);
    437 }
    438 
    439 
    440 /*------------------------------------------------------------*/
    441 /*--- Constructing definedness primitive ops               ---*/
    442 /*------------------------------------------------------------*/
    443 
    444 /* --------- Defined-if-either-defined --------- */
    445 
    446 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    447    tl_assert(isShadowAtom(mce,a1));
    448    tl_assert(isShadowAtom(mce,a2));
    449    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
    450 }
    451 
    452 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    453    tl_assert(isShadowAtom(mce,a1));
    454    tl_assert(isShadowAtom(mce,a2));
    455    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
    456 }
    457 
    458 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    459    tl_assert(isShadowAtom(mce,a1));
    460    tl_assert(isShadowAtom(mce,a2));
    461    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
    462 }
    463 
    464 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    465    tl_assert(isShadowAtom(mce,a1));
    466    tl_assert(isShadowAtom(mce,a2));
    467    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
    468 }
    469 
    470 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    471    tl_assert(isShadowAtom(mce,a1));
    472    tl_assert(isShadowAtom(mce,a2));
    473    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
    474 }
    475 
    476 /* --------- Undefined-if-either-undefined --------- */
    477 
    478 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    479    tl_assert(isShadowAtom(mce,a1));
    480    tl_assert(isShadowAtom(mce,a2));
    481    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
    482 }
    483 
    484 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    485    tl_assert(isShadowAtom(mce,a1));
    486    tl_assert(isShadowAtom(mce,a2));
    487    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
    488 }
    489 
    490 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    491    tl_assert(isShadowAtom(mce,a1));
    492    tl_assert(isShadowAtom(mce,a2));
    493    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
    494 }
    495 
    496 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    497    tl_assert(isShadowAtom(mce,a1));
    498    tl_assert(isShadowAtom(mce,a2));
    499    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
    500 }
    501 
    502 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    503    tl_assert(isShadowAtom(mce,a1));
    504    tl_assert(isShadowAtom(mce,a2));
    505    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
    506 }
    507 
    508 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
    509    switch (vty) {
    510       case Ity_I8:   return mkUifU8(mce, a1, a2);
    511       case Ity_I16:  return mkUifU16(mce, a1, a2);
    512       case Ity_I32:  return mkUifU32(mce, a1, a2);
    513       case Ity_I64:  return mkUifU64(mce, a1, a2);
    514       case Ity_V128: return mkUifUV128(mce, a1, a2);
    515       default:
    516          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
    517          VG_(tool_panic)("memcheck:mkUifU");
    518    }
    519 }
    520 
    521 /* --------- The Left-family of operations. --------- */
    522 
    523 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
    524    tl_assert(isShadowAtom(mce,a1));
    525    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
    526 }
    527 
    528 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
    529    tl_assert(isShadowAtom(mce,a1));
    530    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
    531 }
    532 
    533 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
    534    tl_assert(isShadowAtom(mce,a1));
    535    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
    536 }
    537 
    538 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
    539    tl_assert(isShadowAtom(mce,a1));
    540    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
    541 }
    542 
    543 /* --------- 'Improvement' functions for AND/OR. --------- */
    544 
    545 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
    546    defined (0); all other -> undefined (1).
    547 */
    548 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    549 {
    550    tl_assert(isOriginalAtom(mce, data));
    551    tl_assert(isShadowAtom(mce, vbits));
    552    tl_assert(sameKindedAtoms(data, vbits));
    553    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
    554 }
    555 
    556 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    557 {
    558    tl_assert(isOriginalAtom(mce, data));
    559    tl_assert(isShadowAtom(mce, vbits));
    560    tl_assert(sameKindedAtoms(data, vbits));
    561    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
    562 }
    563 
    564 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    565 {
    566    tl_assert(isOriginalAtom(mce, data));
    567    tl_assert(isShadowAtom(mce, vbits));
    568    tl_assert(sameKindedAtoms(data, vbits));
    569    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
    570 }
    571 
    572 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    573 {
    574    tl_assert(isOriginalAtom(mce, data));
    575    tl_assert(isShadowAtom(mce, vbits));
    576    tl_assert(sameKindedAtoms(data, vbits));
    577    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
    578 }
    579 
    580 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    581 {
    582    tl_assert(isOriginalAtom(mce, data));
    583    tl_assert(isShadowAtom(mce, vbits));
    584    tl_assert(sameKindedAtoms(data, vbits));
    585    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
    586 }
    587 
    588 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
    589    defined (0); all other -> undefined (1).
    590 */
    591 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    592 {
    593    tl_assert(isOriginalAtom(mce, data));
    594    tl_assert(isShadowAtom(mce, vbits));
    595    tl_assert(sameKindedAtoms(data, vbits));
    596    return assignNew(
    597              'V', mce, Ity_I8,
    598              binop(Iop_Or8,
    599                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
    600                    vbits) );
    601 }
    602 
    603 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    604 {
    605    tl_assert(isOriginalAtom(mce, data));
    606    tl_assert(isShadowAtom(mce, vbits));
    607    tl_assert(sameKindedAtoms(data, vbits));
    608    return assignNew(
    609              'V', mce, Ity_I16,
    610              binop(Iop_Or16,
    611                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
    612                    vbits) );
    613 }
    614 
    615 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    616 {
    617    tl_assert(isOriginalAtom(mce, data));
    618    tl_assert(isShadowAtom(mce, vbits));
    619    tl_assert(sameKindedAtoms(data, vbits));
    620    return assignNew(
    621              'V', mce, Ity_I32,
    622              binop(Iop_Or32,
    623                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
    624                    vbits) );
    625 }
    626 
    627 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    628 {
    629    tl_assert(isOriginalAtom(mce, data));
    630    tl_assert(isShadowAtom(mce, vbits));
    631    tl_assert(sameKindedAtoms(data, vbits));
    632    return assignNew(
    633              'V', mce, Ity_I64,
    634              binop(Iop_Or64,
    635                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
    636                    vbits) );
    637 }
    638 
    639 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    640 {
    641    tl_assert(isOriginalAtom(mce, data));
    642    tl_assert(isShadowAtom(mce, vbits));
    643    tl_assert(sameKindedAtoms(data, vbits));
    644    return assignNew(
    645              'V', mce, Ity_V128,
    646              binop(Iop_OrV128,
    647                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
    648                    vbits) );
    649 }
    650 
    651 /* --------- Pessimising casts. --------- */
    652 
    653 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
    654 {
    655    IRType  src_ty;
    656    IRAtom* tmp1;
    657    /* Note, dst_ty is a shadow type, not an original type. */
    658    /* First of all, collapse vbits down to a single bit. */
    659    tl_assert(isShadowAtom(mce,vbits));
    660    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
    661 
    662    /* Fast-track some common cases */
    663    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
    664       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    665 
    666    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
    667       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
    668 
    669    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
    670       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    671       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
    672    }
    673 
    674    /* Else do it the slow way .. */
    675    tmp1   = NULL;
    676    switch (src_ty) {
    677       case Ity_I1:
    678          tmp1 = vbits;
    679          break;
    680       case Ity_I8:
    681          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
    682          break;
    683       case Ity_I16:
    684          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
    685          break;
    686       case Ity_I32:
    687          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
    688          break;
    689       case Ity_I64:
    690          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
    691          break;
    692       case Ity_I128: {
    693          /* Gah.  Chop it in half, OR the halves together, and compare
    694             that with zero. */
    695          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
    696          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
    697          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
    698          tmp1         = assignNew('V', mce, Ity_I1,
    699                                        unop(Iop_CmpNEZ64, tmp4));
    700          break;
    701       }
    702       default:
    703          ppIRType(src_ty);
    704          VG_(tool_panic)("mkPCastTo(1)");
    705    }
    706    tl_assert(tmp1);
    707    /* Now widen up to the dst type. */
    708    switch (dst_ty) {
    709       case Ity_I1:
    710          return tmp1;
    711       case Ity_I8:
    712          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
    713       case Ity_I16:
    714          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
    715       case Ity_I32:
    716          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
    717       case Ity_I64:
    718          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
    719       case Ity_V128:
    720          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    721          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
    722          return tmp1;
    723       case Ity_I128:
    724          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    725          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
    726          return tmp1;
    727       default:
    728          ppIRType(dst_ty);
    729          VG_(tool_panic)("mkPCastTo(2)");
    730    }
    731 }
    732 
    733 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
    734 /*
    735    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
    736    PCasting to Ity_U1.  However, sometimes it is necessary to be more
    737    accurate.  The insight is that the result is defined if two
    738    corresponding bits can be found, one from each argument, so that
    739    both bits are defined but are different -- that makes EQ say "No"
    740    and NE say "Yes".  Hence, we compute an improvement term and DifD
    741    it onto the "normal" (UifU) result.
    742 
    743    The result is:
    744 
    745    PCastTo<1> (
    746       -- naive version
    747       PCastTo<sz>( UifU<sz>(vxx, vyy) )
    748 
    749       `DifD<sz>`
    750 
    751       -- improvement term
    752       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
    753    )
    754 
    755    where
    756      vec contains 0 (defined) bits where the corresponding arg bits
    757      are defined but different, and 1 bits otherwise.
    758 
    759      vec = Or<sz>( vxx,   // 0 iff bit defined
    760                    vyy,   // 0 iff bit defined
    761                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
    762                  )
    763 
    764      If any bit of vec is 0, the result is defined and so the
    765      improvement term should produce 0...0, else it should produce
    766      1...1.
    767 
    768      Hence require for the improvement term:
    769 
    770         if vec == 1...1 then 1...1 else 0...0
    771      ->
    772         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
    773 
    774    This was extensively re-analysed and checked on 6 July 05.
    775 */
    776 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
    777                                     IRType  ty,
    778                                     IRAtom* vxx, IRAtom* vyy,
    779                                     IRAtom* xx,  IRAtom* yy )
    780 {
    781    IRAtom *naive, *vec, *improvement_term;
    782    IRAtom *improved, *final_cast, *top;
    783    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
    784 
    785    tl_assert(isShadowAtom(mce,vxx));
    786    tl_assert(isShadowAtom(mce,vyy));
    787    tl_assert(isOriginalAtom(mce,xx));
    788    tl_assert(isOriginalAtom(mce,yy));
    789    tl_assert(sameKindedAtoms(vxx,xx));
    790    tl_assert(sameKindedAtoms(vyy,yy));
    791 
    792    switch (ty) {
    793       case Ity_I32:
    794          opOR   = Iop_Or32;
    795          opDIFD = Iop_And32;
    796          opUIFU = Iop_Or32;
    797          opNOT  = Iop_Not32;
    798          opXOR  = Iop_Xor32;
    799          opCMP  = Iop_CmpEQ32;
    800          top    = mkU32(0xFFFFFFFF);
    801          break;
    802       case Ity_I64:
    803          opOR   = Iop_Or64;
    804          opDIFD = Iop_And64;
    805          opUIFU = Iop_Or64;
    806          opNOT  = Iop_Not64;
    807          opXOR  = Iop_Xor64;
    808          opCMP  = Iop_CmpEQ64;
    809          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
    810          break;
    811       default:
    812          VG_(tool_panic)("expensiveCmpEQorNE");
    813    }
    814 
    815    naive
    816       = mkPCastTo(mce,ty,
    817                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
    818 
    819    vec
    820       = assignNew(
    821            'V', mce,ty,
    822            binop( opOR,
    823                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
    824                   assignNew(
    825                      'V', mce,ty,
    826                      unop( opNOT,
    827                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
    828 
    829    improvement_term
    830       = mkPCastTo( mce,ty,
    831                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
    832 
    833    improved
    834       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
    835 
    836    final_cast
    837       = mkPCastTo( mce, Ity_I1, improved );
    838 
    839    return final_cast;
    840 }
    841 
    842 
    843 /* --------- Semi-accurate interpretation of CmpORD. --------- */
    844 
    845 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
    846 
    847       CmpORD32S(x,y) = 1<<3   if  x <s y
    848                      = 1<<2   if  x >s y
    849                      = 1<<1   if  x == y
    850 
    851    and similarly the unsigned variant.  The default interpretation is:
    852 
    853       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
    854                                   & (7<<1)
    855 
    856    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
    857    are zero and therefore defined (viz, zero).
    858 
    859    Also deal with a special case better:
    860 
    861       CmpORD32S(x,0)
    862 
    863    Here, bit 3 (LT) of the result is a copy of the top bit of x and
    864    will be defined even if the rest of x isn't.  In which case we do:
    865 
    866       CmpORD32S#(x,x#,0,{impliedly 0}#)
    867          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
    868            | (x# >>u 31) << 3      -- LT# = x#[31]
    869 
    870    Analogous handling for CmpORD64{S,U}.
    871 */
    872 static Bool isZeroU32 ( IRAtom* e )
    873 {
    874    return
    875       toBool( e->tag == Iex_Const
    876               && e->Iex.Const.con->tag == Ico_U32
    877               && e->Iex.Const.con->Ico.U32 == 0 );
    878 }
    879 
    880 static Bool isZeroU64 ( IRAtom* e )
    881 {
    882    return
    883       toBool( e->tag == Iex_Const
    884               && e->Iex.Const.con->tag == Ico_U64
    885               && e->Iex.Const.con->Ico.U64 == 0 );
    886 }
    887 
    888 static IRAtom* doCmpORD ( MCEnv*  mce,
    889                           IROp    cmp_op,
    890                           IRAtom* xxhash, IRAtom* yyhash,
    891                           IRAtom* xx,     IRAtom* yy )
    892 {
    893    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
    894    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
    895    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
    896    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
    897    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
    898    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
    899    IRType ty     = m64 ? Ity_I64   : Ity_I32;
    900    Int    width  = m64 ? 64        : 32;
    901 
    902    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
    903 
    904    IRAtom* threeLeft1 = NULL;
    905    IRAtom* sevenLeft1 = NULL;
    906 
    907    tl_assert(isShadowAtom(mce,xxhash));
    908    tl_assert(isShadowAtom(mce,yyhash));
    909    tl_assert(isOriginalAtom(mce,xx));
    910    tl_assert(isOriginalAtom(mce,yy));
    911    tl_assert(sameKindedAtoms(xxhash,xx));
    912    tl_assert(sameKindedAtoms(yyhash,yy));
    913    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
    914              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
    915 
    916    if (0) {
    917       ppIROp(cmp_op); VG_(printf)(" ");
    918       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
    919    }
    920 
    921    if (syned && isZero(yy)) {
    922       /* fancy interpretation */
    923       /* if yy is zero, then it must be fully defined (zero#). */
    924       tl_assert(isZero(yyhash));
    925       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
    926       return
    927          binop(
    928             opOR,
    929             assignNew(
    930                'V', mce,ty,
    931                binop(
    932                   opAND,
    933                   mkPCastTo(mce,ty, xxhash),
    934                   threeLeft1
    935                )),
    936             assignNew(
    937                'V', mce,ty,
    938                binop(
    939                   opSHL,
    940                   assignNew(
    941                      'V', mce,ty,
    942                      binop(opSHR, xxhash, mkU8(width-1))),
    943                   mkU8(3)
    944                ))
    945 	 );
    946    } else {
    947       /* standard interpretation */
    948       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
    949       return
    950          binop(
    951             opAND,
    952             mkPCastTo( mce,ty,
    953                        mkUifU(mce,ty, xxhash,yyhash)),
    954             sevenLeft1
    955          );
    956    }
    957 }
    958 
    959 
    960 /*------------------------------------------------------------*/
    961 /*--- Emit a test and complaint if something is undefined. ---*/
    962 /*------------------------------------------------------------*/
    963 
    964 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
    965 
    966 
    967 /* Set the annotations on a dirty helper to indicate that the stack
    968    pointer and instruction pointers might be read.  This is the
    969    behaviour of all 'emit-a-complaint' style functions we might
    970    call. */
    971 
    972 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
    973    di->nFxState = 2;
    974    di->fxState[0].fx     = Ifx_Read;
    975    di->fxState[0].offset = mce->layout->offset_SP;
    976    di->fxState[0].size   = mce->layout->sizeof_SP;
    977    di->fxState[1].fx     = Ifx_Read;
    978    di->fxState[1].offset = mce->layout->offset_IP;
    979    di->fxState[1].size   = mce->layout->sizeof_IP;
    980 }
    981 
    982 
    983 /* Check the supplied **original** atom for undefinedness, and emit a
    984    complaint if so.  Once that happens, mark it as defined.  This is
    985    possible because the atom is either a tmp or literal.  If it's a
    986    tmp, it will be shadowed by a tmp, and so we can set the shadow to
    987    be defined.  In fact as mentioned above, we will have to allocate a
    988    new tmp to carry the new 'defined' shadow value, and update the
    989    original->tmp mapping accordingly; we cannot simply assign a new
    990    value to an existing shadow tmp as this breaks SSAness -- resulting
    991    in the post-instrumentation sanity checker spluttering in disapproval.
    992 */
    993 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
    994 {
    995    IRAtom*  vatom;
    996    IRType   ty;
    997    Int      sz;
    998    IRDirty* di;
    999    IRAtom*  cond;
   1000    IRAtom*  origin;
   1001    void*    fn;
   1002    HChar*   nm;
   1003    IRExpr** args;
   1004    Int      nargs;
   1005 
   1006    // Don't do V bit tests if we're not reporting undefined value errors.
   1007    if (MC_(clo_mc_level) == 1)
   1008       return;
   1009 
   1010    /* Since the original expression is atomic, there's no duplicated
   1011       work generated by making multiple V-expressions for it.  So we
   1012       don't really care about the possibility that someone else may
   1013       also create a V-interpretion for it. */
   1014    tl_assert(isOriginalAtom(mce, atom));
   1015    vatom = expr2vbits( mce, atom );
   1016    tl_assert(isShadowAtom(mce, vatom));
   1017    tl_assert(sameKindedAtoms(atom, vatom));
   1018 
   1019    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1020 
   1021    /* sz is only used for constructing the error message */
   1022    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
   1023 
   1024    cond = mkPCastTo( mce, Ity_I1, vatom );
   1025    /* cond will be 0 if all defined, and 1 if any not defined. */
   1026 
   1027    /* Get the origin info for the value we are about to check.  At
   1028       least, if we are doing origin tracking.  If not, use a dummy
   1029       zero origin. */
   1030    if (MC_(clo_mc_level) == 3) {
   1031       origin = schemeE( mce, atom );
   1032       if (mce->hWordTy == Ity_I64) {
   1033          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
   1034       }
   1035    } else {
   1036       origin = NULL;
   1037    }
   1038 
   1039    fn    = NULL;
   1040    nm    = NULL;
   1041    args  = NULL;
   1042    nargs = -1;
   1043 
   1044    switch (sz) {
   1045       case 0:
   1046          if (origin) {
   1047             fn    = &MC_(helperc_value_check0_fail_w_o);
   1048             nm    = "MC_(helperc_value_check0_fail_w_o)";
   1049             args  = mkIRExprVec_1(origin);
   1050             nargs = 1;
   1051          } else {
   1052             fn    = &MC_(helperc_value_check0_fail_no_o);
   1053             nm    = "MC_(helperc_value_check0_fail_no_o)";
   1054             args  = mkIRExprVec_0();
   1055             nargs = 0;
   1056          }
   1057          break;
   1058       case 1:
   1059          if (origin) {
   1060             fn    = &MC_(helperc_value_check1_fail_w_o);
   1061             nm    = "MC_(helperc_value_check1_fail_w_o)";
   1062             args  = mkIRExprVec_1(origin);
   1063             nargs = 1;
   1064          } else {
   1065             fn    = &MC_(helperc_value_check1_fail_no_o);
   1066             nm    = "MC_(helperc_value_check1_fail_no_o)";
   1067             args  = mkIRExprVec_0();
   1068             nargs = 0;
   1069          }
   1070          break;
   1071       case 4:
   1072          if (origin) {
   1073             fn    = &MC_(helperc_value_check4_fail_w_o);
   1074             nm    = "MC_(helperc_value_check4_fail_w_o)";
   1075             args  = mkIRExprVec_1(origin);
   1076             nargs = 1;
   1077          } else {
   1078             fn    = &MC_(helperc_value_check4_fail_no_o);
   1079             nm    = "MC_(helperc_value_check4_fail_no_o)";
   1080             args  = mkIRExprVec_0();
   1081             nargs = 0;
   1082          }
   1083          break;
   1084       case 8:
   1085          if (origin) {
   1086             fn    = &MC_(helperc_value_check8_fail_w_o);
   1087             nm    = "MC_(helperc_value_check8_fail_w_o)";
   1088             args  = mkIRExprVec_1(origin);
   1089             nargs = 1;
   1090          } else {
   1091             fn    = &MC_(helperc_value_check8_fail_no_o);
   1092             nm    = "MC_(helperc_value_check8_fail_no_o)";
   1093             args  = mkIRExprVec_0();
   1094             nargs = 0;
   1095          }
   1096          break;
   1097       case 2:
   1098       case 16:
   1099          if (origin) {
   1100             fn    = &MC_(helperc_value_checkN_fail_w_o);
   1101             nm    = "MC_(helperc_value_checkN_fail_w_o)";
   1102             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
   1103             nargs = 2;
   1104          } else {
   1105             fn    = &MC_(helperc_value_checkN_fail_no_o);
   1106             nm    = "MC_(helperc_value_checkN_fail_no_o)";
   1107             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
   1108             nargs = 1;
   1109          }
   1110          break;
   1111       default:
   1112          VG_(tool_panic)("unexpected szB");
   1113    }
   1114 
   1115    tl_assert(fn);
   1116    tl_assert(nm);
   1117    tl_assert(args);
   1118    tl_assert(nargs >= 0 && nargs <= 2);
   1119    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
   1120               || (MC_(clo_mc_level) == 2 && origin == NULL) );
   1121 
   1122    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
   1123                            VG_(fnptr_to_fnentry)( fn ), args );
   1124    di->guard = cond;
   1125    setHelperAnns( mce, di );
   1126    stmt( 'V', mce, IRStmt_Dirty(di));
   1127 
   1128    /* Set the shadow tmp to be defined.  First, update the
   1129       orig->shadow tmp mapping to reflect the fact that this shadow is
   1130       getting a new value. */
   1131    tl_assert(isIRAtom(vatom));
   1132    /* sameKindedAtoms ... */
   1133    if (vatom->tag == Iex_RdTmp) {
   1134       tl_assert(atom->tag == Iex_RdTmp);
   1135       newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
   1136       assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
   1137                        definedOfType(ty));
   1138    }
   1139 }
   1140 
   1141 
   1142 /*------------------------------------------------------------*/
   1143 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
   1144 /*------------------------------------------------------------*/
   1145 
   1146 /* Examine the always-defined sections declared in layout to see if
   1147    the (offset,size) section is within one.  Note, is is an error to
   1148    partially fall into such a region: (offset,size) should either be
   1149    completely in such a region or completely not-in such a region.
   1150 */
   1151 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
   1152 {
   1153    Int minoffD, maxoffD, i;
   1154    Int minoff = offset;
   1155    Int maxoff = minoff + size - 1;
   1156    tl_assert((minoff & ~0xFFFF) == 0);
   1157    tl_assert((maxoff & ~0xFFFF) == 0);
   1158 
   1159    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
   1160       minoffD = mce->layout->alwaysDefd[i].offset;
   1161       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
   1162       tl_assert((minoffD & ~0xFFFF) == 0);
   1163       tl_assert((maxoffD & ~0xFFFF) == 0);
   1164 
   1165       if (maxoff < minoffD || maxoffD < minoff)
   1166          continue; /* no overlap */
   1167       if (minoff >= minoffD && maxoff <= maxoffD)
   1168          return True; /* completely contained in an always-defd section */
   1169 
   1170       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
   1171    }
   1172    return False; /* could not find any containing section */
   1173 }
   1174 
   1175 
   1176 /* Generate into bb suitable actions to shadow this Put.  If the state
   1177    slice is marked 'always defined', do nothing.  Otherwise, write the
   1178    supplied V bits to the shadow state.  We can pass in either an
   1179    original atom or a V-atom, but not both.  In the former case the
   1180    relevant V-bits are then generated from the original.
   1181 */
   1182 static
   1183 void do_shadow_PUT ( MCEnv* mce,  Int offset,
   1184                      IRAtom* atom, IRAtom* vatom )
   1185 {
   1186    IRType ty;
   1187 
   1188    // Don't do shadow PUTs if we're not doing undefined value checking.
   1189    // Their absence lets Vex's optimiser remove all the shadow computation
   1190    // that they depend on, which includes GETs of the shadow registers.
   1191    if (MC_(clo_mc_level) == 1)
   1192       return;
   1193 
   1194    if (atom) {
   1195       tl_assert(!vatom);
   1196       tl_assert(isOriginalAtom(mce, atom));
   1197       vatom = expr2vbits( mce, atom );
   1198    } else {
   1199       tl_assert(vatom);
   1200       tl_assert(isShadowAtom(mce, vatom));
   1201    }
   1202 
   1203    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1204    tl_assert(ty != Ity_I1);
   1205    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1206       /* later: no ... */
   1207       /* emit code to emit a complaint if any of the vbits are 1. */
   1208       /* complainIfUndefined(mce, atom); */
   1209    } else {
   1210       /* Do a plain shadow Put. */
   1211       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
   1212    }
   1213 }
   1214 
   1215 
   1216 /* Return an expression which contains the V bits corresponding to the
   1217    given GETI (passed in in pieces).
   1218 */
   1219 static
   1220 void do_shadow_PUTI ( MCEnv* mce,
   1221                       IRRegArray* descr,
   1222                       IRAtom* ix, Int bias, IRAtom* atom )
   1223 {
   1224    IRAtom* vatom;
   1225    IRType  ty, tyS;
   1226    Int     arrSize;;
   1227 
   1228    // Don't do shadow PUTIs if we're not doing undefined value checking.
   1229    // Their absence lets Vex's optimiser remove all the shadow computation
   1230    // that they depend on, which includes GETIs of the shadow registers.
   1231    if (MC_(clo_mc_level) == 1)
   1232       return;
   1233 
   1234    tl_assert(isOriginalAtom(mce,atom));
   1235    vatom = expr2vbits( mce, atom );
   1236    tl_assert(sameKindedAtoms(atom, vatom));
   1237    ty   = descr->elemTy;
   1238    tyS  = shadowTypeV(ty);
   1239    arrSize = descr->nElems * sizeofIRType(ty);
   1240    tl_assert(ty != Ity_I1);
   1241    tl_assert(isOriginalAtom(mce,ix));
   1242    complainIfUndefined(mce,ix);
   1243    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1244       /* later: no ... */
   1245       /* emit code to emit a complaint if any of the vbits are 1. */
   1246       /* complainIfUndefined(mce, atom); */
   1247    } else {
   1248       /* Do a cloned version of the Put that refers to the shadow
   1249          area. */
   1250       IRRegArray* new_descr
   1251          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1252                          tyS, descr->nElems);
   1253       stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
   1254    }
   1255 }
   1256 
   1257 
   1258 /* Return an expression which contains the V bits corresponding to the
   1259    given GET (passed in in pieces).
   1260 */
   1261 static
   1262 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
   1263 {
   1264    IRType tyS = shadowTypeV(ty);
   1265    tl_assert(ty != Ity_I1);
   1266    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1267       /* Always defined, return all zeroes of the relevant type */
   1268       return definedOfType(tyS);
   1269    } else {
   1270       /* return a cloned version of the Get that refers to the shadow
   1271          area. */
   1272       /* FIXME: this isn't an atom! */
   1273       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
   1274    }
   1275 }
   1276 
   1277 
   1278 /* Return an expression which contains the V bits corresponding to the
   1279    given GETI (passed in in pieces).
   1280 */
   1281 static
   1282 IRExpr* shadow_GETI ( MCEnv* mce,
   1283                       IRRegArray* descr, IRAtom* ix, Int bias )
   1284 {
   1285    IRType ty   = descr->elemTy;
   1286    IRType tyS  = shadowTypeV(ty);
   1287    Int arrSize = descr->nElems * sizeofIRType(ty);
   1288    tl_assert(ty != Ity_I1);
   1289    tl_assert(isOriginalAtom(mce,ix));
   1290    complainIfUndefined(mce,ix);
   1291    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1292       /* Always defined, return all zeroes of the relevant type */
   1293       return definedOfType(tyS);
   1294    } else {
   1295       /* return a cloned version of the Get that refers to the shadow
   1296          area. */
   1297       IRRegArray* new_descr
   1298          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1299                          tyS, descr->nElems);
   1300       return IRExpr_GetI( new_descr, ix, bias );
   1301    }
   1302 }
   1303 
   1304 
   1305 /*------------------------------------------------------------*/
   1306 /*--- Generating approximations for unknown operations,    ---*/
   1307 /*--- using lazy-propagate semantics                       ---*/
   1308 /*------------------------------------------------------------*/
   1309 
   1310 /* Lazy propagation of undefinedness from two values, resulting in the
   1311    specified shadow type.
   1312 */
   1313 static
   1314 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
   1315 {
   1316    IRAtom* at;
   1317    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1318    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1319    tl_assert(isShadowAtom(mce,va1));
   1320    tl_assert(isShadowAtom(mce,va2));
   1321 
   1322    /* The general case is inefficient because PCast is an expensive
   1323       operation.  Here are some special cases which use PCast only
   1324       once rather than twice. */
   1325 
   1326    /* I64 x I64 -> I64 */
   1327    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
   1328       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
   1329       at = mkUifU(mce, Ity_I64, va1, va2);
   1330       at = mkPCastTo(mce, Ity_I64, at);
   1331       return at;
   1332    }
   1333 
   1334    /* I64 x I64 -> I32 */
   1335    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
   1336       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
   1337       at = mkUifU(mce, Ity_I64, va1, va2);
   1338       at = mkPCastTo(mce, Ity_I32, at);
   1339       return at;
   1340    }
   1341 
   1342    if (0) {
   1343       VG_(printf)("mkLazy2 ");
   1344       ppIRType(t1);
   1345       VG_(printf)("_");
   1346       ppIRType(t2);
   1347       VG_(printf)("_");
   1348       ppIRType(finalVty);
   1349       VG_(printf)("\n");
   1350    }
   1351 
   1352    /* General case: force everything via 32-bit intermediaries. */
   1353    at = mkPCastTo(mce, Ity_I32, va1);
   1354    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1355    at = mkPCastTo(mce, finalVty, at);
   1356    return at;
   1357 }
   1358 
   1359 
   1360 /* 3-arg version of the above. */
   1361 static
   1362 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
   1363                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
   1364 {
   1365    IRAtom* at;
   1366    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1367    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1368    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1369    tl_assert(isShadowAtom(mce,va1));
   1370    tl_assert(isShadowAtom(mce,va2));
   1371    tl_assert(isShadowAtom(mce,va3));
   1372 
   1373    /* The general case is inefficient because PCast is an expensive
   1374       operation.  Here are some special cases which use PCast only
   1375       twice rather than three times. */
   1376 
   1377    /* I32 x I64 x I64 -> I64 */
   1378    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
   1379    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1380        && finalVty == Ity_I64) {
   1381       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
   1382       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1383          mode indication which is fully defined, this should get
   1384          folded out later. */
   1385       at = mkPCastTo(mce, Ity_I64, va1);
   1386       /* Now fold in 2nd and 3rd args. */
   1387       at = mkUifU(mce, Ity_I64, at, va2);
   1388       at = mkUifU(mce, Ity_I64, at, va3);
   1389       /* and PCast once again. */
   1390       at = mkPCastTo(mce, Ity_I64, at);
   1391       return at;
   1392    }
   1393 
   1394    /* I32 x I64 x I64 -> I32 */
   1395    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1396        && finalVty == Ity_I32) {
   1397       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
   1398       at = mkPCastTo(mce, Ity_I64, va1);
   1399       at = mkUifU(mce, Ity_I64, at, va2);
   1400       at = mkUifU(mce, Ity_I64, at, va3);
   1401       at = mkPCastTo(mce, Ity_I32, at);
   1402       return at;
   1403    }
   1404 
   1405    /* I32 x I32 x I32 -> I32 */
   1406    /* 32-bit FP idiom, as (eg) happens on ARM */
   1407    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
   1408        && finalVty == Ity_I32) {
   1409       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
   1410       at = va1;
   1411       at = mkUifU(mce, Ity_I32, at, va2);
   1412       at = mkUifU(mce, Ity_I32, at, va3);
   1413       at = mkPCastTo(mce, Ity_I32, at);
   1414       return at;
   1415    }
   1416 
   1417    if (1) {
   1418       VG_(printf)("mkLazy3: ");
   1419       ppIRType(t1);
   1420       VG_(printf)(" x ");
   1421       ppIRType(t2);
   1422       VG_(printf)(" x ");
   1423       ppIRType(t3);
   1424       VG_(printf)(" -> ");
   1425       ppIRType(finalVty);
   1426       VG_(printf)("\n");
   1427    }
   1428 
   1429    tl_assert(0);
   1430    /* General case: force everything via 32-bit intermediaries. */
   1431    /*
   1432    at = mkPCastTo(mce, Ity_I32, va1);
   1433    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1434    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
   1435    at = mkPCastTo(mce, finalVty, at);
   1436    return at;
   1437    */
   1438 }
   1439 
   1440 
   1441 /* 4-arg version of the above. */
   1442 static
   1443 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
   1444                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
   1445 {
   1446    IRAtom* at;
   1447    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1448    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1449    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1450    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
   1451    tl_assert(isShadowAtom(mce,va1));
   1452    tl_assert(isShadowAtom(mce,va2));
   1453    tl_assert(isShadowAtom(mce,va3));
   1454    tl_assert(isShadowAtom(mce,va4));
   1455 
   1456    /* The general case is inefficient because PCast is an expensive
   1457       operation.  Here are some special cases which use PCast only
   1458       twice rather than three times. */
   1459 
   1460    /* I32 x I64 x I64 x I64 -> I64 */
   1461    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
   1462    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
   1463        && finalVty == Ity_I64) {
   1464       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
   1465       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1466          mode indication which is fully defined, this should get
   1467          folded out later. */
   1468       at = mkPCastTo(mce, Ity_I64, va1);
   1469       /* Now fold in 2nd, 3rd, 4th args. */
   1470       at = mkUifU(mce, Ity_I64, at, va2);
   1471       at = mkUifU(mce, Ity_I64, at, va3);
   1472       at = mkUifU(mce, Ity_I64, at, va4);
   1473       /* and PCast once again. */
   1474       at = mkPCastTo(mce, Ity_I64, at);
   1475       return at;
   1476    }
   1477 
   1478    if (1) {
   1479       VG_(printf)("mkLazy4: ");
   1480       ppIRType(t1);
   1481       VG_(printf)(" x ");
   1482       ppIRType(t2);
   1483       VG_(printf)(" x ");
   1484       ppIRType(t3);
   1485       VG_(printf)(" x ");
   1486       ppIRType(t4);
   1487       VG_(printf)(" -> ");
   1488       ppIRType(finalVty);
   1489       VG_(printf)("\n");
   1490    }
   1491 
   1492    tl_assert(0);
   1493 }
   1494 
   1495 
   1496 /* Do the lazy propagation game from a null-terminated vector of
   1497    atoms.  This is presumably the arguments to a helper call, so the
   1498    IRCallee info is also supplied in order that we can know which
   1499    arguments should be ignored (via the .mcx_mask field).
   1500 */
   1501 static
   1502 IRAtom* mkLazyN ( MCEnv* mce,
   1503                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
   1504 {
   1505    Int     i;
   1506    IRAtom* here;
   1507    IRAtom* curr;
   1508    IRType  mergeTy;
   1509    IRType  mergeTy64 = True;
   1510 
   1511    /* Decide on the type of the merge intermediary.  If all relevant
   1512       args are I64, then it's I64.  In all other circumstances, use
   1513       I32. */
   1514    for (i = 0; exprvec[i]; i++) {
   1515       tl_assert(i < 32);
   1516       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1517       if (cee->mcx_mask & (1<<i))
   1518          continue;
   1519       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
   1520          mergeTy64 = False;
   1521    }
   1522 
   1523    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
   1524    curr    = definedOfType(mergeTy);
   1525 
   1526    for (i = 0; exprvec[i]; i++) {
   1527       tl_assert(i < 32);
   1528       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1529       /* Only take notice of this arg if the callee's mc-exclusion
   1530          mask does not say it is to be excluded. */
   1531       if (cee->mcx_mask & (1<<i)) {
   1532          /* the arg is to be excluded from definedness checking.  Do
   1533             nothing. */
   1534          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
   1535       } else {
   1536          /* calculate the arg's definedness, and pessimistically merge
   1537             it in. */
   1538          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
   1539          curr = mergeTy64
   1540                    ? mkUifU64(mce, here, curr)
   1541                    : mkUifU32(mce, here, curr);
   1542       }
   1543    }
   1544    return mkPCastTo(mce, finalVtype, curr );
   1545 }
   1546 
   1547 
   1548 /*------------------------------------------------------------*/
   1549 /*--- Generating expensive sequences for exact carry-chain ---*/
   1550 /*--- propagation in add/sub and related operations.       ---*/
   1551 /*------------------------------------------------------------*/
   1552 
   1553 static
   1554 IRAtom* expensiveAddSub ( MCEnv*  mce,
   1555                           Bool    add,
   1556                           IRType  ty,
   1557                           IRAtom* qaa, IRAtom* qbb,
   1558                           IRAtom* aa,  IRAtom* bb )
   1559 {
   1560    IRAtom *a_min, *b_min, *a_max, *b_max;
   1561    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
   1562 
   1563    tl_assert(isShadowAtom(mce,qaa));
   1564    tl_assert(isShadowAtom(mce,qbb));
   1565    tl_assert(isOriginalAtom(mce,aa));
   1566    tl_assert(isOriginalAtom(mce,bb));
   1567    tl_assert(sameKindedAtoms(qaa,aa));
   1568    tl_assert(sameKindedAtoms(qbb,bb));
   1569 
   1570    switch (ty) {
   1571       case Ity_I32:
   1572          opAND = Iop_And32;
   1573          opOR  = Iop_Or32;
   1574          opXOR = Iop_Xor32;
   1575          opNOT = Iop_Not32;
   1576          opADD = Iop_Add32;
   1577          opSUB = Iop_Sub32;
   1578          break;
   1579       case Ity_I64:
   1580          opAND = Iop_And64;
   1581          opOR  = Iop_Or64;
   1582          opXOR = Iop_Xor64;
   1583          opNOT = Iop_Not64;
   1584          opADD = Iop_Add64;
   1585          opSUB = Iop_Sub64;
   1586          break;
   1587       default:
   1588          VG_(tool_panic)("expensiveAddSub");
   1589    }
   1590 
   1591    // a_min = aa & ~qaa
   1592    a_min = assignNew('V', mce,ty,
   1593                      binop(opAND, aa,
   1594                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
   1595 
   1596    // b_min = bb & ~qbb
   1597    b_min = assignNew('V', mce,ty,
   1598                      binop(opAND, bb,
   1599                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
   1600 
   1601    // a_max = aa | qaa
   1602    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
   1603 
   1604    // b_max = bb | qbb
   1605    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
   1606 
   1607    if (add) {
   1608       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
   1609       return
   1610       assignNew('V', mce,ty,
   1611          binop( opOR,
   1612                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1613                 assignNew('V', mce,ty,
   1614                    binop( opXOR,
   1615                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
   1616                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
   1617                    )
   1618                 )
   1619          )
   1620       );
   1621    } else {
   1622       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
   1623       return
   1624       assignNew('V', mce,ty,
   1625          binop( opOR,
   1626                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1627                 assignNew('V', mce,ty,
   1628                    binop( opXOR,
   1629                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
   1630                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
   1631                    )
   1632                 )
   1633          )
   1634       );
   1635    }
   1636 
   1637 }
   1638 
   1639 
   1640 /*------------------------------------------------------------*/
   1641 /*--- Scalar shifts.                                       ---*/
   1642 /*------------------------------------------------------------*/
   1643 
   1644 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
   1645    idea is to shift the definedness bits by the original shift amount.
   1646    This introduces 0s ("defined") in new positions for left shifts and
   1647    unsigned right shifts, and copies the top definedness bit for
   1648    signed right shifts.  So, conveniently, applying the original shift
   1649    operator to the definedness bits for the left arg is exactly the
   1650    right thing to do:
   1651 
   1652       (qaa << bb)
   1653 
   1654    However if the shift amount is undefined then the whole result
   1655    is undefined.  Hence need:
   1656 
   1657       (qaa << bb) `UifU` PCast(qbb)
   1658 
   1659    If the shift amount bb is a literal than qbb will say 'all defined'
   1660    and the UifU and PCast will get folded out by post-instrumentation
   1661    optimisation.
   1662 */
   1663 static IRAtom* scalarShift ( MCEnv*  mce,
   1664                              IRType  ty,
   1665                              IROp    original_op,
   1666                              IRAtom* qaa, IRAtom* qbb,
   1667                              IRAtom* aa,  IRAtom* bb )
   1668 {
   1669    tl_assert(isShadowAtom(mce,qaa));
   1670    tl_assert(isShadowAtom(mce,qbb));
   1671    tl_assert(isOriginalAtom(mce,aa));
   1672    tl_assert(isOriginalAtom(mce,bb));
   1673    tl_assert(sameKindedAtoms(qaa,aa));
   1674    tl_assert(sameKindedAtoms(qbb,bb));
   1675    return
   1676       assignNew(
   1677          'V', mce, ty,
   1678          mkUifU( mce, ty,
   1679                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
   1680                  mkPCastTo(mce, ty, qbb)
   1681          )
   1682    );
   1683 }
   1684 
   1685 
   1686 /*------------------------------------------------------------*/
   1687 /*--- Helpers for dealing with vector primops.             ---*/
   1688 /*------------------------------------------------------------*/
   1689 
   1690 /* Vector pessimisation -- pessimise within each lane individually. */
   1691 
   1692 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
   1693 {
   1694    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
   1695 }
   1696 
   1697 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
   1698 {
   1699    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
   1700 }
   1701 
   1702 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
   1703 {
   1704    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
   1705 }
   1706 
   1707 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
   1708 {
   1709    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
   1710 }
   1711 
   1712 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
   1713 {
   1714    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
   1715 }
   1716 
   1717 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
   1718 {
   1719    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
   1720 }
   1721 
   1722 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
   1723 {
   1724    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
   1725 }
   1726 
   1727 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
   1728 {
   1729    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
   1730 }
   1731 
   1732 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
   1733 {
   1734    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
   1735 }
   1736 
   1737 
   1738 /* Here's a simple scheme capable of handling ops derived from SSE1
   1739    code and while only generating ops that can be efficiently
   1740    implemented in SSE1. */
   1741 
   1742 /* All-lanes versions are straightforward:
   1743 
   1744    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
   1745 
   1746    unary32Fx4(x,y)    ==> PCast32x4(x#)
   1747 
   1748    Lowest-lane-only versions are more complex:
   1749 
   1750    binary32F0x4(x,y)  ==> SetV128lo32(
   1751                              x#,
   1752                              PCast32(V128to32(UifUV128(x#,y#)))
   1753                           )
   1754 
   1755    This is perhaps not so obvious.  In particular, it's faster to
   1756    do a V128-bit UifU and then take the bottom 32 bits than the more
   1757    obvious scheme of taking the bottom 32 bits of each operand
   1758    and doing a 32-bit UifU.  Basically since UifU is fast and
   1759    chopping lanes off vector values is slow.
   1760 
   1761    Finally:
   1762 
   1763    unary32F0x4(x)     ==> SetV128lo32(
   1764                              x#,
   1765                              PCast32(V128to32(x#))
   1766                           )
   1767 
   1768    Where:
   1769 
   1770    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
   1771    PCast32x4(v#) = CmpNEZ32x4(v#)
   1772 */
   1773 
   1774 static
   1775 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1776 {
   1777    IRAtom* at;
   1778    tl_assert(isShadowAtom(mce, vatomX));
   1779    tl_assert(isShadowAtom(mce, vatomY));
   1780    at = mkUifUV128(mce, vatomX, vatomY);
   1781    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
   1782    return at;
   1783 }
   1784 
   1785 static
   1786 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
   1787 {
   1788    IRAtom* at;
   1789    tl_assert(isShadowAtom(mce, vatomX));
   1790    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
   1791    return at;
   1792 }
   1793 
   1794 static
   1795 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1796 {
   1797    IRAtom* at;
   1798    tl_assert(isShadowAtom(mce, vatomX));
   1799    tl_assert(isShadowAtom(mce, vatomY));
   1800    at = mkUifUV128(mce, vatomX, vatomY);
   1801    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
   1802    at = mkPCastTo(mce, Ity_I32, at);
   1803    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1804    return at;
   1805 }
   1806 
   1807 static
   1808 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
   1809 {
   1810    IRAtom* at;
   1811    tl_assert(isShadowAtom(mce, vatomX));
   1812    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
   1813    at = mkPCastTo(mce, Ity_I32, at);
   1814    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1815    return at;
   1816 }
   1817 
   1818 /* --- ... and ... 64Fx2 versions of the same ... --- */
   1819 
   1820 static
   1821 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1822 {
   1823    IRAtom* at;
   1824    tl_assert(isShadowAtom(mce, vatomX));
   1825    tl_assert(isShadowAtom(mce, vatomY));
   1826    at = mkUifUV128(mce, vatomX, vatomY);
   1827    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
   1828    return at;
   1829 }
   1830 
   1831 static
   1832 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1833 {
   1834    IRAtom* at;
   1835    tl_assert(isShadowAtom(mce, vatomX));
   1836    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
   1837    return at;
   1838 }
   1839 
   1840 static
   1841 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1842 {
   1843    IRAtom* at;
   1844    tl_assert(isShadowAtom(mce, vatomX));
   1845    tl_assert(isShadowAtom(mce, vatomY));
   1846    at = mkUifUV128(mce, vatomX, vatomY);
   1847    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
   1848    at = mkPCastTo(mce, Ity_I64, at);
   1849    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1850    return at;
   1851 }
   1852 
   1853 static
   1854 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
   1855 {
   1856    IRAtom* at;
   1857    tl_assert(isShadowAtom(mce, vatomX));
   1858    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
   1859    at = mkPCastTo(mce, Ity_I64, at);
   1860    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1861    return at;
   1862 }
   1863 
   1864 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
   1865 
   1866 static
   1867 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1868 {
   1869    IRAtom* at;
   1870    tl_assert(isShadowAtom(mce, vatomX));
   1871    tl_assert(isShadowAtom(mce, vatomY));
   1872    at = mkUifU64(mce, vatomX, vatomY);
   1873    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
   1874    return at;
   1875 }
   1876 
   1877 static
   1878 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1879 {
   1880    IRAtom* at;
   1881    tl_assert(isShadowAtom(mce, vatomX));
   1882    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
   1883    return at;
   1884 }
   1885 
   1886 /* --- --- Vector saturated narrowing --- --- */
   1887 
   1888 /* This is quite subtle.  What to do is simple:
   1889 
   1890    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
   1891 
   1892       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
   1893 
   1894    Why this is right is not so simple.  Consider a lane in the args,
   1895    vatom1 or 2, doesn't matter.
   1896 
   1897    After the PCast, that lane is all 0s (defined) or all
   1898    1s(undefined).
   1899 
   1900    Both signed and unsigned saturating narrowing of all 0s produces
   1901    all 0s, which is what we want.
   1902 
   1903    The all-1s case is more complex.  Unsigned narrowing interprets an
   1904    all-1s input as the largest unsigned integer, and so produces all
   1905    1s as a result since that is the largest unsigned value at the
   1906    smaller width.
   1907 
   1908    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
   1909    to -1, so we still wind up with all 1s at the smaller width.
   1910 
   1911    So: In short, pessimise the args, then apply the original narrowing
   1912    op.
   1913 */
   1914 static
   1915 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
   1916                           IRAtom* vatom1, IRAtom* vatom2)
   1917 {
   1918    IRAtom *at1, *at2, *at3;
   1919    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1920    switch (narrow_op) {
   1921       case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
   1922       case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
   1923       case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
   1924       case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
   1925       default: VG_(tool_panic)("vectorNarrowV128");
   1926    }
   1927    tl_assert(isShadowAtom(mce,vatom1));
   1928    tl_assert(isShadowAtom(mce,vatom2));
   1929    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   1930    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
   1931    at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
   1932    return at3;
   1933 }
   1934 
   1935 static
   1936 IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
   1937                          IRAtom* vatom1, IRAtom* vatom2)
   1938 {
   1939    IRAtom *at1, *at2, *at3;
   1940    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1941    switch (narrow_op) {
   1942       case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
   1943       case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
   1944       case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
   1945       default: VG_(tool_panic)("vectorNarrow64");
   1946    }
   1947    tl_assert(isShadowAtom(mce,vatom1));
   1948    tl_assert(isShadowAtom(mce,vatom2));
   1949    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
   1950    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
   1951    at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
   1952    return at3;
   1953 }
   1954 
   1955 static
   1956 IRAtom* vectorShortenV128 ( MCEnv* mce, IROp shorten_op,
   1957                           IRAtom* vatom1)
   1958 {
   1959    IRAtom *at1, *at2;
   1960    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1961    switch (shorten_op) {
   1962       case Iop_Shorten16x8: pcast = mkPCast16x8; break;
   1963       case Iop_Shorten32x4: pcast = mkPCast32x4; break;
   1964       case Iop_Shorten64x2: pcast = mkPCast64x2; break;
   1965       case Iop_QShortenS16Sx8: pcast = mkPCast16x8; break;
   1966       case Iop_QShortenU16Sx8: pcast = mkPCast16x8; break;
   1967       case Iop_QShortenU16Ux8: pcast = mkPCast16x8; break;
   1968       case Iop_QShortenS32Sx4: pcast = mkPCast32x4; break;
   1969       case Iop_QShortenU32Sx4: pcast = mkPCast32x4; break;
   1970       case Iop_QShortenU32Ux4: pcast = mkPCast32x4; break;
   1971       case Iop_QShortenS64Sx2: pcast = mkPCast64x2; break;
   1972       case Iop_QShortenU64Sx2: pcast = mkPCast64x2; break;
   1973       case Iop_QShortenU64Ux2: pcast = mkPCast64x2; break;
   1974       default: VG_(tool_panic)("vectorShortenV128");
   1975    }
   1976    tl_assert(isShadowAtom(mce,vatom1));
   1977    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   1978    at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
   1979    return at2;
   1980 }
   1981 
   1982 static
   1983 IRAtom* vectorLongenI64 ( MCEnv* mce, IROp longen_op,
   1984                            IRAtom* vatom1)
   1985 {
   1986    IRAtom *at1, *at2;
   1987    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1988    switch (longen_op) {
   1989       case Iop_Longen8Ux8: pcast = mkPCast16x8; break;
   1990       case Iop_Longen8Sx8: pcast = mkPCast16x8; break;
   1991       case Iop_Longen16Ux4: pcast = mkPCast32x4; break;
   1992       case Iop_Longen16Sx4: pcast = mkPCast32x4; break;
   1993       case Iop_Longen32Ux2: pcast = mkPCast64x2; break;
   1994       case Iop_Longen32Sx2: pcast = mkPCast64x2; break;
   1995       default: VG_(tool_panic)("vectorLongenI64");
   1996    }
   1997    tl_assert(isShadowAtom(mce,vatom1));
   1998    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
   1999    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
   2000    return at2;
   2001 }
   2002 
   2003 
   2004 /* --- --- Vector integer arithmetic --- --- */
   2005 
   2006 /* Simple ... UifU the args and per-lane pessimise the results. */
   2007 
   2008 /* --- V128-bit versions --- */
   2009 
   2010 static
   2011 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2012 {
   2013    IRAtom* at;
   2014    at = mkUifUV128(mce, vatom1, vatom2);
   2015    at = mkPCast8x16(mce, at);
   2016    return at;
   2017 }
   2018 
   2019 static
   2020 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2021 {
   2022    IRAtom* at;
   2023    at = mkUifUV128(mce, vatom1, vatom2);
   2024    at = mkPCast16x8(mce, at);
   2025    return at;
   2026 }
   2027 
   2028 static
   2029 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2030 {
   2031    IRAtom* at;
   2032    at = mkUifUV128(mce, vatom1, vatom2);
   2033    at = mkPCast32x4(mce, at);
   2034    return at;
   2035 }
   2036 
   2037 static
   2038 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2039 {
   2040    IRAtom* at;
   2041    at = mkUifUV128(mce, vatom1, vatom2);
   2042    at = mkPCast64x2(mce, at);
   2043    return at;
   2044 }
   2045 
   2046 /* --- 64-bit versions --- */
   2047 
   2048 static
   2049 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2050 {
   2051    IRAtom* at;
   2052    at = mkUifU64(mce, vatom1, vatom2);
   2053    at = mkPCast8x8(mce, at);
   2054    return at;
   2055 }
   2056 
   2057 static
   2058 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2059 {
   2060    IRAtom* at;
   2061    at = mkUifU64(mce, vatom1, vatom2);
   2062    at = mkPCast16x4(mce, at);
   2063    return at;
   2064 }
   2065 
   2066 static
   2067 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2068 {
   2069    IRAtom* at;
   2070    at = mkUifU64(mce, vatom1, vatom2);
   2071    at = mkPCast32x2(mce, at);
   2072    return at;
   2073 }
   2074 
   2075 static
   2076 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2077 {
   2078    IRAtom* at;
   2079    at = mkUifU64(mce, vatom1, vatom2);
   2080    at = mkPCastTo(mce, Ity_I64, at);
   2081    return at;
   2082 }
   2083 
   2084 /* --- 32-bit versions --- */
   2085 
   2086 static
   2087 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2088 {
   2089    IRAtom* at;
   2090    at = mkUifU32(mce, vatom1, vatom2);
   2091    at = mkPCast8x4(mce, at);
   2092    return at;
   2093 }
   2094 
   2095 static
   2096 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2097 {
   2098    IRAtom* at;
   2099    at = mkUifU32(mce, vatom1, vatom2);
   2100    at = mkPCast16x2(mce, at);
   2101    return at;
   2102 }
   2103 
   2104 
   2105 /*------------------------------------------------------------*/
   2106 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
   2107 /*------------------------------------------------------------*/
   2108 
   2109 static
   2110 IRAtom* expr2vbits_Qop ( MCEnv* mce,
   2111                          IROp op,
   2112                          IRAtom* atom1, IRAtom* atom2,
   2113                          IRAtom* atom3, IRAtom* atom4 )
   2114 {
   2115    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2116    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2117    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2118    IRAtom* vatom4 = expr2vbits( mce, atom4 );
   2119 
   2120    tl_assert(isOriginalAtom(mce,atom1));
   2121    tl_assert(isOriginalAtom(mce,atom2));
   2122    tl_assert(isOriginalAtom(mce,atom3));
   2123    tl_assert(isOriginalAtom(mce,atom4));
   2124    tl_assert(isShadowAtom(mce,vatom1));
   2125    tl_assert(isShadowAtom(mce,vatom2));
   2126    tl_assert(isShadowAtom(mce,vatom3));
   2127    tl_assert(isShadowAtom(mce,vatom4));
   2128    tl_assert(sameKindedAtoms(atom1,vatom1));
   2129    tl_assert(sameKindedAtoms(atom2,vatom2));
   2130    tl_assert(sameKindedAtoms(atom3,vatom3));
   2131    tl_assert(sameKindedAtoms(atom4,vatom4));
   2132    switch (op) {
   2133       case Iop_MAddF64:
   2134       case Iop_MAddF64r32:
   2135       case Iop_MSubF64:
   2136       case Iop_MSubF64r32:
   2137          /* I32(rm) x F64 x F64 x F64 -> F64 */
   2138          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
   2139       default:
   2140          ppIROp(op);
   2141          VG_(tool_panic)("memcheck:expr2vbits_Qop");
   2142    }
   2143 }
   2144 
   2145 
   2146 static
   2147 IRAtom* expr2vbits_Triop ( MCEnv* mce,
   2148                            IROp op,
   2149                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
   2150 {
   2151    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2152    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2153    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2154 
   2155    tl_assert(isOriginalAtom(mce,atom1));
   2156    tl_assert(isOriginalAtom(mce,atom2));
   2157    tl_assert(isOriginalAtom(mce,atom3));
   2158    tl_assert(isShadowAtom(mce,vatom1));
   2159    tl_assert(isShadowAtom(mce,vatom2));
   2160    tl_assert(isShadowAtom(mce,vatom3));
   2161    tl_assert(sameKindedAtoms(atom1,vatom1));
   2162    tl_assert(sameKindedAtoms(atom2,vatom2));
   2163    tl_assert(sameKindedAtoms(atom3,vatom3));
   2164    switch (op) {
   2165       case Iop_AddF64:
   2166       case Iop_AddF64r32:
   2167       case Iop_SubF64:
   2168       case Iop_SubF64r32:
   2169       case Iop_MulF64:
   2170       case Iop_MulF64r32:
   2171       case Iop_DivF64:
   2172       case Iop_DivF64r32:
   2173       case Iop_ScaleF64:
   2174       case Iop_Yl2xF64:
   2175       case Iop_Yl2xp1F64:
   2176       case Iop_AtanF64:
   2177       case Iop_PRemF64:
   2178       case Iop_PRem1F64:
   2179          /* I32(rm) x F64 x F64 -> F64 */
   2180          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
   2181       case Iop_PRemC3210F64:
   2182       case Iop_PRem1C3210F64:
   2183          /* I32(rm) x F64 x F64 -> I32 */
   2184          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2185       case Iop_AddF32:
   2186       case Iop_SubF32:
   2187       case Iop_MulF32:
   2188       case Iop_DivF32:
   2189          /* I32(rm) x F32 x F32 -> I32 */
   2190          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2191       case Iop_ExtractV128:
   2192          complainIfUndefined(mce, atom3);
   2193          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
   2194       case Iop_Extract64:
   2195          complainIfUndefined(mce, atom3);
   2196          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
   2197       case Iop_SetElem8x8:
   2198       case Iop_SetElem16x4:
   2199       case Iop_SetElem32x2:
   2200          complainIfUndefined(mce, atom2);
   2201          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
   2202       default:
   2203          ppIROp(op);
   2204          VG_(tool_panic)("memcheck:expr2vbits_Triop");
   2205    }
   2206 }
   2207 
   2208 
   2209 static
   2210 IRAtom* expr2vbits_Binop ( MCEnv* mce,
   2211                            IROp op,
   2212                            IRAtom* atom1, IRAtom* atom2 )
   2213 {
   2214    IRType  and_or_ty;
   2215    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
   2216    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
   2217    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
   2218 
   2219    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2220    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2221 
   2222    tl_assert(isOriginalAtom(mce,atom1));
   2223    tl_assert(isOriginalAtom(mce,atom2));
   2224    tl_assert(isShadowAtom(mce,vatom1));
   2225    tl_assert(isShadowAtom(mce,vatom2));
   2226    tl_assert(sameKindedAtoms(atom1,vatom1));
   2227    tl_assert(sameKindedAtoms(atom2,vatom2));
   2228    switch (op) {
   2229 
   2230       /* 32-bit SIMD */
   2231 
   2232       case Iop_Add16x2:
   2233       case Iop_HAdd16Ux2:
   2234       case Iop_HAdd16Sx2:
   2235       case Iop_Sub16x2:
   2236       case Iop_HSub16Ux2:
   2237       case Iop_HSub16Sx2:
   2238       case Iop_QAdd16Sx2:
   2239       case Iop_QSub16Sx2:
   2240          return binary16Ix2(mce, vatom1, vatom2);
   2241 
   2242       case Iop_Add8x4:
   2243       case Iop_HAdd8Ux4:
   2244       case Iop_HAdd8Sx4:
   2245       case Iop_Sub8x4:
   2246       case Iop_HSub8Ux4:
   2247       case Iop_HSub8Sx4:
   2248       case Iop_QSub8Ux4:
   2249       case Iop_QAdd8Ux4:
   2250       case Iop_QSub8Sx4:
   2251       case Iop_QAdd8Sx4:
   2252          return binary8Ix4(mce, vatom1, vatom2);
   2253 
   2254       /* 64-bit SIMD */
   2255 
   2256       case Iop_ShrN8x8:
   2257       case Iop_ShrN16x4:
   2258       case Iop_ShrN32x2:
   2259       case Iop_SarN8x8:
   2260       case Iop_SarN16x4:
   2261       case Iop_SarN32x2:
   2262       case Iop_ShlN16x4:
   2263       case Iop_ShlN32x2:
   2264       case Iop_ShlN8x8:
   2265          /* Same scheme as with all other shifts. */
   2266          complainIfUndefined(mce, atom2);
   2267          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   2268 
   2269       case Iop_QNarrow32Sx2:
   2270       case Iop_QNarrow16Sx4:
   2271       case Iop_QNarrow16Ux4:
   2272          return vectorNarrow64(mce, op, vatom1, vatom2);
   2273 
   2274       case Iop_Min8Ux8:
   2275       case Iop_Min8Sx8:
   2276       case Iop_Max8Ux8:
   2277       case Iop_Max8Sx8:
   2278       case Iop_Avg8Ux8:
   2279       case Iop_QSub8Sx8:
   2280       case Iop_QSub8Ux8:
   2281       case Iop_Sub8x8:
   2282       case Iop_CmpGT8Sx8:
   2283       case Iop_CmpGT8Ux8:
   2284       case Iop_CmpEQ8x8:
   2285       case Iop_QAdd8Sx8:
   2286       case Iop_QAdd8Ux8:
   2287       case Iop_QSal8x8:
   2288       case Iop_QShl8x8:
   2289       case Iop_Add8x8:
   2290       case Iop_Mul8x8:
   2291       case Iop_PolynomialMul8x8:
   2292          return binary8Ix8(mce, vatom1, vatom2);
   2293 
   2294       case Iop_Min16Sx4:
   2295       case Iop_Min16Ux4:
   2296       case Iop_Max16Sx4:
   2297       case Iop_Max16Ux4:
   2298       case Iop_Avg16Ux4:
   2299       case Iop_QSub16Ux4:
   2300       case Iop_QSub16Sx4:
   2301       case Iop_Sub16x4:
   2302       case Iop_Mul16x4:
   2303       case Iop_MulHi16Sx4:
   2304       case Iop_MulHi16Ux4:
   2305       case Iop_CmpGT16Sx4:
   2306       case Iop_CmpGT16Ux4:
   2307       case Iop_CmpEQ16x4:
   2308       case Iop_QAdd16Sx4:
   2309       case Iop_QAdd16Ux4:
   2310       case Iop_QSal16x4:
   2311       case Iop_QShl16x4:
   2312       case Iop_Add16x4:
   2313       case Iop_QDMulHi16Sx4:
   2314       case Iop_QRDMulHi16Sx4:
   2315          return binary16Ix4(mce, vatom1, vatom2);
   2316 
   2317       case Iop_Sub32x2:
   2318       case Iop_Mul32x2:
   2319       case Iop_Max32Sx2:
   2320       case Iop_Max32Ux2:
   2321       case Iop_Min32Sx2:
   2322       case Iop_Min32Ux2:
   2323       case Iop_CmpGT32Sx2:
   2324       case Iop_CmpGT32Ux2:
   2325       case Iop_CmpEQ32x2:
   2326       case Iop_Add32x2:
   2327       case Iop_QAdd32Ux2:
   2328       case Iop_QAdd32Sx2:
   2329       case Iop_QSub32Ux2:
   2330       case Iop_QSub32Sx2:
   2331       case Iop_QSal32x2:
   2332       case Iop_QShl32x2:
   2333       case Iop_QDMulHi32Sx2:
   2334       case Iop_QRDMulHi32Sx2:
   2335          return binary32Ix2(mce, vatom1, vatom2);
   2336 
   2337       case Iop_QSub64Ux1:
   2338       case Iop_QSub64Sx1:
   2339       case Iop_QAdd64Ux1:
   2340       case Iop_QAdd64Sx1:
   2341       case Iop_QSal64x1:
   2342       case Iop_QShl64x1:
   2343       case Iop_Sal64x1:
   2344          return binary64Ix1(mce, vatom1, vatom2);
   2345 
   2346       case Iop_QShlN8Sx8:
   2347       case Iop_QShlN8x8:
   2348       case Iop_QSalN8x8:
   2349          complainIfUndefined(mce, atom2);
   2350          return mkPCast8x8(mce, vatom1);
   2351 
   2352       case Iop_QShlN16Sx4:
   2353       case Iop_QShlN16x4:
   2354       case Iop_QSalN16x4:
   2355          complainIfUndefined(mce, atom2);
   2356          return mkPCast16x4(mce, vatom1);
   2357 
   2358       case Iop_QShlN32Sx2:
   2359       case Iop_QShlN32x2:
   2360       case Iop_QSalN32x2:
   2361          complainIfUndefined(mce, atom2);
   2362          return mkPCast32x2(mce, vatom1);
   2363 
   2364       case Iop_QShlN64Sx1:
   2365       case Iop_QShlN64x1:
   2366       case Iop_QSalN64x1:
   2367          complainIfUndefined(mce, atom2);
   2368          return mkPCast32x2(mce, vatom1);
   2369 
   2370       case Iop_PwMax32Sx2:
   2371       case Iop_PwMax32Ux2:
   2372       case Iop_PwMin32Sx2:
   2373       case Iop_PwMin32Ux2:
   2374       case Iop_PwMax32Fx2:
   2375       case Iop_PwMin32Fx2:
   2376          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
   2377                      mkPCast32x2(mce, vatom2)));
   2378 
   2379       case Iop_PwMax16Sx4:
   2380       case Iop_PwMax16Ux4:
   2381       case Iop_PwMin16Sx4:
   2382       case Iop_PwMin16Ux4:
   2383          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
   2384                      mkPCast16x4(mce, vatom2)));
   2385 
   2386       case Iop_PwMax8Sx8:
   2387       case Iop_PwMax8Ux8:
   2388       case Iop_PwMin8Sx8:
   2389       case Iop_PwMin8Ux8:
   2390          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
   2391                      mkPCast8x8(mce, vatom2)));
   2392 
   2393       case Iop_PwAdd32x2:
   2394       case Iop_PwAdd32Fx2:
   2395          return mkPCast32x2(mce,
   2396                assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
   2397                      mkPCast32x2(mce, vatom2))));
   2398 
   2399       case Iop_PwAdd16x4:
   2400          return mkPCast16x4(mce,
   2401                assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
   2402                      mkPCast16x4(mce, vatom2))));
   2403 
   2404       case Iop_PwAdd8x8:
   2405          return mkPCast8x8(mce,
   2406                assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
   2407                      mkPCast8x8(mce, vatom2))));
   2408 
   2409       case Iop_Shl8x8:
   2410       case Iop_Shr8x8:
   2411       case Iop_Sar8x8:
   2412       case Iop_Sal8x8:
   2413          return mkUifU64(mce,
   2414                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2415                    mkPCast8x8(mce,vatom2)
   2416                 );
   2417 
   2418       case Iop_Shl16x4:
   2419       case Iop_Shr16x4:
   2420       case Iop_Sar16x4:
   2421       case Iop_Sal16x4:
   2422          return mkUifU64(mce,
   2423                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2424                    mkPCast16x4(mce,vatom2)
   2425                 );
   2426 
   2427       case Iop_Shl32x2:
   2428       case Iop_Shr32x2:
   2429       case Iop_Sar32x2:
   2430       case Iop_Sal32x2:
   2431          return mkUifU64(mce,
   2432                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2433                    mkPCast32x2(mce,vatom2)
   2434                 );
   2435 
   2436       /* 64-bit data-steering */
   2437       case Iop_InterleaveLO32x2:
   2438       case Iop_InterleaveLO16x4:
   2439       case Iop_InterleaveLO8x8:
   2440       case Iop_InterleaveHI32x2:
   2441       case Iop_InterleaveHI16x4:
   2442       case Iop_InterleaveHI8x8:
   2443       case Iop_CatOddLanes8x8:
   2444       case Iop_CatEvenLanes8x8:
   2445       case Iop_CatOddLanes16x4:
   2446       case Iop_CatEvenLanes16x4:
   2447       case Iop_InterleaveOddLanes8x8:
   2448       case Iop_InterleaveEvenLanes8x8:
   2449       case Iop_InterleaveOddLanes16x4:
   2450       case Iop_InterleaveEvenLanes16x4:
   2451          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   2452 
   2453       case Iop_GetElem8x8:
   2454          complainIfUndefined(mce, atom2);
   2455          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   2456       case Iop_GetElem16x4:
   2457          complainIfUndefined(mce, atom2);
   2458          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   2459       case Iop_GetElem32x2:
   2460          complainIfUndefined(mce, atom2);
   2461          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   2462 
   2463       /* Perm8x8: rearrange values in left arg using steering values
   2464         from right arg.  So rearrange the vbits in the same way but
   2465         pessimise wrt steering values. */
   2466       case Iop_Perm8x8:
   2467          return mkUifU64(
   2468                    mce,
   2469                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2470                    mkPCast8x8(mce, vatom2)
   2471                 );
   2472 
   2473       /* V128-bit SIMD */
   2474 
   2475       case Iop_ShrN8x16:
   2476       case Iop_ShrN16x8:
   2477       case Iop_ShrN32x4:
   2478       case Iop_ShrN64x2:
   2479       case Iop_SarN8x16:
   2480       case Iop_SarN16x8:
   2481       case Iop_SarN32x4:
   2482       case Iop_SarN64x2:
   2483       case Iop_ShlN8x16:
   2484       case Iop_ShlN16x8:
   2485       case Iop_ShlN32x4:
   2486       case Iop_ShlN64x2:
   2487          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
   2488             this is wrong now, scalar shifts are done properly lazily.
   2489             Vector shifts should be fixed too. */
   2490          complainIfUndefined(mce, atom2);
   2491          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   2492 
   2493       /* V x V shifts/rotates are done using the standard lazy scheme. */
   2494       case Iop_Shl8x16:
   2495       case Iop_Shr8x16:
   2496       case Iop_Sar8x16:
   2497       case Iop_Sal8x16:
   2498       case Iop_Rol8x16:
   2499          return mkUifUV128(mce,
   2500                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2501                    mkPCast8x16(mce,vatom2)
   2502                 );
   2503 
   2504       case Iop_Shl16x8:
   2505       case Iop_Shr16x8:
   2506       case Iop_Sar16x8:
   2507       case Iop_Sal16x8:
   2508       case Iop_Rol16x8:
   2509          return mkUifUV128(mce,
   2510                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2511                    mkPCast16x8(mce,vatom2)
   2512                 );
   2513 
   2514       case Iop_Shl32x4:
   2515       case Iop_Shr32x4:
   2516       case Iop_Sar32x4:
   2517       case Iop_Sal32x4:
   2518       case Iop_Rol32x4:
   2519          return mkUifUV128(mce,
   2520                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2521                    mkPCast32x4(mce,vatom2)
   2522                 );
   2523 
   2524       case Iop_Shl64x2:
   2525       case Iop_Shr64x2:
   2526       case Iop_Sar64x2:
   2527       case Iop_Sal64x2:
   2528          return mkUifUV128(mce,
   2529                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2530                    mkPCast64x2(mce,vatom2)
   2531                 );
   2532 
   2533       case Iop_F32ToFixed32Ux4_RZ:
   2534       case Iop_F32ToFixed32Sx4_RZ:
   2535       case Iop_Fixed32UToF32x4_RN:
   2536       case Iop_Fixed32SToF32x4_RN:
   2537          complainIfUndefined(mce, atom2);
   2538          return mkPCast32x4(mce, vatom1);
   2539 
   2540       case Iop_F32ToFixed32Ux2_RZ:
   2541       case Iop_F32ToFixed32Sx2_RZ:
   2542       case Iop_Fixed32UToF32x2_RN:
   2543       case Iop_Fixed32SToF32x2_RN:
   2544          complainIfUndefined(mce, atom2);
   2545          return mkPCast32x2(mce, vatom1);
   2546 
   2547       case Iop_QSub8Ux16:
   2548       case Iop_QSub8Sx16:
   2549       case Iop_Sub8x16:
   2550       case Iop_Min8Ux16:
   2551       case Iop_Min8Sx16:
   2552       case Iop_Max8Ux16:
   2553       case Iop_Max8Sx16:
   2554       case Iop_CmpGT8Sx16:
   2555       case Iop_CmpGT8Ux16:
   2556       case Iop_CmpEQ8x16:
   2557       case Iop_Avg8Ux16:
   2558       case Iop_Avg8Sx16:
   2559       case Iop_QAdd8Ux16:
   2560       case Iop_QAdd8Sx16:
   2561       case Iop_QSal8x16:
   2562       case Iop_QShl8x16:
   2563       case Iop_Add8x16:
   2564       case Iop_Mul8x16:
   2565       case Iop_PolynomialMul8x16:
   2566          return binary8Ix16(mce, vatom1, vatom2);
   2567 
   2568       case Iop_QSub16Ux8:
   2569       case Iop_QSub16Sx8:
   2570       case Iop_Sub16x8:
   2571       case Iop_Mul16x8:
   2572       case Iop_MulHi16Sx8:
   2573       case Iop_MulHi16Ux8:
   2574       case Iop_Min16Sx8:
   2575       case Iop_Min16Ux8:
   2576       case Iop_Max16Sx8:
   2577       case Iop_Max16Ux8:
   2578       case Iop_CmpGT16Sx8:
   2579       case Iop_CmpGT16Ux8:
   2580       case Iop_CmpEQ16x8:
   2581       case Iop_Avg16Ux8:
   2582       case Iop_Avg16Sx8:
   2583       case Iop_QAdd16Ux8:
   2584       case Iop_QAdd16Sx8:
   2585       case Iop_QSal16x8:
   2586       case Iop_QShl16x8:
   2587       case Iop_Add16x8:
   2588       case Iop_QDMulHi16Sx8:
   2589       case Iop_QRDMulHi16Sx8:
   2590          return binary16Ix8(mce, vatom1, vatom2);
   2591 
   2592       case Iop_Sub32x4:
   2593       case Iop_CmpGT32Sx4:
   2594       case Iop_CmpGT32Ux4:
   2595       case Iop_CmpEQ32x4:
   2596       case Iop_QAdd32Sx4:
   2597       case Iop_QAdd32Ux4:
   2598       case Iop_QSub32Sx4:
   2599       case Iop_QSub32Ux4:
   2600       case Iop_QSal32x4:
   2601       case Iop_QShl32x4:
   2602       case Iop_Avg32Ux4:
   2603       case Iop_Avg32Sx4:
   2604       case Iop_Add32x4:
   2605       case Iop_Max32Ux4:
   2606       case Iop_Max32Sx4:
   2607       case Iop_Min32Ux4:
   2608       case Iop_Min32Sx4:
   2609       case Iop_Mul32x4:
   2610       case Iop_QDMulHi32Sx4:
   2611       case Iop_QRDMulHi32Sx4:
   2612          return binary32Ix4(mce, vatom1, vatom2);
   2613 
   2614       case Iop_Sub64x2:
   2615       case Iop_Add64x2:
   2616       case Iop_CmpGT64Sx2:
   2617       case Iop_QSal64x2:
   2618       case Iop_QShl64x2:
   2619       case Iop_QAdd64Ux2:
   2620       case Iop_QAdd64Sx2:
   2621       case Iop_QSub64Ux2:
   2622       case Iop_QSub64Sx2:
   2623          return binary64Ix2(mce, vatom1, vatom2);
   2624 
   2625       case Iop_QNarrow32Sx4:
   2626       case Iop_QNarrow32Ux4:
   2627       case Iop_QNarrow16Sx8:
   2628       case Iop_QNarrow16Ux8:
   2629          return vectorNarrowV128(mce, op, vatom1, vatom2);
   2630 
   2631       case Iop_Sub64Fx2:
   2632       case Iop_Mul64Fx2:
   2633       case Iop_Min64Fx2:
   2634       case Iop_Max64Fx2:
   2635       case Iop_Div64Fx2:
   2636       case Iop_CmpLT64Fx2:
   2637       case Iop_CmpLE64Fx2:
   2638       case Iop_CmpEQ64Fx2:
   2639       case Iop_CmpUN64Fx2:
   2640       case Iop_Add64Fx2:
   2641          return binary64Fx2(mce, vatom1, vatom2);
   2642 
   2643       case Iop_Sub64F0x2:
   2644       case Iop_Mul64F0x2:
   2645       case Iop_Min64F0x2:
   2646       case Iop_Max64F0x2:
   2647       case Iop_Div64F0x2:
   2648       case Iop_CmpLT64F0x2:
   2649       case Iop_CmpLE64F0x2:
   2650       case Iop_CmpEQ64F0x2:
   2651       case Iop_CmpUN64F0x2:
   2652       case Iop_Add64F0x2:
   2653          return binary64F0x2(mce, vatom1, vatom2);
   2654 
   2655       case Iop_Sub32Fx4:
   2656       case Iop_Mul32Fx4:
   2657       case Iop_Min32Fx4:
   2658       case Iop_Max32Fx4:
   2659       case Iop_Div32Fx4:
   2660       case Iop_CmpLT32Fx4:
   2661       case Iop_CmpLE32Fx4:
   2662       case Iop_CmpEQ32Fx4:
   2663       case Iop_CmpUN32Fx4:
   2664       case Iop_CmpGT32Fx4:
   2665       case Iop_CmpGE32Fx4:
   2666       case Iop_Add32Fx4:
   2667       case Iop_Recps32Fx4:
   2668       case Iop_Rsqrts32Fx4:
   2669          return binary32Fx4(mce, vatom1, vatom2);
   2670 
   2671       case Iop_Sub32Fx2:
   2672       case Iop_Mul32Fx2:
   2673       case Iop_Min32Fx2:
   2674       case Iop_Max32Fx2:
   2675       case Iop_CmpEQ32Fx2:
   2676       case Iop_CmpGT32Fx2:
   2677       case Iop_CmpGE32Fx2:
   2678       case Iop_Add32Fx2:
   2679       case Iop_Recps32Fx2:
   2680       case Iop_Rsqrts32Fx2:
   2681          return binary32Fx2(mce, vatom1, vatom2);
   2682 
   2683       case Iop_Sub32F0x4:
   2684       case Iop_Mul32F0x4:
   2685       case Iop_Min32F0x4:
   2686       case Iop_Max32F0x4:
   2687       case Iop_Div32F0x4:
   2688       case Iop_CmpLT32F0x4:
   2689       case Iop_CmpLE32F0x4:
   2690       case Iop_CmpEQ32F0x4:
   2691       case Iop_CmpUN32F0x4:
   2692       case Iop_Add32F0x4:
   2693          return binary32F0x4(mce, vatom1, vatom2);
   2694 
   2695       case Iop_QShlN8Sx16:
   2696       case Iop_QShlN8x16:
   2697       case Iop_QSalN8x16:
   2698          complainIfUndefined(mce, atom2);
   2699          return mkPCast8x16(mce, vatom1);
   2700 
   2701       case Iop_QShlN16Sx8:
   2702       case Iop_QShlN16x8:
   2703       case Iop_QSalN16x8:
   2704          complainIfUndefined(mce, atom2);
   2705          return mkPCast16x8(mce, vatom1);
   2706 
   2707       case Iop_QShlN32Sx4:
   2708       case Iop_QShlN32x4:
   2709       case Iop_QSalN32x4:
   2710          complainIfUndefined(mce, atom2);
   2711          return mkPCast32x4(mce, vatom1);
   2712 
   2713       case Iop_QShlN64Sx2:
   2714       case Iop_QShlN64x2:
   2715       case Iop_QSalN64x2:
   2716          complainIfUndefined(mce, atom2);
   2717          return mkPCast32x4(mce, vatom1);
   2718 
   2719       case Iop_Mull32Sx2:
   2720       case Iop_Mull32Ux2:
   2721       case Iop_QDMulLong32Sx2:
   2722          return vectorLongenI64(mce, Iop_Longen32Sx2,
   2723                mkUifU64(mce, vatom1, vatom2));
   2724 
   2725       case Iop_Mull16Sx4:
   2726       case Iop_Mull16Ux4:
   2727       case Iop_QDMulLong16Sx4:
   2728          return vectorLongenI64(mce, Iop_Longen16Sx4,
   2729                mkUifU64(mce, vatom1, vatom2));
   2730 
   2731       case Iop_Mull8Sx8:
   2732       case Iop_Mull8Ux8:
   2733       case Iop_PolynomialMull8x8:
   2734          return vectorLongenI64(mce, Iop_Longen8Sx8,
   2735                mkUifU64(mce, vatom1, vatom2));
   2736 
   2737       case Iop_PwAdd32x4:
   2738          return mkPCast32x4(mce,
   2739                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
   2740                      mkPCast32x4(mce, vatom2))));
   2741 
   2742       case Iop_PwAdd16x8:
   2743          return mkPCast16x8(mce,
   2744                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
   2745                      mkPCast16x8(mce, vatom2))));
   2746 
   2747       case Iop_PwAdd8x16:
   2748          return mkPCast8x16(mce,
   2749                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
   2750                      mkPCast8x16(mce, vatom2))));
   2751 
   2752       /* V128-bit data-steering */
   2753       case Iop_SetV128lo32:
   2754       case Iop_SetV128lo64:
   2755       case Iop_64HLtoV128:
   2756       case Iop_InterleaveLO64x2:
   2757       case Iop_InterleaveLO32x4:
   2758       case Iop_InterleaveLO16x8:
   2759       case Iop_InterleaveLO8x16:
   2760       case Iop_InterleaveHI64x2:
   2761       case Iop_InterleaveHI32x4:
   2762       case Iop_InterleaveHI16x8:
   2763       case Iop_InterleaveHI8x16:
   2764       case Iop_CatOddLanes8x16:
   2765       case Iop_CatOddLanes16x8:
   2766       case Iop_CatOddLanes32x4:
   2767       case Iop_CatEvenLanes8x16:
   2768       case Iop_CatEvenLanes16x8:
   2769       case Iop_CatEvenLanes32x4:
   2770       case Iop_InterleaveOddLanes8x16:
   2771       case Iop_InterleaveOddLanes16x8:
   2772       case Iop_InterleaveOddLanes32x4:
   2773       case Iop_InterleaveEvenLanes8x16:
   2774       case Iop_InterleaveEvenLanes16x8:
   2775       case Iop_InterleaveEvenLanes32x4:
   2776          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
   2777 
   2778       case Iop_GetElem8x16:
   2779          complainIfUndefined(mce, atom2);
   2780          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   2781       case Iop_GetElem16x8:
   2782          complainIfUndefined(mce, atom2);
   2783          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   2784       case Iop_GetElem32x4:
   2785          complainIfUndefined(mce, atom2);
   2786          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   2787       case Iop_GetElem64x2:
   2788          complainIfUndefined(mce, atom2);
   2789          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   2790 
   2791      /* Perm8x16: rearrange values in left arg using steering values
   2792         from right arg.  So rearrange the vbits in the same way but
   2793         pessimise wrt steering values. */
   2794       case Iop_Perm8x16:
   2795          return mkUifUV128(
   2796                    mce,
   2797                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2798                    mkPCast8x16(mce, vatom2)
   2799                 );
   2800 
   2801      /* These two take the lower half of each 16-bit lane, sign/zero
   2802         extend it to 32, and multiply together, producing a 32x4
   2803         result (and implicitly ignoring half the operand bits).  So
   2804         treat it as a bunch of independent 16x8 operations, but then
   2805         do 32-bit shifts left-right to copy the lower half results
   2806         (which are all 0s or all 1s due to PCasting in binary16Ix8)
   2807         into the upper half of each result lane. */
   2808       case Iop_MullEven16Ux8:
   2809       case Iop_MullEven16Sx8: {
   2810          IRAtom* at;
   2811          at = binary16Ix8(mce,vatom1,vatom2);
   2812          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
   2813          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
   2814 	 return at;
   2815       }
   2816 
   2817       /* Same deal as Iop_MullEven16{S,U}x8 */
   2818       case Iop_MullEven8Ux16:
   2819       case Iop_MullEven8Sx16: {
   2820          IRAtom* at;
   2821          at = binary8Ix16(mce,vatom1,vatom2);
   2822          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
   2823          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
   2824 	 return at;
   2825       }
   2826 
   2827       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
   2828          32x4 -> 16x8 laneage, discarding the upper half of each lane.
   2829          Simply apply same op to the V bits, since this really no more
   2830          than a data steering operation. */
   2831       case Iop_Narrow32x4:
   2832       case Iop_Narrow16x8:
   2833          return assignNew('V', mce, Ity_V128,
   2834                                     binop(op, vatom1, vatom2));
   2835 
   2836       case Iop_ShrV128:
   2837       case Iop_ShlV128:
   2838          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
   2839             this is wrong now, scalar shifts are done properly lazily.
   2840             Vector shifts should be fixed too. */
   2841          complainIfUndefined(mce, atom2);
   2842          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   2843 
   2844       /* I128-bit data-steering */
   2845       case Iop_64HLto128:
   2846          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
   2847 
   2848       /* Scalar floating point */
   2849 
   2850       case Iop_RoundF64toInt:
   2851       case Iop_RoundF64toF32:
   2852       case Iop_F64toI64S:
   2853       case Iop_I64StoF64:
   2854       case Iop_SinF64:
   2855       case Iop_CosF64:
   2856       case Iop_TanF64:
   2857       case Iop_2xm1F64:
   2858       case Iop_SqrtF64:
   2859          /* I32(rm) x I64/F64 -> I64/F64 */
   2860          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   2861 
   2862       case Iop_RoundF32toInt:
   2863       case Iop_SqrtF32:
   2864          /* I32(rm) x I32/F32 -> I32/F32 */
   2865          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   2866 
   2867       case Iop_F64toI32U:
   2868       case Iop_F64toI32S:
   2869       case Iop_F64toF32:
   2870          /* First arg is I32 (rounding mode), second is F64 (data). */
   2871          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   2872 
   2873       case Iop_F64toI16S:
   2874          /* First arg is I32 (rounding mode), second is F64 (data). */
   2875          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
   2876 
   2877       case Iop_CmpF64:
   2878          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   2879 
   2880       /* non-FP after here */
   2881 
   2882       case Iop_DivModU64to32:
   2883       case Iop_DivModS64to32:
   2884          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   2885 
   2886       case Iop_DivModU128to64:
   2887       case Iop_DivModS128to64:
   2888          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   2889 
   2890       case Iop_16HLto32:
   2891          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
   2892       case Iop_32HLto64:
   2893          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   2894 
   2895       case Iop_MullS64:
   2896       case Iop_MullU64: {
   2897          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   2898          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
   2899          return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
   2900       }
   2901 
   2902       case Iop_MullS32:
   2903       case Iop_MullU32: {
   2904          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   2905          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
   2906          return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
   2907       }
   2908 
   2909       case Iop_MullS16:
   2910       case Iop_MullU16: {
   2911          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   2912          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
   2913          return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
   2914       }
   2915 
   2916       case Iop_MullS8:
   2917       case Iop_MullU8: {
   2918          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   2919          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
   2920          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
   2921       }
   2922 
   2923       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
   2924       case Iop_DivS32:
   2925       case Iop_DivU32:
   2926          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   2927 
   2928       case Iop_DivS64:
   2929       case Iop_DivU64:
   2930          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   2931 
   2932       case Iop_Add32:
   2933          if (mce->bogusLiterals)
   2934             return expensiveAddSub(mce,True,Ity_I32,
   2935                                    vatom1,vatom2, atom1,atom2);
   2936          else
   2937             goto cheap_AddSub32;
   2938       case Iop_Sub32:
   2939          if (mce->bogusLiterals)
   2940             return expensiveAddSub(mce,False,Ity_I32,
   2941                                    vatom1,vatom2, atom1,atom2);
   2942          else
   2943             goto cheap_AddSub32;
   2944 
   2945       cheap_AddSub32:
   2946       case Iop_Mul32:
   2947          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   2948 
   2949       case Iop_CmpORD32S:
   2950       case Iop_CmpORD32U:
   2951       case Iop_CmpORD64S:
   2952       case Iop_CmpORD64U:
   2953          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
   2954 
   2955       case Iop_Add64:
   2956          if (mce->bogusLiterals)
   2957             return expensiveAddSub(mce,True,Ity_I64,
   2958                                    vatom1,vatom2, atom1,atom2);
   2959          else
   2960             goto cheap_AddSub64;
   2961       case Iop_Sub64:
   2962          if (mce->bogusLiterals)
   2963             return expensiveAddSub(mce,False,Ity_I64,
   2964                                    vatom1,vatom2, atom1,atom2);
   2965          else
   2966             goto cheap_AddSub64;
   2967 
   2968       cheap_AddSub64:
   2969       case Iop_Mul64:
   2970          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   2971 
   2972       case Iop_Mul16:
   2973       case Iop_Add16:
   2974       case Iop_Sub16:
   2975          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   2976 
   2977       case Iop_Sub8:
   2978       case Iop_Add8:
   2979          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   2980 
   2981       case Iop_CmpEQ64:
   2982       case Iop_CmpNE64:
   2983          if (mce->bogusLiterals)
   2984             return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
   2985          else
   2986             goto cheap_cmp64;
   2987       cheap_cmp64:
   2988       case Iop_CmpLE64S: case Iop_CmpLE64U:
   2989       case Iop_CmpLT64U: case Iop_CmpLT64S:
   2990          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
   2991 
   2992       case Iop_CmpEQ32:
   2993       case Iop_CmpNE32:
   2994          if (mce->bogusLiterals)
   2995             return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
   2996          else
   2997             goto cheap_cmp32;
   2998       cheap_cmp32:
   2999       case Iop_CmpLE32S: case Iop_CmpLE32U:
   3000       case Iop_CmpLT32U: case Iop_CmpLT32S:
   3001          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
   3002 
   3003       case Iop_CmpEQ16: case Iop_CmpNE16:
   3004          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
   3005 
   3006       case Iop_CmpEQ8: case Iop_CmpNE8:
   3007          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
   3008 
   3009       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   3010       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   3011       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   3012       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   3013          /* Just say these all produce a defined result, regardless
   3014             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
   3015          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
   3016 
   3017       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
   3018          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
   3019 
   3020       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
   3021          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
   3022 
   3023       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
   3024          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
   3025 
   3026       case Iop_Shl8: case Iop_Shr8:
   3027          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
   3028 
   3029       case Iop_AndV128:
   3030          uifu = mkUifUV128; difd = mkDifDV128;
   3031          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
   3032       case Iop_And64:
   3033          uifu = mkUifU64; difd = mkDifD64;
   3034          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
   3035       case Iop_And32:
   3036          uifu = mkUifU32; difd = mkDifD32;
   3037          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
   3038       case Iop_And16:
   3039          uifu = mkUifU16; difd = mkDifD16;
   3040          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
   3041       case Iop_And8:
   3042          uifu = mkUifU8; difd = mkDifD8;
   3043          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
   3044 
   3045       case Iop_OrV128:
   3046          uifu = mkUifUV128; difd = mkDifDV128;
   3047          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
   3048       case Iop_Or64:
   3049          uifu = mkUifU64; difd = mkDifD64;
   3050          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
   3051       case Iop_Or32:
   3052          uifu = mkUifU32; difd = mkDifD32;
   3053          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
   3054       case Iop_Or16:
   3055          uifu = mkUifU16; difd = mkDifD16;
   3056          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
   3057       case Iop_Or8:
   3058          uifu = mkUifU8; difd = mkDifD8;
   3059          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
   3060 
   3061       do_And_Or:
   3062          return
   3063          assignNew(
   3064             'V', mce,
   3065             and_or_ty,
   3066             difd(mce, uifu(mce, vatom1, vatom2),
   3067                       difd(mce, improve(mce, atom1, vatom1),
   3068                                 improve(mce, atom2, vatom2) ) ) );
   3069 
   3070       case Iop_Xor8:
   3071          return mkUifU8(mce, vatom1, vatom2);
   3072       case Iop_Xor16:
   3073          return mkUifU16(mce, vatom1, vatom2);
   3074       case Iop_Xor32:
   3075          return mkUifU32(mce, vatom1, vatom2);
   3076       case Iop_Xor64:
   3077          return mkUifU64(mce, vatom1, vatom2);
   3078       case Iop_XorV128:
   3079          return mkUifUV128(mce, vatom1, vatom2);
   3080 
   3081       default:
   3082          ppIROp(op);
   3083          VG_(tool_panic)("memcheck:expr2vbits_Binop");
   3084    }
   3085 }
   3086 
   3087 
   3088 static
   3089 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
   3090 {
   3091    IRAtom* vatom = expr2vbits( mce, atom );
   3092    tl_assert(isOriginalAtom(mce,atom));
   3093    switch (op) {
   3094 
   3095       case Iop_Sqrt64Fx2:
   3096          return unary64Fx2(mce, vatom);
   3097 
   3098       case Iop_Sqrt64F0x2:
   3099          return unary64F0x2(mce, vatom);
   3100 
   3101       case Iop_Sqrt32Fx4:
   3102       case Iop_RSqrt32Fx4:
   3103       case Iop_Recip32Fx4:
   3104       case Iop_I32UtoFx4:
   3105       case Iop_I32StoFx4:
   3106       case Iop_QFtoI32Ux4_RZ:
   3107       case Iop_QFtoI32Sx4_RZ:
   3108       case Iop_RoundF32x4_RM:
   3109       case Iop_RoundF32x4_RP:
   3110       case Iop_RoundF32x4_RN:
   3111       case Iop_RoundF32x4_RZ:
   3112       case Iop_Recip32x4:
   3113       case Iop_Abs32Fx4:
   3114       case Iop_Neg32Fx4:
   3115       case Iop_Rsqrte32Fx4:
   3116          return unary32Fx4(mce, vatom);
   3117 
   3118       case Iop_I32UtoFx2:
   3119       case Iop_I32StoFx2:
   3120       case Iop_Recip32Fx2:
   3121       case Iop_Recip32x2:
   3122       case Iop_Abs32Fx2:
   3123       case Iop_Neg32Fx2:
   3124       case Iop_Rsqrte32Fx2:
   3125          return unary32Fx2(mce, vatom);
   3126 
   3127       case Iop_Sqrt32F0x4:
   3128       case Iop_RSqrt32F0x4:
   3129       case Iop_Recip32F0x4:
   3130          return unary32F0x4(mce, vatom);
   3131 
   3132       case Iop_32UtoV128:
   3133       case Iop_64UtoV128:
   3134       case Iop_Dup8x16:
   3135       case Iop_Dup16x8:
   3136       case Iop_Dup32x4:
   3137       case Iop_Reverse16_8x16:
   3138       case Iop_Reverse32_8x16:
   3139       case Iop_Reverse32_16x8:
   3140       case Iop_Reverse64_8x16:
   3141       case Iop_Reverse64_16x8:
   3142       case Iop_Reverse64_32x4:
   3143          return assignNew('V', mce, Ity_V128, unop(op, vatom));
   3144 
   3145       case Iop_F32toF64:
   3146       case Iop_I32StoF64:
   3147       case Iop_I32UtoF64:
   3148       case Iop_NegF64:
   3149       case Iop_AbsF64:
   3150       case Iop_Est5FRSqrt:
   3151       case Iop_RoundF64toF64_NEAREST:
   3152       case Iop_RoundF64toF64_NegINF:
   3153       case Iop_RoundF64toF64_PosINF:
   3154       case Iop_RoundF64toF64_ZERO:
   3155       case Iop_Clz64:
   3156       case Iop_Ctz64:
   3157          return mkPCastTo(mce, Ity_I64, vatom);
   3158 
   3159       case Iop_Clz32:
   3160       case Iop_Ctz32:
   3161       case Iop_TruncF64asF32:
   3162       case Iop_NegF32:
   3163       case Iop_AbsF32:
   3164          return mkPCastTo(mce, Ity_I32, vatom);
   3165 
   3166       case Iop_1Uto64:
   3167       case Iop_8Uto64:
   3168       case Iop_8Sto64:
   3169       case Iop_16Uto64:
   3170       case Iop_16Sto64:
   3171       case Iop_32Sto64:
   3172       case Iop_32Uto64:
   3173       case Iop_V128to64:
   3174       case Iop_V128HIto64:
   3175       case Iop_128HIto64:
   3176       case Iop_128to64:
   3177       case Iop_Dup8x8:
   3178       case Iop_Dup16x4:
   3179       case Iop_Dup32x2:
   3180       case Iop_Reverse16_8x8:
   3181       case Iop_Reverse32_8x8:
   3182       case Iop_Reverse32_16x4:
   3183       case Iop_Reverse64_8x8:
   3184       case Iop_Reverse64_16x4:
   3185       case Iop_Reverse64_32x2:
   3186          return assignNew('V', mce, Ity_I64, unop(op, vatom));
   3187 
   3188       case Iop_64to32:
   3189       case Iop_64HIto32:
   3190       case Iop_1Uto32:
   3191       case Iop_1Sto32:
   3192       case Iop_8Uto32:
   3193       case Iop_16Uto32:
   3194       case Iop_16Sto32:
   3195       case Iop_8Sto32:
   3196       case Iop_V128to32:
   3197          return assignNew('V', mce, Ity_I32, unop(op, vatom));
   3198 
   3199       case Iop_8Sto16:
   3200       case Iop_8Uto16:
   3201       case Iop_32to16:
   3202       case Iop_32HIto16:
   3203       case Iop_64to16:
   3204          return assignNew('V', mce, Ity_I16, unop(op, vatom));
   3205 
   3206       case Iop_1Uto8:
   3207       case Iop_16to8:
   3208       case Iop_16HIto8:
   3209       case Iop_32to8:
   3210       case Iop_64to8:
   3211          return assignNew('V', mce, Ity_I8, unop(op, vatom));
   3212 
   3213       case Iop_32to1:
   3214          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
   3215 
   3216       case Iop_64to1:
   3217          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
   3218 
   3219       case Iop_ReinterpF64asI64:
   3220       case Iop_ReinterpI64asF64:
   3221       case Iop_ReinterpI32asF32:
   3222       case Iop_ReinterpF32asI32:
   3223       case Iop_NotV128:
   3224       case Iop_Not64:
   3225       case Iop_Not32:
   3226       case Iop_Not16:
   3227       case Iop_Not8:
   3228       case Iop_Not1:
   3229          return vatom;
   3230 
   3231       case Iop_CmpNEZ8x8:
   3232       case Iop_Cnt8x8:
   3233       case Iop_Clz8Sx8:
   3234       case Iop_Cls8Sx8:
   3235       case Iop_Abs8x8:
   3236          return mkPCast8x8(mce, vatom);
   3237 
   3238       case Iop_CmpNEZ8x16:
   3239       case Iop_Cnt8x16:
   3240       case Iop_Clz8Sx16:
   3241       case Iop_Cls8Sx16:
   3242       case Iop_Abs8x16:
   3243          return mkPCast8x16(mce, vatom);
   3244 
   3245       case Iop_CmpNEZ16x4:
   3246       case Iop_Clz16Sx4:
   3247       case Iop_Cls16Sx4:
   3248       case Iop_Abs16x4:
   3249          return mkPCast16x4(mce, vatom);
   3250 
   3251       case Iop_CmpNEZ16x8:
   3252       case Iop_Clz16Sx8:
   3253       case Iop_Cls16Sx8:
   3254       case Iop_Abs16x8:
   3255          return mkPCast16x8(mce, vatom);
   3256 
   3257       case Iop_CmpNEZ32x2:
   3258       case Iop_Clz32Sx2:
   3259       case Iop_Cls32Sx2:
   3260       case Iop_FtoI32Ux2_RZ:
   3261       case Iop_FtoI32Sx2_RZ:
   3262       case Iop_Abs32x2:
   3263          return mkPCast32x2(mce, vatom);
   3264 
   3265       case Iop_CmpNEZ32x4:
   3266       case Iop_Clz32Sx4:
   3267       case Iop_Cls32Sx4:
   3268       case Iop_FtoI32Ux4_RZ:
   3269       case Iop_FtoI32Sx4_RZ:
   3270       case Iop_Abs32x4:
   3271          return mkPCast32x4(mce, vatom);
   3272 
   3273       case Iop_CmpwNEZ64:
   3274          return mkPCastTo(mce, Ity_I64, vatom);
   3275 
   3276       case Iop_CmpNEZ64x2:
   3277          return mkPCast64x2(mce, vatom);
   3278 
   3279       case Iop_Shorten16x8:
   3280       case Iop_Shorten32x4:
   3281       case Iop_Shorten64x2:
   3282       case Iop_QShortenS16Sx8:
   3283       case Iop_QShortenU16Sx8:
   3284       case Iop_QShortenU16Ux8:
   3285       case Iop_QShortenS32Sx4:
   3286       case Iop_QShortenU32Sx4:
   3287       case Iop_QShortenU32Ux4:
   3288       case Iop_QShortenS64Sx2:
   3289       case Iop_QShortenU64Sx2:
   3290       case Iop_QShortenU64Ux2:
   3291          return vectorShortenV128(mce, op, vatom);
   3292 
   3293       case Iop_Longen8Sx8:
   3294       case Iop_Longen8Ux8:
   3295       case Iop_Longen16Sx4:
   3296       case Iop_Longen16Ux4:
   3297       case Iop_Longen32Sx2:
   3298       case Iop_Longen32Ux2:
   3299          return vectorLongenI64(mce, op, vatom);
   3300 
   3301       case Iop_PwAddL32Ux2:
   3302       case Iop_PwAddL32Sx2:
   3303          return mkPCastTo(mce, Ity_I64,
   3304                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
   3305 
   3306       case Iop_PwAddL16Ux4:
   3307       case Iop_PwAddL16Sx4:
   3308          return mkPCast32x2(mce,
   3309                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
   3310 
   3311       case Iop_PwAddL8Ux8:
   3312       case Iop_PwAddL8Sx8:
   3313          return mkPCast16x4(mce,
   3314                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
   3315 
   3316       case Iop_PwAddL32Ux4:
   3317       case Iop_PwAddL32Sx4:
   3318          return mkPCast64x2(mce,
   3319                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
   3320 
   3321       case Iop_PwAddL16Ux8:
   3322       case Iop_PwAddL16Sx8:
   3323          return mkPCast32x4(mce,
   3324                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
   3325 
   3326       case Iop_PwAddL8Ux16:
   3327       case Iop_PwAddL8Sx16:
   3328          return mkPCast16x8(mce,
   3329                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
   3330 
   3331       default:
   3332          ppIROp(op);
   3333          VG_(tool_panic)("memcheck:expr2vbits_Unop");
   3334    }
   3335 }
   3336 
   3337 
   3338 /* Worker function; do not call directly. */
   3339 static
   3340 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
   3341                               IREndness end, IRType ty,
   3342                               IRAtom* addr, UInt bias )
   3343 {
   3344    void*    helper;
   3345    Char*    hname;
   3346    IRDirty* di;
   3347    IRTemp   datavbits;
   3348    IRAtom*  addrAct;
   3349 
   3350    tl_assert(isOriginalAtom(mce,addr));
   3351    tl_assert(end == Iend_LE || end == Iend_BE);
   3352 
   3353    /* First, emit a definedness test for the address.  This also sets
   3354       the address (shadow) to 'defined' following the test. */
   3355    complainIfUndefined( mce, addr );
   3356 
   3357    /* Now cook up a call to the relevant helper function, to read the
   3358       data V bits from shadow memory. */
   3359    ty = shadowTypeV(ty);
   3360 
   3361    if (end == Iend_LE) {
   3362       switch (ty) {
   3363          case Ity_I64: helper = &MC_(helperc_LOADV64le);
   3364                        hname = "MC_(helperc_LOADV64le)";
   3365                        break;
   3366          case Ity_I32: helper = &MC_(helperc_LOADV32le);
   3367                        hname = "MC_(helperc_LOADV32le)";
   3368                        break;
   3369          case Ity_I16: helper = &MC_(helperc_LOADV16le);
   3370                        hname = "MC_(helperc_LOADV16le)";
   3371                        break;
   3372          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3373                        hname = "MC_(helperc_LOADV8)";
   3374                        break;
   3375          default:      ppIRType(ty);
   3376                        VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
   3377       }
   3378    } else {
   3379       switch (ty) {
   3380          case Ity_I64: helper = &MC_(helperc_LOADV64be);
   3381                        hname = "MC_(helperc_LOADV64be)";
   3382                        break;
   3383          case Ity_I32: helper = &MC_(helperc_LOADV32be);
   3384                        hname = "MC_(helperc_LOADV32be)";
   3385                        break;
   3386          case Ity_I16: helper = &MC_(helperc_LOADV16be);
   3387                        hname = "MC_(helperc_LOADV16be)";
   3388                        break;
   3389          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3390                        hname = "MC_(helperc_LOADV8)";
   3391                        break;
   3392          default:      ppIRType(ty);
   3393                        VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
   3394       }
   3395    }
   3396 
   3397    /* Generate the actual address into addrAct. */
   3398    if (bias == 0) {
   3399       addrAct = addr;
   3400    } else {
   3401       IROp    mkAdd;
   3402       IRAtom* eBias;
   3403       IRType  tyAddr  = mce->hWordTy;
   3404       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   3405       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   3406       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   3407       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
   3408    }
   3409 
   3410    /* We need to have a place to park the V bits we're just about to
   3411       read. */
   3412    datavbits = newTemp(mce, ty, VSh);
   3413    di = unsafeIRDirty_1_N( datavbits,
   3414                            1/*regparms*/,
   3415                            hname, VG_(fnptr_to_fnentry)( helper ),
   3416                            mkIRExprVec_1( addrAct ));
   3417    setHelperAnns( mce, di );
   3418    stmt( 'V', mce, IRStmt_Dirty(di) );
   3419 
   3420    return mkexpr(datavbits);
   3421 }
   3422 
   3423 
   3424 static
   3425 IRAtom* expr2vbits_Load ( MCEnv* mce,
   3426                           IREndness end, IRType ty,
   3427                           IRAtom* addr, UInt bias )
   3428 {
   3429    IRAtom *v64hi, *v64lo;
   3430    tl_assert(end == Iend_LE || end == Iend_BE);
   3431    switch (shadowTypeV(ty)) {
   3432       case Ity_I8:
   3433       case Ity_I16:
   3434       case Ity_I32:
   3435       case Ity_I64:
   3436          return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
   3437       case Ity_V128:
   3438          if (end == Iend_LE) {
   3439             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
   3440             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3441          } else {
   3442             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
   3443             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3444          }
   3445          return assignNew( 'V', mce,
   3446                            Ity_V128,
   3447                            binop(Iop_64HLtoV128, v64hi, v64lo));
   3448       default:
   3449          VG_(tool_panic)("expr2vbits_Load");
   3450    }
   3451 }
   3452 
   3453 
   3454 static
   3455 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
   3456                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
   3457 {
   3458    IRAtom *vbitsC, *vbits0, *vbitsX;
   3459    IRType ty;
   3460    /* Given Mux0X(cond,expr0,exprX), generate
   3461          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
   3462       That is, steer the V bits like the originals, but trash the
   3463       result if the steering value is undefined.  This gives
   3464       lazy propagation. */
   3465    tl_assert(isOriginalAtom(mce, cond));
   3466    tl_assert(isOriginalAtom(mce, expr0));
   3467    tl_assert(isOriginalAtom(mce, exprX));
   3468 
   3469    vbitsC = expr2vbits(mce, cond);
   3470    vbits0 = expr2vbits(mce, expr0);
   3471    vbitsX = expr2vbits(mce, exprX);
   3472    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
   3473 
   3474    return
   3475       mkUifU(mce, ty, assignNew('V', mce, ty,
   3476                                      IRExpr_Mux0X(cond, vbits0, vbitsX)),
   3477                       mkPCastTo(mce, ty, vbitsC) );
   3478 }
   3479 
   3480 /* --------- This is the main expression-handling function. --------- */
   3481 
   3482 static
   3483 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
   3484 {
   3485    switch (e->tag) {
   3486 
   3487       case Iex_Get:
   3488          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
   3489 
   3490       case Iex_GetI:
   3491          return shadow_GETI( mce, e->Iex.GetI.descr,
   3492                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   3493 
   3494       case Iex_RdTmp:
   3495          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
   3496 
   3497       case Iex_Const:
   3498          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
   3499 
   3500       case Iex_Qop:
   3501          return expr2vbits_Qop(
   3502                    mce,
   3503                    e->Iex.Qop.op,
   3504                    e->Iex.Qop.arg1, e->Iex.Qop.arg2,
   3505 		   e->Iex.Qop.arg3, e->Iex.Qop.arg4
   3506                 );
   3507 
   3508       case Iex_Triop:
   3509          return expr2vbits_Triop(
   3510                    mce,
   3511                    e->Iex.Triop.op,
   3512                    e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
   3513                 );
   3514 
   3515       case Iex_Binop:
   3516          return expr2vbits_Binop(
   3517                    mce,
   3518                    e->Iex.Binop.op,
   3519                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
   3520                 );
   3521 
   3522       case Iex_Unop:
   3523          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
   3524 
   3525       case Iex_Load:
   3526          return expr2vbits_Load( mce, e->Iex.Load.end,
   3527                                       e->Iex.Load.ty,
   3528                                       e->Iex.Load.addr, 0/*addr bias*/ );
   3529 
   3530       case Iex_CCall:
   3531          return mkLazyN( mce, e->Iex.CCall.args,
   3532                               e->Iex.CCall.retty,
   3533                               e->Iex.CCall.cee );
   3534 
   3535       case Iex_Mux0X:
   3536          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
   3537                                        e->Iex.Mux0X.exprX);
   3538 
   3539       default:
   3540          VG_(printf)("\n");
   3541          ppIRExpr(e);
   3542          VG_(printf)("\n");
   3543          VG_(tool_panic)("memcheck: expr2vbits");
   3544    }
   3545 }
   3546 
   3547 /*------------------------------------------------------------*/
   3548 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
   3549 /*------------------------------------------------------------*/
   3550 
   3551 /* Widen a value to the host word size. */
   3552 
   3553 static
   3554 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
   3555 {
   3556    IRType ty, tyH;
   3557 
   3558    /* vatom is vbits-value and as such can only have a shadow type. */
   3559    tl_assert(isShadowAtom(mce,vatom));
   3560 
   3561    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
   3562    tyH = mce->hWordTy;
   3563 
   3564    if (tyH == Ity_I32) {
   3565       switch (ty) {
   3566          case Ity_I32:
   3567             return vatom;
   3568          case Ity_I16:
   3569             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
   3570          case Ity_I8:
   3571             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
   3572          default:
   3573             goto unhandled;
   3574       }
   3575    } else
   3576    if (tyH == Ity_I64) {
   3577       switch (ty) {
   3578          case Ity_I32:
   3579             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
   3580          case Ity_I16:
   3581             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   3582                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
   3583          case Ity_I8:
   3584             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   3585                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
   3586          default:
   3587             goto unhandled;
   3588       }
   3589    } else {
   3590       goto unhandled;
   3591    }
   3592   unhandled:
   3593    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
   3594    VG_(tool_panic)("zwidenToHostWord");
   3595 }
   3596 
   3597 
   3598 /* Generate a shadow store.  addr is always the original address atom.
   3599    You can pass in either originals or V-bits for the data atom, but
   3600    obviously not both.  guard :: Ity_I1 controls whether the store
   3601    really happens; NULL means it unconditionally does.  Note that
   3602    guard itself is not checked for definedness; the caller of this
   3603    function must do that if necessary. */
   3604 
   3605 static
   3606 void do_shadow_Store ( MCEnv* mce,
   3607                        IREndness end,
   3608                        IRAtom* addr, UInt bias,
   3609                        IRAtom* data, IRAtom* vdata,
   3610                        IRAtom* guard )
   3611 {
   3612    IROp     mkAdd;
   3613    IRType   ty, tyAddr;
   3614    void*    helper = NULL;
   3615    Char*    hname = NULL;
   3616    IRConst* c;
   3617 
   3618    tyAddr = mce->hWordTy;
   3619    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   3620    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   3621    tl_assert( end == Iend_LE || end == Iend_BE );
   3622 
   3623    if (data) {
   3624       tl_assert(!vdata);
   3625       tl_assert(isOriginalAtom(mce, data));
   3626       tl_assert(bias == 0);
   3627       vdata = expr2vbits( mce, data );
   3628    } else {
   3629       tl_assert(vdata);
   3630    }
   3631 
   3632    tl_assert(isOriginalAtom(mce,addr));
   3633    tl_assert(isShadowAtom(mce,vdata));
   3634 
   3635    if (guard) {
   3636       tl_assert(isOriginalAtom(mce, guard));
   3637       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   3638    }
   3639 
   3640    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
   3641 
   3642    // If we're not doing undefined value checking, pretend that this value
   3643    // is "all valid".  That lets Vex's optimiser remove some of the V bit
   3644    // shadow computation ops that precede it.
   3645    if (MC_(clo_mc_level) == 1) {
   3646       switch (ty) {
   3647          case Ity_V128: // V128 weirdness
   3648                         c = IRConst_V128(V_BITS16_DEFINED); break;
   3649          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
   3650          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
   3651          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
   3652          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
   3653          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   3654       }
   3655       vdata = IRExpr_Const( c );
   3656    }
   3657 
   3658    /* First, emit a definedness test for the address.  This also sets
   3659       the address (shadow) to 'defined' following the test. */
   3660    complainIfUndefined( mce, addr );
   3661 
   3662    /* Now decide which helper function to call to write the data V
   3663       bits into shadow memory. */
   3664    if (end == Iend_LE) {
   3665       switch (ty) {
   3666          case Ity_V128: /* we'll use the helper twice */
   3667          case Ity_I64: helper = &MC_(helperc_STOREV64le);
   3668                        hname = "MC_(helperc_STOREV64le)";
   3669                        break;
   3670          case Ity_I32: helper = &MC_(helperc_STOREV32le);
   3671                        hname = "MC_(helperc_STOREV32le)";
   3672                        break;
   3673          case Ity_I16: helper = &MC_(helperc_STOREV16le);
   3674                        hname = "MC_(helperc_STOREV16le)";
   3675                        break;
   3676          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   3677                        hname = "MC_(helperc_STOREV8)";
   3678                        break;
   3679          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   3680       }
   3681    } else {
   3682       switch (ty) {
   3683          case Ity_V128: /* we'll use the helper twice */
   3684          case Ity_I64: helper = &MC_(helperc_STOREV64be);
   3685                        hname = "MC_(helperc_STOREV64be)";
   3686                        break;
   3687          case Ity_I32: helper = &MC_(helperc_STOREV32be);
   3688                        hname = "MC_(helperc_STOREV32be)";
   3689                        break;
   3690          case Ity_I16: helper = &MC_(helperc_STOREV16be);
   3691                        hname = "MC_(helperc_STOREV16be)";
   3692                        break;
   3693          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   3694                        hname = "MC_(helperc_STOREV8)";
   3695                        break;
   3696          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
   3697       }
   3698    }
   3699 
   3700    if (ty == Ity_V128) {
   3701 
   3702       /* V128-bit case */
   3703       /* See comment in next clause re 64-bit regparms */
   3704       /* also, need to be careful about endianness */
   3705 
   3706       Int     offLo64, offHi64;
   3707       IRDirty *diLo64, *diHi64;
   3708       IRAtom  *addrLo64, *addrHi64;
   3709       IRAtom  *vdataLo64, *vdataHi64;
   3710       IRAtom  *eBiasLo64, *eBiasHi64;
   3711 
   3712       if (end == Iend_LE) {
   3713          offLo64 = 0;
   3714          offHi64 = 8;
   3715       } else {
   3716          offLo64 = 8;
   3717          offHi64 = 0;
   3718       }
   3719 
   3720       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
   3721       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
   3722       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
   3723       diLo64    = unsafeIRDirty_0_N(
   3724                      1/*regparms*/,
   3725                      hname, VG_(fnptr_to_fnentry)( helper ),
   3726                      mkIRExprVec_2( addrLo64, vdataLo64 )
   3727                   );
   3728       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
   3729       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
   3730       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
   3731       diHi64    = unsafeIRDirty_0_N(
   3732                      1/*regparms*/,
   3733                      hname, VG_(fnptr_to_fnentry)( helper ),
   3734                      mkIRExprVec_2( addrHi64, vdataHi64 )
   3735                   );
   3736       if (guard) diLo64->guard = guard;
   3737       if (guard) diHi64->guard = guard;
   3738       setHelperAnns( mce, diLo64 );
   3739       setHelperAnns( mce, diHi64 );
   3740       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
   3741       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
   3742 
   3743    } else {
   3744 
   3745       IRDirty *di;
   3746       IRAtom  *addrAct;
   3747 
   3748       /* 8/16/32/64-bit cases */
   3749       /* Generate the actual address into addrAct. */
   3750       if (bias == 0) {
   3751          addrAct = addr;
   3752       } else {
   3753          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   3754          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
   3755       }
   3756 
   3757       if (ty == Ity_I64) {
   3758          /* We can't do this with regparm 2 on 32-bit platforms, since
   3759             the back ends aren't clever enough to handle 64-bit
   3760             regparm args.  Therefore be different. */
   3761          di = unsafeIRDirty_0_N(
   3762                  1/*regparms*/,
   3763                  hname, VG_(fnptr_to_fnentry)( helper ),
   3764                  mkIRExprVec_2( addrAct, vdata )
   3765               );
   3766       } else {
   3767          di = unsafeIRDirty_0_N(
   3768                  2/*regparms*/,
   3769                  hname, VG_(fnptr_to_fnentry)( helper ),
   3770                  mkIRExprVec_2( addrAct,
   3771                                 zwidenToHostWord( mce, vdata ))
   3772               );
   3773       }
   3774       if (guard) di->guard = guard;
   3775       setHelperAnns( mce, di );
   3776       stmt( 'V', mce, IRStmt_Dirty(di) );
   3777    }
   3778 
   3779 }
   3780 
   3781 
   3782 /* Do lazy pessimistic propagation through a dirty helper call, by
   3783    looking at the annotations on it.  This is the most complex part of
   3784    Memcheck. */
   3785 
   3786 static IRType szToITy ( Int n )
   3787 {
   3788    switch (n) {
   3789       case 1: return Ity_I8;
   3790       case 2: return Ity_I16;
   3791       case 4: return Ity_I32;
   3792       case 8: return Ity_I64;
   3793       default: VG_(tool_panic)("szToITy(memcheck)");
   3794    }
   3795 }
   3796 
   3797 static
   3798 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
   3799 {
   3800    Int       i, n, toDo, gSz, gOff;
   3801    IRAtom    *src, *here, *curr;
   3802    IRType    tySrc, tyDst;
   3803    IRTemp    dst;
   3804    IREndness end;
   3805 
   3806    /* What's the native endianness?  We need to know this. */
   3807 #  if defined(VG_BIGENDIAN)
   3808    end = Iend_BE;
   3809 #  elif defined(VG_LITTLEENDIAN)
   3810    end = Iend_LE;
   3811 #  else
   3812 #    error "Unknown endianness"
   3813 #  endif
   3814 
   3815    /* First check the guard. */
   3816    complainIfUndefined(mce, d->guard);
   3817 
   3818    /* Now round up all inputs and PCast over them. */
   3819    curr = definedOfType(Ity_I32);
   3820 
   3821    /* Inputs: unmasked args */
   3822    for (i = 0; d->args[i]; i++) {
   3823       if (d->cee->mcx_mask & (1<<i)) {
   3824          /* ignore this arg */
   3825       } else {
   3826          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
   3827          curr = mkUifU32(mce, here, curr);
   3828       }
   3829    }
   3830 
   3831    /* Inputs: guest state that we read. */
   3832    for (i = 0; i < d->nFxState; i++) {
   3833       tl_assert(d->fxState[i].fx != Ifx_None);
   3834       if (d->fxState[i].fx == Ifx_Write)
   3835          continue;
   3836 
   3837       /* Ignore any sections marked as 'always defined'. */
   3838       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   3839          if (0)
   3840          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   3841                      d->fxState[i].offset, d->fxState[i].size );
   3842          continue;
   3843       }
   3844 
   3845       /* This state element is read or modified.  So we need to
   3846          consider it.  If larger than 8 bytes, deal with it in 8-byte
   3847          chunks. */
   3848       gSz  = d->fxState[i].size;
   3849       gOff = d->fxState[i].offset;
   3850       tl_assert(gSz > 0);
   3851       while (True) {
   3852          if (gSz == 0) break;
   3853          n = gSz <= 8 ? gSz : 8;
   3854          /* update 'curr' with UifU of the state slice
   3855             gOff .. gOff+n-1 */
   3856          tySrc = szToITy( n );
   3857          src   = assignNew( 'V', mce, tySrc,
   3858                                  shadow_GET(mce, gOff, tySrc ) );
   3859          here = mkPCastTo( mce, Ity_I32, src );
   3860          curr = mkUifU32(mce, here, curr);
   3861          gSz -= n;
   3862          gOff += n;
   3863       }
   3864 
   3865    }
   3866 
   3867    /* Inputs: memory.  First set up some info needed regardless of
   3868       whether we're doing reads or writes. */
   3869 
   3870    if (d->mFx != Ifx_None) {
   3871       /* Because we may do multiple shadow loads/stores from the same
   3872          base address, it's best to do a single test of its
   3873          definedness right now.  Post-instrumentation optimisation
   3874          should remove all but this test. */
   3875       IRType tyAddr;
   3876       tl_assert(d->mAddr);
   3877       complainIfUndefined(mce, d->mAddr);
   3878 
   3879       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
   3880       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   3881       tl_assert(tyAddr == mce->hWordTy); /* not really right */
   3882    }
   3883 
   3884    /* Deal with memory inputs (reads or modifies) */
   3885    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   3886       toDo   = d->mSize;
   3887       /* chew off 32-bit chunks.  We don't care about the endianness
   3888          since it's all going to be condensed down to a single bit,
   3889          but nevertheless choose an endianness which is hopefully
   3890          native to the platform. */
   3891       while (toDo >= 4) {
   3892          here = mkPCastTo(
   3893                    mce, Ity_I32,
   3894                    expr2vbits_Load ( mce, end, Ity_I32,
   3895                                      d->mAddr, d->mSize - toDo )
   3896                 );
   3897          curr = mkUifU32(mce, here, curr);
   3898          toDo -= 4;
   3899       }
   3900       /* chew off 16-bit chunks */
   3901       while (toDo >= 2) {
   3902          here = mkPCastTo(
   3903                    mce, Ity_I32,
   3904                    expr2vbits_Load ( mce, end, Ity_I16,
   3905                                      d->mAddr, d->mSize - toDo )
   3906                 );
   3907          curr = mkUifU32(mce, here, curr);
   3908          toDo -= 2;
   3909       }
   3910       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   3911    }
   3912 
   3913    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
   3914       all the inputs to the helper.  Now we need to re-distribute the
   3915       results to all destinations. */
   3916 
   3917    /* Outputs: the destination temporary, if there is one. */
   3918    if (d->tmp != IRTemp_INVALID) {
   3919       dst   = findShadowTmpV(mce, d->tmp);
   3920       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
   3921       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
   3922    }
   3923 
   3924    /* Outputs: guest state that we write or modify. */
   3925    for (i = 0; i < d->nFxState; i++) {
   3926       tl_assert(d->fxState[i].fx != Ifx_None);
   3927       if (d->fxState[i].fx == Ifx_Read)
   3928          continue;
   3929       /* Ignore any sections marked as 'always defined'. */
   3930       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   3931          continue;
   3932       /* This state element is written or modified.  So we need to
   3933          consider it.  If larger than 8 bytes, deal with it in 8-byte
   3934          chunks. */
   3935       gSz  = d->fxState[i].size;
   3936       gOff = d->fxState[i].offset;
   3937       tl_assert(gSz > 0);
   3938       while (True) {
   3939          if (gSz == 0) break;
   3940          n = gSz <= 8 ? gSz : 8;
   3941          /* Write suitably-casted 'curr' to the state slice
   3942             gOff .. gOff+n-1 */
   3943          tyDst = szToITy( n );
   3944          do_shadow_PUT( mce, gOff,
   3945                              NULL, /* original atom */
   3946                              mkPCastTo( mce, tyDst, curr ) );
   3947          gSz -= n;
   3948          gOff += n;
   3949       }
   3950    }
   3951 
   3952    /* Outputs: memory that we write or modify.  Same comments about
   3953       endianness as above apply. */
   3954    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   3955       toDo   = d->mSize;
   3956       /* chew off 32-bit chunks */
   3957       while (toDo >= 4) {
   3958          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   3959                           NULL, /* original data */
   3960                           mkPCastTo( mce, Ity_I32, curr ),
   3961                           NULL/*guard*/ );
   3962          toDo -= 4;
   3963       }
   3964       /* chew off 16-bit chunks */
   3965       while (toDo >= 2) {
   3966          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   3967                           NULL, /* original data */
   3968                           mkPCastTo( mce, Ity_I16, curr ),
   3969                           NULL/*guard*/ );
   3970          toDo -= 2;
   3971       }
   3972       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   3973    }
   3974 
   3975 }
   3976 
   3977 
   3978 /* We have an ABI hint telling us that [base .. base+len-1] is to
   3979    become undefined ("writable").  Generate code to call a helper to
   3980    notify the A/V bit machinery of this fact.
   3981 
   3982    We call
   3983    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
   3984                                                     Addr nia );
   3985 */
   3986 static
   3987 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
   3988 {
   3989    IRDirty* di;
   3990    /* Minor optimisation: if not doing origin tracking, ignore the
   3991       supplied nia and pass zero instead.  This is on the basis that
   3992       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
   3993       almost always generate a shorter instruction to put zero into a
   3994       register than any other value. */
   3995    if (MC_(clo_mc_level) < 3)
   3996       nia = mkIRExpr_HWord(0);
   3997 
   3998    di = unsafeIRDirty_0_N(
   3999            0/*regparms*/,
   4000            "MC_(helperc_MAKE_STACK_UNINIT)",
   4001            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
   4002            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
   4003         );
   4004    stmt( 'V', mce, IRStmt_Dirty(di) );
   4005 }
   4006 
   4007 
   4008 /* ------ Dealing with IRCAS (big and complex) ------ */
   4009 
   4010 /* FWDS */
   4011 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
   4012                              IRAtom* baseaddr, Int offset );
   4013 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
   4014 static void    gen_store_b ( MCEnv* mce, Int szB,
   4015                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
   4016                              IRAtom* guard );
   4017 
   4018 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
   4019 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
   4020 
   4021 
   4022 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
   4023    IRExpr.Consts, else this asserts.  If they are both Consts, it
   4024    doesn't do anything.  So that just leaves the RdTmp case.
   4025 
   4026    In which case: this assigns the shadow value SHADOW to the IR
   4027    shadow temporary associated with ORIG.  That is, ORIG, being an
   4028    original temporary, will have a shadow temporary associated with
   4029    it.  However, in the case envisaged here, there will so far have
   4030    been no IR emitted to actually write a shadow value into that
   4031    temporary.  What this routine does is to (emit IR to) copy the
   4032    value in SHADOW into said temporary, so that after this call,
   4033    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
   4034    value in SHADOW.
   4035 
   4036    Point is to allow callers to compute "by hand" a shadow value for
   4037    ORIG, and force it to be associated with ORIG.
   4038 
   4039    How do we know that that shadow associated with ORIG has not so far
   4040    been assigned to?  Well, we don't per se know that, but supposing
   4041    it had.  Then this routine would create a second assignment to it,
   4042    and later the IR sanity checker would barf.  But that never
   4043    happens.  QED.
   4044 */
   4045 static void bind_shadow_tmp_to_orig ( UChar how,
   4046                                       MCEnv* mce,
   4047                                       IRAtom* orig, IRAtom* shadow )
   4048 {
   4049    tl_assert(isOriginalAtom(mce, orig));
   4050    tl_assert(isShadowAtom(mce, shadow));
   4051    switch (orig->tag) {
   4052       case Iex_Const:
   4053          tl_assert(shadow->tag == Iex_Const);
   4054          break;
   4055       case Iex_RdTmp:
   4056          tl_assert(shadow->tag == Iex_RdTmp);
   4057          if (how == 'V') {
   4058             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
   4059                    shadow);
   4060          } else {
   4061             tl_assert(how == 'B');
   4062             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
   4063                    shadow);
   4064          }
   4065          break;
   4066       default:
   4067          tl_assert(0);
   4068    }
   4069 }
   4070 
   4071 
   4072 static
   4073 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
   4074 {
   4075    /* Scheme is (both single- and double- cases):
   4076 
   4077       1. fetch data#,dataB (the proposed new value)
   4078 
   4079       2. fetch expd#,expdB (what we expect to see at the address)
   4080 
   4081       3. check definedness of address
   4082 
   4083       4. load old#,oldB from shadow memory; this also checks
   4084          addressibility of the address
   4085 
   4086       5. the CAS itself
   4087 
   4088       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
   4089 
   4090       7. if "expected == old" (as computed by (6))
   4091             store data#,dataB to shadow memory
   4092 
   4093       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
   4094       'data' but 7 stores 'data#'.  Hence it is possible for the
   4095       shadow data to be incorrectly checked and/or updated:
   4096 
   4097       * 7 is at least gated correctly, since the 'expected == old'
   4098         condition is derived from outputs of 5.  However, the shadow
   4099         write could happen too late: imagine after 5 we are
   4100         descheduled, a different thread runs, writes a different
   4101         (shadow) value at the address, and then we resume, hence
   4102         overwriting the shadow value written by the other thread.
   4103 
   4104       Because the original memory access is atomic, there's no way to
   4105       make both the original and shadow accesses into a single atomic
   4106       thing, hence this is unavoidable.
   4107 
   4108       At least as Valgrind stands, I don't think it's a problem, since
   4109       we're single threaded *and* we guarantee that there are no
   4110       context switches during the execution of any specific superblock
   4111       -- context switches can only happen at superblock boundaries.
   4112 
   4113       If Valgrind ever becomes MT in the future, then it might be more
   4114       of a problem.  A possible kludge would be to artificially
   4115       associate with the location, a lock, which we must acquire and
   4116       release around the transaction as a whole.  Hmm, that probably
   4117       would't work properly since it only guards us against other
   4118       threads doing CASs on the same location, not against other
   4119       threads doing normal reads and writes.
   4120 
   4121       ------------------------------------------------------------
   4122 
   4123       COMMENT_ON_CasCmpEQ:
   4124 
   4125       Note two things.  Firstly, in the sequence above, we compute
   4126       "expected == old", but we don't check definedness of it.  Why
   4127       not?  Also, the x86 and amd64 front ends use
   4128       Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
   4129       determination (expected == old ?) for themselves, and we also
   4130       don't check definedness for those primops; we just say that the
   4131       result is defined.  Why?  Details follow.
   4132 
   4133       x86/amd64 contains various forms of locked insns:
   4134       * lock prefix before all basic arithmetic insn;
   4135         eg lock xorl %reg1,(%reg2)
   4136       * atomic exchange reg-mem
   4137       * compare-and-swaps
   4138 
   4139       Rather than attempt to represent them all, which would be a
   4140       royal PITA, I used a result from Maurice Herlihy
   4141       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
   4142       demonstrates that compare-and-swap is a primitive more general
   4143       than the other two, and so can be used to represent all of them.
   4144       So the translation scheme for (eg) lock incl (%reg) is as
   4145       follows:
   4146 
   4147         again:
   4148          old = * %reg
   4149          new = old + 1
   4150          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
   4151 
   4152       The "atomically" is the CAS bit.  The scheme is always the same:
   4153       get old value from memory, compute new value, atomically stuff
   4154       new value back in memory iff the old value has not changed (iow,
   4155       no other thread modified it in the meantime).  If it has changed
   4156       then we've been out-raced and we have to start over.
   4157 
   4158       Now that's all very neat, but it has the bad side effect of
   4159       introducing an explicit equality test into the translation.
   4160       Consider the behaviour of said code on a memory location which
   4161       is uninitialised.  We will wind up doing a comparison on
   4162       uninitialised data, and mc duly complains.
   4163 
   4164       What's difficult about this is, the common case is that the
   4165       location is uncontended, and so we're usually comparing the same
   4166       value (* %reg) with itself.  So we shouldn't complain even if it
   4167       is undefined.  But mc doesn't know that.
   4168 
   4169       My solution is to mark the == in the IR specially, so as to tell
   4170       mc that it almost certainly compares a value with itself, and we
   4171       should just regard the result as always defined.  Rather than
   4172       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
   4173       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
   4174 
   4175       So there's always the question of, can this give a false
   4176       negative?  eg, imagine that initially, * %reg is defined; and we
   4177       read that; but then in the gap between the read and the CAS, a
   4178       different thread writes an undefined (and different) value at
   4179       the location.  Then the CAS in this thread will fail and we will
   4180       go back to "again:", but without knowing that the trip back
   4181       there was based on an undefined comparison.  No matter; at least
   4182       the other thread won the race and the location is correctly
   4183       marked as undefined.  What if it wrote an uninitialised version
   4184       of the same value that was there originally, though?
   4185 
   4186       etc etc.  Seems like there's a small corner case in which we
   4187       might lose the fact that something's defined -- we're out-raced
   4188       in between the "old = * reg" and the "atomically {", _and_ the
   4189       other thread is writing in an undefined version of what's
   4190       already there.  Well, that seems pretty unlikely.
   4191 
   4192       ---
   4193 
   4194       If we ever need to reinstate it .. code which generates a
   4195       definedness test for "expected == old" was removed at r10432 of
   4196       this file.
   4197    */
   4198    if (cas->oldHi == IRTemp_INVALID) {
   4199       do_shadow_CAS_single( mce, cas );
   4200    } else {
   4201       do_shadow_CAS_double( mce, cas );
   4202    }
   4203 }
   4204 
   4205 
   4206 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
   4207 {
   4208    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4209    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4210    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4211    IRAtom *expd_eq_old = NULL;
   4212    IROp   opCasCmpEQ;
   4213    Int    elemSzB;
   4214    IRType elemTy;
   4215    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4216 
   4217    /* single CAS */
   4218    tl_assert(cas->oldHi == IRTemp_INVALID);
   4219    tl_assert(cas->expdHi == NULL);
   4220    tl_assert(cas->dataHi == NULL);
   4221 
   4222    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4223    switch (elemTy) {
   4224       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
   4225       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
   4226       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
   4227       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
   4228       default: tl_assert(0); /* IR defn disallows any other types */
   4229    }
   4230 
   4231    /* 1. fetch data# (the proposed new value) */
   4232    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4233    vdataLo
   4234       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4235    tl_assert(isShadowAtom(mce, vdataLo));
   4236    if (otrak) {
   4237       bdataLo
   4238          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4239       tl_assert(isShadowAtom(mce, bdataLo));
   4240    }
   4241 
   4242    /* 2. fetch expected# (what we expect to see at the address) */
   4243    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4244    vexpdLo
   4245       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   4246    tl_assert(isShadowAtom(mce, vexpdLo));
   4247    if (otrak) {
   4248       bexpdLo
   4249          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   4250       tl_assert(isShadowAtom(mce, bexpdLo));
   4251    }
   4252 
   4253    /* 3. check definedness of address */
   4254    /* 4. fetch old# from shadow memory; this also checks
   4255          addressibility of the address */
   4256    voldLo
   4257       = assignNew(
   4258            'V', mce, elemTy,
   4259            expr2vbits_Load(
   4260               mce,
   4261               cas->end, elemTy, cas->addr, 0/*Addr bias*/
   4262         ));
   4263    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   4264    if (otrak) {
   4265       boldLo
   4266          = assignNew('B', mce, Ity_I32,
   4267                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
   4268       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   4269    }
   4270 
   4271    /* 5. the CAS itself */
   4272    stmt( 'C', mce, IRStmt_CAS(cas) );
   4273 
   4274    /* 6. compute "expected == old" */
   4275    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   4276    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   4277       tree, but it's not copied from the input block. */
   4278    expd_eq_old
   4279       = assignNew('C', mce, Ity_I1,
   4280                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
   4281 
   4282    /* 7. if "expected == old"
   4283             store data# to shadow memory */
   4284    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
   4285                     NULL/*data*/, vdataLo/*vdata*/,
   4286                     expd_eq_old/*guard for store*/ );
   4287    if (otrak) {
   4288       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
   4289                    bdataLo/*bdata*/,
   4290                    expd_eq_old/*guard for store*/ );
   4291    }
   4292 }
   4293 
   4294 
   4295 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
   4296 {
   4297    IRAtom *vdataHi = NULL, *bdataHi = NULL;
   4298    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4299    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
   4300    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4301    IRAtom *voldHi  = NULL, *boldHi  = NULL;
   4302    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4303    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
   4304    IRAtom *expd_eq_old = NULL, *zero = NULL;
   4305    IROp   opCasCmpEQ, opOr, opXor;
   4306    Int    elemSzB, memOffsLo, memOffsHi;
   4307    IRType elemTy;
   4308    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4309 
   4310    /* double CAS */
   4311    tl_assert(cas->oldHi != IRTemp_INVALID);
   4312    tl_assert(cas->expdHi != NULL);
   4313    tl_assert(cas->dataHi != NULL);
   4314 
   4315    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4316    switch (elemTy) {
   4317       case Ity_I8:
   4318          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
   4319          elemSzB = 1; zero = mkU8(0);
   4320          break;
   4321       case Ity_I16:
   4322          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
   4323          elemSzB = 2; zero = mkU16(0);
   4324          break;
   4325       case Ity_I32:
   4326          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
   4327          elemSzB = 4; zero = mkU32(0);
   4328          break;
   4329       case Ity_I64:
   4330          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
   4331          elemSzB = 8; zero = mkU64(0);
   4332          break;
   4333       default:
   4334          tl_assert(0); /* IR defn disallows any other types */
   4335    }
   4336 
   4337    /* 1. fetch data# (the proposed new value) */
   4338    tl_assert(isOriginalAtom(mce, cas->dataHi));
   4339    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4340    vdataHi
   4341       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
   4342    vdataLo
   4343       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4344    tl_assert(isShadowAtom(mce, vdataHi));
   4345    tl_assert(isShadowAtom(mce, vdataLo));
   4346    if (otrak) {
   4347       bdataHi
   4348          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
   4349       bdataLo
   4350          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4351       tl_assert(isShadowAtom(mce, bdataHi));
   4352       tl_assert(isShadowAtom(mce, bdataLo));
   4353    }
   4354 
   4355    /* 2. fetch expected# (what we expect to see at the address) */
   4356    tl_assert(isOriginalAtom(mce, cas->expdHi));
   4357    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4358    vexpdHi
   4359       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
   4360    vexpdLo
   4361       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   4362    tl_assert(isShadowAtom(mce, vexpdHi));
   4363    tl_assert(isShadowAtom(mce, vexpdLo));
   4364    if (otrak) {
   4365       bexpdHi
   4366          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
   4367       bexpdLo
   4368          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   4369       tl_assert(isShadowAtom(mce, bexpdHi));
   4370       tl_assert(isShadowAtom(mce, bexpdLo));
   4371    }
   4372 
   4373    /* 3. check definedness of address */
   4374    /* 4. fetch old# from shadow memory; this also checks
   4375          addressibility of the address */
   4376    if (cas->end == Iend_LE) {
   4377       memOffsLo = 0;
   4378       memOffsHi = elemSzB;
   4379    } else {
   4380       tl_assert(cas->end == Iend_BE);
   4381       memOffsLo = elemSzB;
   4382       memOffsHi = 0;
   4383    }
   4384    voldHi
   4385       = assignNew(
   4386            'V', mce, elemTy,
   4387            expr2vbits_Load(
   4388               mce,
   4389               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
   4390         ));
   4391    voldLo
   4392       = assignNew(
   4393            'V', mce, elemTy,
   4394            expr2vbits_Load(
   4395               mce,
   4396               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
   4397         ));
   4398    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
   4399    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   4400    if (otrak) {
   4401       boldHi
   4402          = assignNew('B', mce, Ity_I32,
   4403                      gen_load_b(mce, elemSzB, cas->addr,
   4404                                 memOffsHi/*addr bias*/));
   4405       boldLo
   4406          = assignNew('B', mce, Ity_I32,
   4407                      gen_load_b(mce, elemSzB, cas->addr,
   4408                                 memOffsLo/*addr bias*/));
   4409       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
   4410       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   4411    }
   4412 
   4413    /* 5. the CAS itself */
   4414    stmt( 'C', mce, IRStmt_CAS(cas) );
   4415 
   4416    /* 6. compute "expected == old" */
   4417    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   4418    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   4419       tree, but it's not copied from the input block. */
   4420    /*
   4421       xHi = oldHi ^ expdHi;
   4422       xLo = oldLo ^ expdLo;
   4423       xHL = xHi | xLo;
   4424       expd_eq_old = xHL == 0;
   4425    */
   4426    xHi = assignNew('C', mce, elemTy,
   4427                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
   4428    xLo = assignNew('C', mce, elemTy,
   4429                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
   4430    xHL = assignNew('C', mce, elemTy,
   4431                    binop(opOr, xHi, xLo));
   4432    expd_eq_old
   4433       = assignNew('C', mce, Ity_I1,
   4434                   binop(opCasCmpEQ, xHL, zero));
   4435 
   4436    /* 7. if "expected == old"
   4437             store data# to shadow memory */
   4438    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
   4439                     NULL/*data*/, vdataHi/*vdata*/,
   4440                     expd_eq_old/*guard for store*/ );
   4441    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
   4442                     NULL/*data*/, vdataLo/*vdata*/,
   4443                     expd_eq_old/*guard for store*/ );
   4444    if (otrak) {
   4445       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
   4446                    bdataHi/*bdata*/,
   4447                    expd_eq_old/*guard for store*/ );
   4448       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
   4449                    bdataLo/*bdata*/,
   4450                    expd_eq_old/*guard for store*/ );
   4451    }
   4452 }
   4453 
   4454 
   4455 /* ------ Dealing with LL/SC (not difficult) ------ */
   4456 
   4457 static void do_shadow_LLSC ( MCEnv*    mce,
   4458                              IREndness stEnd,
   4459                              IRTemp    stResult,
   4460                              IRExpr*   stAddr,
   4461                              IRExpr*   stStoredata )
   4462 {
   4463    /* In short: treat a load-linked like a normal load followed by an
   4464       assignment of the loaded (shadow) data to the result temporary.
   4465       Treat a store-conditional like a normal store, and mark the
   4466       result temporary as defined. */
   4467    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
   4468    IRTemp resTmp = findShadowTmpV(mce, stResult);
   4469 
   4470    tl_assert(isIRAtom(stAddr));
   4471    if (stStoredata)
   4472       tl_assert(isIRAtom(stStoredata));
   4473 
   4474    if (stStoredata == NULL) {
   4475       /* Load Linked */
   4476       /* Just treat this as a normal load, followed by an assignment of
   4477          the value to .result. */
   4478       /* Stay sane */
   4479       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   4480                 || resTy == Ity_I16 || resTy == Ity_I8);
   4481       assign( 'V', mce, resTmp,
   4482                    expr2vbits_Load(
   4483                       mce, stEnd, resTy, stAddr, 0/*addr bias*/));
   4484    } else {
   4485       /* Store Conditional */
   4486       /* Stay sane */
   4487       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
   4488                                    stStoredata);
   4489       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
   4490                 || dataTy == Ity_I16 || dataTy == Ity_I8);
   4491       do_shadow_Store( mce, stEnd,
   4492                             stAddr, 0/* addr bias */,
   4493                             stStoredata,
   4494                             NULL /* shadow data */,
   4495                             NULL/*guard*/ );
   4496       /* This is a store conditional, so it writes to .result a value
   4497          indicating whether or not the store succeeded.  Just claim
   4498          this value is always defined.  In the PowerPC interpretation
   4499          of store-conditional, definedness of the success indication
   4500          depends on whether the address of the store matches the
   4501          reservation address.  But we can't tell that here (and
   4502          anyway, we're not being PowerPC-specific).  At least we are
   4503          guaranteed that the definedness of the store address, and its
   4504          addressibility, will be checked as per normal.  So it seems
   4505          pretty safe to just say that the success indication is always
   4506          defined.
   4507 
   4508          In schemeS, for origin tracking, we must correspondingly set
   4509          a no-origin value for the origin shadow of .result.
   4510       */
   4511       tl_assert(resTy == Ity_I1);
   4512       assign( 'V', mce, resTmp, definedOfType(resTy) );
   4513    }
   4514 }
   4515 
   4516 
   4517 /*------------------------------------------------------------*/
   4518 /*--- Memcheck main                                        ---*/
   4519 /*------------------------------------------------------------*/
   4520 
   4521 static void schemeS ( MCEnv* mce, IRStmt* st );
   4522 
   4523 static Bool isBogusAtom ( IRAtom* at )
   4524 {
   4525    ULong n = 0;
   4526    IRConst* con;
   4527    tl_assert(isIRAtom(at));
   4528    if (at->tag == Iex_RdTmp)
   4529       return False;
   4530    tl_assert(at->tag == Iex_Const);
   4531    con = at->Iex.Const.con;
   4532    switch (con->tag) {
   4533       case Ico_U1:   return False;
   4534       case Ico_U8:   n = (ULong)con->Ico.U8; break;
   4535       case Ico_U16:  n = (ULong)con->Ico.U16; break;
   4536       case Ico_U32:  n = (ULong)con->Ico.U32; break;
   4537       case Ico_U64:  n = (ULong)con->Ico.U64; break;
   4538       case Ico_F64:  return False;
   4539       case Ico_F64i: return False;
   4540       case Ico_V128: return False;
   4541       default: ppIRExpr(at); tl_assert(0);
   4542    }
   4543    /* VG_(printf)("%llx\n", n); */
   4544    return (/*32*/    n == 0xFEFEFEFFULL
   4545            /*32*/ || n == 0x80808080ULL
   4546            /*32*/ || n == 0x7F7F7F7FULL
   4547            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
   4548            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
   4549            /*64*/ || n == 0x0000000000008080ULL
   4550            /*64*/ || n == 0x8080808080808080ULL
   4551            /*64*/ || n == 0x0101010101010101ULL
   4552           );
   4553 }
   4554 
   4555 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
   4556 {
   4557    Int      i;
   4558    IRExpr*  e;
   4559    IRDirty* d;
   4560    IRCAS*   cas;
   4561    switch (st->tag) {
   4562       case Ist_WrTmp:
   4563          e = st->Ist.WrTmp.data;
   4564          switch (e->tag) {
   4565             case Iex_Get:
   4566             case Iex_RdTmp:
   4567                return False;
   4568             case Iex_Const:
   4569                return isBogusAtom(e);
   4570             case Iex_Unop:
   4571                return isBogusAtom(e->Iex.Unop.arg);
   4572             case Iex_GetI:
   4573                return isBogusAtom(e->Iex.GetI.ix);
   4574             case Iex_Binop:
   4575                return isBogusAtom(e->Iex.Binop.arg1)
   4576                       || isBogusAtom(e->Iex.Binop.arg2);
   4577             case Iex_Triop:
   4578                return isBogusAtom(e->Iex.Triop.arg1)
   4579                       || isBogusAtom(e->Iex.Triop.arg2)
   4580                       || isBogusAtom(e->Iex.Triop.arg3);
   4581             case Iex_Qop:
   4582                return isBogusAtom(e->Iex.Qop.arg1)
   4583                       || isBogusAtom(e->Iex.Qop.arg2)
   4584                       || isBogusAtom(e->Iex.Qop.arg3)
   4585                       || isBogusAtom(e->Iex.Qop.arg4);
   4586             case Iex_Mux0X:
   4587                return isBogusAtom(e->Iex.Mux0X.cond)
   4588                       || isBogusAtom(e->Iex.Mux0X.expr0)
   4589                       || isBogusAtom(e->Iex.Mux0X.exprX);
   4590             case Iex_Load:
   4591                return isBogusAtom(e->Iex.Load.addr);
   4592             case Iex_CCall:
   4593                for (i = 0; e->Iex.CCall.args[i]; i++)
   4594                   if (isBogusAtom(e->Iex.CCall.args[i]))
   4595                      return True;
   4596                return False;
   4597             default:
   4598                goto unhandled;
   4599          }
   4600       case Ist_Dirty:
   4601          d = st->Ist.Dirty.details;
   4602          for (i = 0; d->args[i]; i++)
   4603             if (isBogusAtom(d->args[i]))
   4604                return True;
   4605          if (d->guard && isBogusAtom(d->guard))
   4606             return True;
   4607          if (d->mAddr && isBogusAtom(d->mAddr))
   4608             return True;
   4609          return False;
   4610       case Ist_Put:
   4611          return isBogusAtom(st->Ist.Put.data);
   4612       case Ist_PutI:
   4613          return isBogusAtom(st->Ist.PutI.ix)
   4614                 || isBogusAtom(st->Ist.PutI.data);
   4615       case Ist_Store:
   4616          return isBogusAtom(st->Ist.Store.addr)
   4617                 || isBogusAtom(st->Ist.Store.data);
   4618       case Ist_Exit:
   4619          return isBogusAtom(st->Ist.Exit.guard);
   4620       case Ist_AbiHint:
   4621          return isBogusAtom(st->Ist.AbiHint.base)
   4622                 || isBogusAtom(st->Ist.AbiHint.nia);
   4623       case Ist_NoOp:
   4624       case Ist_IMark:
   4625       case Ist_MBE:
   4626          return False;
   4627       case Ist_CAS:
   4628          cas = st->Ist.CAS.details;
   4629          return isBogusAtom(cas->addr)
   4630                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
   4631                 || isBogusAtom(cas->expdLo)
   4632                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
   4633                 || isBogusAtom(cas->dataLo);
   4634       case Ist_LLSC:
   4635          return isBogusAtom(st->Ist.LLSC.addr)
   4636                 || (st->Ist.LLSC.storedata
   4637                        ? isBogusAtom(st->Ist.LLSC.storedata)
   4638                        : False);
   4639       default:
   4640       unhandled:
   4641          ppIRStmt(st);
   4642          VG_(tool_panic)("hasBogusLiterals");
   4643    }
   4644 }
   4645 
   4646 
   4647 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
   4648                         IRSB* sb_in,
   4649                         VexGuestLayout* layout,
   4650                         VexGuestExtents* vge,
   4651                         IRType gWordTy, IRType hWordTy )
   4652 {
   4653    Bool    verboze = 0||False;
   4654    Bool    bogus;
   4655    Int     i, j, first_stmt;
   4656    IRStmt* st;
   4657    MCEnv   mce;
   4658    IRSB*   sb_out;
   4659 
   4660    if (gWordTy != hWordTy) {
   4661       /* We don't currently support this case. */
   4662       VG_(tool_panic)("host/guest word size mismatch");
   4663    }
   4664 
   4665    /* Check we're not completely nuts */
   4666    tl_assert(sizeof(UWord)  == sizeof(void*));
   4667    tl_assert(sizeof(Word)   == sizeof(void*));
   4668    tl_assert(sizeof(Addr)   == sizeof(void*));
   4669    tl_assert(sizeof(ULong)  == 8);
   4670    tl_assert(sizeof(Long)   == 8);
   4671    tl_assert(sizeof(Addr64) == 8);
   4672    tl_assert(sizeof(UInt)   == 4);
   4673    tl_assert(sizeof(Int)    == 4);
   4674 
   4675    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
   4676 
   4677    /* Set up SB */
   4678    sb_out = deepCopyIRSBExceptStmts(sb_in);
   4679 
   4680    /* Set up the running environment.  Both .sb and .tmpMap are
   4681       modified as we go along.  Note that tmps are added to both
   4682       .sb->tyenv and .tmpMap together, so the valid index-set for
   4683       those two arrays should always be identical. */
   4684    VG_(memset)(&mce, 0, sizeof(mce));
   4685    mce.sb             = sb_out;
   4686    mce.trace          = verboze;
   4687    mce.layout         = layout;
   4688    mce.hWordTy        = hWordTy;
   4689    mce.bogusLiterals  = False;
   4690 
   4691    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
   4692                             sizeof(TempMapEnt));
   4693    for (i = 0; i < sb_in->tyenv->types_used; i++) {
   4694       TempMapEnt ent;
   4695       ent.kind    = Orig;
   4696       ent.shadowV = IRTemp_INVALID;
   4697       ent.shadowB = IRTemp_INVALID;
   4698       VG_(addToXA)( mce.tmpMap, &ent );
   4699    }
   4700    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
   4701 
   4702    /* Make a preliminary inspection of the statements, to see if there
   4703       are any dodgy-looking literals.  If there are, we generate
   4704       extra-detailed (hence extra-expensive) instrumentation in
   4705       places.  Scan the whole bb even if dodgyness is found earlier,
   4706       so that the flatness assertion is applied to all stmts. */
   4707 
   4708    bogus = False;
   4709 
   4710    for (i = 0; i < sb_in->stmts_used; i++) {
   4711 
   4712       st = sb_in->stmts[i];
   4713       tl_assert(st);
   4714       tl_assert(isFlatIRStmt(st));
   4715 
   4716       if (!bogus) {
   4717          bogus = checkForBogusLiterals(st);
   4718          if (0 && bogus) {
   4719             VG_(printf)("bogus: ");
   4720             ppIRStmt(st);
   4721             VG_(printf)("\n");
   4722          }
   4723       }
   4724 
   4725    }
   4726 
   4727    mce.bogusLiterals = bogus;
   4728 
   4729    /* Copy verbatim any IR preamble preceding the first IMark */
   4730 
   4731    tl_assert(mce.sb == sb_out);
   4732    tl_assert(mce.sb != sb_in);
   4733 
   4734    i = 0;
   4735    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
   4736 
   4737       st = sb_in->stmts[i];
   4738       tl_assert(st);
   4739       tl_assert(isFlatIRStmt(st));
   4740 
   4741       stmt( 'C', &mce, sb_in->stmts[i] );
   4742       i++;
   4743    }
   4744 
   4745    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
   4746       cause the IR following the preamble to contain references to IR
   4747       temporaries defined in the preamble.  Because the preamble isn't
   4748       instrumented, these temporaries don't have any shadows.
   4749       Nevertheless uses of them following the preamble will cause
   4750       memcheck to generate references to their shadows.  End effect is
   4751       to cause IR sanity check failures, due to references to
   4752       non-existent shadows.  This is only evident for the complex
   4753       preambles used for function wrapping on TOC-afflicted platforms
   4754       (ppc64-linux, ppc32-aix5, ppc64-aix5).
   4755 
   4756       The following loop therefore scans the preamble looking for
   4757       assignments to temporaries.  For each one found it creates an
   4758       assignment to the corresponding (V) shadow temp, marking it as
   4759       'defined'.  This is the same resulting IR as if the main
   4760       instrumentation loop before had been applied to the statement
   4761       'tmp = CONSTANT'.
   4762 
   4763       Similarly, if origin tracking is enabled, we must generate an
   4764       assignment for the corresponding origin (B) shadow, claiming
   4765       no-origin, as appropriate for a defined value.
   4766    */
   4767    for (j = 0; j < i; j++) {
   4768       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
   4769          /* findShadowTmpV checks its arg is an original tmp;
   4770             no need to assert that here. */
   4771          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
   4772          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
   4773          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
   4774          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
   4775          if (MC_(clo_mc_level) == 3) {
   4776             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
   4777             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
   4778             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
   4779          }
   4780          if (0) {
   4781             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
   4782             ppIRType( ty_v );
   4783             VG_(printf)("\n");
   4784          }
   4785       }
   4786    }
   4787 
   4788    /* Iterate over the remaining stmts to generate instrumentation. */
   4789 
   4790    tl_assert(sb_in->stmts_used > 0);
   4791    tl_assert(i >= 0);
   4792    tl_assert(i < sb_in->stmts_used);
   4793    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
   4794 
   4795    for (/* use current i*/; i < sb_in->stmts_used; i++) {
   4796 
   4797       st = sb_in->stmts[i];
   4798       first_stmt = sb_out->stmts_used;
   4799 
   4800       if (verboze) {
   4801          VG_(printf)("\n");
   4802          ppIRStmt(st);
   4803          VG_(printf)("\n");
   4804       }
   4805 
   4806       if (MC_(clo_mc_level) == 3) {
   4807          /* See comments on case Ist_CAS below. */
   4808          if (st->tag != Ist_CAS)
   4809             schemeS( &mce, st );
   4810       }
   4811 
   4812       /* Generate instrumentation code for each stmt ... */
   4813 
   4814       switch (st->tag) {
   4815 
   4816          case Ist_WrTmp:
   4817             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
   4818                                expr2vbits( &mce, st->Ist.WrTmp.data) );
   4819             break;
   4820 
   4821          case Ist_Put:
   4822             do_shadow_PUT( &mce,
   4823                            st->Ist.Put.offset,
   4824                            st->Ist.Put.data,
   4825                            NULL /* shadow atom */ );
   4826             break;
   4827 
   4828          case Ist_PutI:
   4829             do_shadow_PUTI( &mce,
   4830                             st->Ist.PutI.descr,
   4831                             st->Ist.PutI.ix,
   4832                             st->Ist.PutI.bias,
   4833                             st->Ist.PutI.data );
   4834             break;
   4835 
   4836          case Ist_Store:
   4837             do_shadow_Store( &mce, st->Ist.Store.end,
   4838                                    st->Ist.Store.addr, 0/* addr bias */,
   4839                                    st->Ist.Store.data,
   4840                                    NULL /* shadow data */,
   4841                                    NULL/*guard*/ );
   4842             break;
   4843 
   4844          case Ist_Exit:
   4845             complainIfUndefined( &mce, st->Ist.Exit.guard );
   4846             break;
   4847 
   4848          case Ist_IMark:
   4849             break;
   4850 
   4851          case Ist_NoOp:
   4852          case Ist_MBE:
   4853             break;
   4854 
   4855          case Ist_Dirty:
   4856             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
   4857             break;
   4858 
   4859          case Ist_AbiHint:
   4860             do_AbiHint( &mce, st->Ist.AbiHint.base,
   4861                               st->Ist.AbiHint.len,
   4862                               st->Ist.AbiHint.nia );
   4863             break;
   4864 
   4865          case Ist_CAS:
   4866             do_shadow_CAS( &mce, st->Ist.CAS.details );
   4867             /* Note, do_shadow_CAS copies the CAS itself to the output
   4868                block, because it needs to add instrumentation both
   4869                before and after it.  Hence skip the copy below.  Also
   4870                skip the origin-tracking stuff (call to schemeS) above,
   4871                since that's all tangled up with it too; do_shadow_CAS
   4872                does it all. */
   4873             break;
   4874 
   4875          case Ist_LLSC:
   4876             do_shadow_LLSC( &mce,
   4877                             st->Ist.LLSC.end,
   4878                             st->Ist.LLSC.result,
   4879                             st->Ist.LLSC.addr,
   4880                             st->Ist.LLSC.storedata );
   4881             break;
   4882 
   4883          default:
   4884             VG_(printf)("\n");
   4885             ppIRStmt(st);
   4886             VG_(printf)("\n");
   4887             VG_(tool_panic)("memcheck: unhandled IRStmt");
   4888 
   4889       } /* switch (st->tag) */
   4890 
   4891       if (0 && verboze) {
   4892          for (j = first_stmt; j < sb_out->stmts_used; j++) {
   4893             VG_(printf)("   ");
   4894             ppIRStmt(sb_out->stmts[j]);
   4895             VG_(printf)("\n");
   4896          }
   4897          VG_(printf)("\n");
   4898       }
   4899 
   4900       /* ... and finally copy the stmt itself to the output.  Except,
   4901          skip the copy of IRCASs; see comments on case Ist_CAS
   4902          above. */
   4903       if (st->tag != Ist_CAS)
   4904          stmt('C', &mce, st);
   4905    }
   4906 
   4907    /* Now we need to complain if the jump target is undefined. */
   4908    first_stmt = sb_out->stmts_used;
   4909 
   4910    if (verboze) {
   4911       VG_(printf)("sb_in->next = ");
   4912       ppIRExpr(sb_in->next);
   4913       VG_(printf)("\n\n");
   4914    }
   4915 
   4916    complainIfUndefined( &mce, sb_in->next );
   4917 
   4918    if (0 && verboze) {
   4919       for (j = first_stmt; j < sb_out->stmts_used; j++) {
   4920          VG_(printf)("   ");
   4921          ppIRStmt(sb_out->stmts[j]);
   4922          VG_(printf)("\n");
   4923       }
   4924       VG_(printf)("\n");
   4925    }
   4926 
   4927    /* If this fails, there's been some serious snafu with tmp management,
   4928       that should be investigated. */
   4929    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
   4930    VG_(deleteXA)( mce.tmpMap );
   4931 
   4932    tl_assert(mce.sb == sb_out);
   4933    return sb_out;
   4934 }
   4935 
   4936 /*------------------------------------------------------------*/
   4937 /*--- Post-tree-build final tidying                        ---*/
   4938 /*------------------------------------------------------------*/
   4939 
   4940 /* This exploits the observation that Memcheck often produces
   4941    repeated conditional calls of the form
   4942 
   4943    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
   4944 
   4945    with the same guard expression G guarding the same helper call.
   4946    The second and subsequent calls are redundant.  This usually
   4947    results from instrumentation of guest code containing multiple
   4948    memory references at different constant offsets from the same base
   4949    register.  After optimisation of the instrumentation, you get a
   4950    test for the definedness of the base register for each memory
   4951    reference, which is kinda pointless.  MC_(final_tidy) therefore
   4952    looks for such repeated calls and removes all but the first. */
   4953 
   4954 /* A struct for recording which (helper, guard) pairs we have already
   4955    seen. */
   4956 typedef
   4957    struct { void* entry; IRExpr* guard; }
   4958    Pair;
   4959 
   4960 /* Return True if e1 and e2 definitely denote the same value (used to
   4961    compare guards).  Return False if unknown; False is the safe
   4962    answer.  Since guest registers and guest memory do not have the
   4963    SSA property we must return False if any Gets or Loads appear in
   4964    the expression. */
   4965 
   4966 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
   4967 {
   4968    if (e1->tag != e2->tag)
   4969       return False;
   4970    switch (e1->tag) {
   4971       case Iex_Const:
   4972          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
   4973       case Iex_Binop:
   4974          return e1->Iex.Binop.op == e2->Iex.Binop.op
   4975                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
   4976                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
   4977       case Iex_Unop:
   4978          return e1->Iex.Unop.op == e2->Iex.Unop.op
   4979                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
   4980       case Iex_RdTmp:
   4981          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
   4982       case Iex_Mux0X:
   4983          return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
   4984                 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
   4985                 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
   4986       case Iex_Qop:
   4987       case Iex_Triop:
   4988       case Iex_CCall:
   4989          /* be lazy.  Could define equality for these, but they never
   4990             appear to be used. */
   4991          return False;
   4992       case Iex_Get:
   4993       case Iex_GetI:
   4994       case Iex_Load:
   4995          /* be conservative - these may not give the same value each
   4996             time */
   4997          return False;
   4998       case Iex_Binder:
   4999          /* should never see this */
   5000          /* fallthrough */
   5001       default:
   5002          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
   5003          ppIRExpr(e1);
   5004          VG_(tool_panic)("memcheck:sameIRValue");
   5005          return False;
   5006    }
   5007 }
   5008 
   5009 /* See if 'pairs' already has an entry for (entry, guard).  Return
   5010    True if so.  If not, add an entry. */
   5011 
   5012 static
   5013 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
   5014 {
   5015    Pair  p;
   5016    Pair* pp;
   5017    Int   i, n = VG_(sizeXA)( pairs );
   5018    for (i = 0; i < n; i++) {
   5019       pp = VG_(indexXA)( pairs, i );
   5020       if (pp->entry == entry && sameIRValue(pp->guard, guard))
   5021          return True;
   5022    }
   5023    p.guard = guard;
   5024    p.entry = entry;
   5025    VG_(addToXA)( pairs, &p );
   5026    return False;
   5027 }
   5028 
   5029 static Bool is_helperc_value_checkN_fail ( HChar* name )
   5030 {
   5031    return
   5032       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
   5033       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
   5034       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
   5035       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
   5036       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
   5037       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
   5038       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
   5039       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
   5040 }
   5041 
   5042 IRSB* MC_(final_tidy) ( IRSB* sb_in )
   5043 {
   5044    Int i;
   5045    IRStmt*   st;
   5046    IRDirty*  di;
   5047    IRExpr*   guard;
   5048    IRCallee* cee;
   5049    Bool      alreadyPresent;
   5050    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
   5051                                  VG_(free), sizeof(Pair) );
   5052    /* Scan forwards through the statements.  Each time a call to one
   5053       of the relevant helpers is seen, check if we have made a
   5054       previous call to the same helper using the same guard
   5055       expression, and if so, delete the call. */
   5056    for (i = 0; i < sb_in->stmts_used; i++) {
   5057       st = sb_in->stmts[i];
   5058       tl_assert(st);
   5059       if (st->tag != Ist_Dirty)
   5060          continue;
   5061       di = st->Ist.Dirty.details;
   5062       guard = di->guard;
   5063       if (!guard)
   5064          continue;
   5065       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
   5066       cee = di->cee;
   5067       if (!is_helperc_value_checkN_fail( cee->name ))
   5068          continue;
   5069        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
   5070           guard 'guard'.  Check if we have already seen a call to this
   5071           function with the same guard.  If so, delete it.  If not,
   5072           add it to the set of calls we do know about. */
   5073       alreadyPresent = check_or_add( pairs, guard, cee->addr );
   5074       if (alreadyPresent) {
   5075          sb_in->stmts[i] = IRStmt_NoOp();
   5076          if (0) VG_(printf)("XX\n");
   5077       }
   5078    }
   5079    VG_(deleteXA)( pairs );
   5080    return sb_in;
   5081 }
   5082 
   5083 
   5084 /*------------------------------------------------------------*/
   5085 /*--- Origin tracking stuff                                ---*/
   5086 /*------------------------------------------------------------*/
   5087 
   5088 /* Almost identical to findShadowTmpV. */
   5089 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
   5090 {
   5091    TempMapEnt* ent;
   5092    /* VG_(indexXA) range-checks 'orig', hence no need to check
   5093       here. */
   5094    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5095    tl_assert(ent->kind == Orig);
   5096    if (ent->shadowB == IRTemp_INVALID) {
   5097       IRTemp tmpB
   5098         = newTemp( mce, Ity_I32, BSh );
   5099       /* newTemp may cause mce->tmpMap to resize, hence previous results
   5100          from VG_(indexXA) are invalid. */
   5101       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5102       tl_assert(ent->kind == Orig);
   5103       tl_assert(ent->shadowB == IRTemp_INVALID);
   5104       ent->shadowB = tmpB;
   5105    }
   5106    return ent->shadowB;
   5107 }
   5108 
   5109 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
   5110 {
   5111    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
   5112 }
   5113 
   5114 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
   5115                             IRAtom* baseaddr, Int offset )
   5116 {
   5117    void*    hFun;
   5118    HChar*   hName;
   5119    IRTemp   bTmp;
   5120    IRDirty* di;
   5121    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5122    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5123    IRAtom*  ea    = baseaddr;
   5124    if (offset != 0) {
   5125       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5126                                    : mkU64( (Long)(Int)offset );
   5127       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
   5128    }
   5129    bTmp = newTemp(mce, mce->hWordTy, BSh);
   5130 
   5131    switch (szB) {
   5132       case 1: hFun  = (void*)&MC_(helperc_b_load1);
   5133               hName = "MC_(helperc_b_load1)";
   5134               break;
   5135       case 2: hFun  = (void*)&MC_(helperc_b_load2);
   5136               hName = "MC_(helperc_b_load2)";
   5137               break;
   5138       case 4: hFun  = (void*)&MC_(helperc_b_load4);
   5139               hName = "MC_(helperc_b_load4)";
   5140               break;
   5141       case 8: hFun  = (void*)&MC_(helperc_b_load8);
   5142               hName = "MC_(helperc_b_load8)";
   5143               break;
   5144       case 16: hFun  = (void*)&MC_(helperc_b_load16);
   5145                hName = "MC_(helperc_b_load16)";
   5146                break;
   5147       default:
   5148          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
   5149          tl_assert(0);
   5150    }
   5151    di = unsafeIRDirty_1_N(
   5152            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
   5153            mkIRExprVec_1( ea )
   5154         );
   5155    /* no need to mess with any annotations.  This call accesses
   5156       neither guest state nor guest memory. */
   5157    stmt( 'B', mce, IRStmt_Dirty(di) );
   5158    if (mce->hWordTy == Ity_I64) {
   5159       /* 64-bit host */
   5160       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
   5161       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
   5162       return mkexpr(bTmp32);
   5163    } else {
   5164       /* 32-bit host */
   5165       return mkexpr(bTmp);
   5166    }
   5167 }
   5168 
   5169 /* Generate a shadow store.  guard :: Ity_I1 controls whether the
   5170    store really happens; NULL means it unconditionally does. */
   5171 static void gen_store_b ( MCEnv* mce, Int szB,
   5172                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
   5173                           IRAtom* guard )
   5174 {
   5175    void*    hFun;
   5176    HChar*   hName;
   5177    IRDirty* di;
   5178    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5179    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5180    IRAtom*  ea    = baseaddr;
   5181    if (guard) {
   5182       tl_assert(isOriginalAtom(mce, guard));
   5183       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   5184    }
   5185    if (offset != 0) {
   5186       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5187                                    : mkU64( (Long)(Int)offset );
   5188       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
   5189    }
   5190    if (mce->hWordTy == Ity_I64)
   5191       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
   5192 
   5193    switch (szB) {
   5194       case 1: hFun  = (void*)&MC_(helperc_b_store1);
   5195               hName = "MC_(helperc_b_store1)";
   5196               break;
   5197       case 2: hFun  = (void*)&MC_(helperc_b_store2);
   5198               hName = "MC_(helperc_b_store2)";
   5199               break;
   5200       case 4: hFun  = (void*)&MC_(helperc_b_store4);
   5201               hName = "MC_(helperc_b_store4)";
   5202               break;
   5203       case 8: hFun  = (void*)&MC_(helperc_b_store8);
   5204               hName = "MC_(helperc_b_store8)";
   5205               break;
   5206       case 16: hFun  = (void*)&MC_(helperc_b_store16);
   5207                hName = "MC_(helperc_b_store16)";
   5208                break;
   5209       default:
   5210          tl_assert(0);
   5211    }
   5212    di = unsafeIRDirty_0_N( 2/*regparms*/,
   5213            hName, VG_(fnptr_to_fnentry)( hFun ),
   5214            mkIRExprVec_2( ea, dataB )
   5215         );
   5216    /* no need to mess with any annotations.  This call accesses
   5217       neither guest state nor guest memory. */
   5218    if (guard) di->guard = guard;
   5219    stmt( 'B', mce, IRStmt_Dirty(di) );
   5220 }
   5221 
   5222 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
   5223    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5224    if (eTy == Ity_I64)
   5225       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
   5226    if (eTy == Ity_I32)
   5227       return e;
   5228    tl_assert(0);
   5229 }
   5230 
   5231 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
   5232    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5233    tl_assert(eTy == Ity_I32);
   5234    if (dstTy == Ity_I64)
   5235       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
   5236    tl_assert(0);
   5237 }
   5238 
   5239 
   5240 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
   5241 {
   5242    tl_assert(MC_(clo_mc_level) == 3);
   5243 
   5244    switch (e->tag) {
   5245 
   5246       case Iex_GetI: {
   5247          IRRegArray* descr_b;
   5248          IRAtom      *t1, *t2, *t3, *t4;
   5249          IRRegArray* descr      = e->Iex.GetI.descr;
   5250          IRType equivIntTy
   5251             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   5252          /* If this array is unshadowable for whatever reason, use the
   5253             usual approximation. */
   5254          if (equivIntTy == Ity_INVALID)
   5255             return mkU32(0);
   5256          tl_assert(sizeofIRType(equivIntTy) >= 4);
   5257          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   5258          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   5259                                  equivIntTy, descr->nElems );
   5260          /* Do a shadow indexed get of the same size, giving t1.  Take
   5261             the bottom 32 bits of it, giving t2.  Compute into t3 the
   5262             origin for the index (almost certainly zero, but there's
   5263             no harm in being completely general here, since iropt will
   5264             remove any useless code), and fold it in, giving a final
   5265             value t4. */
   5266          t1 = assignNew( 'B', mce, equivIntTy,
   5267                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
   5268                                                 e->Iex.GetI.bias ));
   5269          t2 = narrowTo32( mce, t1 );
   5270          t3 = schemeE( mce, e->Iex.GetI.ix );
   5271          t4 = gen_maxU32( mce, t2, t3 );
   5272          return t4;
   5273       }
   5274       case Iex_CCall: {
   5275          Int i;
   5276          IRAtom*  here;
   5277          IRExpr** args = e->Iex.CCall.args;
   5278          IRAtom*  curr = mkU32(0);
   5279          for (i = 0; args[i]; i++) {
   5280             tl_assert(i < 32);
   5281             tl_assert(isOriginalAtom(mce, args[i]));
   5282             /* Only take notice of this arg if the callee's
   5283                mc-exclusion mask does not say it is to be excluded. */
   5284             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
   5285                /* the arg is to be excluded from definedness checking.
   5286                   Do nothing. */
   5287                if (0) VG_(printf)("excluding %s(%d)\n",
   5288                                   e->Iex.CCall.cee->name, i);
   5289             } else {
   5290                /* calculate the arg's definedness, and pessimistically
   5291                   merge it in. */
   5292                here = schemeE( mce, args[i] );
   5293                curr = gen_maxU32( mce, curr, here );
   5294             }
   5295          }
   5296          return curr;
   5297       }
   5298       case Iex_Load: {
   5299          Int dszB;
   5300          dszB = sizeofIRType(e->Iex.Load.ty);
   5301          /* assert that the B value for the address is already
   5302             available (somewhere) */
   5303          tl_assert(isIRAtom(e->Iex.Load.addr));
   5304          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
   5305          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
   5306       }
   5307       case Iex_Mux0X: {
   5308          IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
   5309          IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
   5310          IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
   5311          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
   5312       }
   5313       case Iex_Qop: {
   5314          IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
   5315          IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
   5316          IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
   5317          IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
   5318          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
   5319                                  gen_maxU32( mce, b3, b4 ) );
   5320       }
   5321       case Iex_Triop: {
   5322          IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
   5323          IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
   5324          IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
   5325          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
   5326       }
   5327       case Iex_Binop: {
   5328          switch (e->Iex.Binop.op) {
   5329             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   5330             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   5331             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   5332             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   5333                /* Just say these all produce a defined result,
   5334                   regardless of their arguments.  See
   5335                   COMMENT_ON_CasCmpEQ in this file. */
   5336                return mkU32(0);
   5337             default: {
   5338                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
   5339                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
   5340                return gen_maxU32( mce, b1, b2 );
   5341             }
   5342          }
   5343          tl_assert(0);
   5344          /*NOTREACHED*/
   5345       }
   5346       case Iex_Unop: {
   5347          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
   5348          return b1;
   5349       }
   5350       case Iex_Const:
   5351          return mkU32(0);
   5352       case Iex_RdTmp:
   5353          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
   5354       case Iex_Get: {
   5355          Int b_offset = MC_(get_otrack_shadow_offset)(
   5356                            e->Iex.Get.offset,
   5357                            sizeofIRType(e->Iex.Get.ty)
   5358                         );
   5359          tl_assert(b_offset >= -1
   5360                    && b_offset <= mce->layout->total_sizeB -4);
   5361          if (b_offset >= 0) {
   5362             /* FIXME: this isn't an atom! */
   5363             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
   5364                                Ity_I32 );
   5365          }
   5366          return mkU32(0);
   5367       }
   5368       default:
   5369          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
   5370          ppIRExpr(e);
   5371          VG_(tool_panic)("memcheck:schemeE");
   5372    }
   5373 }
   5374 
   5375 
   5376 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
   5377 {
   5378    // This is a hacked version of do_shadow_Dirty
   5379    Int       i, n, toDo, gSz, gOff;
   5380    IRAtom    *here, *curr;
   5381    IRTemp    dst;
   5382 
   5383    /* First check the guard. */
   5384    curr = schemeE( mce, d->guard );
   5385 
   5386    /* Now round up all inputs and maxU32 over them. */
   5387 
   5388    /* Inputs: unmasked args */
   5389    for (i = 0; d->args[i]; i++) {
   5390       if (d->cee->mcx_mask & (1<<i)) {
   5391          /* ignore this arg */
   5392       } else {
   5393          here = schemeE( mce, d->args[i] );
   5394          curr = gen_maxU32( mce, curr, here );
   5395       }
   5396    }
   5397 
   5398    /* Inputs: guest state that we read. */
   5399    for (i = 0; i < d->nFxState; i++) {
   5400       tl_assert(d->fxState[i].fx != Ifx_None);
   5401       if (d->fxState[i].fx == Ifx_Write)
   5402          continue;
   5403 
   5404       /* Ignore any sections marked as 'always defined'. */
   5405       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   5406          if (0)
   5407          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   5408                      d->fxState[i].offset, d->fxState[i].size );
   5409          continue;
   5410       }
   5411 
   5412       /* This state element is read or modified.  So we need to
   5413          consider it.  If larger than 4 bytes, deal with it in 4-byte
   5414          chunks. */
   5415       gSz  = d->fxState[i].size;
   5416       gOff = d->fxState[i].offset;
   5417       tl_assert(gSz > 0);
   5418       while (True) {
   5419          Int b_offset;
   5420          if (gSz == 0) break;
   5421          n = gSz <= 4 ? gSz : 4;
   5422          /* update 'curr' with maxU32 of the state slice
   5423             gOff .. gOff+n-1 */
   5424          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   5425          if (b_offset != -1) {
   5426             here = assignNew( 'B',mce,
   5427                                Ity_I32,
   5428                                IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
   5429                                           Ity_I32));
   5430             curr = gen_maxU32( mce, curr, here );
   5431          }
   5432          gSz -= n;
   5433          gOff += n;
   5434       }
   5435 
   5436    }
   5437 
   5438    /* Inputs: memory */
   5439 
   5440    if (d->mFx != Ifx_None) {
   5441       /* Because we may do multiple shadow loads/stores from the same
   5442          base address, it's best to do a single test of its
   5443          definedness right now.  Post-instrumentation optimisation
   5444          should remove all but this test. */
   5445       tl_assert(d->mAddr);
   5446       here = schemeE( mce, d->mAddr );
   5447       curr = gen_maxU32( mce, curr, here );
   5448    }
   5449 
   5450    /* Deal with memory inputs (reads or modifies) */
   5451    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   5452       toDo   = d->mSize;
   5453       /* chew off 32-bit chunks.  We don't care about the endianness
   5454          since it's all going to be condensed down to a single bit,
   5455          but nevertheless choose an endianness which is hopefully
   5456          native to the platform. */
   5457       while (toDo >= 4) {
   5458          here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
   5459          curr = gen_maxU32( mce, curr, here );
   5460          toDo -= 4;
   5461       }
   5462       /* handle possible 16-bit excess */
   5463       while (toDo >= 2) {
   5464          here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
   5465          curr = gen_maxU32( mce, curr, here );
   5466          toDo -= 2;
   5467       }
   5468       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   5469    }
   5470 
   5471    /* Whew!  So curr is a 32-bit B-value which should give an origin
   5472       of some use if any of the inputs to the helper are undefined.
   5473       Now we need to re-distribute the results to all destinations. */
   5474 
   5475    /* Outputs: the destination temporary, if there is one. */
   5476    if (d->tmp != IRTemp_INVALID) {
   5477       dst   = findShadowTmpB(mce, d->tmp);
   5478       assign( 'V', mce, dst, curr );
   5479    }
   5480 
   5481    /* Outputs: guest state that we write or modify. */
   5482    for (i = 0; i < d->nFxState; i++) {
   5483       tl_assert(d->fxState[i].fx != Ifx_None);
   5484       if (d->fxState[i].fx == Ifx_Read)
   5485          continue;
   5486 
   5487       /* Ignore any sections marked as 'always defined'. */
   5488       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   5489          continue;
   5490 
   5491       /* This state element is written or modified.  So we need to
   5492          consider it.  If larger than 4 bytes, deal with it in 4-byte
   5493          chunks. */
   5494       gSz  = d->fxState[i].size;
   5495       gOff = d->fxState[i].offset;
   5496       tl_assert(gSz > 0);
   5497       while (True) {
   5498          Int b_offset;
   5499          if (gSz == 0) break;
   5500          n = gSz <= 4 ? gSz : 4;
   5501          /* Write 'curr' to the state slice gOff .. gOff+n-1 */
   5502          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   5503          if (b_offset != -1) {
   5504            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
   5505                                       curr ));
   5506          }
   5507          gSz -= n;
   5508          gOff += n;
   5509       }
   5510    }
   5511 
   5512    /* Outputs: memory that we write or modify.  Same comments about
   5513       endianness as above apply. */
   5514    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   5515       toDo   = d->mSize;
   5516       /* chew off 32-bit chunks */
   5517       while (toDo >= 4) {
   5518          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
   5519                       NULL/*guard*/ );
   5520          toDo -= 4;
   5521       }
   5522       /* handle possible 16-bit excess */
   5523       while (toDo >= 2) {
   5524         gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
   5525                      NULL/*guard*/ );
   5526          toDo -= 2;
   5527       }
   5528       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   5529    }
   5530 }
   5531 
   5532 
   5533 static void do_origins_Store ( MCEnv* mce,
   5534                                IREndness stEnd,
   5535                                IRExpr* stAddr,
   5536                                IRExpr* stData )
   5537 {
   5538    Int     dszB;
   5539    IRAtom* dataB;
   5540    /* assert that the B value for the address is already available
   5541       (somewhere), since the call to schemeE will want to see it.
   5542       XXXX how does this actually ensure that?? */
   5543    tl_assert(isIRAtom(stAddr));
   5544    tl_assert(isIRAtom(stData));
   5545    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
   5546    dataB = schemeE( mce, stData );
   5547    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
   5548                      NULL/*guard*/ );
   5549 }
   5550 
   5551 
   5552 static void schemeS ( MCEnv* mce, IRStmt* st )
   5553 {
   5554    tl_assert(MC_(clo_mc_level) == 3);
   5555 
   5556    switch (st->tag) {
   5557 
   5558       case Ist_AbiHint:
   5559          /* The value-check instrumenter handles this - by arranging
   5560             to pass the address of the next instruction to
   5561             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
   5562             happen for origin tracking w.r.t. AbiHints.  So there is
   5563             nothing to do here. */
   5564          break;
   5565 
   5566       case Ist_PutI: {
   5567          IRRegArray* descr_b;
   5568          IRAtom      *t1, *t2, *t3, *t4;
   5569          IRRegArray* descr = st->Ist.PutI.descr;
   5570          IRType equivIntTy
   5571             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   5572          /* If this array is unshadowable for whatever reason,
   5573             generate no code. */
   5574          if (equivIntTy == Ity_INVALID)
   5575             break;
   5576          tl_assert(sizeofIRType(equivIntTy) >= 4);
   5577          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   5578          descr_b
   5579             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   5580                             equivIntTy, descr->nElems );
   5581          /* Compute a value to Put - the conjoinment of the origin for
   5582             the data to be Put-ted (obviously) and of the index value
   5583             (not so obviously). */
   5584          t1 = schemeE( mce, st->Ist.PutI.data );
   5585          t2 = schemeE( mce, st->Ist.PutI.ix );
   5586          t3 = gen_maxU32( mce, t1, t2 );
   5587          t4 = zWidenFrom32( mce, equivIntTy, t3 );
   5588          stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
   5589                                       st->Ist.PutI.bias, t4 ));
   5590          break;
   5591       }
   5592 
   5593       case Ist_Dirty:
   5594          do_origins_Dirty( mce, st->Ist.Dirty.details );
   5595          break;
   5596 
   5597       case Ist_Store:
   5598          do_origins_Store( mce, st->Ist.Store.end,
   5599                                 st->Ist.Store.addr,
   5600                                 st->Ist.Store.data );
   5601          break;
   5602 
   5603       case Ist_LLSC: {
   5604          /* In short: treat a load-linked like a normal load followed
   5605             by an assignment of the loaded (shadow) data the result
   5606             temporary.  Treat a store-conditional like a normal store,
   5607             and mark the result temporary as defined. */
   5608          if (st->Ist.LLSC.storedata == NULL) {
   5609             /* Load Linked */
   5610             IRType resTy
   5611                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
   5612             IRExpr* vanillaLoad
   5613                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
   5614             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   5615                       || resTy == Ity_I16 || resTy == Ity_I8);
   5616             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   5617                               schemeE(mce, vanillaLoad));
   5618          } else {
   5619             /* Store conditional */
   5620             do_origins_Store( mce, st->Ist.LLSC.end,
   5621                                    st->Ist.LLSC.addr,
   5622                                    st->Ist.LLSC.storedata );
   5623             /* For the rationale behind this, see comments at the
   5624                place where the V-shadow for .result is constructed, in
   5625                do_shadow_LLSC.  In short, we regard .result as
   5626                always-defined. */
   5627             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   5628                               mkU32(0) );
   5629          }
   5630          break;
   5631       }
   5632 
   5633       case Ist_Put: {
   5634          Int b_offset
   5635             = MC_(get_otrack_shadow_offset)(
   5636                  st->Ist.Put.offset,
   5637                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
   5638               );
   5639          if (b_offset >= 0) {
   5640             /* FIXME: this isn't an atom! */
   5641             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
   5642                                        schemeE( mce, st->Ist.Put.data )) );
   5643          }
   5644          break;
   5645       }
   5646 
   5647       case Ist_WrTmp:
   5648          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
   5649                            schemeE(mce, st->Ist.WrTmp.data) );
   5650          break;
   5651 
   5652       case Ist_MBE:
   5653       case Ist_NoOp:
   5654       case Ist_Exit:
   5655       case Ist_IMark:
   5656          break;
   5657 
   5658       default:
   5659          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
   5660          ppIRStmt(st);
   5661          VG_(tool_panic)("memcheck:schemeS");
   5662    }
   5663 }
   5664 
   5665 
   5666 /*--------------------------------------------------------------------*/
   5667 /*--- end                                           mc_translate.c ---*/
   5668 /*--------------------------------------------------------------------*/
   5669