Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*---------------------------------------------------------------*/
      4 /*--- begin                                          ir_opt.c ---*/
      5 /*---------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2004-2013 OpenWorks LLP
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #include "libvex_basictypes.h"
     38 #include "libvex_ir.h"
     39 #include "libvex.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "ir_opt.h"
     44 
     45 
     46 /* Set to 1 for lots of debugging output. */
     47 #define DEBUG_IROPT 0
     48 
     49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
     50 #define STATS_IROPT 0
     51 
     52 
     53 /* What iropt does, 29 Dec 04.
     54 
     55    It takes an IRSB and produces a new one with the same meaning,
     56    defined thus:
     57 
     58    After execution of the new BB, all guest state and guest memory is
     59    the same as after execution of the original.  This is true
     60    regardless of how the block was exited (at the end vs side exit).
     61 
     62    In addition, parts of the guest state will be identical to that
     63    created by execution of the original at the following observation
     64    points:
     65 
     66    * In a dirty helper call, any parts of the guest state that the
     67      helper states that it reads or modifies will be up to date.
     68      Also, guest memory will be up to date.  Parts of the guest state
     69      not marked as being read or modified by the helper cannot be
     70      assumed to be up-to-date at the point where the helper is called.
     71 
     72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
     73      The guest state is only up to date only as explained above
     74      (i.e. at SB exits and as specified by dirty helper call).
     75      Also, the stack pointer register is up to date at memory
     76      exception points (as this is needed for the stack extension
     77      logic in m_signals.c).
     78 
     79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
     80      Immediately prior to any load or store, those parts of the guest
     81      state marked as requiring precise exceptions will be up to date.
     82      Also, guest memory will be up to date.  Parts of the guest state
     83      not marked as requiring precise exceptions cannot be assumed to
     84      be up-to-date at the point of the load/store.
     85 
     86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
     87      Same as minimal, but all the guest state is up to date at memory
     88      exception points.
     89 
     90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
     91      Guest state is up to date at each instruction.
     92 
     93    The relative order of loads and stores (including loads/stores of
     94    guest memory done by dirty helpers annotated as such) is not
     95    changed.  However, the relative order of loads with no intervening
     96    stores/modifies may be changed.
     97 
     98    Transformation order
     99    ~~~~~~~~~~~~~~~~~~~~
    100 
    101    There are three levels of optimisation, controlled by
    102    vex_control.iropt_level.  Define first:
    103 
    104    "Cheap transformations" are the following sequence:
    105       * Redundant-Get removal
    106       * Redundant-Put removal
    107       * Constant propagation/folding
    108       * Dead code removal
    109       * Specialisation of clean helper functions
    110       * Dead code removal
    111 
    112    "Expensive transformations" are the following sequence:
    113       * CSE
    114       * Folding of add/sub chains
    115       * Redundant-GetI removal
    116       * Redundant-PutI removal
    117       * Dead code removal
    118 
    119    Then the transformations are as follows, as defined by
    120    vex_control.iropt_level:
    121 
    122    Level 0:
    123       * Flatten into atomic form.
    124 
    125    Level 1: the following sequence:
    126       * Flatten into atomic form.
    127       * Cheap transformations.
    128 
    129    Level 2: the following sequence
    130       * Flatten into atomic form.
    131       * Cheap transformations.
    132       * If block contains any floating or vector types, CSE.
    133       * If block contains GetI or PutI, Expensive transformations.
    134       * Try unrolling loops.  Three possible outcomes:
    135         - No effect: do nothing more.
    136         - Unrolled a loop, and block does not contain GetI or PutI:
    137           Do: * CSE
    138               * Dead code removal
    139         - Unrolled a loop, and block contains GetI or PutI:
    140           Do: * Expensive transformations
    141               * Cheap transformations
    142 */
    143 
    144 /* Implementation notes, 29 Dec 04.
    145 
    146    TODO (important): I think rPutI removal ignores precise exceptions
    147    and is therefore in a sense, wrong.  In the sense that PutIs are
    148    assumed not to write parts of the guest state that we need to have
    149    up-to-date at loads/stores.  So far on x86 guest that has not
    150    mattered since indeed only the x87 FP registers and tags are
    151    accessed using GetI/PutI, and there is no need so far for them to
    152    be up to date at mem exception points.  The rPutI pass should be
    153    fixed.
    154 
    155    TODO: improve pessimistic handling of precise exceptions
    156      in the tree builder.
    157 
    158    TODO: check interaction of rGetI and dirty helpers.
    159 
    160    F64i constants are treated differently from other constants.
    161    They are not regarded as atoms, and instead lifted off and
    162    bound to temps.  This allows them to participate in CSE, which
    163    is important for getting good performance for x86 guest code.
    164 
    165    CSE up F64 literals (already doing F64is)
    166 
    167    CSE: consider carefully the requirement for precise exns
    168         prior to making CSE any more aggressive.  */
    169 
    170 
    171 /*---------------------------------------------------------------*/
    172 /*--- Finite mappery, of a sort                               ---*/
    173 /*---------------------------------------------------------------*/
    174 
    175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    176    hashing, but it's not clear whether or not this would really be any
    177    faster. */
    178 
    179 typedef
    180    struct {
    181       Bool*  inuse;
    182       HWord* key;
    183       HWord* val;
    184       Int    size;
    185       Int    used;
    186    }
    187    HashHW;
    188 
    189 static HashHW* newHHW ( void )
    190 {
    191    HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
    192    h->size   = 8;
    193    h->used   = 0;
    194    h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
    195    h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    196    h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    197    return h;
    198 }
    199 
    200 
    201 /* Look up key in the map. */
    202 
    203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    204 {
    205    Int i;
    206    /* vex_printf("lookupHHW(%llx)\n", key ); */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          if (val)
    210             *val = h->val[i];
    211          return True;
    212       }
    213    }
    214    return False;
    215 }
    216 
    217 
    218 /* Add key->val to the map.  Replaces any existing binding for key. */
    219 
    220 static void addToHHW ( HashHW* h, HWord key, HWord val )
    221 {
    222    Int i, j;
    223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    224 
    225    /* Find and replace existing binding, if any. */
    226    for (i = 0; i < h->used; i++) {
    227       if (h->inuse[i] && h->key[i] == key) {
    228          h->val[i] = val;
    229          return;
    230       }
    231    }
    232 
    233    /* Ensure a space is available. */
    234    if (h->used == h->size) {
    235       /* Copy into arrays twice the size. */
    236       Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
    237       HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    238       HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    239       for (i = j = 0; i < h->size; i++) {
    240          if (!h->inuse[i]) continue;
    241          inuse2[j] = True;
    242          key2[j] = h->key[i];
    243          val2[j] = h->val[i];
    244          j++;
    245       }
    246       h->used = j;
    247       h->size *= 2;
    248       h->inuse = inuse2;
    249       h->key = key2;
    250       h->val = val2;
    251    }
    252 
    253    /* Finally, add it. */
    254    vassert(h->used < h->size);
    255    h->inuse[h->used] = True;
    256    h->key[h->used] = key;
    257    h->val[h->used] = val;
    258    h->used++;
    259 }
    260 
    261 
    262 /*---------------------------------------------------------------*/
    263 /*--- Flattening out a BB into atomic SSA form                ---*/
    264 /*---------------------------------------------------------------*/
    265 
    266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    267    copies made by flattening.  If in doubt return False. */
    268 
    269 static Bool isFlat ( IRExpr* e )
    270 {
    271    if (e->tag == Iex_Get)
    272       return True;
    273    if (e->tag == Iex_Binop)
    274       return toBool( isIRAtom(e->Iex.Binop.arg1)
    275                      && isIRAtom(e->Iex.Binop.arg2) );
    276    if (e->tag == Iex_Load)
    277       return isIRAtom(e->Iex.Load.addr);
    278    return False;
    279 }
    280 
    281 /* Flatten out 'ex' so it is atomic, returning a new expression with
    282    the same value, after having appended extra IRTemp assignments to
    283    the end of 'bb'. */
    284 
    285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    286 {
    287    Int i;
    288    IRExpr** newargs;
    289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    290    IRTemp t1;
    291 
    292    switch (ex->tag) {
    293 
    294       case Iex_GetI:
    295          t1 = newIRTemp(bb->tyenv, ty);
    296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    297             IRExpr_GetI(ex->Iex.GetI.descr,
    298                         flatten_Expr(bb, ex->Iex.GetI.ix),
    299                         ex->Iex.GetI.bias)));
    300          return IRExpr_RdTmp(t1);
    301 
    302       case Iex_Get:
    303          t1 = newIRTemp(bb->tyenv, ty);
    304          addStmtToIRSB(bb,
    305             IRStmt_WrTmp(t1, ex));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Qop: {
    309          IRQop* qop = ex->Iex.Qop.details;
    310          t1 = newIRTemp(bb->tyenv, ty);
    311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    312             IRExpr_Qop(qop->op,
    313                          flatten_Expr(bb, qop->arg1),
    314                          flatten_Expr(bb, qop->arg2),
    315                          flatten_Expr(bb, qop->arg3),
    316                          flatten_Expr(bb, qop->arg4))));
    317          return IRExpr_RdTmp(t1);
    318       }
    319 
    320       case Iex_Triop: {
    321          IRTriop* triop = ex->Iex.Triop.details;
    322          t1 = newIRTemp(bb->tyenv, ty);
    323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    324             IRExpr_Triop(triop->op,
    325                          flatten_Expr(bb, triop->arg1),
    326                          flatten_Expr(bb, triop->arg2),
    327                          flatten_Expr(bb, triop->arg3))));
    328          return IRExpr_RdTmp(t1);
    329       }
    330 
    331       case Iex_Binop:
    332          t1 = newIRTemp(bb->tyenv, ty);
    333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    334             IRExpr_Binop(ex->Iex.Binop.op,
    335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    337          return IRExpr_RdTmp(t1);
    338 
    339       case Iex_Unop:
    340          t1 = newIRTemp(bb->tyenv, ty);
    341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    342             IRExpr_Unop(ex->Iex.Unop.op,
    343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    344          return IRExpr_RdTmp(t1);
    345 
    346       case Iex_Load:
    347          t1 = newIRTemp(bb->tyenv, ty);
    348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    349             IRExpr_Load(ex->Iex.Load.end,
    350                         ex->Iex.Load.ty,
    351                         flatten_Expr(bb, ex->Iex.Load.addr))));
    352          return IRExpr_RdTmp(t1);
    353 
    354       case Iex_CCall:
    355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    356          for (i = 0; newargs[i]; i++)
    357             newargs[i] = flatten_Expr(bb, newargs[i]);
    358          t1 = newIRTemp(bb->tyenv, ty);
    359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    360             IRExpr_CCall(ex->Iex.CCall.cee,
    361                          ex->Iex.CCall.retty,
    362                          newargs)));
    363          return IRExpr_RdTmp(t1);
    364 
    365       case Iex_ITE:
    366          t1 = newIRTemp(bb->tyenv, ty);
    367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
    369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
    370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
    371          return IRExpr_RdTmp(t1);
    372 
    373       case Iex_Const:
    374          /* Lift F64i constants out onto temps so they can be CSEd
    375             later. */
    376          if (ex->Iex.Const.con->tag == Ico_F64i) {
    377             t1 = newIRTemp(bb->tyenv, ty);
    378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    379                IRExpr_Const(ex->Iex.Const.con)));
    380             return IRExpr_RdTmp(t1);
    381          } else {
    382             /* Leave all other constants alone. */
    383             return ex;
    384          }
    385 
    386       case Iex_RdTmp:
    387          return ex;
    388 
    389       default:
    390          vex_printf("\n");
    391          ppIRExpr(ex);
    392          vex_printf("\n");
    393          vpanic("flatten_Expr");
    394    }
    395 }
    396 
    397 
    398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    399 
    400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    401 {
    402    Int i;
    403    IRExpr   *e1, *e2, *e3, *e4, *e5;
    404    IRDirty  *d,  *d2;
    405    IRCAS    *cas, *cas2;
    406    IRPutI   *puti, *puti2;
    407    IRLoadG  *lg;
    408    IRStoreG *sg;
    409    switch (st->tag) {
    410       case Ist_Put:
    411          if (isIRAtom(st->Ist.Put.data)) {
    412             /* optimisation to reduce the amount of heap wasted
    413                by the flattener */
    414             addStmtToIRSB(bb, st);
    415          } else {
    416             /* general case, always correct */
    417             e1 = flatten_Expr(bb, st->Ist.Put.data);
    418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    419          }
    420          break;
    421       case Ist_PutI:
    422          puti = st->Ist.PutI.details;
    423          e1 = flatten_Expr(bb, puti->ix);
    424          e2 = flatten_Expr(bb, puti->data);
    425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
    426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
    427          break;
    428       case Ist_WrTmp:
    429          if (isFlat(st->Ist.WrTmp.data)) {
    430             /* optimisation, to reduce the number of tmp-tmp
    431                copies generated */
    432             addStmtToIRSB(bb, st);
    433          } else {
    434             /* general case, always correct */
    435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    437          }
    438          break;
    439       case Ist_Store:
    440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    441          e2 = flatten_Expr(bb, st->Ist.Store.data);
    442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    443          break;
    444       case Ist_StoreG:
    445          sg = st->Ist.StoreG.details;
    446          e1 = flatten_Expr(bb, sg->addr);
    447          e2 = flatten_Expr(bb, sg->data);
    448          e3 = flatten_Expr(bb, sg->guard);
    449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
    450          break;
    451       case Ist_LoadG:
    452          lg = st->Ist.LoadG.details;
    453          e1 = flatten_Expr(bb, lg->addr);
    454          e2 = flatten_Expr(bb, lg->alt);
    455          e3 = flatten_Expr(bb, lg->guard);
    456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
    457                                         e1, e2, e3));
    458          break;
    459       case Ist_CAS:
    460          cas  = st->Ist.CAS.details;
    461          e1   = flatten_Expr(bb, cas->addr);
    462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    463          e3   = flatten_Expr(bb, cas->expdLo);
    464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    465          e5   = flatten_Expr(bb, cas->dataLo);
    466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    467                          e1, e2, e3, e4, e5 );
    468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    469          break;
    470       case Ist_LLSC:
    471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    472          e2 = st->Ist.LLSC.storedata
    473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    474                  : NULL;
    475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    476                                        st->Ist.LLSC.result, e1, e2));
    477          break;
    478       case Ist_Dirty:
    479          d = st->Ist.Dirty.details;
    480          d2 = emptyIRDirty();
    481          *d2 = *d;
    482          d2->args = shallowCopyIRExprVec(d2->args);
    483          if (d2->mFx != Ifx_None) {
    484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    485          } else {
    486             vassert(d2->mAddr == NULL);
    487          }
    488          d2->guard = flatten_Expr(bb, d2->guard);
    489          for (i = 0; d2->args[i]; i++) {
    490             IRExpr* arg = d2->args[i];
    491             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
    492                d2->args[i] = flatten_Expr(bb, arg);
    493          }
    494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    495          break;
    496       case Ist_NoOp:
    497       case Ist_MBE:
    498       case Ist_IMark:
    499          addStmtToIRSB(bb, st);
    500          break;
    501       case Ist_AbiHint:
    502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    505          break;
    506       case Ist_Exit:
    507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    509                                        st->Ist.Exit.dst,
    510                                        st->Ist.Exit.offsIP));
    511          break;
    512       default:
    513          vex_printf("\n");
    514          ppIRStmt(st);
    515          vex_printf("\n");
    516          vpanic("flatten_Stmt");
    517    }
    518 }
    519 
    520 
    521 static IRSB* flatten_BB ( IRSB* in )
    522 {
    523    Int   i;
    524    IRSB* out;
    525    out = emptyIRSB();
    526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    527    for (i = 0; i < in->stmts_used; i++)
    528       if (in->stmts[i])
    529          flatten_Stmt( out, in->stmts[i] );
    530    out->next     = flatten_Expr( out, in->next );
    531    out->jumpkind = in->jumpkind;
    532    out->offsIP   = in->offsIP;
    533    return out;
    534 }
    535 
    536 
    537 /*---------------------------------------------------------------*/
    538 /*--- In-place removal of redundant GETs                      ---*/
    539 /*---------------------------------------------------------------*/
    540 
    541 /* Scan forwards, building up an environment binding (min offset, max
    542    offset) pairs to values, which will either be temps or constants.
    543 
    544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    545    env and if it matches, replace the Get with the stored value.  If
    546    there is no match, add a (minoff,maxoff) :-> t binding.
    547 
    548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    549    any binding which fully or partially overlaps with (minoff,maxoff).
    550    Then add a new (minoff,maxoff) :-> t or c binding.  */
    551 
    552 /* Extract the min/max offsets from a guest state array descriptor. */
    553 
    554 inline
    555 static void getArrayBounds ( IRRegArray* descr,
    556                              UInt* minoff, UInt* maxoff )
    557 {
    558    *minoff = descr->base;
    559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    560    vassert((*minoff & ~0xFFFF) == 0);
    561    vassert((*maxoff & ~0xFFFF) == 0);
    562    vassert(*minoff <= *maxoff);
    563 }
    564 
    565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    566 
    567 static UInt mk_key_GetPut ( Int offset, IRType ty )
    568 {
    569    /* offset should fit in 16 bits. */
    570    UInt minoff = offset;
    571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    572    vassert((minoff & ~0xFFFF) == 0);
    573    vassert((maxoff & ~0xFFFF) == 0);
    574    return (minoff << 16) | maxoff;
    575 }
    576 
    577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    578 {
    579    UInt minoff, maxoff;
    580    getArrayBounds( descr, &minoff, &maxoff );
    581    vassert((minoff & ~0xFFFF) == 0);
    582    vassert((maxoff & ~0xFFFF) == 0);
    583    return (minoff << 16) | maxoff;
    584 }
    585 
    586 /* Supposing h has keys of the form generated by mk_key_GetPut and
    587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    588    .. k_hi).
    589 */
    590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    591 {
    592    Int  j;
    593    UInt e_lo, e_hi;
    594    vassert(k_lo <= k_hi);
    595    /* invalidate any env entries which in any way overlap (k_lo
    596       .. k_hi) */
    597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    598 
    599    for (j = 0; j < h->used; j++) {
    600       if (!h->inuse[j])
    601          continue;
    602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    604       vassert(e_lo <= e_hi);
    605       if (e_hi < k_lo || k_hi < e_lo)
    606          continue; /* no overlap possible */
    607       else
    608          /* overlap; invalidate */
    609          h->inuse[j] = False;
    610    }
    611 }
    612 
    613 
    614 static void redundant_get_removal_BB ( IRSB* bb )
    615 {
    616    HashHW* env = newHHW();
    617    UInt    key = 0; /* keep gcc -O happy */
    618    Int     i, j;
    619    HWord   val;
    620 
    621    for (i = 0; i < bb->stmts_used; i++) {
    622       IRStmt* st = bb->stmts[i];
    623 
    624       if (st->tag == Ist_NoOp)
    625          continue;
    626 
    627       /* Deal with Gets */
    628       if (st->tag == Ist_WrTmp
    629           && st->Ist.WrTmp.data->tag == Iex_Get) {
    630          /* st is 't = Get(...)'.  Look up in the environment and see
    631             if the Get can be replaced. */
    632          IRExpr* get = st->Ist.WrTmp.data;
    633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    634                                      get->Iex.Get.ty );
    635          if (lookupHHW(env, &val, (HWord)key)) {
    636             /* found it */
    637             /* Note, we could do better here.  If the types are
    638                different we don't do the substitution, since doing so
    639                could lead to invalidly-typed IR.  An improvement would
    640                be to stick in a reinterpret-style cast, although that
    641                would make maintaining flatness more difficult. */
    642             IRExpr* valE    = (IRExpr*)val;
    643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    645             if (typesOK && DEBUG_IROPT) {
    646                vex_printf("rGET: "); ppIRExpr(get);
    647                vex_printf("  ->  "); ppIRExpr(valE);
    648                vex_printf("\n");
    649             }
    650             if (typesOK)
    651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    652          } else {
    653             /* Not found, but at least we know that t and the Get(...)
    654                are now associated.  So add a binding to reflect that
    655                fact. */
    656             addToHHW( env, (HWord)key,
    657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    658          }
    659       }
    660 
    661       /* Deal with Puts: invalidate any env entries overlapped by this
    662          Put */
    663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    664          UInt k_lo, k_hi;
    665          if (st->tag == Ist_Put) {
    666             key = mk_key_GetPut( st->Ist.Put.offset,
    667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    668          } else {
    669             vassert(st->tag == Ist_PutI);
    670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    671          }
    672 
    673          k_lo = (key >> 16) & 0xFFFF;
    674          k_hi = key & 0xFFFF;
    675          invalidateOverlaps(env, k_lo, k_hi);
    676       }
    677       else
    678       if (st->tag == Ist_Dirty) {
    679          /* Deal with dirty helpers which write or modify guest state.
    680             Invalidate the entire env.  We could do a lot better
    681             here. */
    682          IRDirty* d      = st->Ist.Dirty.details;
    683          Bool     writes = False;
    684          for (j = 0; j < d->nFxState; j++) {
    685             if (d->fxState[j].fx == Ifx_Modify
    686                 || d->fxState[j].fx == Ifx_Write)
    687             writes = True;
    688          }
    689          if (writes) {
    690             /* dump the entire env (not clever, but correct ...) */
    691             for (j = 0; j < env->used; j++)
    692                env->inuse[j] = False;
    693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    694          }
    695       }
    696 
    697       /* add this one to the env, if appropriate */
    698       if (st->tag == Ist_Put) {
    699          vassert(isIRAtom(st->Ist.Put.data));
    700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    701       }
    702 
    703    } /* for (i = 0; i < bb->stmts_used; i++) */
    704 
    705 }
    706 
    707 
    708 /*---------------------------------------------------------------*/
    709 /*--- In-place removal of redundant PUTs                      ---*/
    710 /*---------------------------------------------------------------*/
    711 
    712 /* Find any Get uses in st and invalidate any partially or fully
    713    overlapping ranges listed in env.  Due to the flattening phase, the
    714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    715 
    716 static void handle_gets_Stmt (
    717                HashHW* env,
    718                IRStmt* st,
    719                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    720                VexRegisterUpdates pxControl
    721             )
    722 {
    723    Int     j;
    724    UInt    key = 0; /* keep gcc -O happy */
    725    Bool    isGet;
    726    Bool    memRW = False;
    727    IRExpr* e;
    728 
    729    switch (st->tag) {
    730 
    731       /* This is the only interesting case.  Deal with Gets in the RHS
    732          expression. */
    733       case Ist_WrTmp:
    734          e = st->Ist.WrTmp.data;
    735          switch (e->tag) {
    736             case Iex_Get:
    737                isGet = True;
    738                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    739                break;
    740             case Iex_GetI:
    741                isGet = True;
    742                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    743                break;
    744             case Iex_Load:
    745                isGet = False;
    746                memRW = True;
    747                break;
    748             default:
    749                isGet = False;
    750          }
    751          if (isGet) {
    752             UInt k_lo, k_hi;
    753             k_lo = (key >> 16) & 0xFFFF;
    754             k_hi = key & 0xFFFF;
    755             invalidateOverlaps(env, k_lo, k_hi);
    756          }
    757          break;
    758 
    759       /* Be very conservative for dirty helper calls; dump the entire
    760          environment.  The helper might read guest state, in which
    761          case it needs to be flushed first.  Also, the helper might
    762          access guest memory, in which case all parts of the guest
    763          state requiring precise exceptions needs to be flushed.  The
    764          crude solution is just to flush everything; we could easily
    765          enough do a lot better if needed. */
    766       /* Probably also overly-conservative, but also dump everything
    767          if we hit a memory bus event (fence, lock, unlock).  Ditto
    768          AbiHints, CASs, LLs and SCs. */
    769       case Ist_AbiHint:
    770          vassert(isIRAtom(st->Ist.AbiHint.base));
    771          vassert(isIRAtom(st->Ist.AbiHint.nia));
    772          /* fall through */
    773       case Ist_MBE:
    774       case Ist_Dirty:
    775       case Ist_CAS:
    776       case Ist_LLSC:
    777          for (j = 0; j < env->used; j++)
    778             env->inuse[j] = False;
    779          break;
    780 
    781       /* all other cases are boring. */
    782       case Ist_Store:
    783          vassert(isIRAtom(st->Ist.Store.addr));
    784          vassert(isIRAtom(st->Ist.Store.data));
    785          memRW = True;
    786          break;
    787       case Ist_StoreG: {
    788          IRStoreG* sg = st->Ist.StoreG.details;
    789          vassert(isIRAtom(sg->addr));
    790          vassert(isIRAtom(sg->data));
    791          vassert(isIRAtom(sg->guard));
    792          memRW = True;
    793          break;
    794       }
    795       case Ist_LoadG: {
    796          IRLoadG* lg = st->Ist.LoadG.details;
    797          vassert(isIRAtom(lg->addr));
    798          vassert(isIRAtom(lg->alt));
    799          vassert(isIRAtom(lg->guard));
    800          memRW = True;
    801          break;
    802       }
    803       case Ist_Exit:
    804          vassert(isIRAtom(st->Ist.Exit.guard));
    805          break;
    806 
    807       case Ist_Put:
    808          vassert(isIRAtom(st->Ist.Put.data));
    809          break;
    810 
    811       case Ist_PutI:
    812          vassert(isIRAtom(st->Ist.PutI.details->ix));
    813          vassert(isIRAtom(st->Ist.PutI.details->data));
    814          break;
    815 
    816       case Ist_NoOp:
    817       case Ist_IMark:
    818          break;
    819 
    820       default:
    821          vex_printf("\n");
    822          ppIRStmt(st);
    823          vex_printf("\n");
    824          vpanic("handle_gets_Stmt");
    825    }
    826 
    827    if (memRW) {
    828       /* This statement accesses memory.  So we might need to dump all parts
    829          of the environment corresponding to guest state that may not
    830          be reordered with respect to memory references.  That means
    831          at least the stack pointer. */
    832       switch (pxControl) {
    833          case VexRegUpdAllregsAtMemAccess:
    834             /* Precise exceptions required at mem access.
    835                Flush all guest state. */
    836             for (j = 0; j < env->used; j++)
    837                env->inuse[j] = False;
    838             break;
    839          case VexRegUpdSpAtMemAccess:
    840             /* We need to dump the stack pointer
    841                (needed for stack extension in m_signals.c).
    842                preciseMemExnsFn will use vex_control.iropt_register_updates
    843                to verify only the sp is to be checked. */
    844             /* fallthrough */
    845          case VexRegUpdUnwindregsAtMemAccess:
    846             for (j = 0; j < env->used; j++) {
    847                if (!env->inuse[j])
    848                   continue;
    849                /* Just flush the minimal amount required, as computed by
    850                   preciseMemExnsFn. */
    851                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    852                HWord k_hi = env->key[j] & 0xFFFF;
    853                if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
    854                   env->inuse[j] = False;
    855             }
    856             break;
    857          case VexRegUpdAllregsAtEachInsn:
    858             // VexRegUpdAllregsAtEachInsn cannot happen here.
    859             // fall through
    860          case VexRegUpd_INVALID:
    861          default:
    862             vassert(0);
    863       }
    864    } /* if (memRW) */
    865 
    866 }
    867 
    868 
    869 /* Scan backwards, building up a set of (min offset, max
    870    offset) pairs, indicating those parts of the guest state
    871    for which the next event is a write.
    872 
    873    On seeing a conditional exit, empty the set.
    874 
    875    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    876    completely within the set, remove the Put.  Otherwise, add
    877    (minoff,maxoff) to the set.
    878 
    879    On seeing 'Get (minoff,maxoff)', remove any part of the set
    880    overlapping (minoff,maxoff).  The same has to happen for any events
    881    which implicitly read parts of the guest state: dirty helper calls
    882    and loads/stores.
    883 */
    884 
    885 static void redundant_put_removal_BB (
    886                IRSB* bb,
    887                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    888                VexRegisterUpdates pxControl
    889             )
    890 {
    891    Int     i, j;
    892    Bool    isPut;
    893    IRStmt* st;
    894    UInt    key = 0; /* keep gcc -O happy */
    895 
    896    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
    897 
    898    HashHW* env = newHHW();
    899 
    900    /* Initialise the running env with the fact that the final exit
    901       writes the IP (or, whatever it claims to write.  We don't
    902       care.) */
    903    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
    904    addToHHW(env, (HWord)key, 0);
    905 
    906    /* And now scan backwards through the statements. */
    907    for (i = bb->stmts_used-1; i >= 0; i--) {
    908       st = bb->stmts[i];
    909 
    910       if (st->tag == Ist_NoOp)
    911          continue;
    912 
    913       /* Deal with conditional exits. */
    914       if (st->tag == Ist_Exit) {
    915          //Bool re_add;
    916          /* Need to throw out from the env, any part of it which
    917             doesn't overlap with the guest state written by this exit.
    918             Since the exit only writes one section, it's simplest to
    919             do this: (1) check whether env contains a write that
    920             completely overlaps the write done by this exit; (2) empty
    921             out env; and (3) if (1) was true, add the write done by
    922             this exit.
    923 
    924             To make (1) a bit simpler, merely search for a write that
    925             exactly matches the one done by this exit.  That's safe
    926             because it will fail as often or more often than a full
    927             overlap check, and failure to find an overlapping write in
    928             env is the safe case (we just nuke env if that
    929             happens). */
    930          //vassert(isIRAtom(st->Ist.Exit.guard));
    931          /* (1) */
    932          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
    933          //                    typeOfIRConst(st->Ist.Exit.dst));
    934          //re_add = lookupHHW(env, NULL, key);
    935          /* (2) */
    936          for (j = 0; j < env->used; j++)
    937             env->inuse[j] = False;
    938          /* (3) */
    939          //if (0 && re_add)
    940          //   addToHHW(env, (HWord)key, 0);
    941          continue;
    942       }
    943 
    944       /* Deal with Puts */
    945       switch (st->tag) {
    946          case Ist_Put:
    947             isPut = True;
    948             key = mk_key_GetPut( st->Ist.Put.offset,
    949                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    950             vassert(isIRAtom(st->Ist.Put.data));
    951             break;
    952          case Ist_PutI:
    953             isPut = True;
    954             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    955             vassert(isIRAtom(st->Ist.PutI.details->ix));
    956             vassert(isIRAtom(st->Ist.PutI.details->data));
    957             break;
    958          default:
    959             isPut = False;
    960       }
    961       if (isPut && st->tag != Ist_PutI) {
    962          /* See if any single entry in env overlaps this Put.  This is
    963             simplistic in that the transformation is valid if, say, two
    964             or more entries in the env overlap this Put, but the use of
    965             lookupHHW will only find a single entry which exactly
    966             overlaps this Put.  This is suboptimal but safe. */
    967          if (lookupHHW(env, NULL, (HWord)key)) {
    968             /* This Put is redundant because a later one will overwrite
    969                it.  So NULL (nop) it out. */
    970             if (DEBUG_IROPT) {
    971                vex_printf("rPUT: "); ppIRStmt(st);
    972                vex_printf("\n");
    973             }
    974             bb->stmts[i] = IRStmt_NoOp();
    975          } else {
    976             /* We can't demonstrate that this Put is redundant, so add it
    977                to the running collection. */
    978             addToHHW(env, (HWord)key, 0);
    979          }
    980          continue;
    981       }
    982 
    983       /* Deal with Gets.  These remove bits of the environment since
    984          appearance of a Get means that the next event for that slice
    985          of the guest state is no longer a write, but a read.  Also
    986          deals with implicit reads of guest state needed to maintain
    987          precise exceptions. */
    988       handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
    989    }
    990 }
    991 
    992 
    993 /*---------------------------------------------------------------*/
    994 /*--- Constant propagation and folding                        ---*/
    995 /*---------------------------------------------------------------*/
    996 
    997 #if STATS_IROPT
    998 /* How often sameIRExprs was invoked */
    999 static UInt invocation_count;
   1000 /* How often sameIRExprs recursed through IRTemp assignments */
   1001 static UInt recursion_count;
   1002 /* How often sameIRExprs found identical IRExprs */
   1003 static UInt success_count;
   1004 /* How often recursing through assignments to IRTemps helped
   1005    establishing equality. */
   1006 static UInt recursion_success_count;
   1007 /* Whether or not recursing through an IRTemp assignment helped
   1008    establishing IRExpr equality for a given sameIRExprs invocation. */
   1009 static Bool recursion_helped;
   1010 /* Whether or not a given sameIRExprs invocation recursed through an
   1011    IRTemp assignment */
   1012 static Bool recursed;
   1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
   1014 static UInt max_nodes_visited;
   1015 #endif /* STATS_IROPT */
   1016 
   1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
   1018 static UInt num_nodes_visited;
   1019 
   1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
   1021    This is to guard against performance degradation by visiting large
   1022    trees without success. */
   1023 #define NODE_LIMIT 30
   1024 
   1025 
   1026 /* The env in this section is a map from IRTemp to IRExpr*,
   1027    that is, an array indexed by IRTemp. */
   1028 
   1029 /* Do both expressions compute the same value? The answer is generally
   1030    conservative, i.e. it will report that the expressions do not compute
   1031    the same value when in fact they do. The reason is that we do not
   1032    keep track of changes in the guest state and memory. Thusly, two
   1033    Get's, GetI's or Load's, even when accessing the same location, will be
   1034    assumed to compute different values. After all the accesses may happen
   1035    at different times and the guest state / memory can have changed in
   1036    the meantime.
   1037 
   1038    XXX IMPORTANT XXX the two expressions must have the same IR type.
   1039    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
   1040 
   1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
   1042    wrapper that deals with the common tags-don't-match case, and a
   1043    slower out of line general case.  Saves a few insns. */
   1044 
   1045 __attribute__((noinline))
   1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
   1047 
   1048 inline
   1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1050 {
   1051    if (e1->tag != e2->tag) return False;
   1052    return sameIRExprs_aux2(env, e1, e2);
   1053 }
   1054 
   1055 __attribute__((noinline))
   1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1057 {
   1058    if (num_nodes_visited++ > NODE_LIMIT) return False;
   1059 
   1060    switch (e1->tag) {
   1061       case Iex_RdTmp:
   1062          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
   1063 
   1064          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
   1065             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
   1066                                         env[e2->Iex.RdTmp.tmp]);
   1067 #if STATS_IROPT
   1068             recursed = True;
   1069             if (same) recursion_helped = True;
   1070 #endif
   1071             return same;
   1072          }
   1073          return False;
   1074 
   1075       case Iex_Get:
   1076       case Iex_GetI:
   1077       case Iex_Load:
   1078          /* Guest state / memory could have changed in the meantime. */
   1079          return False;
   1080 
   1081       case Iex_Binop:
   1082          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
   1083                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
   1084                                                  e2->Iex.Binop.arg1 )
   1085                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
   1086                                                  e2->Iex.Binop.arg2 ));
   1087 
   1088       case Iex_Unop:
   1089          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
   1090                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
   1091                                                  e2->Iex.Unop.arg ));
   1092 
   1093       case Iex_Const: {
   1094          IRConst *c1 = e1->Iex.Const.con;
   1095          IRConst *c2 = e2->Iex.Const.con;
   1096          vassert(c1->tag == c2->tag);
   1097          switch (c1->tag) {
   1098             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
   1099             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
   1100             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
   1101             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
   1102             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
   1103             default: break;
   1104          }
   1105          return False;
   1106       }
   1107 
   1108       case Iex_Triop: {
   1109          IRTriop *tri1 = e1->Iex.Triop.details;
   1110          IRTriop *tri2 = e2->Iex.Triop.details;
   1111          return toBool( tri1->op == tri2->op
   1112                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
   1113                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
   1114                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
   1115       }
   1116 
   1117       case Iex_ITE:
   1118          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
   1119                                                  e2->Iex.ITE.cond )
   1120                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
   1121                                                  e2->Iex.ITE.iftrue )
   1122                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
   1123                                                  e2->Iex.ITE.iffalse ));
   1124 
   1125       default:
   1126          /* Not very likely to be "same". */
   1127          break;
   1128    }
   1129 
   1130    return False;
   1131 }
   1132 
   1133 inline
   1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1135 {
   1136    Bool same;
   1137 
   1138    num_nodes_visited = 0;
   1139    same = sameIRExprs_aux(env, e1, e2);
   1140 
   1141 #if STATS_IROPT
   1142    ++invocation_count;
   1143    if (recursed) ++recursion_count;
   1144    success_count += same;
   1145    if (same && recursion_helped)
   1146       ++recursion_success_count;
   1147    if (num_nodes_visited > max_nodes_visited)
   1148       max_nodes_visited = num_nodes_visited;
   1149    recursed = False; /* reset */
   1150    recursion_helped = False;  /* reset */
   1151 #endif /* STATS_IROPT */
   1152 
   1153    return same;
   1154 }
   1155 
   1156 
   1157 /* Debugging-only hack (not used in production runs): make a guess
   1158    whether sameIRExprs might assert due to the two args being of
   1159    different types.  If in doubt return False.  Is only used when
   1160    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
   1161    Bad because it duplicates functionality from typeOfIRExpr.  See
   1162    comment on the single use point below for rationale. */
   1163 static
   1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
   1165 {
   1166    if (e1->tag != e2->tag) return False;
   1167    switch (e1->tag) {
   1168       case Iex_Const: {
   1169          /* The only interesting case */
   1170          IRConst *c1 = e1->Iex.Const.con;
   1171          IRConst *c2 = e2->Iex.Const.con;
   1172          return c1->tag != c2->tag;
   1173       }
   1174       default:
   1175          break;
   1176    }
   1177    return False;
   1178 }
   1179 
   1180 
   1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
   1182 static Bool isZeroU32 ( IRExpr* e )
   1183 {
   1184    return toBool( e->tag == Iex_Const
   1185                   && e->Iex.Const.con->tag == Ico_U32
   1186                   && e->Iex.Const.con->Ico.U32 == 0);
   1187 }
   1188 
   1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
   1190    Currently unused; commented out to avoid compiler warning */
   1191 #if 0
   1192 static Bool isZeroU64 ( IRExpr* e )
   1193 {
   1194    return toBool( e->tag == Iex_Const
   1195                   && e->Iex.Const.con->tag == Ico_U64
   1196                   && e->Iex.Const.con->Ico.U64 == 0);
   1197 }
   1198 #endif
   1199 
   1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
   1201 static Bool isZeroV128 ( IRExpr* e )
   1202 {
   1203    return toBool( e->tag == Iex_Const
   1204                   && e->Iex.Const.con->tag == Ico_V128
   1205                   && e->Iex.Const.con->Ico.V128 == 0x0000);
   1206 }
   1207 
   1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
   1209 static Bool isZeroV256 ( IRExpr* e )
   1210 {
   1211    return toBool( e->tag == Iex_Const
   1212                   && e->Iex.Const.con->tag == Ico_V256
   1213                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
   1214 }
   1215 
   1216 /* Is this an integer constant with value 0 ? */
   1217 static Bool isZeroU ( IRExpr* e )
   1218 {
   1219    if (e->tag != Iex_Const) return False;
   1220    switch (e->Iex.Const.con->tag) {
   1221       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
   1222       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
   1223       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
   1224       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
   1225       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
   1226       default: vpanic("isZeroU");
   1227    }
   1228 }
   1229 
   1230 /* Is this an integer constant with value 1---1b ? */
   1231 static Bool isOnesU ( IRExpr* e )
   1232 {
   1233    if (e->tag != Iex_Const) return False;
   1234    switch (e->Iex.Const.con->tag) {
   1235       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
   1236       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
   1237       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
   1238                                      == 0xFFFFFFFF);
   1239       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
   1240                                      == 0xFFFFFFFFFFFFFFFFULL);
   1241       default: ppIRExpr(e); vpanic("isOnesU");
   1242    }
   1243 }
   1244 
   1245 static Bool notBool ( Bool b )
   1246 {
   1247    if (b == True) return False;
   1248    if (b == False) return True;
   1249    vpanic("notBool");
   1250 }
   1251 
   1252 /* Make a zero which has the same type as the result of the given
   1253    primop. */
   1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
   1255 {
   1256    switch (op) {
   1257       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
   1258       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
   1259       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
   1260       case Iop_Sub32:
   1261       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
   1262       case Iop_And64:
   1263       case Iop_Sub64:
   1264       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
   1265       case Iop_XorV128:
   1266       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
   1267       case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
   1268       default: vpanic("mkZeroOfPrimopResultType: bad primop");
   1269    }
   1270 }
   1271 
   1272 /* Make a value containing all 1-bits, which has the same type as the
   1273    result of the given primop. */
   1274 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
   1275 {
   1276    switch (op) {
   1277       case Iop_CmpEQ32:
   1278       case Iop_CmpEQ64:
   1279          return IRExpr_Const(IRConst_U1(toBool(1)));
   1280       case Iop_Or8:
   1281          return IRExpr_Const(IRConst_U8(0xFF));
   1282       case Iop_Or16:
   1283          return IRExpr_Const(IRConst_U16(0xFFFF));
   1284       case Iop_Or32:
   1285          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
   1286       case Iop_CmpEQ8x8:
   1287       case Iop_Or64:
   1288          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
   1289       case Iop_CmpEQ8x16:
   1290       case Iop_CmpEQ16x8:
   1291       case Iop_CmpEQ32x4:
   1292          return IRExpr_Const(IRConst_V128(0xFFFF));
   1293       default:
   1294          ppIROp(op);
   1295          vpanic("mkOnesOfPrimopResultType: bad primop");
   1296    }
   1297 }
   1298 
   1299 /* Helpers for folding Clz32/64. */
   1300 static UInt fold_Clz64 ( ULong value )
   1301 {
   1302    UInt i;
   1303    vassert(value != 0ULL); /* no defined semantics for arg==0 */
   1304    for (i = 0; i < 64; ++i) {
   1305       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
   1306    }
   1307    vassert(0);
   1308    /*NOTREACHED*/
   1309    return 0;
   1310 }
   1311 
   1312 static UInt fold_Clz32 ( UInt value )
   1313 {
   1314    UInt i;
   1315    vassert(value != 0); /* no defined semantics for arg==0 */
   1316    for (i = 0; i < 32; ++i) {
   1317       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
   1318    }
   1319    vassert(0);
   1320    /*NOTREACHED*/
   1321    return 0;
   1322 }
   1323 
   1324 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
   1325    the corresponding real constant. */
   1326 //XXX re-check this before use
   1327 //static ULong de_summarise_V64 ( UChar v64 )
   1328 //{
   1329 //   ULong r = 0;
   1330 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
   1331 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
   1332 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
   1333 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
   1334 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
   1335 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
   1336 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
   1337 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
   1338 //   return r;
   1339 //}
   1340 
   1341 /* Helper for arbitrary expression pattern matching in flat IR.  If
   1342    'e' is a reference to a tmp, look it up in env -- repeatedly, if
   1343    necessary -- until it resolves to a non-tmp.  Note that this can
   1344    return NULL if it can't resolve 'e' to a new expression, which will
   1345    be the case if 'e' is instead defined by an IRStmt (IRDirty or
   1346    LLSC). */
   1347 static IRExpr* chase ( IRExpr** env, IRExpr* e )
   1348 {
   1349    /* Why is this loop guaranteed to terminate?  Because all tmps must
   1350       have definitions before use, hence a tmp cannot be bound
   1351       (directly or indirectly) to itself. */
   1352    while (e->tag == Iex_RdTmp) {
   1353       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
   1354       e = env[(Int)e->Iex.RdTmp.tmp];
   1355       if (e == NULL) break;
   1356    }
   1357    return e;
   1358 }
   1359 
   1360 /* Similar to |chase|, but follows at most one level of tmp reference. */
   1361 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
   1362 {
   1363    if (e == NULL || e->tag != Iex_RdTmp)
   1364       return e;
   1365    else
   1366       return env[(Int)e->Iex.RdTmp.tmp];
   1367 }
   1368 
   1369 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
   1370 {
   1371    Int     shift;
   1372    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1373 
   1374    switch (e->tag) {
   1375    case Iex_Unop:
   1376       /* UNARY ops */
   1377       if (e->Iex.Unop.arg->tag == Iex_Const) {
   1378          switch (e->Iex.Unop.op) {
   1379          case Iop_1Uto8:
   1380             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1381                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1382                     ? 1 : 0)));
   1383             break;
   1384          case Iop_1Uto32:
   1385             e2 = IRExpr_Const(IRConst_U32(
   1386                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1387                     ? 1 : 0));
   1388             break;
   1389          case Iop_1Uto64:
   1390             e2 = IRExpr_Const(IRConst_U64(
   1391                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1392                     ? 1 : 0));
   1393             break;
   1394 
   1395          case Iop_1Sto8:
   1396             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1397                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1398                     ? 0xFF : 0)));
   1399             break;
   1400          case Iop_1Sto16:
   1401             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1402                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1403                     ? 0xFFFF : 0)));
   1404             break;
   1405          case Iop_1Sto32:
   1406             e2 = IRExpr_Const(IRConst_U32(
   1407                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1408                     ? 0xFFFFFFFF : 0));
   1409             break;
   1410          case Iop_1Sto64:
   1411             e2 = IRExpr_Const(IRConst_U64(
   1412                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1413                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1414             break;
   1415 
   1416          case Iop_8Sto32: {
   1417             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1418             u32 <<= 24;
   1419             u32 = (Int)u32 >> 24;   /* signed shift */
   1420             e2 = IRExpr_Const(IRConst_U32(u32));
   1421             break;
   1422          }
   1423          case Iop_16Sto32: {
   1424             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1425             u32 <<= 16;
   1426             u32 = (Int)u32 >> 16;   /* signed shift */
   1427             e2 = IRExpr_Const(IRConst_U32(u32));
   1428             break;
   1429          }
   1430          case Iop_8Uto64:
   1431             e2 = IRExpr_Const(IRConst_U64(
   1432                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1433             break;
   1434          case Iop_16Uto64:
   1435             e2 = IRExpr_Const(IRConst_U64(
   1436                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1437             break;
   1438          case Iop_8Uto32:
   1439             e2 = IRExpr_Const(IRConst_U32(
   1440                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1441             break;
   1442          case Iop_8Sto16: {
   1443             UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1444             u16 <<= 8;
   1445             u16 = (Short)u16 >> 8;  /* signed shift */
   1446             e2 = IRExpr_Const(IRConst_U16(u16));
   1447             break;
   1448          }
   1449          case Iop_8Uto16:
   1450             e2 = IRExpr_Const(IRConst_U16(
   1451                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1452             break;
   1453          case Iop_16Uto32:
   1454             e2 = IRExpr_Const(IRConst_U32(
   1455                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1456             break;
   1457          case Iop_32to16:
   1458             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1459                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1460             break;
   1461          case Iop_32to8:
   1462             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1463                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1464             break;
   1465          case Iop_32to1:
   1466             e2 = IRExpr_Const(IRConst_U1(toBool(
   1467                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1468                  )));
   1469             break;
   1470          case Iop_64to1:
   1471             e2 = IRExpr_Const(IRConst_U1(toBool(
   1472                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1473                  )));
   1474             break;
   1475 
   1476          case Iop_NotV128:
   1477             e2 = IRExpr_Const(IRConst_V128(
   1478                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
   1479             break;
   1480          case Iop_Not64:
   1481             e2 = IRExpr_Const(IRConst_U64(
   1482                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1483             break;
   1484          case Iop_Not32:
   1485             e2 = IRExpr_Const(IRConst_U32(
   1486                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1487             break;
   1488          case Iop_Not16:
   1489             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1490                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1491             break;
   1492          case Iop_Not8:
   1493             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1494                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1495             break;
   1496 
   1497          case Iop_Not1:
   1498             e2 = IRExpr_Const(IRConst_U1(
   1499                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1500             break;
   1501 
   1502          case Iop_64to8: {
   1503             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1504             w64 &= 0xFFULL;
   1505             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1506             break;
   1507          }
   1508          case Iop_64to16: {
   1509             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1510             w64 &= 0xFFFFULL;
   1511             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1512             break;
   1513          }
   1514          case Iop_64to32: {
   1515             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1516             w64 &= 0x00000000FFFFFFFFULL;
   1517             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1518             break;
   1519          }
   1520          case Iop_64HIto32: {
   1521             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1522             w64 >>= 32;
   1523             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1524             break;
   1525          }
   1526          case Iop_32Uto64:
   1527             e2 = IRExpr_Const(IRConst_U64(
   1528                     0xFFFFFFFFULL
   1529                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1530             break;
   1531          case Iop_16Sto64: {
   1532             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1533             u64 <<= 48;
   1534             u64 = (Long)u64 >> 48;   /* signed shift */
   1535             e2 = IRExpr_Const(IRConst_U64(u64));
   1536             break;
   1537          }
   1538          case Iop_32Sto64: {
   1539             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1540             u64 <<= 32;
   1541             u64 = (Long)u64 >> 32;   /* signed shift */
   1542             e2 = IRExpr_Const(IRConst_U64(u64));
   1543             break;
   1544          }
   1545 
   1546          case Iop_16to8: {
   1547             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1548             w16 &= 0xFF;
   1549             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1550             break;
   1551          }
   1552          case Iop_16HIto8: {
   1553             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1554             w16 >>= 8;
   1555             w16 &= 0xFF;
   1556             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1557             break;
   1558          }
   1559 
   1560          case Iop_CmpNEZ8:
   1561             e2 = IRExpr_Const(IRConst_U1(toBool(
   1562                     0 !=
   1563                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1564                  )));
   1565             break;
   1566          case Iop_CmpNEZ32:
   1567             e2 = IRExpr_Const(IRConst_U1(toBool(
   1568                     0 !=
   1569                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1570                  )));
   1571             break;
   1572          case Iop_CmpNEZ64:
   1573             e2 = IRExpr_Const(IRConst_U1(toBool(
   1574                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1575                  )));
   1576             break;
   1577 
   1578          case Iop_CmpwNEZ32: {
   1579             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1580             if (w32 == 0)
   1581                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1582             else
   1583                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1584             break;
   1585          }
   1586          case Iop_CmpwNEZ64: {
   1587             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1588             if (w64 == 0)
   1589                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1590             else
   1591                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1592             break;
   1593          }
   1594 
   1595          case Iop_Left32: {
   1596             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1597             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1598             s32 = (s32 | (-s32));
   1599             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1600             break;
   1601          }
   1602 
   1603          case Iop_Left64: {
   1604             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1605             Long  s64 = (Long)u64;
   1606             s64 = (s64 | (-s64));
   1607             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1608             break;
   1609          }
   1610 
   1611          case Iop_Clz32: {
   1612             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1613             if (u32 != 0)
   1614                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1615             break;
   1616          }
   1617          case Iop_Clz64: {
   1618             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1619             if (u64 != 0ULL)
   1620                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1621             break;
   1622          }
   1623 
   1624          /* For these vector ones, can't fold all cases, but at least
   1625             do the most obvious one.  Could do better here using
   1626             summarise/desummarise of vector constants, but too
   1627             difficult to verify; hence just handle the zero cases. */
   1628          case Iop_32UtoV128: {
   1629             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1630             if (0 == u32) {
   1631                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1632             } else {
   1633                goto unhandled;
   1634             }
   1635             break;
   1636          }
   1637          case Iop_V128to64: {
   1638             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1639             if (0 == ((v128 >> 0) & 0xFF)) {
   1640                e2 = IRExpr_Const(IRConst_U64(0));
   1641             } else {
   1642                goto unhandled;
   1643             }
   1644             break;
   1645          }
   1646          case Iop_V128HIto64: {
   1647             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1648             if (0 == ((v128 >> 8) & 0xFF)) {
   1649                e2 = IRExpr_Const(IRConst_U64(0));
   1650             } else {
   1651                goto unhandled;
   1652             }
   1653             break;
   1654          }
   1655          case Iop_64UtoV128: {
   1656             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1657             if (0 == u64) {
   1658                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1659             } else {
   1660                goto unhandled;
   1661             }
   1662             break;
   1663          }
   1664 
   1665          /* Even stupider (although still correct ..) */
   1666          case Iop_V256to64_0: case Iop_V256to64_1:
   1667          case Iop_V256to64_2: case Iop_V256to64_3: {
   1668             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
   1669             if (v256 == 0x00000000) {
   1670                e2 = IRExpr_Const(IRConst_U64(0));
   1671             } else {
   1672                goto unhandled;
   1673             }
   1674             break;
   1675          }
   1676 
   1677          case Iop_ZeroHI64ofV128: {
   1678             /* Could do better here -- only need to look at the bottom 64 bits
   1679                of the argument, really. */
   1680             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1681             if (v128 == 0x0000) {
   1682                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1683             } else {
   1684                goto unhandled;
   1685             }
   1686             break;
   1687          }
   1688 
   1689          default:
   1690             goto unhandled;
   1691       }
   1692       }
   1693       break;
   1694 
   1695    case Iex_Binop:
   1696       /* BINARY ops */
   1697       if (e->Iex.Binop.arg1->tag == Iex_Const
   1698           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1699          /* cases where both args are consts */
   1700          switch (e->Iex.Binop.op) {
   1701 
   1702             /* -- Or -- */
   1703             case Iop_Or8:
   1704                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1705                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1706                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1707                break;
   1708             case Iop_Or16:
   1709                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1710                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1711                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1712                break;
   1713             case Iop_Or32:
   1714                e2 = IRExpr_Const(IRConst_U32(
   1715                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1716                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1717                break;
   1718             case Iop_Or64:
   1719                e2 = IRExpr_Const(IRConst_U64(
   1720                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1721                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1722                break;
   1723             case Iop_OrV128:
   1724                e2 = IRExpr_Const(IRConst_V128(
   1725                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1726                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1727                break;
   1728 
   1729             /* -- Xor -- */
   1730             case Iop_Xor8:
   1731                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1732                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1733                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1734                break;
   1735             case Iop_Xor16:
   1736                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1737                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1738                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1739                break;
   1740             case Iop_Xor32:
   1741                e2 = IRExpr_Const(IRConst_U32(
   1742                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1743                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1744                break;
   1745             case Iop_Xor64:
   1746                e2 = IRExpr_Const(IRConst_U64(
   1747                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1748                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1749                break;
   1750             case Iop_XorV128:
   1751                e2 = IRExpr_Const(IRConst_V128(
   1752                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1753                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1754                break;
   1755 
   1756             /* -- And -- */
   1757             case Iop_And8:
   1758                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1759                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1760                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1761                break;
   1762             case Iop_And16:
   1763                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1764                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1765                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1766                break;
   1767             case Iop_And32:
   1768                e2 = IRExpr_Const(IRConst_U32(
   1769                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1770                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1771                break;
   1772             case Iop_And64:
   1773                e2 = IRExpr_Const(IRConst_U64(
   1774                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1775                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1776                break;
   1777             case Iop_AndV128:
   1778                e2 = IRExpr_Const(IRConst_V128(
   1779                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1780                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1781                break;
   1782 
   1783             /* -- Add -- */
   1784             case Iop_Add8:
   1785                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1786                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1787                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1788                break;
   1789             case Iop_Add32:
   1790                e2 = IRExpr_Const(IRConst_U32(
   1791                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1792                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1793                break;
   1794             case Iop_Add64:
   1795                e2 = IRExpr_Const(IRConst_U64(
   1796                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1797                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1798                break;
   1799 
   1800             /* -- Sub -- */
   1801             case Iop_Sub8:
   1802                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1803                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1804                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1805                break;
   1806             case Iop_Sub32:
   1807                e2 = IRExpr_Const(IRConst_U32(
   1808                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1809                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1810                break;
   1811             case Iop_Sub64:
   1812                e2 = IRExpr_Const(IRConst_U64(
   1813                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1814                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1815                break;
   1816 
   1817             /* -- Max32U -- */
   1818             case Iop_Max32U: {
   1819                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1820                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1821                UInt res  = u32a > u32b ? u32a : u32b;
   1822                e2 = IRExpr_Const(IRConst_U32(res));
   1823                break;
   1824             }
   1825 
   1826             /* -- Mul -- */
   1827             case Iop_Mul32:
   1828                e2 = IRExpr_Const(IRConst_U32(
   1829                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1830                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1831                break;
   1832             case Iop_Mul64:
   1833                e2 = IRExpr_Const(IRConst_U64(
   1834                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1835                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1836                break;
   1837 
   1838             case Iop_MullS32: {
   1839                /* very paranoid */
   1840                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1841                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1842                Int   s32a = (Int)u32a;
   1843                Int   s32b = (Int)u32b;
   1844                Long  s64a = (Long)s32a;
   1845                Long  s64b = (Long)s32b;
   1846                Long  sres = s64a * s64b;
   1847                ULong ures = (ULong)sres;
   1848                e2 = IRExpr_Const(IRConst_U64(ures));
   1849                break;
   1850             }
   1851 
   1852             /* -- Shl -- */
   1853             case Iop_Shl32:
   1854                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1855                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1856                if (shift >= 0 && shift <= 31)
   1857                   e2 = IRExpr_Const(IRConst_U32(
   1858                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1859                            << shift)));
   1860                break;
   1861             case Iop_Shl64:
   1862                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1863                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1864                if (shift >= 0 && shift <= 63)
   1865                   e2 = IRExpr_Const(IRConst_U64(
   1866                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1867                            << shift)));
   1868                break;
   1869 
   1870             /* -- Sar -- */
   1871             case Iop_Sar32: {
   1872                /* paranoid ... */
   1873                /*signed*/ Int s32;
   1874                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1875                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1876                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1877                if (shift >= 0 && shift <= 31) {
   1878                   s32 >>=/*signed*/ shift;
   1879                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1880                }
   1881                break;
   1882             }
   1883             case Iop_Sar64: {
   1884                /* paranoid ... */
   1885                /*signed*/ Long s64;
   1886                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1887                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1888                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1889                if (shift >= 0 && shift <= 63) {
   1890                   s64 >>=/*signed*/ shift;
   1891                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1892                }
   1893                break;
   1894             }
   1895 
   1896             /* -- Shr -- */
   1897             case Iop_Shr32: {
   1898                /* paranoid ... */
   1899                /*unsigned*/ UInt u32;
   1900                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1901                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1902                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1903                if (shift >= 0 && shift <= 31) {
   1904                   u32 >>=/*unsigned*/ shift;
   1905                   e2 = IRExpr_Const(IRConst_U32(u32));
   1906                }
   1907                break;
   1908             }
   1909             case Iop_Shr64: {
   1910                /* paranoid ... */
   1911                /*unsigned*/ ULong u64;
   1912                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1913                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1914                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1915                if (shift >= 0 && shift <= 63) {
   1916                   u64 >>=/*unsigned*/ shift;
   1917                   e2 = IRExpr_Const(IRConst_U64(u64));
   1918                }
   1919                break;
   1920             }
   1921 
   1922             /* -- CmpEQ -- */
   1923             case Iop_CmpEQ32:
   1924                e2 = IRExpr_Const(IRConst_U1(toBool(
   1925                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1926                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1927                break;
   1928             case Iop_CmpEQ64:
   1929                e2 = IRExpr_Const(IRConst_U1(toBool(
   1930                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1931                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1932                break;
   1933 
   1934             /* -- CmpNE -- */
   1935             case Iop_CmpNE8:
   1936             case Iop_CasCmpNE8:
   1937             case Iop_ExpCmpNE8:
   1938                e2 = IRExpr_Const(IRConst_U1(toBool(
   1939                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1940                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1941                break;
   1942             case Iop_CmpNE32:
   1943             case Iop_CasCmpNE32:
   1944             case Iop_ExpCmpNE32:
   1945                e2 = IRExpr_Const(IRConst_U1(toBool(
   1946                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1947                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1948                break;
   1949             case Iop_CmpNE64:
   1950             case Iop_CasCmpNE64:
   1951             case Iop_ExpCmpNE64:
   1952                e2 = IRExpr_Const(IRConst_U1(toBool(
   1953                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1954                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1955                break;
   1956 
   1957             /* -- CmpLEU -- */
   1958             case Iop_CmpLE32U:
   1959                e2 = IRExpr_Const(IRConst_U1(toBool(
   1960                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1961                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1962                break;
   1963             case Iop_CmpLE64U:
   1964                e2 = IRExpr_Const(IRConst_U1(toBool(
   1965                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1966                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1967                break;
   1968 
   1969             /* -- CmpLES -- */
   1970             case Iop_CmpLE32S:
   1971                e2 = IRExpr_Const(IRConst_U1(toBool(
   1972                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1973                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1974                break;
   1975             case Iop_CmpLE64S:
   1976                e2 = IRExpr_Const(IRConst_U1(toBool(
   1977                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1978                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1979                break;
   1980 
   1981             /* -- CmpLTS -- */
   1982             case Iop_CmpLT32S:
   1983                e2 = IRExpr_Const(IRConst_U1(toBool(
   1984                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1985                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1986                break;
   1987             case Iop_CmpLT64S:
   1988                e2 = IRExpr_Const(IRConst_U1(toBool(
   1989                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1990                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1991                break;
   1992 
   1993             /* -- CmpLTU -- */
   1994             case Iop_CmpLT32U:
   1995                e2 = IRExpr_Const(IRConst_U1(toBool(
   1996                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1997                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1998                break;
   1999             case Iop_CmpLT64U:
   2000                e2 = IRExpr_Const(IRConst_U1(toBool(
   2001                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   2002                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   2003                break;
   2004 
   2005             /* -- CmpORD -- */
   2006             case Iop_CmpORD32S: {
   2007                /* very paranoid */
   2008                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   2009                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   2010                Int   s32a = (Int)u32a;
   2011                Int   s32b = (Int)u32b;
   2012                Int   r = 0x2; /* EQ */
   2013                if (s32a < s32b) {
   2014                   r = 0x8; /* LT */
   2015                }
   2016                else if (s32a > s32b) {
   2017                   r = 0x4; /* GT */
   2018                }
   2019                e2 = IRExpr_Const(IRConst_U32(r));
   2020                break;
   2021             }
   2022 
   2023             /* -- nHLto2n -- */
   2024             case Iop_32HLto64:
   2025                e2 = IRExpr_Const(IRConst_U64(
   2026                        (((ULong)(e->Iex.Binop.arg1
   2027                                   ->Iex.Const.con->Ico.U32)) << 32)
   2028                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   2029                     ));
   2030                break;
   2031             case Iop_64HLto128:
   2032                /* We can't fold this, because there is no way to
   2033                   express he result in IR, but at least pretend to
   2034                   handle it, so as to stop getting blasted with
   2035                   no-rule-for-this-primop messages. */
   2036                break;
   2037             /* For this vector one, can't fold all cases, but at
   2038                least do the most obvious one.  Could do better here
   2039                using summarise/desummarise of vector constants, but
   2040                too difficult to verify; hence just handle the zero
   2041                cases. */
   2042             case Iop_64HLtoV128: {
   2043                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
   2044                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2045                if (0 == argHi && 0 == argLo) {
   2046                   e2 = IRExpr_Const(IRConst_V128(0));
   2047                } else {
   2048                   goto unhandled;
   2049                }
   2050                break;
   2051             }
   2052             /* Same reasoning for the 256-bit version. */
   2053             case Iop_V128HLtoV256: {
   2054                IRExpr* argHi = e->Iex.Binop.arg1;
   2055                IRExpr* argLo = e->Iex.Binop.arg2;
   2056                if (isZeroV128(argHi) && isZeroV128(argLo)) {
   2057                   e2 = IRExpr_Const(IRConst_V256(0));
   2058                } else {
   2059                   goto unhandled;
   2060                }
   2061                break;
   2062             }
   2063 
   2064             /* -- V128 stuff -- */
   2065             case Iop_InterleaveLO8x16: {
   2066                /* This turns up a lot in Memcheck instrumentation of
   2067                   Icc generated code.  I don't know why. */
   2068                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
   2069                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
   2070                if (0 == arg1 && 0 == arg2) {
   2071                   e2 = IRExpr_Const(IRConst_V128(0));
   2072                } else {
   2073                   goto unhandled;
   2074                }
   2075                break;
   2076             }
   2077 
   2078             default:
   2079                goto unhandled;
   2080          }
   2081 
   2082       } else {
   2083 
   2084          /* other cases (identities, etc) */
   2085          switch (e->Iex.Binop.op) {
   2086 
   2087             case Iop_Shl32:
   2088             case Iop_Shl64:
   2089             case Iop_Shr64:
   2090             case Iop_Sar64:
   2091                /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
   2092                if (isZeroU(e->Iex.Binop.arg2)) {
   2093                   e2 = e->Iex.Binop.arg1;
   2094                   break;
   2095                }
   2096                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
   2097                if (isZeroU(e->Iex.Binop.arg1)) {
   2098                   e2 = e->Iex.Binop.arg1;
   2099                   break;
   2100                }
   2101                break;
   2102 
   2103             case Iop_Sar32:
   2104             case Iop_Shr32:
   2105                /* Shr32/Sar32(x,0) ==> x */
   2106                if (isZeroU(e->Iex.Binop.arg2)) {
   2107                   e2 = e->Iex.Binop.arg1;
   2108                   break;
   2109                }
   2110                break;
   2111 
   2112             case Iop_Or8:
   2113             case Iop_Or16:
   2114             case Iop_Or32:
   2115             case Iop_Or64:
   2116             case Iop_Max32U:
   2117                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
   2118                if (isZeroU(e->Iex.Binop.arg2)) {
   2119                   e2 = e->Iex.Binop.arg1;
   2120                   break;
   2121                }
   2122                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
   2123                if (isZeroU(e->Iex.Binop.arg1)) {
   2124                   e2 = e->Iex.Binop.arg2;
   2125                   break;
   2126                }
   2127                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
   2128                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
   2129                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
   2130                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2131                   break;
   2132                }
   2133                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
   2134                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2135                   e2 = e->Iex.Binop.arg1;
   2136                   break;
   2137                }
   2138                break;
   2139 
   2140             case Iop_Add8:
   2141                /* Add8(t,t) ==> t << 1.
   2142                   Memcheck doesn't understand that
   2143                   x+x produces a defined least significant bit, and it seems
   2144                   simplest just to get rid of the problem by rewriting it
   2145                   out, since the opportunity to do so exists. */
   2146                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2147                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
   2148                                     IRExpr_Const(IRConst_U8(1)));
   2149                   break;
   2150                }
   2151                break;
   2152 
   2153                /* NB no Add16(t,t) case yet as no known test case exists */
   2154 
   2155             case Iop_Add32:
   2156             case Iop_Add64:
   2157                /* Add32/Add64(x,0) ==> x */
   2158                if (isZeroU(e->Iex.Binop.arg2)) {
   2159                   e2 = e->Iex.Binop.arg1;
   2160                   break;
   2161                }
   2162                /* Add32/Add64(0,x) ==> x */
   2163                if (isZeroU(e->Iex.Binop.arg1)) {
   2164                   e2 = e->Iex.Binop.arg2;
   2165                   break;
   2166                }
   2167                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
   2168                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2169                   e2 = IRExpr_Binop(
   2170                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
   2171                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
   2172                   break;
   2173                }
   2174                break;
   2175 
   2176             case Iop_Sub32:
   2177             case Iop_Sub64:
   2178                /* Sub32/Sub64(x,0) ==> x */
   2179                if (isZeroU(e->Iex.Binop.arg2)) {
   2180                   e2 = e->Iex.Binop.arg1;
   2181                   break;
   2182                }
   2183                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
   2184                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2185                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2186                   break;
   2187                }
   2188                break;
   2189             case Iop_Sub8x16:
   2190                /* Sub8x16(x,0) ==> x */
   2191                if (isZeroV128(e->Iex.Binop.arg2)) {
   2192                   e2 = e->Iex.Binop.arg1;
   2193                   break;
   2194                }
   2195                break;
   2196 
   2197             case Iop_And8:
   2198             case Iop_And16:
   2199             case Iop_And32:
   2200             case Iop_And64:
   2201                /* And8/And16/And32/And64(x,1---1b) ==> x */
   2202                if (isOnesU(e->Iex.Binop.arg2)) {
   2203                   e2 = e->Iex.Binop.arg1;
   2204                   break;
   2205                }
   2206                /* And8/And16/And32/And64(1---1b,x) ==> x */
   2207                if (isOnesU(e->Iex.Binop.arg1)) {
   2208                   e2 = e->Iex.Binop.arg2;
   2209                   break;
   2210                }
   2211                /* And8/And16/And32/And64(x,0) ==> 0 */
   2212                if (isZeroU(e->Iex.Binop.arg2)) {
   2213                   e2 = e->Iex.Binop.arg2;
   2214                   break;
   2215                }
   2216                /* And8/And16/And32/And64(0,x) ==> 0 */
   2217                if (isZeroU(e->Iex.Binop.arg1)) {
   2218                   e2 = e->Iex.Binop.arg1;
   2219                   break;
   2220                }
   2221                /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
   2222                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2223                   e2 = e->Iex.Binop.arg1;
   2224                   break;
   2225                }
   2226                break;
   2227 
   2228             case Iop_AndV128:
   2229             case Iop_AndV256:
   2230                /* And8/And16/AndV128/AndV256(t,t)
   2231                   ==> t, for some IRTemp t */
   2232                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2233                   e2 = e->Iex.Binop.arg1;
   2234                   break;
   2235                }
   2236                /* Deal with either arg zero.  Could handle other And
   2237                   cases here too. */
   2238                if (e->Iex.Binop.op == Iop_AndV256
   2239                    && (isZeroV256(e->Iex.Binop.arg1)
   2240                        || isZeroV256(e->Iex.Binop.arg2))) {
   2241                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2242                   break;
   2243                } else if (e->Iex.Binop.op == Iop_AndV128
   2244                           && (isZeroV128(e->Iex.Binop.arg1)
   2245                               || isZeroV128(e->Iex.Binop.arg2))) {
   2246                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2247                   break;
   2248                }
   2249                break;
   2250 
   2251             case Iop_OrV128:
   2252             case Iop_OrV256:
   2253                /* V128/V256(t,t) ==> t, for some IRTemp t */
   2254                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2255                   e2 = e->Iex.Binop.arg1;
   2256                   break;
   2257                }
   2258                /* OrV128(t,0) ==> t */
   2259                if (e->Iex.Binop.op == Iop_OrV128) {
   2260                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2261                      e2 = e->Iex.Binop.arg1;
   2262                      break;
   2263                   }
   2264                   if (isZeroV128(e->Iex.Binop.arg1)) {
   2265                      e2 = e->Iex.Binop.arg2;
   2266                      break;
   2267                   }
   2268                }
   2269                /* OrV256(t,0) ==> t */
   2270                if (e->Iex.Binop.op == Iop_OrV256) {
   2271                   if (isZeroV256(e->Iex.Binop.arg2)) {
   2272                      e2 = e->Iex.Binop.arg1;
   2273                      break;
   2274                   }
   2275                   //Disabled because there's no known test case right now.
   2276                   //if (isZeroV256(e->Iex.Binop.arg1)) {
   2277                   //   e2 = e->Iex.Binop.arg2;
   2278                   //   break;
   2279                   //}
   2280                }
   2281                break;
   2282 
   2283             case Iop_Xor8:
   2284             case Iop_Xor16:
   2285             case Iop_Xor32:
   2286             case Iop_Xor64:
   2287             case Iop_XorV128:
   2288                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   2289                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2290                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2291                   break;
   2292                }
   2293                /* XorV128(t,0) ==> t */
   2294                if (e->Iex.Binop.op == Iop_XorV128) {
   2295                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2296                      e2 = e->Iex.Binop.arg1;
   2297                      break;
   2298                   }
   2299                   //Disabled because there's no known test case right now.
   2300                   //if (isZeroV128(e->Iex.Binop.arg1)) {
   2301                   //   e2 = e->Iex.Binop.arg2;
   2302                   //   break;
   2303                   //}
   2304                } else {
   2305                   /* Xor8/16/32/64(0,t) ==> t */
   2306                   if (isZeroU(e->Iex.Binop.arg1)) {
   2307                      e2 = e->Iex.Binop.arg2;
   2308                      break;
   2309                   }
   2310                   /* Xor8/16/32/64(t,0) ==> t */
   2311                   if (isZeroU(e->Iex.Binop.arg2)) {
   2312                      e2 = e->Iex.Binop.arg1;
   2313                      break;
   2314                   }
   2315                }
   2316                break;
   2317 
   2318             case Iop_CmpNE32:
   2319                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
   2320                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2321                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2322                   break;
   2323                }
   2324                /* CmpNE32(1Uto32(b), 0) ==> b */
   2325                if (isZeroU32(e->Iex.Binop.arg2)) {
   2326                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
   2327                   if (a1 && a1->tag == Iex_Unop
   2328                          && a1->Iex.Unop.op == Iop_1Uto32) {
   2329                      e2 = a1->Iex.Unop.arg;
   2330                      break;
   2331                   }
   2332                }
   2333                break;
   2334 
   2335             case Iop_CmpEQ32:
   2336             case Iop_CmpEQ64:
   2337             case Iop_CmpEQ8x8:
   2338             case Iop_CmpEQ8x16:
   2339             case Iop_CmpEQ16x8:
   2340             case Iop_CmpEQ32x4:
   2341                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2342                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2343                   break;
   2344                }
   2345                break;
   2346 
   2347             default:
   2348                break;
   2349          }
   2350       }
   2351       break;
   2352 
   2353    case Iex_ITE:
   2354       /* ITE */
   2355       /* is the discriminant is a constant? */
   2356       if (e->Iex.ITE.cond->tag == Iex_Const) {
   2357          /* assured us by the IR type rules */
   2358          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
   2359          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
   2360                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
   2361       }
   2362       else
   2363       /* are the arms identical? (pretty weedy test) */
   2364       if (sameIRExprs(env, e->Iex.ITE.iftrue,
   2365                            e->Iex.ITE.iffalse)) {
   2366          e2 = e->Iex.ITE.iffalse;
   2367       }
   2368       break;
   2369 
   2370    default:
   2371       /* not considered */
   2372       break;
   2373    }
   2374 
   2375    /* Show cases where we've found but not folded 'op(t,t)'.  Be
   2376       careful not to call sameIRExprs with values of different types,
   2377       though, else it will assert (and so it should!).  We can't
   2378       conveniently call typeOfIRExpr on the two args without a whole
   2379       bunch of extra plumbing to pass in a type env, so just use a
   2380       hacky test to check the arguments are not anything that might
   2381       sameIRExprs to assert.  This is only OK because this kludge is
   2382       only used for debug printing, not for "real" operation.  For
   2383       "real" operation (ie, all other calls to sameIRExprs), it is
   2384       essential that the to args have the same type.
   2385 
   2386       The "right" solution is to plumb the containing block's
   2387       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
   2388       that's a bunch of extra parameter passing which will just slow
   2389       down the normal case, for no purpose. */
   2390    if (vex_control.iropt_verbosity > 0
   2391        && e == e2
   2392        && e->tag == Iex_Binop
   2393        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
   2394                                                     e->Iex.Binop.arg2)
   2395        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2396       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
   2397       ppIRExpr(e);
   2398       vex_printf("\n");
   2399    }
   2400 
   2401    /* Show the overall results of folding. */
   2402    if (DEBUG_IROPT && e2 != e) {
   2403       vex_printf("FOLD: ");
   2404       ppIRExpr(e); vex_printf("  ->  ");
   2405       ppIRExpr(e2); vex_printf("\n");
   2406    }
   2407 
   2408    return e2;
   2409 
   2410  unhandled:
   2411 #  if 0
   2412    vex_printf("\n\n");
   2413    ppIRExpr(e);
   2414    vpanic("fold_Expr: no rule for the above");
   2415 #  else
   2416    if (vex_control.iropt_verbosity > 0) {
   2417       vex_printf("vex iropt: fold_Expr: no const rule for: ");
   2418       ppIRExpr(e);
   2419       vex_printf("\n");
   2420    }
   2421    return e2;
   2422 #  endif
   2423 }
   2424 
   2425 
   2426 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   2427    1-level due to previous flattening pass. */
   2428 
   2429 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   2430 {
   2431    switch (ex->tag) {
   2432       case Iex_RdTmp:
   2433          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   2434             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
   2435             if (rhs->tag == Iex_RdTmp)
   2436                return rhs;
   2437             if (rhs->tag == Iex_Const
   2438                 && rhs->Iex.Const.con->tag != Ico_F64i)
   2439                return rhs;
   2440          }
   2441          /* not bound in env */
   2442          return ex;
   2443 
   2444       case Iex_Const:
   2445       case Iex_Get:
   2446          return ex;
   2447 
   2448       case Iex_GetI:
   2449          vassert(isIRAtom(ex->Iex.GetI.ix));
   2450          return IRExpr_GetI(
   2451             ex->Iex.GetI.descr,
   2452             subst_Expr(env, ex->Iex.GetI.ix),
   2453             ex->Iex.GetI.bias
   2454          );
   2455 
   2456       case Iex_Qop: {
   2457          IRQop* qop = ex->Iex.Qop.details;
   2458          vassert(isIRAtom(qop->arg1));
   2459          vassert(isIRAtom(qop->arg2));
   2460          vassert(isIRAtom(qop->arg3));
   2461          vassert(isIRAtom(qop->arg4));
   2462          return IRExpr_Qop(
   2463                    qop->op,
   2464                    subst_Expr(env, qop->arg1),
   2465                    subst_Expr(env, qop->arg2),
   2466                    subst_Expr(env, qop->arg3),
   2467                    subst_Expr(env, qop->arg4)
   2468                 );
   2469       }
   2470 
   2471       case Iex_Triop: {
   2472          IRTriop* triop = ex->Iex.Triop.details;
   2473          vassert(isIRAtom(triop->arg1));
   2474          vassert(isIRAtom(triop->arg2));
   2475          vassert(isIRAtom(triop->arg3));
   2476          return IRExpr_Triop(
   2477                    triop->op,
   2478                    subst_Expr(env, triop->arg1),
   2479                    subst_Expr(env, triop->arg2),
   2480                    subst_Expr(env, triop->arg3)
   2481                 );
   2482       }
   2483 
   2484       case Iex_Binop:
   2485          vassert(isIRAtom(ex->Iex.Binop.arg1));
   2486          vassert(isIRAtom(ex->Iex.Binop.arg2));
   2487          return IRExpr_Binop(
   2488                    ex->Iex.Binop.op,
   2489                    subst_Expr(env, ex->Iex.Binop.arg1),
   2490                    subst_Expr(env, ex->Iex.Binop.arg2)
   2491                 );
   2492 
   2493       case Iex_Unop:
   2494          vassert(isIRAtom(ex->Iex.Unop.arg));
   2495          return IRExpr_Unop(
   2496                    ex->Iex.Unop.op,
   2497                    subst_Expr(env, ex->Iex.Unop.arg)
   2498                 );
   2499 
   2500       case Iex_Load:
   2501          vassert(isIRAtom(ex->Iex.Load.addr));
   2502          return IRExpr_Load(
   2503                    ex->Iex.Load.end,
   2504                    ex->Iex.Load.ty,
   2505                    subst_Expr(env, ex->Iex.Load.addr)
   2506                 );
   2507 
   2508       case Iex_CCall: {
   2509          Int      i;
   2510          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   2511          for (i = 0; args2[i]; i++) {
   2512             vassert(isIRAtom(args2[i]));
   2513             args2[i] = subst_Expr(env, args2[i]);
   2514          }
   2515          return IRExpr_CCall(
   2516                    ex->Iex.CCall.cee,
   2517                    ex->Iex.CCall.retty,
   2518                    args2
   2519                 );
   2520       }
   2521 
   2522       case Iex_ITE:
   2523          vassert(isIRAtom(ex->Iex.ITE.cond));
   2524          vassert(isIRAtom(ex->Iex.ITE.iftrue));
   2525          vassert(isIRAtom(ex->Iex.ITE.iffalse));
   2526          return IRExpr_ITE(
   2527                    subst_Expr(env, ex->Iex.ITE.cond),
   2528                    subst_Expr(env, ex->Iex.ITE.iftrue),
   2529                    subst_Expr(env, ex->Iex.ITE.iffalse)
   2530                 );
   2531 
   2532       default:
   2533          vex_printf("\n\n"); ppIRExpr(ex);
   2534          vpanic("subst_Expr");
   2535 
   2536    }
   2537 }
   2538 
   2539 
   2540 /* Apply the subst to stmt, then fold the result as much as possible.
   2541    Much simplified due to stmt being previously flattened.  As a
   2542    result of this, the stmt may wind up being turned into a no-op.
   2543 */
   2544 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   2545 {
   2546 #  if 0
   2547    vex_printf("\nsubst and fold stmt\n");
   2548    ppIRStmt(st);
   2549    vex_printf("\n");
   2550 #  endif
   2551 
   2552    switch (st->tag) {
   2553       case Ist_AbiHint:
   2554          vassert(isIRAtom(st->Ist.AbiHint.base));
   2555          vassert(isIRAtom(st->Ist.AbiHint.nia));
   2556          return IRStmt_AbiHint(
   2557                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
   2558                    st->Ist.AbiHint.len,
   2559                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
   2560                 );
   2561       case Ist_Put:
   2562          vassert(isIRAtom(st->Ist.Put.data));
   2563          return IRStmt_Put(
   2564                    st->Ist.Put.offset,
   2565                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
   2566                 );
   2567 
   2568       case Ist_PutI: {
   2569          IRPutI *puti, *puti2;
   2570          puti = st->Ist.PutI.details;
   2571          vassert(isIRAtom(puti->ix));
   2572          vassert(isIRAtom(puti->data));
   2573          puti2 = mkIRPutI(puti->descr,
   2574                           fold_Expr(env, subst_Expr(env, puti->ix)),
   2575                           puti->bias,
   2576                           fold_Expr(env, subst_Expr(env, puti->data)));
   2577          return IRStmt_PutI(puti2);
   2578       }
   2579 
   2580       case Ist_WrTmp:
   2581          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   2582             allowed to be more than just a constant or a tmp. */
   2583          return IRStmt_WrTmp(
   2584                    st->Ist.WrTmp.tmp,
   2585                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
   2586                 );
   2587 
   2588       case Ist_Store:
   2589          vassert(isIRAtom(st->Ist.Store.addr));
   2590          vassert(isIRAtom(st->Ist.Store.data));
   2591          return IRStmt_Store(
   2592                    st->Ist.Store.end,
   2593                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
   2594                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
   2595                 );
   2596 
   2597       case Ist_StoreG: {
   2598          IRStoreG* sg = st->Ist.StoreG.details;
   2599          vassert(isIRAtom(sg->addr));
   2600          vassert(isIRAtom(sg->data));
   2601          vassert(isIRAtom(sg->guard));
   2602          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
   2603          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
   2604          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
   2605          if (fguard->tag == Iex_Const) {
   2606             /* The condition on this store has folded down to a constant. */
   2607             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2608             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2609                return IRStmt_NoOp();
   2610             } else {
   2611                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2612                return IRStmt_Store(sg->end, faddr, fdata);
   2613             }
   2614          }
   2615          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
   2616       }
   2617 
   2618       case Ist_LoadG: {
   2619          /* This is complicated.  If the guard folds down to 'false',
   2620             we can replace it with an assignment 'dst := alt', but if
   2621             the guard folds down to 'true', we can't conveniently
   2622             replace it with an unconditional load, because doing so
   2623             requires generating a new temporary, and that is not easy
   2624             to do at this point. */
   2625          IRLoadG* lg = st->Ist.LoadG.details;
   2626          vassert(isIRAtom(lg->addr));
   2627          vassert(isIRAtom(lg->alt));
   2628          vassert(isIRAtom(lg->guard));
   2629          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
   2630          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
   2631          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
   2632          if (fguard->tag == Iex_Const) {
   2633             /* The condition on this load has folded down to a constant. */
   2634             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2635             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2636                /* The load is not going to happen -- instead 'alt' is
   2637                   assigned to 'dst'.  */
   2638                return IRStmt_WrTmp(lg->dst, falt);
   2639             } else {
   2640                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2641                /* The load is always going to happen.  We want to
   2642                   convert to an unconditional load and assign to 'dst'
   2643                   (IRStmt_WrTmp).  Problem is we need an extra temp to
   2644                   hold the loaded value, but none is available.
   2645                   Instead, reconstitute the conditional load (with
   2646                   folded args, of course) and let the caller of this
   2647                   routine deal with the problem. */
   2648             }
   2649          }
   2650          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
   2651       }
   2652 
   2653       case Ist_CAS: {
   2654          IRCAS *cas, *cas2;
   2655          cas = st->Ist.CAS.details;
   2656          vassert(isIRAtom(cas->addr));
   2657          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   2658          vassert(isIRAtom(cas->expdLo));
   2659          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   2660          vassert(isIRAtom(cas->dataLo));
   2661          cas2 = mkIRCAS(
   2662                    cas->oldHi, cas->oldLo, cas->end,
   2663                    fold_Expr(env, subst_Expr(env, cas->addr)),
   2664                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
   2665                                : NULL,
   2666                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
   2667                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
   2668                                : NULL,
   2669                    fold_Expr(env, subst_Expr(env, cas->dataLo))
   2670                 );
   2671          return IRStmt_CAS(cas2);
   2672       }
   2673 
   2674       case Ist_LLSC:
   2675          vassert(isIRAtom(st->Ist.LLSC.addr));
   2676          if (st->Ist.LLSC.storedata)
   2677             vassert(isIRAtom(st->Ist.LLSC.storedata));
   2678          return IRStmt_LLSC(
   2679                    st->Ist.LLSC.end,
   2680                    st->Ist.LLSC.result,
   2681                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
   2682                    st->Ist.LLSC.storedata
   2683                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
   2684                       : NULL
   2685                 );
   2686 
   2687       case Ist_Dirty: {
   2688          Int     i;
   2689          IRDirty *d, *d2;
   2690          d = st->Ist.Dirty.details;
   2691          d2 = emptyIRDirty();
   2692          *d2 = *d;
   2693          d2->args = shallowCopyIRExprVec(d2->args);
   2694          if (d2->mFx != Ifx_None) {
   2695             vassert(isIRAtom(d2->mAddr));
   2696             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
   2697          }
   2698          vassert(isIRAtom(d2->guard));
   2699          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
   2700          for (i = 0; d2->args[i]; i++) {
   2701             IRExpr* arg = d2->args[i];
   2702             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
   2703                vassert(isIRAtom(arg));
   2704                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
   2705             }
   2706          }
   2707          return IRStmt_Dirty(d2);
   2708       }
   2709 
   2710       case Ist_IMark:
   2711          return IRStmt_IMark(st->Ist.IMark.addr,
   2712                              st->Ist.IMark.len,
   2713                              st->Ist.IMark.delta);
   2714 
   2715       case Ist_NoOp:
   2716          return IRStmt_NoOp();
   2717 
   2718       case Ist_MBE:
   2719          return IRStmt_MBE(st->Ist.MBE.event);
   2720 
   2721       case Ist_Exit: {
   2722          IRExpr* fcond;
   2723          vassert(isIRAtom(st->Ist.Exit.guard));
   2724          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
   2725          if (fcond->tag == Iex_Const) {
   2726             /* Interesting.  The condition on this exit has folded down to
   2727                a constant. */
   2728             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2729             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2730                /* exit is never going to happen, so dump the statement. */
   2731                return IRStmt_NoOp();
   2732             } else {
   2733                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2734                /* Hmmm.  The exit has become unconditional.  Leave it
   2735                   as it is for now, since we'd have to truncate the BB
   2736                   at this point, which is tricky.  Such truncation is
   2737                   done later by the dead-code elimination pass. */
   2738                /* fall out into the reconstruct-the-exit code. */
   2739                if (vex_control.iropt_verbosity > 0)
   2740                   /* really a misuse of vex_control.iropt_verbosity */
   2741                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2742             }
   2743          }
   2744          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
   2745                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
   2746       }
   2747 
   2748    default:
   2749       vex_printf("\n"); ppIRStmt(st);
   2750       vpanic("subst_and_fold_Stmt");
   2751    }
   2752 }
   2753 
   2754 
   2755 IRSB* cprop_BB ( IRSB* in )
   2756 {
   2757    Int      i;
   2758    IRSB*    out;
   2759    IRStmt*  st2;
   2760    Int      n_tmps = in->tyenv->types_used;
   2761    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   2762    /* Keep track of IRStmt_LoadGs that we need to revisit after
   2763       processing all the other statements. */
   2764    const Int N_FIXUPS = 16;
   2765    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
   2766    Int n_fixups = 0;
   2767 
   2768    out = emptyIRSB();
   2769    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2770 
   2771    /* Set up the env with which travels forward.  This holds a
   2772       substitution, mapping IRTemps to IRExprs. The environment
   2773       is to be applied as we move along.  Keys are IRTemps.
   2774       Values are IRExpr*s.
   2775    */
   2776    for (i = 0; i < n_tmps; i++)
   2777       env[i] = NULL;
   2778 
   2779    /* For each original SSA-form stmt ... */
   2780    for (i = 0; i < in->stmts_used; i++) {
   2781 
   2782       /* First apply the substitution to the current stmt.  This
   2783          propagates in any constants and tmp-tmp assignments
   2784          accumulated prior to this point.  As part of the subst_Stmt
   2785          call, also then fold any constant expressions resulting. */
   2786 
   2787       st2 = in->stmts[i];
   2788 
   2789       /* perhaps st2 is already a no-op? */
   2790       if (st2->tag == Ist_NoOp) continue;
   2791 
   2792       st2 = subst_and_fold_Stmt( env, st2 );
   2793 
   2794       /* Deal with some post-folding special cases. */
   2795       switch (st2->tag) {
   2796 
   2797          /* If the statement has been folded into a no-op, forget
   2798             it. */
   2799          case Ist_NoOp:
   2800             continue;
   2801 
   2802          /* If the statement assigns to an IRTemp add it to the
   2803             running environment. This is for the benefit of copy
   2804             propagation and to allow sameIRExpr look through
   2805             IRTemps. */
   2806          case Ist_WrTmp: {
   2807             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
   2808             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2809 
   2810             /* 't1 = t2' -- don't add to BB; will be optimized out */
   2811             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
   2812                continue;
   2813 
   2814             /* 't = const' && 'const != F64i' -- don't add to BB
   2815                Note, we choose not to propagate const when const is an
   2816                F64i, so that F64i literals can be CSE'd later.  This
   2817                helps x86 floating point code generation. */
   2818             if (st2->Ist.WrTmp.data->tag == Iex_Const
   2819                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   2820                continue;
   2821             }
   2822             /* else add it to the output, as normal */
   2823             break;
   2824          }
   2825 
   2826          case Ist_LoadG: {
   2827             IRLoadG* lg    = st2->Ist.LoadG.details;
   2828             IRExpr*  guard = lg->guard;
   2829             if (guard->tag == Iex_Const) {
   2830                /* The guard has folded to a constant, and that
   2831                   constant must be 1:I1, since subst_and_fold_Stmt
   2832                   folds out the case 0:I1 by itself. */
   2833                vassert(guard->Iex.Const.con->tag == Ico_U1);
   2834                vassert(guard->Iex.Const.con->Ico.U1 == True);
   2835                /* Add a NoOp here as a placeholder, and make a note of
   2836                   where it is in the output block.  Afterwards we'll
   2837                   come back here and transform the NoOp and the LoadG
   2838                   into a load-convert pair.  The fixups[] entry
   2839                   refers to the inserted NoOp, and we expect to find
   2840                   the relevant LoadG immediately after it. */
   2841                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2842                if (n_fixups < N_FIXUPS) {
   2843                   fixups[n_fixups++] = out->stmts_used;
   2844                   addStmtToIRSB( out, IRStmt_NoOp() );
   2845                }
   2846             }
   2847             /* And always add the LoadG to the output, regardless. */
   2848             break;
   2849          }
   2850 
   2851       default:
   2852          break;
   2853       }
   2854 
   2855       /* Not interesting, copy st2 into the output block. */
   2856       addStmtToIRSB( out, st2 );
   2857    }
   2858 
   2859 #  if STATS_IROPT
   2860    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
   2861               invocation_count, recursion_count, success_count,
   2862               recursion_success_count, max_nodes_visited);
   2863 #  endif
   2864 
   2865    out->next     = subst_Expr( env, in->next );
   2866    out->jumpkind = in->jumpkind;
   2867    out->offsIP   = in->offsIP;
   2868 
   2869    /* Process any leftover unconditional LoadGs that we noticed
   2870       in the main pass. */
   2871    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2872    for (i = 0; i < n_fixups; i++) {
   2873       Int ix = fixups[i];
   2874       /* Carefully verify that the LoadG has the expected form. */
   2875       vassert(ix >= 0 && ix+1 < out->stmts_used);
   2876       IRStmt* nop = out->stmts[ix];
   2877       IRStmt* lgu = out->stmts[ix+1];
   2878       vassert(nop->tag == Ist_NoOp);
   2879       vassert(lgu->tag == Ist_LoadG);
   2880       IRLoadG* lg    = lgu->Ist.LoadG.details;
   2881       IRExpr*  guard = lg->guard;
   2882       vassert(guard->Iex.Const.con->tag == Ico_U1);
   2883       vassert(guard->Iex.Const.con->Ico.U1 == True);
   2884       /* Figure out the load and result types, and the implied
   2885          conversion operation. */
   2886       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
   2887       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
   2888       IROp cvtOp = Iop_INVALID;
   2889       switch (lg->cvt) {
   2890          case ILGop_Ident32: break;
   2891          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
   2892          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
   2893          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
   2894          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
   2895          default: vpanic("cprop_BB: unhandled ILGOp");
   2896       }
   2897       /* Replace the placeholder NoOp by the required unconditional
   2898          load. */
   2899       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
   2900       out->stmts[ix]
   2901          = IRStmt_WrTmp(tLoaded,
   2902                         IRExpr_Load(lg->end, cvtArg, lg->addr));
   2903       /* Replace the LoadG by a conversion from the loaded value's
   2904          type to the required result type. */
   2905       out->stmts[ix+1]
   2906          = IRStmt_WrTmp(
   2907               lg->dst, cvtOp == Iop_INVALID
   2908                           ? IRExpr_RdTmp(tLoaded)
   2909                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
   2910    }
   2911 
   2912    return out;
   2913 }
   2914 
   2915 
   2916 /*---------------------------------------------------------------*/
   2917 /*--- Dead code (t = E) removal                               ---*/
   2918 /*---------------------------------------------------------------*/
   2919 
   2920 /* As a side effect, also removes all code following an unconditional
   2921    side exit. */
   2922 
   2923 /* The type of the HashHW map is: a map from IRTemp to nothing
   2924    -- really just operating a set or IRTemps.
   2925 */
   2926 
   2927 inline
   2928 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2929 {
   2930    set[(Int)tmp] = True;
   2931 }
   2932 
   2933 static void addUses_Expr ( Bool* set, IRExpr* e )
   2934 {
   2935    Int i;
   2936    switch (e->tag) {
   2937       case Iex_GetI:
   2938          addUses_Expr(set, e->Iex.GetI.ix);
   2939          return;
   2940       case Iex_ITE:
   2941          addUses_Expr(set, e->Iex.ITE.cond);
   2942          addUses_Expr(set, e->Iex.ITE.iftrue);
   2943          addUses_Expr(set, e->Iex.ITE.iffalse);
   2944          return;
   2945       case Iex_CCall:
   2946          for (i = 0; e->Iex.CCall.args[i]; i++)
   2947             addUses_Expr(set, e->Iex.CCall.args[i]);
   2948          return;
   2949       case Iex_Load:
   2950          addUses_Expr(set, e->Iex.Load.addr);
   2951          return;
   2952       case Iex_Qop:
   2953          addUses_Expr(set, e->Iex.Qop.details->arg1);
   2954          addUses_Expr(set, e->Iex.Qop.details->arg2);
   2955          addUses_Expr(set, e->Iex.Qop.details->arg3);
   2956          addUses_Expr(set, e->Iex.Qop.details->arg4);
   2957          return;
   2958       case Iex_Triop:
   2959          addUses_Expr(set, e->Iex.Triop.details->arg1);
   2960          addUses_Expr(set, e->Iex.Triop.details->arg2);
   2961          addUses_Expr(set, e->Iex.Triop.details->arg3);
   2962          return;
   2963       case Iex_Binop:
   2964          addUses_Expr(set, e->Iex.Binop.arg1);
   2965          addUses_Expr(set, e->Iex.Binop.arg2);
   2966          return;
   2967       case Iex_Unop:
   2968          addUses_Expr(set, e->Iex.Unop.arg);
   2969          return;
   2970       case Iex_RdTmp:
   2971          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2972          return;
   2973       case Iex_Const:
   2974       case Iex_Get:
   2975          return;
   2976       default:
   2977          vex_printf("\n");
   2978          ppIRExpr(e);
   2979          vpanic("addUses_Expr");
   2980    }
   2981 }
   2982 
   2983 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2984 {
   2985    Int      i;
   2986    IRDirty* d;
   2987    IRCAS*   cas;
   2988    switch (st->tag) {
   2989       case Ist_AbiHint:
   2990          addUses_Expr(set, st->Ist.AbiHint.base);
   2991          addUses_Expr(set, st->Ist.AbiHint.nia);
   2992          return;
   2993       case Ist_PutI:
   2994          addUses_Expr(set, st->Ist.PutI.details->ix);
   2995          addUses_Expr(set, st->Ist.PutI.details->data);
   2996          return;
   2997       case Ist_WrTmp:
   2998          addUses_Expr(set, st->Ist.WrTmp.data);
   2999          return;
   3000       case Ist_Put:
   3001          addUses_Expr(set, st->Ist.Put.data);
   3002          return;
   3003       case Ist_Store:
   3004          addUses_Expr(set, st->Ist.Store.addr);
   3005          addUses_Expr(set, st->Ist.Store.data);
   3006          return;
   3007       case Ist_StoreG: {
   3008          IRStoreG* sg = st->Ist.StoreG.details;
   3009          addUses_Expr(set, sg->addr);
   3010          addUses_Expr(set, sg->data);
   3011          addUses_Expr(set, sg->guard);
   3012          return;
   3013       }
   3014       case Ist_LoadG: {
   3015          IRLoadG* lg = st->Ist.LoadG.details;
   3016          addUses_Expr(set, lg->addr);
   3017          addUses_Expr(set, lg->alt);
   3018          addUses_Expr(set, lg->guard);
   3019          return;
   3020       }
   3021       case Ist_CAS:
   3022          cas = st->Ist.CAS.details;
   3023          addUses_Expr(set, cas->addr);
   3024          if (cas->expdHi)
   3025             addUses_Expr(set, cas->expdHi);
   3026          addUses_Expr(set, cas->expdLo);
   3027          if (cas->dataHi)
   3028             addUses_Expr(set, cas->dataHi);
   3029          addUses_Expr(set, cas->dataLo);
   3030          return;
   3031       case Ist_LLSC:
   3032          addUses_Expr(set, st->Ist.LLSC.addr);
   3033          if (st->Ist.LLSC.storedata)
   3034             addUses_Expr(set, st->Ist.LLSC.storedata);
   3035          return;
   3036       case Ist_Dirty:
   3037          d = st->Ist.Dirty.details;
   3038          if (d->mFx != Ifx_None)
   3039             addUses_Expr(set, d->mAddr);
   3040          addUses_Expr(set, d->guard);
   3041          for (i = 0; d->args[i] != NULL; i++) {
   3042             IRExpr* arg = d->args[i];
   3043             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   3044                addUses_Expr(set, arg);
   3045          }
   3046          return;
   3047       case Ist_NoOp:
   3048       case Ist_IMark:
   3049       case Ist_MBE:
   3050          return;
   3051       case Ist_Exit:
   3052          addUses_Expr(set, st->Ist.Exit.guard);
   3053          return;
   3054       default:
   3055          vex_printf("\n");
   3056          ppIRStmt(st);
   3057          vpanic("addUses_Stmt");
   3058    }
   3059 }
   3060 
   3061 
   3062 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   3063 static Bool isZeroU1 ( IRExpr* e )
   3064 {
   3065    return toBool( e->tag == Iex_Const
   3066                   && e->Iex.Const.con->tag == Ico_U1
   3067                   && e->Iex.Const.con->Ico.U1 == False );
   3068 }
   3069 
   3070 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   3071 static Bool isOneU1 ( IRExpr* e )
   3072 {
   3073    return toBool( e->tag == Iex_Const
   3074                   && e->Iex.Const.con->tag == Ico_U1
   3075                   && e->Iex.Const.con->Ico.U1 == True );
   3076 }
   3077 
   3078 
   3079 /* Note, this destructively modifies the given IRSB. */
   3080 
   3081 /* Scan backwards through statements, carrying a set of IRTemps which
   3082    are known to be used after the current point.  On encountering 't =
   3083    E', delete the binding if it is not used.  Otherwise, add any temp
   3084    uses to the set and keep on moving backwards.
   3085 
   3086    As an enhancement, the first (backwards) pass searches for IR exits
   3087    with always-taken conditions and notes the location of the earliest
   3088    one in the block.  If any such are found, a second pass copies the
   3089    exit destination and jump kind to the bb-end.  Then, the exit and
   3090    all statements following it are turned into no-ops.
   3091 */
   3092 
   3093 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   3094 {
   3095    Int     i, i_unconditional_exit;
   3096    Int     n_tmps = bb->tyenv->types_used;
   3097    Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
   3098    IRStmt* st;
   3099 
   3100    for (i = 0; i < n_tmps; i++)
   3101       set[i] = False;
   3102 
   3103    /* start off by recording IRTemp uses in the next field. */
   3104    addUses_Expr(set, bb->next);
   3105 
   3106    /* First pass */
   3107 
   3108    /* Work backwards through the stmts */
   3109    i_unconditional_exit = -1;
   3110    for (i = bb->stmts_used-1; i >= 0; i--) {
   3111       st = bb->stmts[i];
   3112       if (st->tag == Ist_NoOp)
   3113          continue;
   3114       /* take note of any unconditional exits */
   3115       if (st->tag == Ist_Exit
   3116           && isOneU1(st->Ist.Exit.guard))
   3117          i_unconditional_exit = i;
   3118       if (st->tag == Ist_WrTmp
   3119           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   3120           /* it's an IRTemp which never got used.  Delete it. */
   3121          if (DEBUG_IROPT) {
   3122             vex_printf("DEAD: ");
   3123             ppIRStmt(st);
   3124             vex_printf("\n");
   3125          }
   3126          bb->stmts[i] = IRStmt_NoOp();
   3127       }
   3128       else
   3129       if (st->tag == Ist_Dirty
   3130           && st->Ist.Dirty.details->guard
   3131           && isZeroU1(st->Ist.Dirty.details->guard)) {
   3132          /* This is a dirty helper which will never get called.
   3133             Delete it. */
   3134          bb->stmts[i] = IRStmt_NoOp();
   3135        }
   3136        else {
   3137          /* Note any IRTemp uses made by the current statement. */
   3138          addUses_Stmt(set, st);
   3139       }
   3140    }
   3141 
   3142    /* Optional second pass: if any unconditional exits were found,
   3143       delete them and all following statements. */
   3144 
   3145    if (i_unconditional_exit != -1) {
   3146       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   3147                         i_unconditional_exit);
   3148       vassert(i_unconditional_exit >= 0
   3149               && i_unconditional_exit < bb->stmts_used);
   3150       bb->next
   3151          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   3152       bb->jumpkind
   3153          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   3154       bb->offsIP
   3155          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
   3156       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   3157          bb->stmts[i] = IRStmt_NoOp();
   3158    }
   3159 }
   3160 
   3161 
   3162 /*---------------------------------------------------------------*/
   3163 /*--- Specialisation of helper function calls, in             ---*/
   3164 /*--- collaboration with the front end                        ---*/
   3165 /*---------------------------------------------------------------*/
   3166 
   3167 static
   3168 IRSB* spec_helpers_BB(
   3169          IRSB* bb,
   3170          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
   3171       )
   3172 {
   3173    Int     i;
   3174    IRStmt* st;
   3175    IRExpr* ex;
   3176    Bool    any = False;
   3177 
   3178    for (i = bb->stmts_used-1; i >= 0; i--) {
   3179       st = bb->stmts[i];
   3180 
   3181       if (st->tag != Ist_WrTmp
   3182           || st->Ist.WrTmp.data->tag != Iex_CCall)
   3183          continue;
   3184 
   3185       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   3186                           st->Ist.WrTmp.data->Iex.CCall.args,
   3187                           &bb->stmts[0], i );
   3188       if (!ex)
   3189         /* the front end can't think of a suitable replacement */
   3190         continue;
   3191 
   3192       /* We got something better.  Install it in the bb. */
   3193       any = True;
   3194       bb->stmts[i]
   3195          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   3196 
   3197       if (0) {
   3198          vex_printf("SPEC: ");
   3199          ppIRExpr(st->Ist.WrTmp.data);
   3200          vex_printf("  -->  ");
   3201          ppIRExpr(ex);
   3202          vex_printf("\n");
   3203       }
   3204    }
   3205 
   3206    if (any)
   3207       bb = flatten_BB(bb);
   3208    return bb;
   3209 }
   3210 
   3211 
   3212 /*---------------------------------------------------------------*/
   3213 /*--- Determination of guest state aliasing relationships     ---*/
   3214 /*---------------------------------------------------------------*/
   3215 
   3216 /* These are helper functions for CSE and GetI/PutI transformations.
   3217 
   3218    Determine, to the extent possible, the relationship between two
   3219    guest state accesses.  The possible outcomes are:
   3220 
   3221    * Exact alias.  These two accesses denote precisely the same
   3222      piece of the guest state.
   3223 
   3224    * Definitely no alias.  These two accesses are guaranteed not to
   3225      overlap any part of the guest state.
   3226 
   3227    * Unknown -- if neither of the above can be established.
   3228 
   3229    If in doubt, return Unknown.  */
   3230 
   3231 typedef
   3232    enum { ExactAlias, NoAlias, UnknownAlias }
   3233    GSAliasing;
   3234 
   3235 
   3236 /* Produces the alias relation between an indexed guest
   3237    state access and a non-indexed access. */
   3238 
   3239 static
   3240 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   3241                                     Int offset2, IRType ty2 )
   3242 {
   3243    UInt minoff1, maxoff1, minoff2, maxoff2;
   3244 
   3245    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3246    minoff2 = offset2;
   3247    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   3248 
   3249    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3250       return NoAlias;
   3251 
   3252    /* Could probably do better here if required.  For the moment
   3253       however just claim not to know anything more. */
   3254    return UnknownAlias;
   3255 }
   3256 
   3257 
   3258 /* Produces the alias relation between two indexed guest state
   3259    accesses. */
   3260 
   3261 static
   3262 GSAliasing getAliasingRelation_II (
   3263               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   3264               IRRegArray* descr2, IRExpr* ix2, Int bias2
   3265            )
   3266 {
   3267    UInt minoff1, maxoff1, minoff2, maxoff2;
   3268    Int  iters;
   3269 
   3270    /* First try hard to show they don't alias. */
   3271    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3272    getArrayBounds( descr2, &minoff2, &maxoff2 );
   3273    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3274       return NoAlias;
   3275 
   3276    /* So the two arrays at least partially overlap.  To get any
   3277       further we'll have to be sure that the descriptors are
   3278       identical. */
   3279    if (!eqIRRegArray(descr1, descr2))
   3280       return UnknownAlias;
   3281 
   3282    /* The descriptors are identical.  Now the only difference can be
   3283       in the index expressions.  If they cannot be shown to be
   3284       identical, we have to say we don't know what the aliasing
   3285       relation will be.  Now, since the IR is flattened, the index
   3286       expressions should be atoms -- either consts or tmps.  So that
   3287       makes the comparison simple. */
   3288    vassert(isIRAtom(ix1));
   3289    vassert(isIRAtom(ix2));
   3290    if (!eqIRAtom(ix1,ix2))
   3291       return UnknownAlias;
   3292 
   3293    /* Ok, the index expressions are identical.  So now the only way
   3294       they can be different is in the bias.  Normalise this
   3295       paranoidly, to reliably establish equality/non-equality. */
   3296 
   3297    /* So now we know that the GetI and PutI index the same array
   3298       with the same base.  Are the offsets the same, modulo the
   3299       array size?  Do this paranoidly. */
   3300    vassert(descr1->nElems == descr2->nElems);
   3301    vassert(descr1->elemTy == descr2->elemTy);
   3302    vassert(descr1->base   == descr2->base);
   3303    iters = 0;
   3304    while (bias1 < 0 || bias2 < 0) {
   3305       bias1 += descr1->nElems;
   3306       bias2 += descr1->nElems;
   3307       iters++;
   3308       if (iters > 10)
   3309          vpanic("getAliasingRelation: iters");
   3310    }
   3311    vassert(bias1 >= 0 && bias2 >= 0);
   3312    bias1 %= descr1->nElems;
   3313    bias2 %= descr1->nElems;
   3314    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   3315    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   3316 
   3317    /* Finally, biasP and biasG are normalised into the range
   3318       0 .. descrP/G->nElems - 1.  And so we can establish
   3319       equality/non-equality. */
   3320 
   3321    return bias1==bias2 ? ExactAlias : NoAlias;
   3322 }
   3323 
   3324 
   3325 /*---------------------------------------------------------------*/
   3326 /*--- Common Subexpression Elimination                        ---*/
   3327 /*---------------------------------------------------------------*/
   3328 
   3329 /* Expensive in time and space. */
   3330 
   3331 /* Uses two environments:
   3332    a IRTemp -> IRTemp mapping
   3333    a mapping from AvailExpr* to IRTemp
   3334 */
   3335 
   3336 typedef
   3337    struct {
   3338       enum { TCc, TCt } tag;
   3339       union { IRTemp tmp; IRConst* con; } u;
   3340    }
   3341    TmpOrConst;
   3342 
   3343 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
   3344 {
   3345    if (tc1->tag != tc2->tag)
   3346       return False;
   3347    switch (tc1->tag) {
   3348       case TCc:
   3349          return eqIRConst(tc1->u.con, tc2->u.con);
   3350       case TCt:
   3351          return tc1->u.tmp == tc2->u.tmp;
   3352       default:
   3353          vpanic("eqTmpOrConst");
   3354    }
   3355 }
   3356 
   3357 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
   3358 {
   3359    Bool eq = cee1->addr == cee2->addr;
   3360    if (eq) {
   3361       vassert(cee1->regparms == cee2->regparms);
   3362       vassert(cee1->mcx_mask == cee2->mcx_mask);
   3363       /* Names should be the same too, but we don't bother to
   3364          check. */
   3365    }
   3366    return eq;
   3367 }
   3368 
   3369 /* Convert an atomic IRExpr* to a TmpOrConst. */
   3370 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
   3371 {
   3372    switch (e->tag) {
   3373       case Iex_RdTmp:
   3374          tc->tag   = TCt;
   3375          tc->u.tmp = e->Iex.RdTmp.tmp;
   3376          break;
   3377       case Iex_Const:
   3378          tc->tag   = TCc;
   3379          tc->u.con = e->Iex.Const.con;
   3380          break;
   3381       default:
   3382          /* Getting here is a serious error.  It means that the
   3383             presented arg isn't an IR atom, as it should be. */
   3384          vpanic("irExpr_to_TmpOrConst");
   3385    }
   3386 }
   3387 
   3388 /* Convert a TmpOrConst to an atomic IRExpr*. */
   3389 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
   3390 {
   3391    switch (tc->tag) {
   3392       case TCc: return IRExpr_Const(tc->u.con);
   3393       case TCt: return IRExpr_RdTmp(tc->u.tmp);
   3394       default:  vpanic("tmpOrConst_to_IRExpr");
   3395    }
   3396 }
   3397 
   3398 /* Convert a NULL terminated IRExpr* vector to an array of
   3399    TmpOrConsts, and a length. */
   3400 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
   3401                                        /*OUT*/Int* nOuts,
   3402                                        IRExpr** ins )
   3403 {
   3404    Int i, n;
   3405    /* We have to make two passes, one to count, one to copy. */
   3406    for (n = 0; ins[n]; n++)
   3407       ;
   3408    *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
   3409    *nOuts = n;
   3410    /* and now copy .. */
   3411    for (i = 0; i < n; i++) {
   3412       IRExpr*     arg = ins[i];
   3413       TmpOrConst* dst = &(*outs)[i];
   3414       irExpr_to_TmpOrConst(dst, arg);
   3415    }
   3416 }
   3417 
   3418 typedef
   3419    struct {
   3420       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
   3421              CCall, Load
   3422       } tag;
   3423       union {
   3424          /* unop(tmp) */
   3425          struct {
   3426             IROp   op;
   3427             IRTemp arg;
   3428          } Ut;
   3429          /* binop(tmp,tmp) */
   3430          struct {
   3431             IROp   op;
   3432             IRTemp arg1;
   3433             IRTemp arg2;
   3434          } Btt;
   3435          /* binop(tmp,const) */
   3436          struct {
   3437             IROp    op;
   3438             IRTemp  arg1;
   3439             IRConst con2;
   3440          } Btc;
   3441          /* binop(const,tmp) */
   3442          struct {
   3443             IROp    op;
   3444             IRConst con1;
   3445             IRTemp  arg2;
   3446          } Bct;
   3447          /* F64i-style const */
   3448          struct {
   3449             ULong f64i;
   3450          } Cf64i;
   3451          /* ITE(tmp,tmp,tmp) */
   3452          struct {
   3453             IRTemp co;
   3454             IRTemp e1;
   3455             IRTemp e0;
   3456          } Ittt;
   3457          /* ITE(tmp,tmp,const) */
   3458          struct {
   3459             IRTemp  co;
   3460             IRTemp  e1;
   3461             IRConst con0;
   3462          } Ittc;
   3463          /* ITE(tmp,const,tmp) */
   3464          struct {
   3465             IRTemp  co;
   3466             IRConst con1;
   3467             IRTemp  e0;
   3468          } Itct;
   3469          /* ITE(tmp,const,const) */
   3470          struct {
   3471             IRTemp  co;
   3472             IRConst con1;
   3473             IRConst con0;
   3474          } Itcc;
   3475          /* GetI(descr,tmp,bias)*/
   3476          struct {
   3477             IRRegArray* descr;
   3478             IRTemp      ix;
   3479             Int         bias;
   3480          } GetIt;
   3481          /* Clean helper call */
   3482          struct {
   3483             IRCallee*   cee;
   3484             TmpOrConst* args;
   3485             Int         nArgs;
   3486             IRType      retty;
   3487          } CCall;
   3488          /* Load(end,ty,addr) */
   3489          struct {
   3490             IREndness  end;
   3491             IRType     ty;
   3492             TmpOrConst addr;
   3493          } Load;
   3494       } u;
   3495    }
   3496    AvailExpr;
   3497 
   3498 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   3499 {
   3500    if (LIKELY(a1->tag != a2->tag))
   3501       return False;
   3502    switch (a1->tag) {
   3503       case Ut:
   3504          return toBool(
   3505                 a1->u.Ut.op == a2->u.Ut.op
   3506                 && a1->u.Ut.arg == a2->u.Ut.arg);
   3507       case Btt:
   3508          return toBool(
   3509                 a1->u.Btt.op == a2->u.Btt.op
   3510                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   3511                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   3512       case Btc:
   3513          return toBool(
   3514                 a1->u.Btc.op == a2->u.Btc.op
   3515                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   3516                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   3517       case Bct:
   3518          return toBool(
   3519                 a1->u.Bct.op == a2->u.Bct.op
   3520                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   3521                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   3522       case Cf64i:
   3523          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   3524       case Ittt:
   3525          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
   3526                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
   3527                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
   3528       case Ittc:
   3529          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
   3530                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
   3531                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
   3532       case Itct:
   3533          return toBool(a1->u.Itct.co == a2->u.Itct.co
   3534                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
   3535                        && a1->u.Itct.e0 == a2->u.Itct.e0);
   3536       case Itcc:
   3537          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
   3538                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
   3539                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
   3540       case GetIt:
   3541          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   3542                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   3543                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   3544       case CCall: {
   3545          Int  i, n;
   3546          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
   3547                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
   3548          if (eq) {
   3549             n = a1->u.CCall.nArgs;
   3550             for (i = 0; i < n; i++) {
   3551                if (!eqTmpOrConst( &a1->u.CCall.args[i],
   3552                                   &a2->u.CCall.args[i] )) {
   3553                   eq = False;
   3554                   break;
   3555                }
   3556             }
   3557          }
   3558          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
   3559          return eq;
   3560       }
   3561       case Load: {
   3562          Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
   3563                           && a1->u.Load.ty == a2->u.Load.ty
   3564                           && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
   3565          return eq;
   3566       }
   3567       default:
   3568          vpanic("eq_AvailExpr");
   3569    }
   3570 }
   3571 
   3572 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   3573 {
   3574    IRConst *con, *con0, *con1;
   3575    switch (ae->tag) {
   3576       case Ut:
   3577          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   3578       case Btt:
   3579          return IRExpr_Binop( ae->u.Btt.op,
   3580                               IRExpr_RdTmp(ae->u.Btt.arg1),
   3581                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   3582       case Btc:
   3583          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3584          *con = ae->u.Btc.con2;
   3585          return IRExpr_Binop( ae->u.Btc.op,
   3586                               IRExpr_RdTmp(ae->u.Btc.arg1),
   3587                               IRExpr_Const(con) );
   3588       case Bct:
   3589          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3590          *con = ae->u.Bct.con1;
   3591          return IRExpr_Binop( ae->u.Bct.op,
   3592                               IRExpr_Const(con),
   3593                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   3594       case Cf64i:
   3595          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   3596       case Ittt:
   3597          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
   3598                            IRExpr_RdTmp(ae->u.Ittt.e1),
   3599                            IRExpr_RdTmp(ae->u.Ittt.e0));
   3600       case Ittc:
   3601          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3602          *con0 = ae->u.Ittc.con0;
   3603          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
   3604                            IRExpr_RdTmp(ae->u.Ittc.e1),
   3605                            IRExpr_Const(con0));
   3606       case Itct:
   3607          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3608          *con1 = ae->u.Itct.con1;
   3609          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
   3610                            IRExpr_Const(con1),
   3611                            IRExpr_RdTmp(ae->u.Itct.e0));
   3612 
   3613       case Itcc:
   3614          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3615          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3616          *con0 = ae->u.Itcc.con0;
   3617          *con1 = ae->u.Itcc.con1;
   3618          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
   3619                            IRExpr_Const(con1),
   3620                            IRExpr_Const(con0));
   3621       case GetIt:
   3622          return IRExpr_GetI(ae->u.GetIt.descr,
   3623                             IRExpr_RdTmp(ae->u.GetIt.ix),
   3624                             ae->u.GetIt.bias);
   3625       case CCall: {
   3626          Int i, n = ae->u.CCall.nArgs;
   3627          vassert(n >= 0);
   3628          IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
   3629          vec[n] = NULL;
   3630          for (i = 0; i < n; i++) {
   3631             vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
   3632          }
   3633          return IRExpr_CCall(ae->u.CCall.cee,
   3634                              ae->u.CCall.retty,
   3635                              vec);
   3636       }
   3637       case Load:
   3638          return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
   3639                             tmpOrConst_to_IRExpr(&ae->u.Load.addr));
   3640       default:
   3641          vpanic("availExpr_to_IRExpr");
   3642    }
   3643 }
   3644 
   3645 inline
   3646 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   3647 {
   3648    HWord res;
   3649    /* env :: IRTemp -> IRTemp */
   3650    if (lookupHHW( env, &res, (HWord)tmp ))
   3651       return (IRTemp)res;
   3652    else
   3653       return tmp;
   3654 }
   3655 
   3656 inline
   3657 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
   3658                                           HashHW* env )
   3659 {
   3660    /* env :: IRTemp -> IRTemp */
   3661    if (tc->tag == TCt) {
   3662       tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
   3663    }
   3664 }
   3665 
   3666 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   3667 {
   3668    /* env :: IRTemp -> IRTemp */
   3669    switch (ae->tag) {
   3670       case Ut:
   3671          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   3672          break;
   3673       case Btt:
   3674          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   3675          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   3676          break;
   3677       case Btc:
   3678          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   3679          break;
   3680       case Bct:
   3681          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   3682          break;
   3683       case Cf64i:
   3684          break;
   3685       case Ittt:
   3686          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
   3687          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
   3688          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
   3689          break;
   3690       case Ittc:
   3691          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
   3692          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
   3693          break;
   3694       case Itct:
   3695          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
   3696          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
   3697          break;
   3698       case Itcc:
   3699          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
   3700          break;
   3701       case GetIt:
   3702          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   3703          break;
   3704       case CCall: {
   3705          Int i, n = ae->u.CCall.nArgs;;
   3706          for (i = 0; i < n; i++) {
   3707             subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
   3708          }
   3709          break;
   3710       }
   3711       case Load:
   3712          subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
   3713          break;
   3714       default:
   3715          vpanic("subst_AvailExpr");
   3716    }
   3717 }
   3718 
   3719 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
   3720 {
   3721    AvailExpr* ae;
   3722 
   3723    switch (e->tag) {
   3724       case Iex_Unop:
   3725          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
   3726             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3727             ae->tag      = Ut;
   3728             ae->u.Ut.op  = e->Iex.Unop.op;
   3729             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   3730             return ae;
   3731          }
   3732          break;
   3733 
   3734       case Iex_Binop:
   3735          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
   3736             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3737                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3738                ae->tag        = Btt;
   3739                ae->u.Btt.op   = e->Iex.Binop.op;
   3740                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3741                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3742                return ae;
   3743             }
   3744             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   3745                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3746                ae->tag        = Btc;
   3747                ae->u.Btc.op   = e->Iex.Binop.op;
   3748                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3749                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   3750                return ae;
   3751             }
   3752          } else if (e->Iex.Binop.arg1->tag == Iex_Const
   3753                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3754             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3755             ae->tag        = Bct;
   3756             ae->u.Bct.op   = e->Iex.Binop.op;
   3757             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3758             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   3759             return ae;
   3760          }
   3761          break;
   3762 
   3763       case Iex_Const:
   3764          if (e->Iex.Const.con->tag == Ico_F64i) {
   3765             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3766             ae->tag          = Cf64i;
   3767             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   3768             return ae;
   3769          }
   3770          break;
   3771 
   3772       case Iex_ITE:
   3773          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
   3774             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
   3775                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3776                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3777                   ae->tag       = Ittt;
   3778                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3779                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3780                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3781                   return ae;
   3782                }
   3783                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3784                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3785                   ae->tag       = Itct;
   3786                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3787                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3788                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3789                   return ae;
   3790                }
   3791             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
   3792                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3793                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3794                   ae->tag       = Ittc;
   3795                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3796                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3797                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3798                   return ae;
   3799                }
   3800                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3801                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3802                   ae->tag       = Itcc;
   3803                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3804                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3805                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3806                   return ae;
   3807                }
   3808             }
   3809          }
   3810          break;
   3811 
   3812       case Iex_GetI:
   3813          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
   3814             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3815             ae->tag           = GetIt;
   3816             ae->u.GetIt.descr = e->Iex.GetI.descr;
   3817             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   3818             ae->u.GetIt.bias  = e->Iex.GetI.bias;
   3819             return ae;
   3820          }
   3821          break;
   3822 
   3823       case Iex_CCall:
   3824          ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3825          ae->tag = CCall;
   3826          /* Ok to share only the cee, since it is immutable. */
   3827          ae->u.CCall.cee   = e->Iex.CCall.cee;
   3828          ae->u.CCall.retty = e->Iex.CCall.retty;
   3829          /* irExprVec_to_TmpOrConsts will assert if the args are
   3830             neither tmps nor constants, but that's ok .. that's all they
   3831             should be. */
   3832          irExprVec_to_TmpOrConsts(
   3833                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
   3834                                   e->Iex.CCall.args
   3835                                  );
   3836          return ae;
   3837 
   3838       case Iex_Load:
   3839          /* If the caller of do_cse_BB has requested that loads also
   3840             be CSEd, convert them into AvailExprs.  If not, we'll just
   3841             return NULL here, and the load never becomes considered
   3842             "available", which effectively disables CSEing of them, as
   3843             desired. */
   3844          if (allowLoadsToBeCSEd) {
   3845             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3846             ae->tag        = Load;
   3847             ae->u.Load.end = e->Iex.Load.end;
   3848             ae->u.Load.ty  = e->Iex.Load.ty;
   3849             irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
   3850             return ae;
   3851          }
   3852          break;
   3853 
   3854       default:
   3855          break;
   3856    }
   3857 
   3858    return NULL;
   3859 }
   3860 
   3861 
   3862 /* The BB is modified in-place.  Returns True if any changes were
   3863    made.  The caller can choose whether or not loads should be CSEd.
   3864    In the normal course of things we don't do that, since CSEing loads
   3865    is something of a dodgy proposition if the guest program is doing
   3866    some screwy stuff to do with races and spinloops. */
   3867 
   3868 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
   3869 {
   3870    Int        i, j, paranoia;
   3871    IRTemp     t, q;
   3872    IRStmt*    st;
   3873    AvailExpr* eprime;
   3874    AvailExpr* ae;
   3875    Bool       invalidate;
   3876    Bool       anyDone = False;
   3877 
   3878    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   3879    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   3880 
   3881    vassert(sizeof(IRTemp) <= sizeof(HWord));
   3882 
   3883    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   3884 
   3885    /* Iterate forwards over the stmts.
   3886       On seeing "t = E", where E is one of the AvailExpr forms:
   3887          let E' = apply tenv substitution to E
   3888          search aenv for E'
   3889             if a mapping E' -> q is found,
   3890                replace this stmt by "t = q"
   3891                and add binding t -> q to tenv
   3892             else
   3893                add binding E' -> t to aenv
   3894                replace this stmt by "t = E'"
   3895 
   3896       Other statements are only interesting to the extent that they
   3897       might invalidate some of the expressions in aenv.  So there is
   3898       an invalidate-bindings check for each statement seen.
   3899    */
   3900    for (i = 0; i < bb->stmts_used; i++) {
   3901       st = bb->stmts[i];
   3902 
   3903       /* ------ BEGIN invalidate aenv bindings ------ */
   3904       /* This is critical: remove from aenv any E' -> .. bindings
   3905          which might be invalidated by this statement.  The only
   3906          vulnerable kind of bindings are the GetI and Load kinds.
   3907             Dirty call - dump (paranoia level -> 2)
   3908             Store      - dump (ditto)
   3909             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   3910          Uses getAliasingRelation_IC and getAliasingRelation_II
   3911          to do the no-overlap assessments needed for Put/PutI.
   3912       */
   3913       switch (st->tag) {
   3914          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   3915          case Ist_CAS: case Ist_LLSC:
   3916          case Ist_StoreG:
   3917             paranoia = 2; break;
   3918          case Ist_Put: case Ist_PutI:
   3919             paranoia = 1; break;
   3920          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   3921          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
   3922             paranoia = 0; break;
   3923          default:
   3924             vpanic("do_cse_BB(1)");
   3925       }
   3926 
   3927       if (paranoia > 0) {
   3928          for (j = 0; j < aenv->used; j++) {
   3929             if (!aenv->inuse[j])
   3930                continue;
   3931             ae = (AvailExpr*)aenv->key[j];
   3932             if (ae->tag != GetIt && ae->tag != Load)
   3933                continue;
   3934             invalidate = False;
   3935             if (paranoia >= 2) {
   3936                invalidate = True;
   3937             } else {
   3938                vassert(paranoia == 1);
   3939                if (ae->tag == Load) {
   3940                   /* Loads can be invalidated by anything that could
   3941                      possibly touch memory.  But in that case we
   3942                      should have |paranoia| == 2 and we won't get
   3943                      here.  So there's nothing to do; we don't have to
   3944                      invalidate the load. */
   3945                }
   3946                else
   3947                if (st->tag == Ist_Put) {
   3948                   if (getAliasingRelation_IC(
   3949                          ae->u.GetIt.descr,
   3950                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3951                          st->Ist.Put.offset,
   3952                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   3953                       ) != NoAlias)
   3954                      invalidate = True;
   3955                }
   3956                else
   3957                if (st->tag == Ist_PutI) {
   3958                   IRPutI *puti = st->Ist.PutI.details;
   3959                   if (getAliasingRelation_II(
   3960                          ae->u.GetIt.descr,
   3961                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3962                          ae->u.GetIt.bias,
   3963                          puti->descr,
   3964                          puti->ix,
   3965                          puti->bias
   3966                       ) != NoAlias)
   3967                      invalidate = True;
   3968                }
   3969                else
   3970                   vpanic("do_cse_BB(2)");
   3971             }
   3972 
   3973             if (invalidate) {
   3974                aenv->inuse[j] = False;
   3975                aenv->key[j]   = (HWord)NULL;  /* be sure */
   3976             }
   3977          } /* for j */
   3978       } /* paranoia > 0 */
   3979 
   3980       /* ------ ENV invalidate aenv bindings ------ */
   3981 
   3982       /* ignore not-interestings */
   3983       if (st->tag != Ist_WrTmp)
   3984          continue;
   3985 
   3986       t = st->Ist.WrTmp.tmp;
   3987       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
   3988       /* ignore if not of AvailExpr form */
   3989       if (!eprime)
   3990          continue;
   3991 
   3992       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   3993 
   3994       /* apply tenv */
   3995       subst_AvailExpr( tenv, eprime );
   3996 
   3997       /* search aenv for eprime, unfortunately the hard way */
   3998       for (j = 0; j < aenv->used; j++)
   3999          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   4000             break;
   4001 
   4002       if (j < aenv->used) {
   4003          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   4004             note the t->q binding in tenv. */
   4005          /* (this is the core of the CSE action) */
   4006          q = (IRTemp)aenv->val[j];
   4007          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   4008          addToHHW( tenv, (HWord)t, (HWord)q );
   4009          anyDone = True;
   4010       } else {
   4011          /* No binding was found, so instead we add E' -> t to our
   4012             collection of available expressions, replace this stmt
   4013             with "t = E'", and move on. */
   4014          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   4015          addToHHW( aenv, (HWord)eprime, (HWord)t );
   4016       }
   4017    }
   4018 
   4019    /*
   4020    ppIRSB(bb);
   4021    sanityCheckIRSB(bb, Ity_I32);
   4022    vex_printf("\n\n");
   4023    */
   4024    return anyDone;
   4025 }
   4026 
   4027 
   4028 /*---------------------------------------------------------------*/
   4029 /*--- Add32/Sub32 chain collapsing                            ---*/
   4030 /*---------------------------------------------------------------*/
   4031 
   4032 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   4033 
   4034 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   4035    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   4036    root node is Add32, not Sub32. */
   4037 
   4038 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   4039 {
   4040    if (e->tag != Iex_Binop)
   4041       return False;
   4042    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   4043       return False;
   4044    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   4045       return False;
   4046    if (e->Iex.Binop.arg2->tag != Iex_Const)
   4047       return False;
   4048    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   4049    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   4050    if (e->Iex.Binop.op == Iop_Sub32)
   4051       *i32 = -*i32;
   4052    return True;
   4053 }
   4054 
   4055 
   4056 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   4057    other tmp2.  Scan backwards from the specified start point -- an
   4058    optimisation. */
   4059 
   4060 static Bool collapseChain ( IRSB* bb, Int startHere,
   4061                             IRTemp tmp,
   4062                             IRTemp* tmp2, Int* i32 )
   4063 {
   4064    Int     j, ii;
   4065    IRTemp  vv;
   4066    IRStmt* st;
   4067    IRExpr* e;
   4068 
   4069    /* the (var, con) pair contain the current 'representation' for
   4070       'tmp'.  We start with 'tmp + 0'.  */
   4071    IRTemp var = tmp;
   4072    Int    con = 0;
   4073 
   4074    /* Scan backwards to see if tmp can be replaced by some other tmp
   4075      +/- a constant. */
   4076    for (j = startHere; j >= 0; j--) {
   4077       st = bb->stmts[j];
   4078       if (st->tag != Ist_WrTmp)
   4079          continue;
   4080       if (st->Ist.WrTmp.tmp != var)
   4081          continue;
   4082       e = st->Ist.WrTmp.data;
   4083       if (!isAdd32OrSub32(e, &vv, &ii))
   4084          break;
   4085       var = vv;
   4086       con += ii;
   4087    }
   4088    if (j == -1)
   4089       /* no earlier binding for var .. ill-formed IR */
   4090       vpanic("collapseChain");
   4091 
   4092    /* so, did we find anything interesting? */
   4093    if (var == tmp)
   4094       return False; /* no .. */
   4095 
   4096    *tmp2 = var;
   4097    *i32  = con;
   4098    return True;
   4099 }
   4100 
   4101 
   4102 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   4103 
   4104 static void collapse_AddSub_chains_BB ( IRSB* bb )
   4105 {
   4106    IRStmt *st;
   4107    IRTemp var, var2;
   4108    Int    i, con, con2;
   4109 
   4110    for (i = bb->stmts_used-1; i >= 0; i--) {
   4111       st = bb->stmts[i];
   4112       if (st->tag == Ist_NoOp)
   4113          continue;
   4114 
   4115       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   4116 
   4117       if (st->tag == Ist_WrTmp
   4118           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   4119 
   4120          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   4121             Find out if var can be expressed as var2 + con2. */
   4122          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   4123             if (DEBUG_IROPT) {
   4124                vex_printf("replacing1 ");
   4125                ppIRStmt(st);
   4126                vex_printf(" with ");
   4127             }
   4128             con2 += con;
   4129             bb->stmts[i]
   4130                = IRStmt_WrTmp(
   4131                     st->Ist.WrTmp.tmp,
   4132                     (con2 >= 0)
   4133                       ? IRExpr_Binop(Iop_Add32,
   4134                                      IRExpr_RdTmp(var2),
   4135                                      IRExpr_Const(IRConst_U32(con2)))
   4136                       : IRExpr_Binop(Iop_Sub32,
   4137                                      IRExpr_RdTmp(var2),
   4138                                      IRExpr_Const(IRConst_U32(-con2)))
   4139                  );
   4140             if (DEBUG_IROPT) {
   4141                ppIRStmt(bb->stmts[i]);
   4142                vex_printf("\n");
   4143             }
   4144          }
   4145 
   4146          continue;
   4147       }
   4148 
   4149       /* Try to collapse 't1 = GetI[t2, con]'. */
   4150 
   4151       if (st->tag == Ist_WrTmp
   4152           && st->Ist.WrTmp.data->tag == Iex_GetI
   4153           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   4154           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   4155                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   4156          if (DEBUG_IROPT) {
   4157             vex_printf("replacing3 ");
   4158             ppIRStmt(st);
   4159             vex_printf(" with ");
   4160          }
   4161          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   4162          bb->stmts[i]
   4163             = IRStmt_WrTmp(
   4164                  st->Ist.WrTmp.tmp,
   4165                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   4166                              IRExpr_RdTmp(var2),
   4167                              con2));
   4168          if (DEBUG_IROPT) {
   4169             ppIRStmt(bb->stmts[i]);
   4170             vex_printf("\n");
   4171          }
   4172          continue;
   4173       }
   4174 
   4175       /* Perhaps st is PutI[t, con] ? */
   4176       IRPutI *puti = st->Ist.PutI.details;
   4177       if (st->tag == Ist_PutI
   4178           && puti->ix->tag == Iex_RdTmp
   4179           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
   4180                                &var2, &con2)) {
   4181          if (DEBUG_IROPT) {
   4182             vex_printf("replacing2 ");
   4183             ppIRStmt(st);
   4184             vex_printf(" with ");
   4185          }
   4186          con2 += puti->bias;
   4187          bb->stmts[i]
   4188             = IRStmt_PutI(mkIRPutI(puti->descr,
   4189                                    IRExpr_RdTmp(var2),
   4190                                    con2,
   4191                                    puti->data));
   4192          if (DEBUG_IROPT) {
   4193             ppIRStmt(bb->stmts[i]);
   4194             vex_printf("\n");
   4195          }
   4196          continue;
   4197       }
   4198 
   4199    } /* for */
   4200 }
   4201 
   4202 
   4203 /*---------------------------------------------------------------*/
   4204 /*--- PutI/GetI transformations                               ---*/
   4205 /*---------------------------------------------------------------*/
   4206 
   4207 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   4208    the given starting point to find, if any, a PutI which writes
   4209    exactly the same piece of guest state, and so return the expression
   4210    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   4211 
   4212 static
   4213 IRExpr* findPutI ( IRSB* bb, Int startHere,
   4214                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   4215 {
   4216    Int        j;
   4217    IRStmt*    st;
   4218    GSAliasing relation;
   4219 
   4220    if (0) {
   4221       vex_printf("\nfindPutI ");
   4222       ppIRRegArray(descrG);
   4223       vex_printf(" ");
   4224       ppIRExpr(ixG);
   4225       vex_printf(" %d\n", biasG);
   4226    }
   4227 
   4228    /* Scan backwards in bb from startHere to find a suitable PutI
   4229       binding for (descrG, ixG, biasG), if any. */
   4230 
   4231    for (j = startHere; j >= 0; j--) {
   4232       st = bb->stmts[j];
   4233       if (st->tag == Ist_NoOp)
   4234          continue;
   4235 
   4236       if (st->tag == Ist_Put) {
   4237          /* Non-indexed Put.  This can't give a binding, but we do
   4238             need to check it doesn't invalidate the search by
   4239             overlapping any part of the indexed guest state. */
   4240 
   4241          relation
   4242             = getAliasingRelation_IC(
   4243                  descrG, ixG,
   4244                  st->Ist.Put.offset,
   4245                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   4246 
   4247          if (relation == NoAlias) {
   4248             /* we're OK; keep going */
   4249             continue;
   4250          } else {
   4251             /* relation == UnknownAlias || relation == ExactAlias */
   4252             /* If this assertion fails, we've found a Put which writes
   4253                an area of guest state which is read by a GetI.  Which
   4254                is unlikely (although not per se wrong). */
   4255             vassert(relation != ExactAlias);
   4256             /* This Put potentially writes guest state that the GetI
   4257                reads; we must fail. */
   4258             return NULL;
   4259          }
   4260       }
   4261 
   4262       if (st->tag == Ist_PutI) {
   4263          IRPutI *puti = st->Ist.PutI.details;
   4264 
   4265          relation = getAliasingRelation_II(
   4266                        descrG, ixG, biasG,
   4267                        puti->descr,
   4268                        puti->ix,
   4269                        puti->bias
   4270                     );
   4271 
   4272          if (relation == NoAlias) {
   4273             /* This PutI definitely doesn't overlap.  Ignore it and
   4274                keep going. */
   4275             continue; /* the for j loop */
   4276          }
   4277 
   4278          if (relation == UnknownAlias) {
   4279             /* We don't know if this PutI writes to the same guest
   4280                state that the GetI, or not.  So we have to give up. */
   4281             return NULL;
   4282          }
   4283 
   4284          /* Otherwise, we've found what we're looking for.  */
   4285          vassert(relation == ExactAlias);
   4286          return puti->data;
   4287 
   4288       } /* if (st->tag == Ist_PutI) */
   4289 
   4290       if (st->tag == Ist_Dirty) {
   4291          /* Be conservative.  If the dirty call has any guest effects at
   4292             all, give up.  We could do better -- only give up if there
   4293             are any guest writes/modifies. */
   4294          if (st->Ist.Dirty.details->nFxState > 0)
   4295             return NULL;
   4296       }
   4297 
   4298    } /* for */
   4299 
   4300    /* No valid replacement was found. */
   4301    return NULL;
   4302 }
   4303 
   4304 
   4305 
   4306 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   4307    that it writes exactly the same piece of guest state) ?  Safe
   4308    answer: False. */
   4309 
   4310 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   4311 {
   4312    vassert(pi->tag == Ist_PutI);
   4313    if (s2->tag != Ist_PutI)
   4314       return False;
   4315 
   4316    IRPutI *p1 = pi->Ist.PutI.details;
   4317    IRPutI *p2 = s2->Ist.PutI.details;
   4318 
   4319    return toBool(
   4320           getAliasingRelation_II(
   4321              p1->descr, p1->ix, p1->bias,
   4322              p2->descr, p2->ix, p2->bias
   4323           )
   4324           == ExactAlias
   4325           );
   4326 }
   4327 
   4328 
   4329 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   4330    overlap it?  Safe answer: True.  Note, we could do a lot better
   4331    than this if needed. */
   4332 
   4333 static
   4334 Bool guestAccessWhichMightOverlapPutI (
   4335         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   4336      )
   4337 {
   4338    GSAliasing relation;
   4339    UInt       minoffP, maxoffP;
   4340 
   4341    vassert(pi->tag == Ist_PutI);
   4342 
   4343    IRPutI *p1 = pi->Ist.PutI.details;
   4344 
   4345    getArrayBounds(p1->descr, &minoffP, &maxoffP);
   4346    switch (s2->tag) {
   4347 
   4348       case Ist_NoOp:
   4349       case Ist_IMark:
   4350          return False;
   4351 
   4352       case Ist_MBE:
   4353       case Ist_AbiHint:
   4354          /* just be paranoid ... these should be rare. */
   4355          return True;
   4356 
   4357       case Ist_CAS:
   4358          /* This is unbelievably lame, but it's probably not
   4359             significant from a performance point of view.  Really, a
   4360             CAS is a load-store op, so it should be safe to say False.
   4361             However .. */
   4362          return True;
   4363 
   4364       case Ist_Dirty:
   4365          /* If the dirty call has any guest effects at all, give up.
   4366             Probably could do better. */
   4367          if (s2->Ist.Dirty.details->nFxState > 0)
   4368             return True;
   4369          return False;
   4370 
   4371       case Ist_Put:
   4372          vassert(isIRAtom(s2->Ist.Put.data));
   4373          relation
   4374             = getAliasingRelation_IC(
   4375                  p1->descr, p1->ix,
   4376                  s2->Ist.Put.offset,
   4377                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   4378               );
   4379          goto have_relation;
   4380 
   4381       case Ist_PutI: {
   4382          IRPutI *p2 = s2->Ist.PutI.details;
   4383 
   4384          vassert(isIRAtom(p2->ix));
   4385          vassert(isIRAtom(p2->data));
   4386          relation
   4387             = getAliasingRelation_II(
   4388                  p1->descr, p1->ix, p1->bias,
   4389                  p2->descr, p2->ix, p2->bias
   4390               );
   4391          goto have_relation;
   4392       }
   4393 
   4394       case Ist_WrTmp:
   4395          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   4396             relation
   4397                = getAliasingRelation_II(
   4398                     p1->descr, p1->ix, p1->bias,
   4399                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   4400                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   4401                     s2->Ist.WrTmp.data->Iex.GetI.bias
   4402                  );
   4403             goto have_relation;
   4404          }
   4405          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   4406             relation
   4407                = getAliasingRelation_IC(
   4408                     p1->descr, p1->ix,
   4409                     s2->Ist.WrTmp.data->Iex.Get.offset,
   4410                     s2->Ist.WrTmp.data->Iex.Get.ty
   4411                  );
   4412             goto have_relation;
   4413          }
   4414          return False;
   4415 
   4416       case Ist_Store:
   4417          vassert(isIRAtom(s2->Ist.Store.addr));
   4418          vassert(isIRAtom(s2->Ist.Store.data));
   4419          return False;
   4420 
   4421       default:
   4422          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   4423          vpanic("guestAccessWhichMightOverlapPutI");
   4424    }
   4425 
   4426   have_relation:
   4427    if (relation == NoAlias)
   4428       return False;
   4429    else
   4430       return True; /* ExactAlias or UnknownAlias */
   4431 }
   4432 
   4433 
   4434 
   4435 /* ---------- PutI/GetI transformations main functions --------- */
   4436 
   4437 /* Remove redundant GetIs, to the extent that they can be detected.
   4438    bb is modified in-place. */
   4439 
   4440 static
   4441 void do_redundant_GetI_elimination ( IRSB* bb )
   4442 {
   4443    Int     i;
   4444    IRStmt* st;
   4445 
   4446    for (i = bb->stmts_used-1; i >= 0; i--) {
   4447       st = bb->stmts[i];
   4448       if (st->tag == Ist_NoOp)
   4449          continue;
   4450 
   4451       if (st->tag == Ist_WrTmp
   4452           && st->Ist.WrTmp.data->tag == Iex_GetI
   4453           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   4454          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   4455          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   4456          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   4457          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   4458          if (replacement
   4459              && isIRAtom(replacement)
   4460              /* Make sure we're doing a type-safe transformation! */
   4461              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   4462             if (DEBUG_IROPT) {
   4463                vex_printf("rGI:  ");
   4464                ppIRExpr(st->Ist.WrTmp.data);
   4465                vex_printf(" -> ");
   4466                ppIRExpr(replacement);
   4467                vex_printf("\n");
   4468             }
   4469             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   4470          }
   4471       }
   4472    }
   4473 
   4474 }
   4475 
   4476 
   4477 /* Remove redundant PutIs, to the extent which they can be detected.
   4478    bb is modified in-place. */
   4479 
   4480 static
   4481 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
   4482 {
   4483    Int    i, j;
   4484    Bool   delete;
   4485    IRStmt *st, *stj;
   4486 
   4487    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
   4488 
   4489    for (i = 0; i < bb->stmts_used; i++) {
   4490       st = bb->stmts[i];
   4491       if (st->tag != Ist_PutI)
   4492          continue;
   4493       /* Ok, search forwards from here to see if we can find another
   4494          PutI which makes this one redundant, and dodging various
   4495          hazards.  Search forwards:
   4496          * If conditional exit, give up (because anything after that
   4497            does not postdominate this put).
   4498          * If a Get which might overlap, give up (because this PutI
   4499            not necessarily dead).
   4500          * If a Put which is identical, stop with success.
   4501          * If a Put which might overlap, but is not identical, give up.
   4502          * If a dirty helper call which might write guest state, give up.
   4503          * If a Put which definitely doesn't overlap, or any other
   4504            kind of stmt, continue.
   4505       */
   4506       delete = False;
   4507       for (j = i+1; j < bb->stmts_used; j++) {
   4508          stj = bb->stmts[j];
   4509          if (stj->tag == Ist_NoOp)
   4510             continue;
   4511          if (identicalPutIs(st, stj)) {
   4512             /* success! */
   4513             delete = True;
   4514             break;
   4515          }
   4516          if (stj->tag == Ist_Exit)
   4517             /* give up */
   4518             break;
   4519          if (st->tag == Ist_Dirty)
   4520             /* give up; could do better here */
   4521             break;
   4522          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   4523             /* give up */
   4524            break;
   4525       }
   4526 
   4527       if (delete) {
   4528          if (DEBUG_IROPT) {
   4529             vex_printf("rPI:  ");
   4530             ppIRStmt(st);
   4531             vex_printf("\n");
   4532          }
   4533          bb->stmts[i] = IRStmt_NoOp();
   4534       }
   4535 
   4536    }
   4537 }
   4538 
   4539 
   4540 /*---------------------------------------------------------------*/
   4541 /*--- Loop unrolling                                          ---*/
   4542 /*---------------------------------------------------------------*/
   4543 
   4544 /* Adjust all tmp values (names) in e by delta.  e is destructively
   4545    modified. */
   4546 
   4547 static void deltaIRExpr ( IRExpr* e, Int delta )
   4548 {
   4549    Int i;
   4550    switch (e->tag) {
   4551       case Iex_RdTmp:
   4552          e->Iex.RdTmp.tmp += delta;
   4553          break;
   4554       case Iex_Get:
   4555       case Iex_Const:
   4556          break;
   4557       case Iex_GetI:
   4558          deltaIRExpr(e->Iex.GetI.ix, delta);
   4559          break;
   4560       case Iex_Qop:
   4561          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
   4562          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
   4563          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
   4564          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
   4565          break;
   4566       case Iex_Triop:
   4567          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
   4568          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
   4569          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
   4570          break;
   4571       case Iex_Binop:
   4572          deltaIRExpr(e->Iex.Binop.arg1, delta);
   4573          deltaIRExpr(e->Iex.Binop.arg2, delta);
   4574          break;
   4575       case Iex_Unop:
   4576          deltaIRExpr(e->Iex.Unop.arg, delta);
   4577          break;
   4578       case Iex_Load:
   4579          deltaIRExpr(e->Iex.Load.addr, delta);
   4580          break;
   4581       case Iex_CCall:
   4582          for (i = 0; e->Iex.CCall.args[i]; i++)
   4583             deltaIRExpr(e->Iex.CCall.args[i], delta);
   4584          break;
   4585       case Iex_ITE:
   4586          deltaIRExpr(e->Iex.ITE.cond, delta);
   4587          deltaIRExpr(e->Iex.ITE.iftrue, delta);
   4588          deltaIRExpr(e->Iex.ITE.iffalse, delta);
   4589          break;
   4590       default:
   4591          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4592          vpanic("deltaIRExpr");
   4593    }
   4594 }
   4595 
   4596 /* Adjust all tmp values (names) in st by delta.  st is destructively
   4597    modified. */
   4598 
   4599 static void deltaIRStmt ( IRStmt* st, Int delta )
   4600 {
   4601    Int      i;
   4602    IRDirty* d;
   4603    switch (st->tag) {
   4604       case Ist_NoOp:
   4605       case Ist_IMark:
   4606       case Ist_MBE:
   4607          break;
   4608       case Ist_AbiHint:
   4609          deltaIRExpr(st->Ist.AbiHint.base, delta);
   4610          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   4611          break;
   4612       case Ist_Put:
   4613          deltaIRExpr(st->Ist.Put.data, delta);
   4614          break;
   4615       case Ist_PutI:
   4616          deltaIRExpr(st->Ist.PutI.details->ix, delta);
   4617          deltaIRExpr(st->Ist.PutI.details->data, delta);
   4618          break;
   4619       case Ist_WrTmp:
   4620          st->Ist.WrTmp.tmp += delta;
   4621          deltaIRExpr(st->Ist.WrTmp.data, delta);
   4622          break;
   4623       case Ist_Exit:
   4624          deltaIRExpr(st->Ist.Exit.guard, delta);
   4625          break;
   4626       case Ist_Store:
   4627          deltaIRExpr(st->Ist.Store.addr, delta);
   4628          deltaIRExpr(st->Ist.Store.data, delta);
   4629          break;
   4630       case Ist_StoreG: {
   4631          IRStoreG* sg = st->Ist.StoreG.details;
   4632          deltaIRExpr(sg->addr, delta);
   4633          deltaIRExpr(sg->data, delta);
   4634          deltaIRExpr(sg->guard, delta);
   4635          break;
   4636       }
   4637       case Ist_LoadG: {
   4638          IRLoadG* lg = st->Ist.LoadG.details;
   4639          lg->dst += delta;
   4640          deltaIRExpr(lg->addr, delta);
   4641          deltaIRExpr(lg->alt, delta);
   4642          deltaIRExpr(lg->guard, delta);
   4643          break;
   4644       }
   4645       case Ist_CAS:
   4646          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   4647             st->Ist.CAS.details->oldHi += delta;
   4648          st->Ist.CAS.details->oldLo += delta;
   4649          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   4650          if (st->Ist.CAS.details->expdHi)
   4651             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   4652          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   4653          if (st->Ist.CAS.details->dataHi)
   4654             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   4655          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   4656          break;
   4657       case Ist_LLSC:
   4658          st->Ist.LLSC.result += delta;
   4659          deltaIRExpr(st->Ist.LLSC.addr, delta);
   4660          if (st->Ist.LLSC.storedata)
   4661             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   4662          break;
   4663       case Ist_Dirty:
   4664          d = st->Ist.Dirty.details;
   4665          deltaIRExpr(d->guard, delta);
   4666          for (i = 0; d->args[i]; i++) {
   4667             IRExpr* arg = d->args[i];
   4668             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   4669                deltaIRExpr(arg, delta);
   4670          }
   4671          if (d->tmp != IRTemp_INVALID)
   4672             d->tmp += delta;
   4673          if (d->mAddr)
   4674             deltaIRExpr(d->mAddr, delta);
   4675          break;
   4676       default:
   4677          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4678          vpanic("deltaIRStmt");
   4679    }
   4680 }
   4681 
   4682 
   4683 /* If possible, return a loop-unrolled version of bb0.  The original
   4684    is changed.  If not possible, return NULL.  */
   4685 
   4686 /* The two schemas considered are:
   4687 
   4688      X: BODY; goto X
   4689 
   4690      which unrolls to (eg)  X: BODY;BODY; goto X
   4691 
   4692    and
   4693 
   4694        X: BODY; if (c) goto X; goto Y
   4695    which trivially transforms to
   4696        X: BODY; if (!c) goto Y; goto X;
   4697    so it falls in the scope of the first case.
   4698 
   4699    X and Y must be literal (guest) addresses.
   4700 */
   4701 
   4702 static Int calc_unroll_factor( IRSB* bb )
   4703 {
   4704    Int n_stmts, i;
   4705 
   4706    n_stmts = 0;
   4707    for (i = 0; i < bb->stmts_used; i++) {
   4708       if (bb->stmts[i]->tag != Ist_NoOp)
   4709          n_stmts++;
   4710    }
   4711 
   4712    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   4713       if (vex_control.iropt_verbosity > 0)
   4714          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   4715                     n_stmts, 8* n_stmts);
   4716       return 8;
   4717    }
   4718    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   4719       if (vex_control.iropt_verbosity > 0)
   4720          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   4721                     n_stmts, 4* n_stmts);
   4722       return 4;
   4723    }
   4724 
   4725    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   4726       if (vex_control.iropt_verbosity > 0)
   4727          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   4728                     n_stmts, 2* n_stmts);
   4729       return 2;
   4730    }
   4731 
   4732    if (vex_control.iropt_verbosity > 0)
   4733       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   4734 
   4735    return 1;
   4736 }
   4737 
   4738 
   4739 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
   4740 {
   4741    Int      i, j, jmax, n_vars;
   4742    Bool     xxx_known;
   4743    Addr64   xxx_value, yyy_value;
   4744    IRExpr*  udst;
   4745    IRStmt*  st;
   4746    IRConst* con;
   4747    IRSB     *bb1, *bb2;
   4748    Int      unroll_factor;
   4749 
   4750    if (vex_control.iropt_unroll_thresh <= 0)
   4751       return NULL;
   4752 
   4753    /* First off, figure out if we can unroll this loop.  Do this
   4754       without modifying bb0. */
   4755 
   4756    if (bb0->jumpkind != Ijk_Boring)
   4757       return NULL;
   4758 
   4759    xxx_known = False;
   4760    xxx_value = 0;
   4761 
   4762    /* Extract the next-guest address.  If it isn't a literal, we
   4763       have to give up. */
   4764 
   4765    udst = bb0->next;
   4766    if (udst->tag == Iex_Const
   4767        && (udst->Iex.Const.con->tag == Ico_U32
   4768            || udst->Iex.Const.con->tag == Ico_U64)) {
   4769       /* The BB ends in a jump to a literal location. */
   4770       xxx_known = True;
   4771       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   4772                     ?  udst->Iex.Const.con->Ico.U64
   4773                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   4774    }
   4775 
   4776    if (!xxx_known)
   4777       return NULL;
   4778 
   4779    /* Now we know the BB ends to a jump to a literal location.  If
   4780       it's a jump to itself (viz, idiom #1), move directly to the
   4781       unrolling stage, first cloning the bb so the original isn't
   4782       modified. */
   4783    if (xxx_value == my_addr) {
   4784       unroll_factor = calc_unroll_factor( bb0 );
   4785       if (unroll_factor < 2)
   4786          return NULL;
   4787       bb1 = deepCopyIRSB( bb0 );
   4788       bb0 = NULL;
   4789       udst = NULL; /* is now invalid */
   4790       goto do_unroll;
   4791    }
   4792 
   4793    /* Search for the second idiomatic form:
   4794         X: BODY; if (c) goto X; goto Y
   4795       We know Y, but need to establish that the last stmt
   4796       is 'if (c) goto X'.
   4797    */
   4798    yyy_value = xxx_value;
   4799    for (i = bb0->stmts_used-1; i >= 0; i--)
   4800       if (bb0->stmts[i])
   4801          break;
   4802 
   4803    if (i < 0)
   4804       return NULL; /* block with no stmts.  Strange. */
   4805 
   4806    st = bb0->stmts[i];
   4807    if (st->tag != Ist_Exit)
   4808       return NULL;
   4809    if (st->Ist.Exit.jk != Ijk_Boring)
   4810       return NULL;
   4811 
   4812    con = st->Ist.Exit.dst;
   4813    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4814 
   4815    xxx_value = con->tag == Ico_U64
   4816                   ? st->Ist.Exit.dst->Ico.U64
   4817                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   4818 
   4819    /* If this assertion fails, we have some kind of type error. */
   4820    vassert(con->tag == udst->Iex.Const.con->tag);
   4821 
   4822    if (xxx_value != my_addr)
   4823       /* We didn't find either idiom.  Give up. */
   4824       return NULL;
   4825 
   4826    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   4827       yyy values (which makes it look like idiom #1), and go into
   4828       unrolling proper.  This means finding (again) the last stmt, in
   4829       the copied BB. */
   4830 
   4831    unroll_factor = calc_unroll_factor( bb0 );
   4832    if (unroll_factor < 2)
   4833       return NULL;
   4834 
   4835    bb1 = deepCopyIRSB( bb0 );
   4836    bb0 = NULL;
   4837    udst = NULL; /* is now invalid */
   4838    for (i = bb1->stmts_used-1; i >= 0; i--)
   4839       if (bb1->stmts[i])
   4840          break;
   4841 
   4842    /* The next bunch of assertions should be true since we already
   4843       found and checked the last stmt in the original bb. */
   4844 
   4845    vassert(i >= 0);
   4846 
   4847    st = bb1->stmts[i];
   4848    vassert(st->tag == Ist_Exit);
   4849 
   4850    con = st->Ist.Exit.dst;
   4851    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4852 
   4853    udst = bb1->next;
   4854    vassert(udst->tag == Iex_Const);
   4855    vassert(udst->Iex.Const.con->tag == Ico_U32
   4856           || udst->Iex.Const.con->tag == Ico_U64);
   4857    vassert(con->tag == udst->Iex.Const.con->tag);
   4858 
   4859    /* switch the xxx and yyy fields around */
   4860    if (con->tag == Ico_U64) {
   4861       udst->Iex.Const.con->Ico.U64 = xxx_value;
   4862       con->Ico.U64 = yyy_value;
   4863    } else {
   4864       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   4865       con->Ico.U32 = (UInt)yyy_value;
   4866    }
   4867 
   4868    /* negate the test condition */
   4869    st->Ist.Exit.guard
   4870       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   4871 
   4872    /* --- The unroller proper.  Both idioms are by now --- */
   4873    /* --- now converted to idiom 1. --- */
   4874 
   4875   do_unroll:
   4876 
   4877    vassert(unroll_factor == 2
   4878            || unroll_factor == 4
   4879            || unroll_factor == 8);
   4880 
   4881    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   4882    for (j = 1; j <= jmax; j++) {
   4883 
   4884       n_vars = bb1->tyenv->types_used;
   4885 
   4886       bb2 = deepCopyIRSB(bb1);
   4887       for (i = 0; i < n_vars; i++)
   4888          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   4889 
   4890       for (i = 0; i < bb2->stmts_used; i++) {
   4891          /* deltaIRStmt destructively modifies the stmt, but
   4892             that's OK since bb2 is a complete fresh copy of bb1. */
   4893          deltaIRStmt(bb2->stmts[i], n_vars);
   4894          addStmtToIRSB(bb1, bb2->stmts[i]);
   4895       }
   4896    }
   4897 
   4898    if (DEBUG_IROPT) {
   4899       vex_printf("\nUNROLLED (%lx)\n", my_addr);
   4900       ppIRSB(bb1);
   4901       vex_printf("\n");
   4902    }
   4903 
   4904    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   4905       by negating the test condition.  This should be fixed properly.
   4906       For the moment use this shotgun approach.  */
   4907    return flatten_BB(bb1);
   4908 }
   4909 
   4910 
   4911 /*---------------------------------------------------------------*/
   4912 /*--- The tree builder                                        ---*/
   4913 /*---------------------------------------------------------------*/
   4914 
   4915 /* This isn't part of IR optimisation.  Really it's a pass done prior
   4916    to instruction selection, which improves the code that the
   4917    instruction selector can produce. */
   4918 
   4919 /* --- The 'tmp' environment is the central data structure here --- */
   4920 
   4921 /* The number of outstanding bindings we're prepared to track.
   4922    The number of times the env becomes full and we have to dump
   4923    the oldest binding (hence reducing code quality) falls very
   4924    rapidly as the env size increases.  8 gives reasonable performance
   4925    under most circumstances. */
   4926 #define A_NENV 10
   4927 
   4928 /* An interval. Used to record the bytes in the guest state accessed
   4929    by a Put[I] statement or by (one or more) Get[I] expression(s). In
   4930    case of several Get[I] expressions, the lower/upper bounds are recorded.
   4931    This is conservative but cheap.
   4932    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
   4933    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
   4934    offset 200 would be recorded as [100,203] */
   4935 typedef
   4936    struct {
   4937       Bool present;
   4938       Int  low;
   4939       Int  high;
   4940    }
   4941    Interval;
   4942 
   4943 static inline Bool
   4944 intervals_overlap(Interval i1, Interval i2)
   4945 {
   4946    return (i1.low >= i2.low && i1.low <= i2.high) ||
   4947           (i2.low >= i1.low && i2.low <= i1.high);
   4948 }
   4949 
   4950 static inline void
   4951 update_interval(Interval *i, Int low, Int high)
   4952 {
   4953    vassert(low <= high);
   4954 
   4955    if (i->present) {
   4956       if (low  < i->low)  i->low  = low;
   4957       if (high > i->high) i->high = high;
   4958    } else {
   4959       i->present = True;
   4960       i->low  = low;
   4961       i->high = high;
   4962    }
   4963 }
   4964 
   4965 
   4966 /* bindee == NULL   ===  slot is not in use
   4967    bindee != NULL   ===  slot is in use
   4968 */
   4969 typedef
   4970    struct {
   4971       IRTemp  binder;
   4972       IRExpr* bindee;
   4973       Bool    doesLoad;
   4974       /* Record the bytes of the guest state BINDEE reads from. */
   4975       Interval getInterval;
   4976    }
   4977    ATmpInfo;
   4978 
   4979 __attribute__((unused))
   4980 static void ppAEnv ( ATmpInfo* env )
   4981 {
   4982    Int i;
   4983    for (i = 0; i < A_NENV; i++) {
   4984       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   4985       if (env[i].bindee)
   4986          ppIRExpr(env[i].bindee);
   4987       else
   4988          vex_printf("(null)");
   4989       vex_printf("\n");
   4990    }
   4991 }
   4992 
   4993 /* --- Tree-traversal fns --- */
   4994 
   4995 /* Traverse an expr, and detect if any part of it reads memory or does
   4996    a Get.  Be careful ... this really controls how much the
   4997    tree-builder can reorder the code, so getting it right is critical.
   4998 */
   4999 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
   5000 {
   5001    Int i;
   5002    switch (e->tag) {
   5003       case Iex_CCall:
   5004          for (i = 0; e->Iex.CCall.args[i]; i++)
   5005             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
   5006          return;
   5007       case Iex_ITE:
   5008          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
   5009          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
   5010          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
   5011          return;
   5012       case Iex_Qop:
   5013          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
   5014          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
   5015          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
   5016          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
   5017          return;
   5018       case Iex_Triop:
   5019          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
   5020          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
   5021          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
   5022          return;
   5023       case Iex_Binop:
   5024          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
   5025          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
   5026          return;
   5027       case Iex_Unop:
   5028          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
   5029          return;
   5030       case Iex_Load:
   5031          *doesLoad = True;
   5032          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
   5033          return;
   5034       case Iex_Get: {
   5035          Int low = e->Iex.Get.offset;
   5036          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
   5037          update_interval(getInterval, low, high);
   5038          return;
   5039       }
   5040       case Iex_GetI: {
   5041          IRRegArray *descr = e->Iex.GetI.descr;
   5042          Int size = sizeofIRType(descr->elemTy);
   5043          Int low  = descr->base;
   5044          Int high = low + descr->nElems * size - 1;
   5045          update_interval(getInterval, low, high);
   5046          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
   5047          return;
   5048       }
   5049       case Iex_RdTmp:
   5050       case Iex_Const:
   5051          return;
   5052       default:
   5053          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5054          vpanic("setHints_Expr");
   5055    }
   5056 }
   5057 
   5058 
   5059 /* Add a binding to the front of the env and slide all the rest
   5060    backwards.  It should be the case that the last slot is free. */
   5061 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   5062 {
   5063    Int i;
   5064    vassert(env[A_NENV-1].bindee == NULL);
   5065    for (i = A_NENV-1; i >= 1; i--)
   5066       env[i] = env[i-1];
   5067    env[0].binder   = binder;
   5068    env[0].bindee   = bindee;
   5069    env[0].doesLoad = False; /* filled in later */
   5070    env[0].getInterval.present  = False; /* filled in later */
   5071    env[0].getInterval.low  = -1; /* filled in later */
   5072    env[0].getInterval.high = -1; /* filled in later */
   5073 }
   5074 
   5075 /* Given uses :: array of UShort, indexed by IRTemp
   5076    Add the use-occurrences of temps in this expression
   5077    to the env.
   5078 */
   5079 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   5080 {
   5081    Int i;
   5082 
   5083    switch (e->tag) {
   5084 
   5085       case Iex_RdTmp: /* the only interesting case */
   5086          uses[e->Iex.RdTmp.tmp]++;
   5087          return;
   5088 
   5089       case Iex_ITE:
   5090          aoccCount_Expr(uses, e->Iex.ITE.cond);
   5091          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
   5092          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
   5093          return;
   5094 
   5095       case Iex_Qop:
   5096          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
   5097          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
   5098          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
   5099          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
   5100          return;
   5101 
   5102       case Iex_Triop:
   5103          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
   5104          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
   5105          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
   5106          return;
   5107 
   5108       case Iex_Binop:
   5109          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   5110          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   5111          return;
   5112 
   5113       case Iex_Unop:
   5114          aoccCount_Expr(uses, e->Iex.Unop.arg);
   5115          return;
   5116 
   5117       case Iex_Load:
   5118          aoccCount_Expr(uses, e->Iex.Load.addr);
   5119          return;
   5120 
   5121       case Iex_CCall:
   5122          for (i = 0; e->Iex.CCall.args[i]; i++)
   5123             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   5124          return;
   5125 
   5126       case Iex_GetI:
   5127          aoccCount_Expr(uses, e->Iex.GetI.ix);
   5128          return;
   5129 
   5130       case Iex_Const:
   5131       case Iex_Get:
   5132          return;
   5133 
   5134       default:
   5135          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5136          vpanic("aoccCount_Expr");
   5137     }
   5138 }
   5139 
   5140 
   5141 /* Given uses :: array of UShort, indexed by IRTemp
   5142    Add the use-occurrences of temps in this statement
   5143    to the env.
   5144 */
   5145 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   5146 {
   5147    Int      i;
   5148    IRDirty* d;
   5149    IRCAS*   cas;
   5150    switch (st->tag) {
   5151       case Ist_AbiHint:
   5152          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   5153          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   5154          return;
   5155       case Ist_WrTmp:
   5156          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   5157          return;
   5158       case Ist_Put:
   5159          aoccCount_Expr(uses, st->Ist.Put.data);
   5160          return;
   5161       case Ist_PutI:
   5162          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
   5163          aoccCount_Expr(uses, st->Ist.PutI.details->data);
   5164          return;
   5165       case Ist_Store:
   5166          aoccCount_Expr(uses, st->Ist.Store.addr);
   5167          aoccCount_Expr(uses, st->Ist.Store.data);
   5168          return;
   5169       case Ist_StoreG: {
   5170          IRStoreG* sg = st->Ist.StoreG.details;
   5171          aoccCount_Expr(uses, sg->addr);
   5172          aoccCount_Expr(uses, sg->data);
   5173          aoccCount_Expr(uses, sg->guard);
   5174          return;
   5175       }
   5176       case Ist_LoadG: {
   5177          IRLoadG* lg = st->Ist.LoadG.details;
   5178          aoccCount_Expr(uses, lg->addr);
   5179          aoccCount_Expr(uses, lg->alt);
   5180          aoccCount_Expr(uses, lg->guard);
   5181          return;
   5182       }
   5183       case Ist_CAS:
   5184          cas = st->Ist.CAS.details;
   5185          aoccCount_Expr(uses, cas->addr);
   5186          if (cas->expdHi)
   5187             aoccCount_Expr(uses, cas->expdHi);
   5188          aoccCount_Expr(uses, cas->expdLo);
   5189          if (cas->dataHi)
   5190             aoccCount_Expr(uses, cas->dataHi);
   5191          aoccCount_Expr(uses, cas->dataLo);
   5192          return;
   5193       case Ist_LLSC:
   5194          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   5195          if (st->Ist.LLSC.storedata)
   5196             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   5197          return;
   5198       case Ist_Dirty:
   5199          d = st->Ist.Dirty.details;
   5200          if (d->mFx != Ifx_None)
   5201             aoccCount_Expr(uses, d->mAddr);
   5202          aoccCount_Expr(uses, d->guard);
   5203          for (i = 0; d->args[i]; i++) {
   5204             IRExpr* arg = d->args[i];
   5205             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5206                aoccCount_Expr(uses, arg);
   5207          }
   5208          return;
   5209       case Ist_NoOp:
   5210       case Ist_IMark:
   5211       case Ist_MBE:
   5212          return;
   5213       case Ist_Exit:
   5214          aoccCount_Expr(uses, st->Ist.Exit.guard);
   5215          return;
   5216       default:
   5217          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5218          vpanic("aoccCount_Stmt");
   5219    }
   5220 }
   5221 
   5222 /* Look up a binding for tmp in the env.  If found, return the bound
   5223    expression, and set the env's binding to NULL so it is marked as
   5224    used.  If not found, return NULL. */
   5225 
   5226 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   5227 {
   5228    Int i;
   5229    for (i = 0; i < A_NENV; i++) {
   5230       if (env[i].binder == tmp && env[i].bindee != NULL) {
   5231          IRExpr* bindee = env[i].bindee;
   5232          env[i].bindee = NULL;
   5233          return bindee;
   5234       }
   5235    }
   5236    return NULL;
   5237 }
   5238 
   5239 /* Traverse e, looking for temps.  For each observed temp, see if env
   5240    contains a binding for the temp, and if so return the bound value.
   5241    The env has the property that any binding it holds is
   5242    'single-shot', so once a binding is used, it is marked as no longer
   5243    available, by setting its .bindee field to NULL. */
   5244 
   5245 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   5246    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   5247 }
   5248 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   5249    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   5250 }
   5251 
   5252 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   5253 {
   5254    switch (op) {
   5255    case Iop_Or32:
   5256       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   5257       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   5258          return IRExpr_Unop( Iop_CmpwNEZ32,
   5259                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   5260                                                      a2->Iex.Unop.arg ) );
   5261       break;
   5262 
   5263    case Iop_CmpNE32:
   5264       /* Since X has type Ity_I1 we can simplify:
   5265          CmpNE32(1Uto32(X),0)) ==> X */
   5266       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   5267          return a1->Iex.Unop.arg;
   5268       break;
   5269 
   5270    default:
   5271       break;
   5272    }
   5273    /* no reduction rule applies */
   5274    return IRExpr_Binop( op, a1, a2 );
   5275 }
   5276 
   5277 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   5278 {
   5279    switch (op) {
   5280    case Iop_CmpwNEZ64:
   5281       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
   5282       if (is_Unop(aa, Iop_CmpwNEZ64))
   5283          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
   5284       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5285       if (is_Binop(aa, Iop_Or64)
   5286           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   5287          return fold_IRExpr_Unop(
   5288                    Iop_CmpwNEZ64,
   5289                    IRExpr_Binop(Iop_Or64,
   5290                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   5291                                 aa->Iex.Binop.arg2));
   5292       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5293       if (is_Binop(aa, Iop_Or64)
   5294           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   5295          return fold_IRExpr_Unop(
   5296                    Iop_CmpwNEZ64,
   5297                    IRExpr_Binop(Iop_Or64,
   5298                                 aa->Iex.Binop.arg1,
   5299                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   5300       break;
   5301    case Iop_CmpNEZ64:
   5302       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   5303       if (is_Unop(aa, Iop_Left64))
   5304          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   5305       /* CmpNEZ64( 1Uto64(X) ) --> X */
   5306       if (is_Unop(aa, Iop_1Uto64))
   5307          return aa->Iex.Unop.arg;
   5308       break;
   5309    case Iop_CmpwNEZ32:
   5310       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   5311       if (is_Unop(aa, Iop_CmpwNEZ32))
   5312          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   5313       break;
   5314    case Iop_CmpNEZ32:
   5315       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   5316       if (is_Unop(aa, Iop_Left32))
   5317          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   5318       /* CmpNEZ32( 1Uto32(X) ) --> X */
   5319       if (is_Unop(aa, Iop_1Uto32))
   5320          return aa->Iex.Unop.arg;
   5321       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
   5322       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
   5323          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
   5324       break;
   5325    case Iop_CmpNEZ8:
   5326       /* CmpNEZ8( 1Uto8(X) ) --> X */
   5327       if (is_Unop(aa, Iop_1Uto8))
   5328          return aa->Iex.Unop.arg;
   5329       break;
   5330    case Iop_Left32:
   5331       /* Left32( Left32(x) ) --> Left32(x) */
   5332       if (is_Unop(aa, Iop_Left32))
   5333          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   5334       break;
   5335    case Iop_Left64:
   5336       /* Left64( Left64(x) ) --> Left64(x) */
   5337       if (is_Unop(aa, Iop_Left64))
   5338          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
   5339       break;
   5340    case Iop_ZeroHI64ofV128:
   5341       /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
   5342       if (is_Unop(aa, Iop_ZeroHI64ofV128))
   5343          return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
   5344       break;
   5345    case Iop_32to1:
   5346       /* 32to1( 1Uto32 ( x ) ) --> x */
   5347       if (is_Unop(aa, Iop_1Uto32))
   5348          return aa->Iex.Unop.arg;
   5349       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   5350       if (is_Unop(aa, Iop_CmpwNEZ32))
   5351          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   5352       break;
   5353    case Iop_64to1:
   5354       /* 64to1( 1Uto64 ( x ) ) --> x */
   5355       if (is_Unop(aa, Iop_1Uto64))
   5356          return aa->Iex.Unop.arg;
   5357       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   5358       if (is_Unop(aa, Iop_CmpwNEZ64))
   5359          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   5360       break;
   5361    case Iop_64to32:
   5362       /* 64to32( 32Uto64 ( x )) --> x */
   5363       if (is_Unop(aa, Iop_32Uto64))
   5364          return aa->Iex.Unop.arg;
   5365       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   5366       if (is_Unop(aa, Iop_8Uto64))
   5367          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   5368       break;
   5369 
   5370    case Iop_32Uto64:
   5371       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   5372       if (is_Unop(aa, Iop_8Uto32))
   5373          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   5374       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   5375       if (is_Unop(aa, Iop_16Uto32))
   5376          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   5377       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
   5378                      --> Shr64( 32Uto64(64to32(x)), sh )) */
   5379       if (is_Unop(aa, Iop_64to32)
   5380           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
   5381           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5382           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5383                      Iop_64to32)) {
   5384          return aa->Iex.Unop.arg;
   5385       }
   5386       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
   5387          --> 32Uto64(64to32( Shl64(                x,   sh )) */
   5388       if (is_Unop(aa, Iop_64to32)
   5389           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
   5390           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5391           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5392                      Iop_64to32)) {
   5393          return
   5394             IRExpr_Unop(
   5395                Iop_32Uto64,
   5396                IRExpr_Unop(
   5397                   Iop_64to32,
   5398                   IRExpr_Binop(
   5399                      Iop_Shl64,
   5400                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
   5401                      aa->Iex.Unop.arg->Iex.Binop.arg2
   5402             )));
   5403       }
   5404       break;
   5405 
   5406    case Iop_1Sto32:
   5407       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   5408       if (is_Unop(aa, Iop_CmpNEZ8)
   5409           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   5410           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   5411           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   5412                      Iop_CmpNEZ32)) {
   5413          return IRExpr_Unop( Iop_CmpwNEZ32,
   5414                              aa->Iex.Unop.arg->Iex.Unop.arg
   5415                                ->Iex.Unop.arg->Iex.Unop.arg);
   5416       }
   5417       break;
   5418 
   5419    default:
   5420       break;
   5421    }
   5422    /* no reduction rule applies */
   5423    return IRExpr_Unop( op, aa );
   5424 }
   5425 
   5426 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   5427 {
   5428    IRExpr*  e2;
   5429    IRExpr** args2;
   5430    Int      i;
   5431 
   5432    switch (e->tag) {
   5433 
   5434       case Iex_CCall:
   5435          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   5436          for (i = 0; args2[i]; i++)
   5437             args2[i] = atbSubst_Expr(env,args2[i]);
   5438          return IRExpr_CCall(
   5439                    e->Iex.CCall.cee,
   5440                    e->Iex.CCall.retty,
   5441                    args2
   5442                 );
   5443       case Iex_RdTmp:
   5444          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   5445          return e2 ? e2 : e;
   5446       case Iex_ITE:
   5447          return IRExpr_ITE(
   5448                    atbSubst_Expr(env, e->Iex.ITE.cond),
   5449                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
   5450                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
   5451                 );
   5452       case Iex_Qop:
   5453          return IRExpr_Qop(
   5454                    e->Iex.Qop.details->op,
   5455                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
   5456                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
   5457                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
   5458                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
   5459                 );
   5460       case Iex_Triop:
   5461          return IRExpr_Triop(
   5462                    e->Iex.Triop.details->op,
   5463                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
   5464                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
   5465                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
   5466                 );
   5467       case Iex_Binop:
   5468          return fold_IRExpr_Binop(
   5469                    e->Iex.Binop.op,
   5470                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   5471                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   5472                 );
   5473       case Iex_Unop:
   5474          return fold_IRExpr_Unop(
   5475                    e->Iex.Unop.op,
   5476                    atbSubst_Expr(env, e->Iex.Unop.arg)
   5477                 );
   5478       case Iex_Load:
   5479          return IRExpr_Load(
   5480                    e->Iex.Load.end,
   5481                    e->Iex.Load.ty,
   5482                    atbSubst_Expr(env, e->Iex.Load.addr)
   5483                 );
   5484       case Iex_GetI:
   5485          return IRExpr_GetI(
   5486                    e->Iex.GetI.descr,
   5487                    atbSubst_Expr(env, e->Iex.GetI.ix),
   5488                    e->Iex.GetI.bias
   5489                 );
   5490       case Iex_Const:
   5491       case Iex_Get:
   5492          return e;
   5493       default:
   5494          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5495          vpanic("atbSubst_Expr");
   5496    }
   5497 }
   5498 
   5499 /* Same deal as atbSubst_Expr, except for stmts. */
   5500 
   5501 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   5502 {
   5503    Int     i;
   5504    IRDirty *d, *d2;
   5505    IRCAS   *cas, *cas2;
   5506    IRPutI  *puti, *puti2;
   5507 
   5508    switch (st->tag) {
   5509       case Ist_AbiHint:
   5510          return IRStmt_AbiHint(
   5511                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   5512                    st->Ist.AbiHint.len,
   5513                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   5514                 );
   5515       case Ist_Store:
   5516          return IRStmt_Store(
   5517                    st->Ist.Store.end,
   5518                    atbSubst_Expr(env, st->Ist.Store.addr),
   5519                    atbSubst_Expr(env, st->Ist.Store.data)
   5520                 );
   5521       case Ist_StoreG: {
   5522          IRStoreG* sg = st->Ist.StoreG.details;
   5523          return IRStmt_StoreG(sg->end,
   5524                               atbSubst_Expr(env, sg->addr),
   5525                               atbSubst_Expr(env, sg->data),
   5526                               atbSubst_Expr(env, sg->guard));
   5527       }
   5528       case Ist_LoadG: {
   5529          IRLoadG* lg = st->Ist.LoadG.details;
   5530          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
   5531                              atbSubst_Expr(env, lg->addr),
   5532                              atbSubst_Expr(env, lg->alt),
   5533                              atbSubst_Expr(env, lg->guard));
   5534       }
   5535       case Ist_WrTmp:
   5536          return IRStmt_WrTmp(
   5537                    st->Ist.WrTmp.tmp,
   5538                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   5539                 );
   5540       case Ist_Put:
   5541          return IRStmt_Put(
   5542                    st->Ist.Put.offset,
   5543                    atbSubst_Expr(env, st->Ist.Put.data)
   5544                 );
   5545       case Ist_PutI:
   5546          puti  = st->Ist.PutI.details;
   5547          puti2 = mkIRPutI(puti->descr,
   5548                           atbSubst_Expr(env, puti->ix),
   5549                           puti->bias,
   5550                           atbSubst_Expr(env, puti->data));
   5551          return IRStmt_PutI(puti2);
   5552 
   5553       case Ist_Exit:
   5554          return IRStmt_Exit(
   5555                    atbSubst_Expr(env, st->Ist.Exit.guard),
   5556                    st->Ist.Exit.jk,
   5557                    st->Ist.Exit.dst,
   5558                    st->Ist.Exit.offsIP
   5559                 );
   5560       case Ist_IMark:
   5561          return IRStmt_IMark(st->Ist.IMark.addr,
   5562                              st->Ist.IMark.len,
   5563                              st->Ist.IMark.delta);
   5564       case Ist_NoOp:
   5565          return IRStmt_NoOp();
   5566       case Ist_MBE:
   5567          return IRStmt_MBE(st->Ist.MBE.event);
   5568       case Ist_CAS:
   5569          cas  = st->Ist.CAS.details;
   5570          cas2 = mkIRCAS(
   5571                    cas->oldHi, cas->oldLo, cas->end,
   5572                    atbSubst_Expr(env, cas->addr),
   5573                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   5574                    atbSubst_Expr(env, cas->expdLo),
   5575                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   5576                    atbSubst_Expr(env, cas->dataLo)
   5577                 );
   5578          return IRStmt_CAS(cas2);
   5579       case Ist_LLSC:
   5580          return IRStmt_LLSC(
   5581                    st->Ist.LLSC.end,
   5582                    st->Ist.LLSC.result,
   5583                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   5584                    st->Ist.LLSC.storedata
   5585                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   5586                 );
   5587       case Ist_Dirty:
   5588          d  = st->Ist.Dirty.details;
   5589          d2 = emptyIRDirty();
   5590          *d2 = *d;
   5591          if (d2->mFx != Ifx_None)
   5592             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   5593          d2->guard = atbSubst_Expr(env, d2->guard);
   5594          for (i = 0; d2->args[i]; i++) {
   5595             IRExpr* arg = d2->args[i];
   5596             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5597                d2->args[i] = atbSubst_Expr(env, arg);
   5598          }
   5599          return IRStmt_Dirty(d2);
   5600       default:
   5601          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5602          vpanic("atbSubst_Stmt");
   5603    }
   5604 }
   5605 
   5606 inline
   5607 static Bool dirty_helper_stores ( const IRDirty *d )
   5608 {
   5609    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
   5610 }
   5611 
   5612 inline
   5613 static Interval dirty_helper_puts (
   5614                    const IRDirty *d,
   5615                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5616                    VexRegisterUpdates pxControl,
   5617                    /*OUT*/Bool *requiresPreciseMemExns
   5618                 )
   5619 {
   5620    Int i;
   5621    Interval interval;
   5622 
   5623    /* Passing the guest state pointer opens the door to modifying the
   5624       guest state under the covers.  It's not allowed, but let's be
   5625       extra conservative and assume the worst. */
   5626    for (i = 0; d->args[i]; i++) {
   5627       if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
   5628          *requiresPreciseMemExns = True;
   5629          /* Assume all guest state is written. */
   5630          interval.present = True;
   5631          interval.low  = 0;
   5632          interval.high = 0x7FFFFFFF;
   5633          return interval;
   5634       }
   5635    }
   5636 
   5637    /* Check the side effects on the guest state */
   5638    interval.present = False;
   5639    interval.low = interval.high = -1;
   5640    *requiresPreciseMemExns = False;
   5641 
   5642    for (i = 0; i < d->nFxState; ++i) {
   5643       if (d->fxState[i].fx != Ifx_Read) {
   5644          Int offset = d->fxState[i].offset;
   5645          Int size = d->fxState[i].size;
   5646          Int nRepeats = d->fxState[i].nRepeats;
   5647          Int repeatLen = d->fxState[i].repeatLen;
   5648 
   5649          if (preciseMemExnsFn(offset,
   5650                               offset + nRepeats * repeatLen + size - 1,
   5651                               pxControl)) {
   5652             *requiresPreciseMemExns = True;
   5653          }
   5654          update_interval(&interval, offset,
   5655                          offset + nRepeats * repeatLen + size - 1);
   5656       }
   5657    }
   5658 
   5659    return interval;
   5660 }
   5661 
   5662 /* Return an interval if st modifies the guest state.  Via
   5663    requiresPreciseMemExns return whether or not that modification
   5664    requires precise exceptions. */
   5665 static Interval stmt_modifies_guest_state (
   5666                    IRSB *bb, const IRStmt *st,
   5667                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5668                    VexRegisterUpdates pxControl,
   5669                    /*OUT*/Bool *requiresPreciseMemExns
   5670                 )
   5671 {
   5672    Interval interval;
   5673 
   5674    switch (st->tag) {
   5675    case Ist_Put: {
   5676       Int offset = st->Ist.Put.offset;
   5677       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
   5678 
   5679       *requiresPreciseMemExns
   5680          = preciseMemExnsFn(offset, offset + size - 1, pxControl);
   5681       interval.present = True;
   5682       interval.low  = offset;
   5683       interval.high = offset + size - 1;
   5684       return interval;
   5685    }
   5686 
   5687    case Ist_PutI: {
   5688       IRRegArray *descr = st->Ist.PutI.details->descr;
   5689       Int offset = descr->base;
   5690       Int size = sizeofIRType(descr->elemTy);
   5691 
   5692       /* We quietly assume here that all segments are contiguous and there
   5693          are no holes. This is to avoid a loop. The assumption is conservative
   5694          in the sense that we might report that precise memory exceptions are
   5695          needed when in fact they are not. */
   5696       *requiresPreciseMemExns
   5697          = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
   5698                             pxControl);
   5699       interval.present = True;
   5700       interval.low  = offset;
   5701       interval.high = offset + descr->nElems * size - 1;
   5702       return interval;
   5703    }
   5704 
   5705    case Ist_Dirty:
   5706       return dirty_helper_puts(st->Ist.Dirty.details,
   5707                                preciseMemExnsFn, pxControl,
   5708                                requiresPreciseMemExns);
   5709 
   5710    default:
   5711       *requiresPreciseMemExns = False;
   5712       interval.present = False;
   5713       interval.low  = -1;
   5714       interval.high = -1;
   5715       return interval;
   5716    }
   5717 }
   5718 
   5719 /* notstatic */ Addr ado_treebuild_BB (
   5720                         IRSB* bb,
   5721                         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5722                         VexRegisterUpdates pxControl
   5723                      )
   5724 {
   5725    Int      i, j, k, m;
   5726    Bool     stmtStores, invalidateMe;
   5727    Interval putInterval;
   5728    IRStmt*  st;
   5729    IRStmt*  st2;
   5730    ATmpInfo env[A_NENV];
   5731 
   5732    Bool   max_ga_known = False;
   5733    Addr   max_ga       = 0;
   5734 
   5735    Int       n_tmps = bb->tyenv->types_used;
   5736    UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
   5737 
   5738    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   5739       temp.  Also count occurrences in the bb->next field.  Take the
   5740       opportunity to also find the maximum guest address in the block,
   5741       since that will be needed later for deciding when we can safely
   5742       elide event checks. */
   5743 
   5744    for (i = 0; i < n_tmps; i++)
   5745       uses[i] = 0;
   5746 
   5747    for (i = 0; i < bb->stmts_used; i++) {
   5748       st = bb->stmts[i];
   5749       switch (st->tag) {
   5750          case Ist_NoOp:
   5751             continue;
   5752          case Ist_IMark: {
   5753             UInt len = st->Ist.IMark.len;
   5754             Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
   5755             max_ga_known = True;
   5756             if (mga > max_ga)
   5757                max_ga = mga;
   5758             break;
   5759          }
   5760          default:
   5761             break;
   5762       }
   5763       aoccCount_Stmt( uses, st );
   5764    }
   5765    aoccCount_Expr(uses, bb->next );
   5766 
   5767 #  if 0
   5768    for (i = 0; i < n_tmps; i++) {
   5769       if (uses[i] == 0)
   5770         continue;
   5771       ppIRTemp( (IRTemp)i );
   5772       vex_printf("  used %d\n", (Int)uses[i] );
   5773    }
   5774 #  endif
   5775 
   5776    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   5777 
   5778          If the env is full, emit the end element.  This guarantees
   5779          there is at least one free slot in the following.
   5780 
   5781          On seeing 't = E', occ(t)==1,
   5782             let E'=env(E)
   5783             delete this stmt
   5784             add t -> E' to the front of the env
   5785             Examine E' and set the hints for E' appropriately
   5786               (doesLoad? doesGet?)
   5787 
   5788          On seeing any other stmt,
   5789             let stmt' = env(stmt)
   5790             remove from env any 't=E' binds invalidated by stmt
   5791                 emit the invalidated stmts
   5792             emit stmt'
   5793             compact any holes in env
   5794               by sliding entries towards the front
   5795 
   5796       Finally, apply env to bb->next.
   5797    */
   5798 
   5799    for (i = 0; i < A_NENV; i++) {
   5800       env[i].bindee = NULL;
   5801       env[i].binder = IRTemp_INVALID;
   5802    }
   5803 
   5804    /* The stmts in bb are being reordered, and we are guaranteed to
   5805       end up with no more than the number we started with.  Use i to
   5806       be the cursor of the current stmt examined and j <= i to be that
   5807       for the current stmt being written.
   5808    */
   5809    j = 0;
   5810    for (i = 0; i < bb->stmts_used; i++) {
   5811 
   5812       st = bb->stmts[i];
   5813       if (st->tag == Ist_NoOp)
   5814          continue;
   5815 
   5816       /* Ensure there's at least one space in the env, by emitting
   5817          the oldest binding if necessary. */
   5818       if (env[A_NENV-1].bindee != NULL) {
   5819          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   5820                                       env[A_NENV-1].bindee );
   5821          j++;
   5822          vassert(j <= i);
   5823          env[A_NENV-1].bindee = NULL;
   5824       }
   5825 
   5826       /* Consider current stmt. */
   5827       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   5828          IRExpr *e, *e2;
   5829 
   5830          /* optional extra: dump dead bindings as we find them.
   5831             Removes the need for a prior dead-code removal pass. */
   5832          if (uses[st->Ist.WrTmp.tmp] == 0) {
   5833 	    if (0) vex_printf("DEAD binding\n");
   5834             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5835          }
   5836          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   5837 
   5838          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   5839             actions. */
   5840          e  = st->Ist.WrTmp.data;
   5841          e2 = atbSubst_Expr(env, e);
   5842          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   5843          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
   5844          /* don't advance j, as we are deleting this stmt and instead
   5845             holding it temporarily in the env. */
   5846          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5847       }
   5848 
   5849       /* we get here for any other kind of statement. */
   5850       /* 'use up' any bindings required by the current statement. */
   5851       st2 = atbSubst_Stmt(env, st);
   5852 
   5853       /* Now, before this stmt, dump any bindings in env that it
   5854          invalidates.  These need to be dumped in the order in which
   5855          they originally entered env -- that means from oldest to
   5856          youngest. */
   5857 
   5858       /* putInterval/stmtStores characterise what the stmt under
   5859          consideration does, or might do (sidely safe @ True). */
   5860 
   5861       Bool putRequiresPreciseMemExns;
   5862       putInterval = stmt_modifies_guest_state(
   5863                        bb, st, preciseMemExnsFn, pxControl,
   5864                        &putRequiresPreciseMemExns
   5865                     );
   5866 
   5867       /* be True if this stmt writes memory or might do (==> we don't
   5868          want to reorder other loads or stores relative to it).  Also,
   5869          both LL and SC fall under this classification, since we
   5870          really ought to be conservative and not reorder any other
   5871          memory transactions relative to them. */
   5872       stmtStores
   5873          = toBool( st->tag == Ist_Store
   5874                    || (st->tag == Ist_Dirty
   5875                        && dirty_helper_stores(st->Ist.Dirty.details))
   5876                    || st->tag == Ist_LLSC
   5877                    || st->tag == Ist_CAS );
   5878 
   5879       for (k = A_NENV-1; k >= 0; k--) {
   5880          if (env[k].bindee == NULL)
   5881             continue;
   5882          /* Compare the actions of this stmt with the actions of
   5883             binding 'k', to see if they invalidate the binding. */
   5884          invalidateMe
   5885             = toBool(
   5886               /* a store invalidates loaded data */
   5887               (env[k].doesLoad && stmtStores)
   5888               /* a put invalidates get'd data, if they overlap */
   5889               || ((env[k].getInterval.present && putInterval.present) &&
   5890                   intervals_overlap(env[k].getInterval, putInterval))
   5891               /* a put invalidates loaded data. That means, in essense, that
   5892                  a load expression cannot be substituted into a statement
   5893                  that follows the put. But there is nothing wrong doing so
   5894                  except when the put statement requries precise exceptions.
   5895                  Think of a load that is moved past a put where the put
   5896                  updates the IP in the guest state. If the load generates
   5897                  a segfault, the wrong address (line number) would be
   5898                  reported. */
   5899               || (env[k].doesLoad && putInterval.present &&
   5900                   putRequiresPreciseMemExns)
   5901               /* probably overly conservative: a memory bus event
   5902                  invalidates absolutely everything, so that all
   5903                  computation prior to it is forced to complete before
   5904                  proceeding with the event (fence,lock,unlock). */
   5905               || st->tag == Ist_MBE
   5906               /* also be (probably overly) paranoid re AbiHints */
   5907               || st->tag == Ist_AbiHint
   5908               );
   5909          if (invalidateMe) {
   5910             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   5911             j++;
   5912             vassert(j <= i);
   5913             env[k].bindee = NULL;
   5914          }
   5915       }
   5916 
   5917       /* Slide in-use entries in env up to the front */
   5918       m = 0;
   5919       for (k = 0; k < A_NENV; k++) {
   5920          if (env[k].bindee != NULL) {
   5921             env[m] = env[k];
   5922             m++;
   5923 	 }
   5924       }
   5925       for (m = m; m < A_NENV; m++) {
   5926          env[m].bindee = NULL;
   5927       }
   5928 
   5929       /* finally, emit the substituted statement */
   5930       bb->stmts[j] = st2;
   5931       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   5932       j++;
   5933 
   5934       vassert(j <= i+1);
   5935    } /* for each stmt in the original bb ... */
   5936 
   5937    /* Finally ... substitute the ->next field as much as possible, and
   5938       dump any left-over bindings.  Hmm.  Perhaps there should be no
   5939       left over bindings?  Or any left-over bindings are
   5940       by definition dead? */
   5941    bb->next = atbSubst_Expr(env, bb->next);
   5942    bb->stmts_used = j;
   5943 
   5944    return max_ga_known ? max_ga : ~(Addr)0;
   5945 }
   5946 
   5947 
   5948 /*---------------------------------------------------------------*/
   5949 /*--- MSVC specific transformation hacks                      ---*/
   5950 /*---------------------------------------------------------------*/
   5951 
   5952 /* The purpose of all this is to find MSVC's idiom for non-constant
   5953    bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
   5954    gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
   5955    generates a lot of false positives from the MSVC version because it
   5956    doesn't understand that XORing an undefined bit with itself gives a
   5957    defined result.
   5958 
   5959    This isn't a problem for the simple case "x ^ x", because iropt
   5960    folds it to a constant zero before Memcheck ever sees it.  But in
   5961    this case we have an intervening "& c" which defeats the simple
   5962    case.  So we have to carefully inspect all expressions rooted at an
   5963    XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
   5964    7 other variants resulting from swapping the order of arguments to
   5965    the three binary operations.  If we get a match, we then replace
   5966    the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
   5967 
   5968    The key difficulty is to spot the two uses of "a".  To normalise
   5969    the IR to maximise the chances of success, we first do a CSE pass,
   5970    with CSEing of loads enabled, since the two "a" expressions may be
   5971    loads, which need to be commoned up.  Then we do a constant folding
   5972    pass, so as to remove any tmp-to-tmp assignment chains that would
   5973    make matching the original expression more difficult.
   5974 */
   5975 
   5976 
   5977 /* Helper function for debug printing */
   5978 __attribute__((unused))
   5979 static void print_flat_expr ( IRExpr** env, IRExpr* e )
   5980 {
   5981    if (e == NULL) {
   5982       vex_printf("?");
   5983       return;
   5984    }
   5985    switch (e->tag) {
   5986       case Iex_Binop: {
   5987          ppIROp(e->Iex.Binop.op);
   5988          vex_printf("(");
   5989          print_flat_expr(env, e->Iex.Binop.arg1);
   5990          vex_printf(",");
   5991          print_flat_expr(env, e->Iex.Binop.arg2);
   5992          vex_printf(")");
   5993          break;
   5994       }
   5995       case Iex_Unop: {
   5996          ppIROp(e->Iex.Unop.op);
   5997          vex_printf("(");
   5998          print_flat_expr(env, e->Iex.Unop.arg);
   5999          vex_printf(")");
   6000          break;
   6001       }
   6002       case Iex_RdTmp:
   6003          ppIRTemp(e->Iex.RdTmp.tmp);
   6004          vex_printf("=");
   6005          print_flat_expr(env, chase(env, e));
   6006          break;
   6007       case Iex_Const:
   6008       case Iex_CCall:
   6009       case Iex_Load:
   6010       case Iex_ITE:
   6011       case Iex_Get:
   6012          ppIRExpr(e);
   6013          break;
   6014       default:
   6015          vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
   6016          vassert(0);
   6017    }
   6018 }
   6019 
   6020 /* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
   6021    or any of the other 7 variants generated by switching the order
   6022    of arguments to the outer ^, the inner ^ and the &.
   6023 */
   6024 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
   6025                                      /*OUT*/IRExpr** cc,
   6026                                      IRExpr** env, IRExpr* e,
   6027                                      IROp opAND, IROp opXOR)
   6028 {
   6029 #  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
   6030                               && (_e)->Iex.Binop.op == (_op))
   6031 #  define ISATOM(_e)    isIRAtom(_e)
   6032 #  define STEP(_e)      chase1(env, (_e))
   6033 #  define LL(_e)        ((_e)->Iex.Binop.arg1)
   6034 #  define RR(_e)        ((_e)->Iex.Binop.arg2)
   6035 
   6036    IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
   6037 
   6038    /* This is common to all 8 cases */
   6039    if (!ISBIN(e, opXOR)) goto fail;
   6040 
   6041    /*                        -----and------ */
   6042    /*                         --xor---      */
   6043    /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
   6044    /* find variant 2:   a1 ^ ((b ^ a2) & c) */
   6045    a1 = and = xor = c = a2bL = a2bR = NULL;
   6046 
   6047    a1   = LL(e);
   6048    and  = STEP(RR(e));
   6049    if (!ISBIN(and, opAND)) goto v34;
   6050    xor  = STEP(LL(and));
   6051    c    = RR(and);
   6052    if (!ISBIN(xor, opXOR)) goto v34;
   6053    a2bL = LL(xor);
   6054    a2bR = RR(xor);
   6055 
   6056    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6057       *aa = a1;
   6058       *bb = a2bR;
   6059       *cc = c;
   6060       return 1;
   6061    }
   6062    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6063       *aa = a1;
   6064       *bb = a2bL;
   6065       *cc = c;
   6066       return 2;
   6067    }
   6068 
   6069   v34:
   6070    /*                   -----and------      */
   6071    /*                    --xor---           */
   6072    /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
   6073    /* find variant 4:   ((b ^ a2) & c) ^ a1 */
   6074    a1 = and = xor = c = a2bL = a2bR = NULL;
   6075 
   6076    a1   = RR(e);
   6077    and  = STEP(LL(e));
   6078    if (!ISBIN(and, opAND)) goto v56;
   6079    xor  = STEP(LL(and));
   6080    c    = RR(and);
   6081    if (!ISBIN(xor, opXOR)) goto v56;
   6082    a2bL = LL(xor);
   6083    a2bR = RR(xor);
   6084 
   6085    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6086       *aa = a1;
   6087       *bb = a2bR;
   6088       *cc = c;
   6089       return 3;
   6090    }
   6091    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6092       *aa = a1;
   6093       *bb = a2bL;
   6094       *cc = c;
   6095       return 4;
   6096    }
   6097 
   6098   v56:
   6099    /*                        -----and------ */
   6100    /*                         --xor---      */
   6101    /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
   6102    /* find variant 6:   a1 ^ (c & (b ^ a2)) */
   6103    a1 = and = xor = c = a2bL = a2bR = NULL;
   6104 
   6105    a1   = LL(e);
   6106    and  = STEP(RR(e));
   6107    if (!ISBIN(and, opAND)) goto v78;
   6108    xor  = STEP(RR(and));
   6109    c    = LL(and);
   6110    if (!ISBIN(xor, opXOR)) goto v78;
   6111    a2bL = LL(xor);
   6112    a2bR = RR(xor);
   6113 
   6114    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6115       *aa = a1;
   6116       *bb = a2bR;
   6117       *cc = c;
   6118       vassert(5-5); // ATC
   6119       return 5;
   6120    }
   6121    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6122       *aa = a1;
   6123       *bb = a2bL;
   6124       *cc = c;
   6125       vassert(6-6); // ATC
   6126       return 6;
   6127    }
   6128 
   6129  v78:
   6130    /*                   -----and------      */
   6131    /*                    --xor---           */
   6132    /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
   6133    /* find variant 8:   (c & (b ^ a2)) ^ a1 */
   6134    a1 = and = xor = c = a2bL = a2bR = NULL;
   6135 
   6136    a1   = RR(e);
   6137    and  = STEP(LL(e));
   6138    if (!ISBIN(and, opAND)) goto fail;
   6139    xor  = STEP(RR(and));
   6140    c    = LL(and);
   6141    if (!ISBIN(xor, opXOR)) goto fail;
   6142    a2bL = LL(xor);
   6143    a2bR = RR(xor);
   6144 
   6145    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6146       *aa = a1;
   6147       *bb = a2bR;
   6148       *cc = c;
   6149       return 7;
   6150    }
   6151    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6152       *aa = a1;
   6153       *bb = a2bL;
   6154       *cc = c;
   6155       return 8;
   6156    }
   6157 
   6158  fail:
   6159    return 0;
   6160 
   6161 #  undef ISBIN
   6162 #  undef ISATOM
   6163 #  undef STEP
   6164 #  undef LL
   6165 #  undef RR
   6166 }
   6167 
   6168 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
   6169    variants thereof generated by switching arguments around), return
   6170    the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
   6171 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
   6172 {
   6173    if (e->tag != Iex_Binop)
   6174       return NULL;
   6175 
   6176    const HChar* tyNm = NULL;
   6177    IROp   opOR  = Iop_INVALID;
   6178    IROp   opAND = Iop_INVALID;
   6179    IROp   opNOT = Iop_INVALID;
   6180    IROp   opXOR = Iop_INVALID;
   6181    switch (e->Iex.Binop.op) {
   6182       case Iop_Xor32:
   6183          tyNm  = "I32";
   6184          opOR  = Iop_Or32;  opAND = Iop_And32;
   6185          opNOT = Iop_Not32; opXOR = Iop_Xor32;
   6186          break;
   6187       case Iop_Xor16:
   6188          tyNm  = "I16";
   6189          opOR  = Iop_Or16;  opAND = Iop_And16;
   6190          opNOT = Iop_Not16; opXOR = Iop_Xor16;
   6191          break;
   6192       case Iop_Xor8:
   6193          tyNm  = "I8";
   6194          opOR  = Iop_Or8;  opAND = Iop_And8;
   6195          opNOT = Iop_Not8; opXOR = Iop_Xor8;
   6196          break;
   6197       default:
   6198          return NULL;
   6199    }
   6200 
   6201    IRExpr* aa = NULL;
   6202    IRExpr* bb = NULL;
   6203    IRExpr* cc = NULL;
   6204    UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
   6205    if (variant > 0) {
   6206       static UInt ctr = 0;
   6207       if (0)
   6208          vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
   6209                     "type %s, variant %u\n",
   6210                     ++ctr, tyNm, variant);
   6211       /* It's vitally important that the returned aa, bb and cc are
   6212          atoms -- either constants or tmps.  If it's anything else
   6213          (eg, a GET) then incorporating them in a tree at this point
   6214          in the SB may erroneously pull them forwards (eg of a PUT
   6215          that originally was after the GET) and so transform the IR
   6216          wrongly.  spotBitfieldAssignment should guarantee only to
   6217          give us atoms, but we check here anyway. */
   6218       vassert(aa && isIRAtom(aa));
   6219       vassert(bb && isIRAtom(bb));
   6220       vassert(cc && isIRAtom(cc));
   6221       return IRExpr_Binop(
   6222                 opOR,
   6223                 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
   6224                 IRExpr_Binop(opAND, bb, cc)
   6225              );
   6226    }
   6227    return NULL;
   6228 }
   6229 
   6230 
   6231 /* SB is modified in-place.  Visit all the IRExprs and, for those
   6232    which are allowed to be non-atomic, perform the XOR transform if
   6233    possible.  This makes |sb| be non-flat, but that's ok, the caller
   6234    can re-flatten it.  Returns True iff any changes were made. */
   6235 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
   6236 {
   6237    Int  i;
   6238    Bool changed = False;
   6239 
   6240    /* Make the tmp->expr environment, so we can use it for
   6241       chasing expressions. */
   6242    Int      n_tmps = sb->tyenv->types_used;
   6243    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   6244    for (i = 0; i < n_tmps; i++)
   6245       env[i] = NULL;
   6246 
   6247    for (i = 0; i < sb->stmts_used; i++) {
   6248       IRStmt* st = sb->stmts[i];
   6249       if (st->tag != Ist_WrTmp)
   6250          continue;
   6251       IRTemp t = st->Ist.WrTmp.tmp;
   6252       vassert(t >= 0 && t < n_tmps);
   6253       env[t] = st->Ist.WrTmp.data;
   6254    }
   6255 
   6256    for (i = 0; i < sb->stmts_used; i++) {
   6257       IRStmt* st = sb->stmts[i];
   6258 
   6259       switch (st->tag) {
   6260          case Ist_AbiHint:
   6261             vassert(isIRAtom(st->Ist.AbiHint.base));
   6262             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6263             break;
   6264          case Ist_Put:
   6265             vassert(isIRAtom(st->Ist.Put.data));
   6266             break;
   6267          case Ist_PutI: {
   6268             IRPutI* puti = st->Ist.PutI.details;
   6269             vassert(isIRAtom(puti->ix));
   6270             vassert(isIRAtom(puti->data));
   6271             break;
   6272          }
   6273          case Ist_WrTmp: {
   6274             /* This is the one place where an expr (st->Ist.WrTmp.data) is
   6275                allowed to be more than just a constant or a tmp. */
   6276             IRExpr* mb_new_data
   6277                = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
   6278             if (mb_new_data) {
   6279                //ppIRSB(sb);
   6280                st->Ist.WrTmp.data = mb_new_data;
   6281                //ppIRSB(sb);
   6282                changed = True;
   6283             }
   6284             break;
   6285          }
   6286          case Ist_Store:
   6287             vassert(isIRAtom(st->Ist.Store.addr));
   6288             vassert(isIRAtom(st->Ist.Store.data));
   6289             break;
   6290          case Ist_StoreG: {
   6291             IRStoreG* sg = st->Ist.StoreG.details;
   6292             vassert(isIRAtom(sg->addr));
   6293             vassert(isIRAtom(sg->data));
   6294             vassert(isIRAtom(sg->guard));
   6295             break;
   6296          }
   6297          case Ist_LoadG: {
   6298             IRLoadG* lg = st->Ist.LoadG.details;
   6299             vassert(isIRAtom(lg->addr));
   6300             vassert(isIRAtom(lg->alt));
   6301             vassert(isIRAtom(lg->guard));
   6302             break;
   6303          }
   6304          case Ist_CAS: {
   6305             IRCAS* cas = st->Ist.CAS.details;
   6306             vassert(isIRAtom(cas->addr));
   6307             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6308             vassert(isIRAtom(cas->expdLo));
   6309             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6310             vassert(isIRAtom(cas->dataLo));
   6311             break;
   6312          }
   6313          case Ist_LLSC:
   6314             vassert(isIRAtom(st->Ist.LLSC.addr));
   6315             if (st->Ist.LLSC.storedata)
   6316                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6317             break;
   6318          case Ist_Dirty: {
   6319             IRDirty* d = st->Ist.Dirty.details;
   6320             if (d->mFx != Ifx_None) {
   6321                vassert(isIRAtom(d->mAddr));
   6322             }
   6323             vassert(isIRAtom(d->guard));
   6324             for (Int j = 0; d->args[j]; j++) {
   6325                IRExpr* arg = d->args[j];
   6326                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
   6327                   vassert(isIRAtom(arg));
   6328                }
   6329             }
   6330             break;
   6331          }
   6332          case Ist_IMark:
   6333          case Ist_NoOp:
   6334          case Ist_MBE:
   6335             break;
   6336          case Ist_Exit:
   6337             vassert(isIRAtom(st->Ist.Exit.guard));
   6338             break;
   6339          default:
   6340             vex_printf("\n"); ppIRStmt(st);
   6341             vpanic("do_XOR_TRANSFORMS_IRSB");
   6342       }
   6343    }
   6344 
   6345    vassert(isIRAtom(sb->next));
   6346    return changed;
   6347 }
   6348 
   6349 
   6350 static IRSB* do_MSVC_HACKS ( IRSB* sb )
   6351 {
   6352    // Normalise as much as we can.  This is the one-and-only place
   6353    // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
   6354    Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
   6355    if (any_cse_changes) {
   6356       // CSEing might have created dead code.  Remove it.
   6357       sb = cprop_BB ( sb );
   6358       do_deadcode_BB(sb);
   6359    }
   6360 
   6361    // Visit all atoms, do the transformation proper.  bb is modified
   6362    // in-place.
   6363    Bool changed = do_XOR_TRANSFORM_IRSB(sb);
   6364 
   6365    if (changed) {
   6366       // The transformation generates non-flat expressions, so we now
   6367       // need to re-flatten the block.
   6368       sb = flatten_BB(sb);
   6369    }
   6370 
   6371    return sb;
   6372 }
   6373 
   6374 
   6375 /*---------------------------------------------------------------*/
   6376 /*--- iropt main                                              ---*/
   6377 /*---------------------------------------------------------------*/
   6378 
   6379 static Bool iropt_verbose = False; /* True; */
   6380 
   6381 
   6382 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   6383    redundant Put removal, constant propagation, dead code removal,
   6384    clean helper specialisation, and dead code removal (again).
   6385 */
   6386 
   6387 
   6388 static
   6389 IRSB* cheap_transformations (
   6390          IRSB* bb,
   6391          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6392          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6393          VexRegisterUpdates pxControl
   6394       )
   6395 {
   6396    redundant_get_removal_BB ( bb );
   6397    if (iropt_verbose) {
   6398       vex_printf("\n========= REDUNDANT GET\n\n" );
   6399       ppIRSB(bb);
   6400    }
   6401 
   6402    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6403       redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6404    }
   6405    if (iropt_verbose) {
   6406       vex_printf("\n========= REDUNDANT PUT\n\n" );
   6407       ppIRSB(bb);
   6408    }
   6409 
   6410    bb = cprop_BB ( bb );
   6411    if (iropt_verbose) {
   6412       vex_printf("\n========= CPROPD\n\n" );
   6413       ppIRSB(bb);
   6414    }
   6415 
   6416    do_deadcode_BB ( bb );
   6417    if (iropt_verbose) {
   6418       vex_printf("\n========= DEAD\n\n" );
   6419       ppIRSB(bb);
   6420    }
   6421 
   6422    bb = spec_helpers_BB ( bb, specHelper );
   6423    do_deadcode_BB ( bb );
   6424    if (iropt_verbose) {
   6425       vex_printf("\n========= SPECd \n\n" );
   6426       ppIRSB(bb);
   6427    }
   6428 
   6429    return bb;
   6430 }
   6431 
   6432 
   6433 /* Do some more expensive transformations on bb, which are aimed at
   6434    optimising as much as possible in the presence of GetI and PutI.  */
   6435 
   6436 static
   6437 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
   6438 {
   6439    (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6440    collapse_AddSub_chains_BB( bb );
   6441    do_redundant_GetI_elimination( bb );
   6442    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6443       do_redundant_PutI_elimination( bb, pxControl );
   6444    }
   6445    do_deadcode_BB( bb );
   6446    return bb;
   6447 }
   6448 
   6449 
   6450 /* Scan a flattened BB to look for signs that more expensive
   6451    optimisations might be useful:
   6452    - find out if there are any GetIs and PutIs
   6453    - find out if there are any floating or vector-typed temporaries
   6454 */
   6455 
   6456 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   6457                                  /*OUT*/Bool* hasVorFtemps,
   6458                                  IRSB* bb )
   6459 {
   6460    Int      i, j;
   6461    IRStmt*  st;
   6462    IRDirty* d;
   6463    IRCAS*   cas;
   6464 
   6465    *hasGetIorPutI = False;
   6466    *hasVorFtemps  = False;
   6467 
   6468    for (i = 0; i < bb->stmts_used; i++) {
   6469       st = bb->stmts[i];
   6470       switch (st->tag) {
   6471          case Ist_AbiHint:
   6472             vassert(isIRAtom(st->Ist.AbiHint.base));
   6473             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6474             break;
   6475          case Ist_PutI:
   6476             *hasGetIorPutI = True;
   6477             break;
   6478          case Ist_WrTmp:
   6479             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   6480                *hasGetIorPutI = True;
   6481             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   6482                case Ity_I1: case Ity_I8: case Ity_I16:
   6483                case Ity_I32: case Ity_I64: case Ity_I128:
   6484                   break;
   6485                case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
   6486                case Ity_V128: case Ity_V256:
   6487                   *hasVorFtemps = True;
   6488                   break;
   6489                case Ity_D32: case Ity_D64: case Ity_D128:
   6490                   *hasVorFtemps = True;
   6491                   break;
   6492                default:
   6493                   goto bad;
   6494             }
   6495             break;
   6496          case Ist_Put:
   6497             vassert(isIRAtom(st->Ist.Put.data));
   6498             break;
   6499          case Ist_Store:
   6500             vassert(isIRAtom(st->Ist.Store.addr));
   6501             vassert(isIRAtom(st->Ist.Store.data));
   6502             break;
   6503          case Ist_StoreG: {
   6504             IRStoreG* sg = st->Ist.StoreG.details;
   6505             vassert(isIRAtom(sg->addr));
   6506             vassert(isIRAtom(sg->data));
   6507             vassert(isIRAtom(sg->guard));
   6508             break;
   6509          }
   6510          case Ist_LoadG: {
   6511             IRLoadG* lg = st->Ist.LoadG.details;
   6512             vassert(isIRAtom(lg->addr));
   6513             vassert(isIRAtom(lg->alt));
   6514             vassert(isIRAtom(lg->guard));
   6515             break;
   6516          }
   6517          case Ist_CAS:
   6518             cas = st->Ist.CAS.details;
   6519             vassert(isIRAtom(cas->addr));
   6520             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6521             vassert(isIRAtom(cas->expdLo));
   6522             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6523             vassert(isIRAtom(cas->dataLo));
   6524             break;
   6525          case Ist_LLSC:
   6526             vassert(isIRAtom(st->Ist.LLSC.addr));
   6527             if (st->Ist.LLSC.storedata)
   6528                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6529             break;
   6530          case Ist_Dirty:
   6531             d = st->Ist.Dirty.details;
   6532             vassert(isIRAtom(d->guard));
   6533             for (j = 0; d->args[j]; j++) {
   6534                IRExpr* arg = d->args[j];
   6535                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   6536                   vassert(isIRAtom(arg));
   6537             }
   6538             if (d->mFx != Ifx_None)
   6539                vassert(isIRAtom(d->mAddr));
   6540             break;
   6541          case Ist_NoOp:
   6542          case Ist_IMark:
   6543          case Ist_MBE:
   6544             break;
   6545          case Ist_Exit:
   6546             vassert(isIRAtom(st->Ist.Exit.guard));
   6547             break;
   6548          default:
   6549          bad:
   6550             ppIRStmt(st);
   6551             vpanic("considerExpensives");
   6552       }
   6553    }
   6554 }
   6555 
   6556 
   6557 /* ---------------- The main iropt entry point. ---------------- */
   6558 
   6559 /* exported from this file */
   6560 /* Rules of the game:
   6561 
   6562    - IRExpr/IRStmt trees should be treated as immutable, as they
   6563      may get shared.  So never change a field of such a tree node;
   6564      instead construct and return a new one if needed.
   6565 */
   6566 
   6567 
   6568 IRSB* do_iropt_BB(
   6569          IRSB* bb0,
   6570          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6571          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6572          VexRegisterUpdates pxControl,
   6573          Addr    guest_addr,
   6574          VexArch guest_arch
   6575       )
   6576 {
   6577    static Int n_total     = 0;
   6578    static Int n_expensive = 0;
   6579 
   6580    Bool hasGetIorPutI, hasVorFtemps;
   6581    IRSB *bb, *bb2;
   6582 
   6583    n_total++;
   6584 
   6585    /* First flatten the block out, since all other
   6586       phases assume flat code. */
   6587 
   6588    bb = flatten_BB ( bb0 );
   6589 
   6590    if (iropt_verbose) {
   6591       vex_printf("\n========= FLAT\n\n" );
   6592       ppIRSB(bb);
   6593    }
   6594 
   6595    /* If at level 0, stop now. */
   6596    if (vex_control.iropt_level <= 0) return bb;
   6597 
   6598    /* Now do a preliminary cleanup pass, and figure out if we also
   6599       need to do 'expensive' optimisations.  Expensive optimisations
   6600       are deemed necessary if the block contains any GetIs or PutIs.
   6601       If needed, do expensive transformations and then another cheap
   6602       cleanup pass. */
   6603 
   6604    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
   6605 
   6606    if (guest_arch == VexArchARM) {
   6607       /* Translating Thumb2 code produces a lot of chaff.  We have to
   6608          work extra hard to get rid of it. */
   6609       bb = cprop_BB(bb);
   6610       bb = spec_helpers_BB ( bb, specHelper );
   6611       if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6612          redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6613       }
   6614       do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6615       do_deadcode_BB( bb );
   6616    }
   6617 
   6618    if (vex_control.iropt_level > 1) {
   6619 
   6620       /* Peer at what we have, to decide how much more effort to throw
   6621          at it. */
   6622       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   6623 
   6624       if (hasVorFtemps && !hasGetIorPutI) {
   6625          /* If any evidence of FP or Vector activity, CSE, as that
   6626             tends to mop up all manner of lardy code to do with
   6627             rounding modes.  Don't bother if hasGetIorPutI since that
   6628             case leads into the expensive transformations, which do
   6629             CSE anyway. */
   6630          (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6631          do_deadcode_BB( bb );
   6632       }
   6633 
   6634       if (hasGetIorPutI) {
   6635          Bool cses;
   6636          n_expensive++;
   6637          if (DEBUG_IROPT)
   6638             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   6639          bb = expensive_transformations( bb, pxControl );
   6640          bb = cheap_transformations( bb, specHelper,
   6641                                      preciseMemExnsFn, pxControl );
   6642          /* Potentially common up GetIs */
   6643          cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6644          if (cses)
   6645             bb = cheap_transformations( bb, specHelper,
   6646                                         preciseMemExnsFn, pxControl );
   6647       }
   6648 
   6649       ///////////////////////////////////////////////////////////
   6650       // BEGIN MSVC optimised code transformation hacks
   6651       if (0)
   6652          bb = do_MSVC_HACKS(bb);
   6653       // END   MSVC optimised code transformation hacks
   6654       ///////////////////////////////////////////////////////////
   6655 
   6656       /* Now have a go at unrolling simple (single-BB) loops.  If
   6657          successful, clean up the results as much as possible. */
   6658 
   6659       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   6660       if (bb2) {
   6661          bb = cheap_transformations( bb2, specHelper,
   6662                                      preciseMemExnsFn, pxControl );
   6663          if (hasGetIorPutI) {
   6664             bb = expensive_transformations( bb, pxControl );
   6665             bb = cheap_transformations( bb, specHelper,
   6666                                         preciseMemExnsFn, pxControl );
   6667          } else {
   6668             /* at least do CSE and dead code removal */
   6669             do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6670             do_deadcode_BB( bb );
   6671          }
   6672          if (0) vex_printf("vex iropt: unrolled a loop\n");
   6673       }
   6674 
   6675    }
   6676 
   6677    return bb;
   6678 }
   6679 
   6680 
   6681 /*---------------------------------------------------------------*/
   6682 /*--- end                                            ir_opt.c ---*/
   6683 /*---------------------------------------------------------------*/
   6684