Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*---------------------------------------------------------------*/
      4 /*--- begin                                          ir_opt.c ---*/
      5 /*---------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2004-2013 OpenWorks LLP
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #include "libvex_basictypes.h"
     38 #include "libvex_ir.h"
     39 #include "libvex.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "ir_opt.h"
     44 
     45 
     46 /* Set to 1 for lots of debugging output. */
     47 #define DEBUG_IROPT 0
     48 
     49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
     50 #define STATS_IROPT 0
     51 
     52 
     53 /* What iropt does, 29 Dec 04.
     54 
     55    It takes an IRSB and produces a new one with the same meaning,
     56    defined thus:
     57 
     58    After execution of the new BB, all guest state and guest memory is
     59    the same as after execution of the original.  This is true
     60    regardless of how the block was exited (at the end vs side exit).
     61 
     62    In addition, parts of the guest state will be identical to that
     63    created by execution of the original at the following observation
     64    points:
     65 
     66    * In a dirty helper call, any parts of the guest state that the
     67      helper states that it reads or modifies will be up to date.
     68      Also, guest memory will be up to date.  Parts of the guest state
     69      not marked as being read or modified by the helper cannot be
     70      assumed to be up-to-date at the point where the helper is called.
     71 
     72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
     73      The guest state is only up to date only as explained above
     74      (i.e. at SB exits and as specified by dirty helper call).
     75      Also, the stack pointer register is up to date at memory
     76      exception points (as this is needed for the stack extension
     77      logic in m_signals.c).
     78 
     79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
     80      Immediately prior to any load or store, those parts of the guest
     81      state marked as requiring precise exceptions will be up to date.
     82      Also, guest memory will be up to date.  Parts of the guest state
     83      not marked as requiring precise exceptions cannot be assumed to
     84      be up-to-date at the point of the load/store.
     85 
     86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
     87      Same as minimal, but all the guest state is up to date at memory
     88      exception points.
     89 
     90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
     91      Guest state is up to date at each instruction.
     92 
     93    The relative order of loads and stores (including loads/stores of
     94    guest memory done by dirty helpers annotated as such) is not
     95    changed.  However, the relative order of loads with no intervening
     96    stores/modifies may be changed.
     97 
     98    Transformation order
     99    ~~~~~~~~~~~~~~~~~~~~
    100 
    101    There are three levels of optimisation, controlled by
    102    vex_control.iropt_level.  Define first:
    103 
    104    "Cheap transformations" are the following sequence:
    105       * Redundant-Get removal
    106       * Redundant-Put removal
    107       * Constant propagation/folding
    108       * Dead code removal
    109       * Specialisation of clean helper functions
    110       * Dead code removal
    111 
    112    "Expensive transformations" are the following sequence:
    113       * CSE
    114       * Folding of add/sub chains
    115       * Redundant-GetI removal
    116       * Redundant-PutI removal
    117       * Dead code removal
    118 
    119    Then the transformations are as follows, as defined by
    120    vex_control.iropt_level:
    121 
    122    Level 0:
    123       * Flatten into atomic form.
    124 
    125    Level 1: the following sequence:
    126       * Flatten into atomic form.
    127       * Cheap transformations.
    128 
    129    Level 2: the following sequence
    130       * Flatten into atomic form.
    131       * Cheap transformations.
    132       * If block contains any floating or vector types, CSE.
    133       * If block contains GetI or PutI, Expensive transformations.
    134       * Try unrolling loops.  Three possible outcomes:
    135         - No effect: do nothing more.
    136         - Unrolled a loop, and block does not contain GetI or PutI:
    137           Do: * CSE
    138               * Dead code removal
    139         - Unrolled a loop, and block contains GetI or PutI:
    140           Do: * Expensive transformations
    141               * Cheap transformations
    142 */
    143 
    144 /* Implementation notes, 29 Dec 04.
    145 
    146    TODO (important): I think rPutI removal ignores precise exceptions
    147    and is therefore in a sense, wrong.  In the sense that PutIs are
    148    assumed not to write parts of the guest state that we need to have
    149    up-to-date at loads/stores.  So far on x86 guest that has not
    150    mattered since indeed only the x87 FP registers and tags are
    151    accessed using GetI/PutI, and there is no need so far for them to
    152    be up to date at mem exception points.  The rPutI pass should be
    153    fixed.
    154 
    155    TODO: improve pessimistic handling of precise exceptions
    156      in the tree builder.
    157 
    158    TODO: check interaction of rGetI and dirty helpers.
    159 
    160    F64i constants are treated differently from other constants.
    161    They are not regarded as atoms, and instead lifted off and
    162    bound to temps.  This allows them to participate in CSE, which
    163    is important for getting good performance for x86 guest code.
    164 
    165    CSE up F64 literals (already doing F64is)
    166 
    167    CSE: consider carefully the requirement for precise exns
    168         prior to making CSE any more aggressive.  */
    169 
    170 
    171 /*---------------------------------------------------------------*/
    172 /*--- Finite mappery, of a sort                               ---*/
    173 /*---------------------------------------------------------------*/
    174 
    175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    176    hashing, but it's not clear whether or not this would really be any
    177    faster. */
    178 
    179 typedef
    180    struct {
    181       Bool*  inuse;
    182       HWord* key;
    183       HWord* val;
    184       Int    size;
    185       Int    used;
    186    }
    187    HashHW;
    188 
    189 static HashHW* newHHW ( void )
    190 {
    191    HashHW* h = LibVEX_Alloc(sizeof(HashHW));
    192    h->size   = 8;
    193    h->used   = 0;
    194    h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
    195    h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
    196    h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
    197    return h;
    198 }
    199 
    200 
    201 /* Look up key in the map. */
    202 
    203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    204 {
    205    Int i;
    206    /* vex_printf("lookupHHW(%llx)\n", key ); */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          if (val)
    210             *val = h->val[i];
    211          return True;
    212       }
    213    }
    214    return False;
    215 }
    216 
    217 
    218 /* Add key->val to the map.  Replaces any existing binding for key. */
    219 
    220 static void addToHHW ( HashHW* h, HWord key, HWord val )
    221 {
    222    Int i, j;
    223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    224 
    225    /* Find and replace existing binding, if any. */
    226    for (i = 0; i < h->used; i++) {
    227       if (h->inuse[i] && h->key[i] == key) {
    228          h->val[i] = val;
    229          return;
    230       }
    231    }
    232 
    233    /* Ensure a space is available. */
    234    if (h->used == h->size) {
    235       /* Copy into arrays twice the size. */
    236       Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
    237       HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    238       HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    239       for (i = j = 0; i < h->size; i++) {
    240          if (!h->inuse[i]) continue;
    241          inuse2[j] = True;
    242          key2[j] = h->key[i];
    243          val2[j] = h->val[i];
    244          j++;
    245       }
    246       h->used = j;
    247       h->size *= 2;
    248       h->inuse = inuse2;
    249       h->key = key2;
    250       h->val = val2;
    251    }
    252 
    253    /* Finally, add it. */
    254    vassert(h->used < h->size);
    255    h->inuse[h->used] = True;
    256    h->key[h->used] = key;
    257    h->val[h->used] = val;
    258    h->used++;
    259 }
    260 
    261 
    262 /*---------------------------------------------------------------*/
    263 /*--- Flattening out a BB into atomic SSA form                ---*/
    264 /*---------------------------------------------------------------*/
    265 
    266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    267    copies made by flattening.  If in doubt return False. */
    268 
    269 static Bool isFlat ( IRExpr* e )
    270 {
    271    if (e->tag == Iex_Get)
    272       return True;
    273    if (e->tag == Iex_Binop)
    274       return toBool( isIRAtom(e->Iex.Binop.arg1)
    275                      && isIRAtom(e->Iex.Binop.arg2) );
    276    if (e->tag == Iex_Load)
    277       return isIRAtom(e->Iex.Load.addr);
    278    return False;
    279 }
    280 
    281 /* Flatten out 'ex' so it is atomic, returning a new expression with
    282    the same value, after having appended extra IRTemp assignments to
    283    the end of 'bb'. */
    284 
    285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    286 {
    287    Int i;
    288    IRExpr** newargs;
    289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    290    IRTemp t1;
    291 
    292    switch (ex->tag) {
    293 
    294       case Iex_GetI:
    295          t1 = newIRTemp(bb->tyenv, ty);
    296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    297             IRExpr_GetI(ex->Iex.GetI.descr,
    298                         flatten_Expr(bb, ex->Iex.GetI.ix),
    299                         ex->Iex.GetI.bias)));
    300          return IRExpr_RdTmp(t1);
    301 
    302       case Iex_Get:
    303          t1 = newIRTemp(bb->tyenv, ty);
    304          addStmtToIRSB(bb,
    305             IRStmt_WrTmp(t1, ex));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Qop: {
    309          IRQop* qop = ex->Iex.Qop.details;
    310          t1 = newIRTemp(bb->tyenv, ty);
    311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    312             IRExpr_Qop(qop->op,
    313                          flatten_Expr(bb, qop->arg1),
    314                          flatten_Expr(bb, qop->arg2),
    315                          flatten_Expr(bb, qop->arg3),
    316                          flatten_Expr(bb, qop->arg4))));
    317          return IRExpr_RdTmp(t1);
    318       }
    319 
    320       case Iex_Triop: {
    321          IRTriop* triop = ex->Iex.Triop.details;
    322          t1 = newIRTemp(bb->tyenv, ty);
    323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    324             IRExpr_Triop(triop->op,
    325                          flatten_Expr(bb, triop->arg1),
    326                          flatten_Expr(bb, triop->arg2),
    327                          flatten_Expr(bb, triop->arg3))));
    328          return IRExpr_RdTmp(t1);
    329       }
    330 
    331       case Iex_Binop:
    332          t1 = newIRTemp(bb->tyenv, ty);
    333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    334             IRExpr_Binop(ex->Iex.Binop.op,
    335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    337          return IRExpr_RdTmp(t1);
    338 
    339       case Iex_Unop:
    340          t1 = newIRTemp(bb->tyenv, ty);
    341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    342             IRExpr_Unop(ex->Iex.Unop.op,
    343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    344          return IRExpr_RdTmp(t1);
    345 
    346       case Iex_Load:
    347          t1 = newIRTemp(bb->tyenv, ty);
    348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    349             IRExpr_Load(ex->Iex.Load.end,
    350                         ex->Iex.Load.ty,
    351                         flatten_Expr(bb, ex->Iex.Load.addr))));
    352          return IRExpr_RdTmp(t1);
    353 
    354       case Iex_CCall:
    355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    356          for (i = 0; newargs[i]; i++)
    357             newargs[i] = flatten_Expr(bb, newargs[i]);
    358          t1 = newIRTemp(bb->tyenv, ty);
    359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    360             IRExpr_CCall(ex->Iex.CCall.cee,
    361                          ex->Iex.CCall.retty,
    362                          newargs)));
    363          return IRExpr_RdTmp(t1);
    364 
    365       case Iex_ITE:
    366          t1 = newIRTemp(bb->tyenv, ty);
    367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
    369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
    370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
    371          return IRExpr_RdTmp(t1);
    372 
    373       case Iex_Const:
    374          /* Lift F64i constants out onto temps so they can be CSEd
    375             later. */
    376          if (ex->Iex.Const.con->tag == Ico_F64i) {
    377             t1 = newIRTemp(bb->tyenv, ty);
    378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    379                IRExpr_Const(ex->Iex.Const.con)));
    380             return IRExpr_RdTmp(t1);
    381          } else {
    382             /* Leave all other constants alone. */
    383             return ex;
    384          }
    385 
    386       case Iex_RdTmp:
    387          return ex;
    388 
    389       default:
    390          vex_printf("\n");
    391          ppIRExpr(ex);
    392          vex_printf("\n");
    393          vpanic("flatten_Expr");
    394    }
    395 }
    396 
    397 
    398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    399 
    400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    401 {
    402    Int i;
    403    IRExpr   *e1, *e2, *e3, *e4, *e5;
    404    IRDirty  *d,  *d2;
    405    IRCAS    *cas, *cas2;
    406    IRPutI   *puti, *puti2;
    407    IRLoadG  *lg;
    408    IRStoreG *sg;
    409    switch (st->tag) {
    410       case Ist_Put:
    411          if (isIRAtom(st->Ist.Put.data)) {
    412             /* optimisation to reduce the amount of heap wasted
    413                by the flattener */
    414             addStmtToIRSB(bb, st);
    415          } else {
    416             /* general case, always correct */
    417             e1 = flatten_Expr(bb, st->Ist.Put.data);
    418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    419          }
    420          break;
    421       case Ist_PutI:
    422          puti = st->Ist.PutI.details;
    423          e1 = flatten_Expr(bb, puti->ix);
    424          e2 = flatten_Expr(bb, puti->data);
    425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
    426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
    427          break;
    428       case Ist_WrTmp:
    429          if (isFlat(st->Ist.WrTmp.data)) {
    430             /* optimisation, to reduce the number of tmp-tmp
    431                copies generated */
    432             addStmtToIRSB(bb, st);
    433          } else {
    434             /* general case, always correct */
    435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    437          }
    438          break;
    439       case Ist_Store:
    440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    441          e2 = flatten_Expr(bb, st->Ist.Store.data);
    442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    443          break;
    444       case Ist_StoreG:
    445          sg = st->Ist.StoreG.details;
    446          e1 = flatten_Expr(bb, sg->addr);
    447          e2 = flatten_Expr(bb, sg->data);
    448          e3 = flatten_Expr(bb, sg->guard);
    449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
    450          break;
    451       case Ist_LoadG:
    452          lg = st->Ist.LoadG.details;
    453          e1 = flatten_Expr(bb, lg->addr);
    454          e2 = flatten_Expr(bb, lg->alt);
    455          e3 = flatten_Expr(bb, lg->guard);
    456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
    457                                         e1, e2, e3));
    458          break;
    459       case Ist_CAS:
    460          cas  = st->Ist.CAS.details;
    461          e1   = flatten_Expr(bb, cas->addr);
    462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    463          e3   = flatten_Expr(bb, cas->expdLo);
    464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    465          e5   = flatten_Expr(bb, cas->dataLo);
    466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    467                          e1, e2, e3, e4, e5 );
    468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    469          break;
    470       case Ist_LLSC:
    471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    472          e2 = st->Ist.LLSC.storedata
    473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    474                  : NULL;
    475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    476                                        st->Ist.LLSC.result, e1, e2));
    477          break;
    478       case Ist_Dirty:
    479          d = st->Ist.Dirty.details;
    480          d2 = emptyIRDirty();
    481          *d2 = *d;
    482          d2->args = shallowCopyIRExprVec(d2->args);
    483          if (d2->mFx != Ifx_None) {
    484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    485          } else {
    486             vassert(d2->mAddr == NULL);
    487          }
    488          d2->guard = flatten_Expr(bb, d2->guard);
    489          for (i = 0; d2->args[i]; i++) {
    490             IRExpr* arg = d2->args[i];
    491             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
    492                d2->args[i] = flatten_Expr(bb, arg);
    493          }
    494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    495          break;
    496       case Ist_NoOp:
    497       case Ist_MBE:
    498       case Ist_IMark:
    499          addStmtToIRSB(bb, st);
    500          break;
    501       case Ist_AbiHint:
    502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    505          break;
    506       case Ist_Exit:
    507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    509                                        st->Ist.Exit.dst,
    510                                        st->Ist.Exit.offsIP));
    511          break;
    512       default:
    513          vex_printf("\n");
    514          ppIRStmt(st);
    515          vex_printf("\n");
    516          vpanic("flatten_Stmt");
    517    }
    518 }
    519 
    520 
    521 static IRSB* flatten_BB ( IRSB* in )
    522 {
    523    Int   i;
    524    IRSB* out;
    525    out = emptyIRSB();
    526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    527    for (i = 0; i < in->stmts_used; i++)
    528       if (in->stmts[i])
    529          flatten_Stmt( out, in->stmts[i] );
    530    out->next     = flatten_Expr( out, in->next );
    531    out->jumpkind = in->jumpkind;
    532    out->offsIP   = in->offsIP;
    533    return out;
    534 }
    535 
    536 
    537 /*---------------------------------------------------------------*/
    538 /*--- In-place removal of redundant GETs                      ---*/
    539 /*---------------------------------------------------------------*/
    540 
    541 /* Scan forwards, building up an environment binding (min offset, max
    542    offset) pairs to values, which will either be temps or constants.
    543 
    544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    545    env and if it matches, replace the Get with the stored value.  If
    546    there is no match, add a (minoff,maxoff) :-> t binding.
    547 
    548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    549    any binding which fully or partially overlaps with (minoff,maxoff).
    550    Then add a new (minoff,maxoff) :-> t or c binding.  */
    551 
    552 /* Extract the min/max offsets from a guest state array descriptor. */
    553 
    554 inline
    555 static void getArrayBounds ( IRRegArray* descr,
    556                              UInt* minoff, UInt* maxoff )
    557 {
    558    *minoff = descr->base;
    559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    560    vassert((*minoff & ~0xFFFF) == 0);
    561    vassert((*maxoff & ~0xFFFF) == 0);
    562    vassert(*minoff <= *maxoff);
    563 }
    564 
    565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    566 
    567 static UInt mk_key_GetPut ( Int offset, IRType ty )
    568 {
    569    /* offset should fit in 16 bits. */
    570    UInt minoff = offset;
    571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    572    vassert((minoff & ~0xFFFF) == 0);
    573    vassert((maxoff & ~0xFFFF) == 0);
    574    return (minoff << 16) | maxoff;
    575 }
    576 
    577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    578 {
    579    UInt minoff, maxoff;
    580    getArrayBounds( descr, &minoff, &maxoff );
    581    vassert((minoff & ~0xFFFF) == 0);
    582    vassert((maxoff & ~0xFFFF) == 0);
    583    return (minoff << 16) | maxoff;
    584 }
    585 
    586 /* Supposing h has keys of the form generated by mk_key_GetPut and
    587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    588    .. k_hi).
    589 */
    590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    591 {
    592    Int  j;
    593    UInt e_lo, e_hi;
    594    vassert(k_lo <= k_hi);
    595    /* invalidate any env entries which in any way overlap (k_lo
    596       .. k_hi) */
    597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    598 
    599    for (j = 0; j < h->used; j++) {
    600       if (!h->inuse[j])
    601          continue;
    602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    604       vassert(e_lo <= e_hi);
    605       if (e_hi < k_lo || k_hi < e_lo)
    606          continue; /* no overlap possible */
    607       else
    608          /* overlap; invalidate */
    609          h->inuse[j] = False;
    610    }
    611 }
    612 
    613 
    614 static void redundant_get_removal_BB ( IRSB* bb )
    615 {
    616    HashHW* env = newHHW();
    617    UInt    key = 0; /* keep gcc -O happy */
    618    Int     i, j;
    619    HWord   val;
    620 
    621    for (i = 0; i < bb->stmts_used; i++) {
    622       IRStmt* st = bb->stmts[i];
    623 
    624       if (st->tag == Ist_NoOp)
    625          continue;
    626 
    627       /* Deal with Gets */
    628       if (st->tag == Ist_WrTmp
    629           && st->Ist.WrTmp.data->tag == Iex_Get) {
    630          /* st is 't = Get(...)'.  Look up in the environment and see
    631             if the Get can be replaced. */
    632          IRExpr* get = st->Ist.WrTmp.data;
    633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    634                                      get->Iex.Get.ty );
    635          if (lookupHHW(env, &val, (HWord)key)) {
    636             /* found it */
    637             /* Note, we could do better here.  If the types are
    638                different we don't do the substitution, since doing so
    639                could lead to invalidly-typed IR.  An improvement would
    640                be to stick in a reinterpret-style cast, although that
    641                would make maintaining flatness more difficult. */
    642             IRExpr* valE    = (IRExpr*)val;
    643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    645             if (typesOK && DEBUG_IROPT) {
    646                vex_printf("rGET: "); ppIRExpr(get);
    647                vex_printf("  ->  "); ppIRExpr(valE);
    648                vex_printf("\n");
    649             }
    650             if (typesOK)
    651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    652          } else {
    653             /* Not found, but at least we know that t and the Get(...)
    654                are now associated.  So add a binding to reflect that
    655                fact. */
    656             addToHHW( env, (HWord)key,
    657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    658          }
    659       }
    660 
    661       /* Deal with Puts: invalidate any env entries overlapped by this
    662          Put */
    663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    664          UInt k_lo, k_hi;
    665          if (st->tag == Ist_Put) {
    666             key = mk_key_GetPut( st->Ist.Put.offset,
    667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    668          } else {
    669             vassert(st->tag == Ist_PutI);
    670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    671          }
    672 
    673          k_lo = (key >> 16) & 0xFFFF;
    674          k_hi = key & 0xFFFF;
    675          invalidateOverlaps(env, k_lo, k_hi);
    676       }
    677       else
    678       if (st->tag == Ist_Dirty) {
    679          /* Deal with dirty helpers which write or modify guest state.
    680             Invalidate the entire env.  We could do a lot better
    681             here. */
    682          IRDirty* d      = st->Ist.Dirty.details;
    683          Bool     writes = False;
    684          for (j = 0; j < d->nFxState; j++) {
    685             if (d->fxState[j].fx == Ifx_Modify
    686                 || d->fxState[j].fx == Ifx_Write)
    687             writes = True;
    688          }
    689          if (writes) {
    690             /* dump the entire env (not clever, but correct ...) */
    691             for (j = 0; j < env->used; j++)
    692                env->inuse[j] = False;
    693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    694          }
    695       }
    696 
    697       /* add this one to the env, if appropriate */
    698       if (st->tag == Ist_Put) {
    699          vassert(isIRAtom(st->Ist.Put.data));
    700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    701       }
    702 
    703    } /* for (i = 0; i < bb->stmts_used; i++) */
    704 
    705 }
    706 
    707 
    708 /*---------------------------------------------------------------*/
    709 /*--- In-place removal of redundant PUTs                      ---*/
    710 /*---------------------------------------------------------------*/
    711 
    712 /* Find any Get uses in st and invalidate any partially or fully
    713    overlapping ranges listed in env.  Due to the flattening phase, the
    714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    715 
    716 static void handle_gets_Stmt (
    717                HashHW* env,
    718                IRStmt* st,
    719                Bool (*preciseMemExnsFn)(Int,Int)
    720             )
    721 {
    722    Int     j;
    723    UInt    key = 0; /* keep gcc -O happy */
    724    Bool    isGet;
    725    Bool    memRW = False;
    726    IRExpr* e;
    727 
    728    switch (st->tag) {
    729 
    730       /* This is the only interesting case.  Deal with Gets in the RHS
    731          expression. */
    732       case Ist_WrTmp:
    733          e = st->Ist.WrTmp.data;
    734          switch (e->tag) {
    735             case Iex_Get:
    736                isGet = True;
    737                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    738                break;
    739             case Iex_GetI:
    740                isGet = True;
    741                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    742                break;
    743             case Iex_Load:
    744                isGet = False;
    745                memRW = True;
    746                break;
    747             default:
    748                isGet = False;
    749          }
    750          if (isGet) {
    751             UInt k_lo, k_hi;
    752             k_lo = (key >> 16) & 0xFFFF;
    753             k_hi = key & 0xFFFF;
    754             invalidateOverlaps(env, k_lo, k_hi);
    755          }
    756          break;
    757 
    758       /* Be very conservative for dirty helper calls; dump the entire
    759          environment.  The helper might read guest state, in which
    760          case it needs to be flushed first.  Also, the helper might
    761          access guest memory, in which case all parts of the guest
    762          state requiring precise exceptions needs to be flushed.  The
    763          crude solution is just to flush everything; we could easily
    764          enough do a lot better if needed. */
    765       /* Probably also overly-conservative, but also dump everything
    766          if we hit a memory bus event (fence, lock, unlock).  Ditto
    767          AbiHints, CASs, LLs and SCs. */
    768       case Ist_AbiHint:
    769          vassert(isIRAtom(st->Ist.AbiHint.base));
    770          vassert(isIRAtom(st->Ist.AbiHint.nia));
    771          /* fall through */
    772       case Ist_MBE:
    773       case Ist_Dirty:
    774       case Ist_CAS:
    775       case Ist_LLSC:
    776          for (j = 0; j < env->used; j++)
    777             env->inuse[j] = False;
    778          break;
    779 
    780       /* all other cases are boring. */
    781       case Ist_Store:
    782          vassert(isIRAtom(st->Ist.Store.addr));
    783          vassert(isIRAtom(st->Ist.Store.data));
    784          memRW = True;
    785          break;
    786       case Ist_StoreG: {
    787          IRStoreG* sg = st->Ist.StoreG.details;
    788          vassert(isIRAtom(sg->addr));
    789          vassert(isIRAtom(sg->data));
    790          vassert(isIRAtom(sg->guard));
    791          memRW = True;
    792          break;
    793       }
    794       case Ist_LoadG: {
    795          IRLoadG* lg = st->Ist.LoadG.details;
    796          vassert(isIRAtom(lg->addr));
    797          vassert(isIRAtom(lg->alt));
    798          vassert(isIRAtom(lg->guard));
    799          memRW = True;
    800          break;
    801       }
    802       case Ist_Exit:
    803          vassert(isIRAtom(st->Ist.Exit.guard));
    804          break;
    805 
    806       case Ist_PutI:
    807          vassert(isIRAtom(st->Ist.PutI.details->ix));
    808          vassert(isIRAtom(st->Ist.PutI.details->data));
    809          break;
    810 
    811       case Ist_NoOp:
    812       case Ist_IMark:
    813          break;
    814 
    815       default:
    816          vex_printf("\n");
    817          ppIRStmt(st);
    818          vex_printf("\n");
    819          vpanic("handle_gets_Stmt");
    820    }
    821 
    822    if (memRW) {
    823       /* This statement accesses memory.  So we might need to dump all parts
    824          of the environment corresponding to guest state that may not
    825          be reordered with respect to memory references.  That means
    826          at least the stack pointer. */
    827       switch (vex_control.iropt_register_updates) {
    828          case VexRegUpdAllregsAtMemAccess:
    829             /* Precise exceptions required at mem access.
    830                Flush all guest state. */
    831             for (j = 0; j < env->used; j++)
    832                env->inuse[j] = False;
    833             break;
    834          case VexRegUpdSpAtMemAccess:
    835             /* We need to dump the stack pointer
    836                (needed for stack extension in m_signals.c).
    837                preciseMemExnsFn will use vex_control.iropt_register_updates
    838                to verify only the sp is to be checked. */
    839             /* fallthrough */
    840          case VexRegUpdUnwindregsAtMemAccess:
    841             for (j = 0; j < env->used; j++) {
    842                if (!env->inuse[j])
    843                   continue;
    844                /* Just flush the minimal amount required, as computed by
    845                   preciseMemExnsFn. */
    846                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    847                HWord k_hi = env->key[j] & 0xFFFF;
    848                if (preciseMemExnsFn( k_lo, k_hi ))
    849                   env->inuse[j] = False;
    850             }
    851             break;
    852          default:
    853             // VexRegUpdAllregsAtEachInsn cannot happen here.
    854             // Neither any rubbish other value.
    855             vassert(0);
    856       }
    857    } /* if (memRW) */
    858 
    859 }
    860 
    861 
    862 /* Scan backwards, building up a set of (min offset, max
    863    offset) pairs, indicating those parts of the guest state
    864    for which the next event is a write.
    865 
    866    On seeing a conditional exit, empty the set.
    867 
    868    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    869    completely within the set, remove the Put.  Otherwise, add
    870    (minoff,maxoff) to the set.
    871 
    872    On seeing 'Get (minoff,maxoff)', remove any part of the set
    873    overlapping (minoff,maxoff).  The same has to happen for any events
    874    which implicitly read parts of the guest state: dirty helper calls
    875    and loads/stores.
    876 */
    877 
    878 static void redundant_put_removal_BB (
    879                IRSB* bb,
    880                Bool (*preciseMemExnsFn)(Int,Int)
    881             )
    882 {
    883    Int     i, j;
    884    Bool    isPut;
    885    IRStmt* st;
    886    UInt    key = 0; /* keep gcc -O happy */
    887 
    888    vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn);
    889 
    890    HashHW* env = newHHW();
    891 
    892    /* Initialise the running env with the fact that the final exit
    893       writes the IP (or, whatever it claims to write.  We don't
    894       care.) */
    895    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
    896    addToHHW(env, (HWord)key, 0);
    897 
    898    /* And now scan backwards through the statements. */
    899    for (i = bb->stmts_used-1; i >= 0; i--) {
    900       st = bb->stmts[i];
    901 
    902       if (st->tag == Ist_NoOp)
    903          continue;
    904 
    905       /* Deal with conditional exits. */
    906       if (st->tag == Ist_Exit) {
    907          //Bool re_add;
    908          /* Need to throw out from the env, any part of it which
    909             doesn't overlap with the guest state written by this exit.
    910             Since the exit only writes one section, it's simplest to
    911             do this: (1) check whether env contains a write that
    912             completely overlaps the write done by this exit; (2) empty
    913             out env; and (3) if (1) was true, add the write done by
    914             this exit.
    915 
    916             To make (1) a bit simpler, merely search for a write that
    917             exactly matches the one done by this exit.  That's safe
    918             because it will fail as often or more often than a full
    919             overlap check, and failure to find an overlapping write in
    920             env is the safe case (we just nuke env if that
    921             happens). */
    922          //vassert(isIRAtom(st->Ist.Exit.guard));
    923          /* (1) */
    924          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
    925          //                    typeOfIRConst(st->Ist.Exit.dst));
    926          //re_add = lookupHHW(env, NULL, key);
    927          /* (2) */
    928          for (j = 0; j < env->used; j++)
    929             env->inuse[j] = False;
    930          /* (3) */
    931          //if (0 && re_add)
    932          //   addToHHW(env, (HWord)key, 0);
    933          continue;
    934       }
    935 
    936       /* Deal with Puts */
    937       switch (st->tag) {
    938          case Ist_Put:
    939             isPut = True;
    940             key = mk_key_GetPut( st->Ist.Put.offset,
    941                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    942             vassert(isIRAtom(st->Ist.Put.data));
    943             break;
    944          case Ist_PutI:
    945             isPut = True;
    946             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    947             vassert(isIRAtom(st->Ist.PutI.details->ix));
    948             vassert(isIRAtom(st->Ist.PutI.details->data));
    949             break;
    950          default:
    951             isPut = False;
    952       }
    953       if (isPut && st->tag != Ist_PutI) {
    954          /* See if any single entry in env overlaps this Put.  This is
    955             simplistic in that the transformation is valid if, say, two
    956             or more entries in the env overlap this Put, but the use of
    957             lookupHHW will only find a single entry which exactly
    958             overlaps this Put.  This is suboptimal but safe. */
    959          if (lookupHHW(env, NULL, (HWord)key)) {
    960             /* This Put is redundant because a later one will overwrite
    961                it.  So NULL (nop) it out. */
    962             if (DEBUG_IROPT) {
    963                vex_printf("rPUT: "); ppIRStmt(st);
    964                vex_printf("\n");
    965             }
    966             bb->stmts[i] = IRStmt_NoOp();
    967          } else {
    968             /* We can't demonstrate that this Put is redundant, so add it
    969                to the running collection. */
    970             addToHHW(env, (HWord)key, 0);
    971          }
    972          continue;
    973       }
    974 
    975       /* Deal with Gets.  These remove bits of the environment since
    976          appearance of a Get means that the next event for that slice
    977          of the guest state is no longer a write, but a read.  Also
    978          deals with implicit reads of guest state needed to maintain
    979          precise exceptions. */
    980       handle_gets_Stmt( env, st, preciseMemExnsFn );
    981    }
    982 }
    983 
    984 
    985 /*---------------------------------------------------------------*/
    986 /*--- Constant propagation and folding                        ---*/
    987 /*---------------------------------------------------------------*/
    988 
    989 #if STATS_IROPT
    990 /* How often sameIRExprs was invoked */
    991 static UInt invocation_count;
    992 /* How often sameIRExprs recursed through IRTemp assignments */
    993 static UInt recursion_count;
    994 /* How often sameIRExprs found identical IRExprs */
    995 static UInt success_count;
    996 /* How often recursing through assignments to IRTemps helped
    997    establishing equality. */
    998 static UInt recursion_success_count;
    999 /* Whether or not recursing through an IRTemp assignment helped
   1000    establishing IRExpr equality for a given sameIRExprs invocation. */
   1001 static Bool recursion_helped;
   1002 /* Whether or not a given sameIRExprs invocation recursed through an
   1003    IRTemp assignment */
   1004 static Bool recursed;
   1005 /* Maximum number of nodes ever visited when comparing two IRExprs. */
   1006 static UInt max_nodes_visited;
   1007 #endif /* STATS_IROPT */
   1008 
   1009 /* Count the number of nodes visited for a given sameIRExprs invocation. */
   1010 static UInt num_nodes_visited;
   1011 
   1012 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
   1013    This is to guard against performance degradation by visiting large
   1014    trees without success. */
   1015 #define NODE_LIMIT 30
   1016 
   1017 
   1018 /* The env in this section is a map from IRTemp to IRExpr*,
   1019    that is, an array indexed by IRTemp. */
   1020 
   1021 /* Do both expressions compute the same value? The answer is generally
   1022    conservative, i.e. it will report that the expressions do not compute
   1023    the same value when in fact they do. The reason is that we do not
   1024    keep track of changes in the guest state and memory. Thusly, two
   1025    Get's, GetI's or Load's, even when accessing the same location, will be
   1026    assumed to compute different values. After all the accesses may happen
   1027    at different times and the guest state / memory can have changed in
   1028    the meantime.
   1029 
   1030    XXX IMPORTANT XXX the two expressions must have the same IR type.
   1031    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
   1032 
   1033 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
   1034    wrapper that deals with the common tags-don't-match case, and a
   1035    slower out of line general case.  Saves a few insns. */
   1036 
   1037 __attribute__((noinline))
   1038 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
   1039 
   1040 inline
   1041 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1042 {
   1043    if (e1->tag != e2->tag) return False;
   1044    return sameIRExprs_aux2(env, e1, e2);
   1045 }
   1046 
   1047 __attribute__((noinline))
   1048 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1049 {
   1050    if (num_nodes_visited++ > NODE_LIMIT) return False;
   1051 
   1052    switch (e1->tag) {
   1053       case Iex_RdTmp:
   1054          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
   1055 
   1056          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
   1057             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
   1058                                         env[e2->Iex.RdTmp.tmp]);
   1059 #if STATS_IROPT
   1060             recursed = True;
   1061             if (same) recursion_helped = True;
   1062 #endif
   1063             return same;
   1064          }
   1065          return False;
   1066 
   1067       case Iex_Get:
   1068       case Iex_GetI:
   1069       case Iex_Load:
   1070          /* Guest state / memory could have changed in the meantime. */
   1071          return False;
   1072 
   1073       case Iex_Binop:
   1074          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
   1075                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
   1076                                                  e2->Iex.Binop.arg1 )
   1077                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
   1078                                                  e2->Iex.Binop.arg2 ));
   1079 
   1080       case Iex_Unop:
   1081          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
   1082                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
   1083                                                  e2->Iex.Unop.arg ));
   1084 
   1085       case Iex_Const: {
   1086          IRConst *c1 = e1->Iex.Const.con;
   1087          IRConst *c2 = e2->Iex.Const.con;
   1088          vassert(c1->tag == c2->tag);
   1089          switch (c1->tag) {
   1090             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
   1091             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
   1092             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
   1093             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
   1094             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
   1095             default: break;
   1096          }
   1097          return False;
   1098       }
   1099 
   1100       case Iex_Triop: {
   1101          IRTriop *tri1 = e1->Iex.Triop.details;
   1102          IRTriop *tri2 = e2->Iex.Triop.details;
   1103          return toBool( tri1->op == tri2->op
   1104                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
   1105                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
   1106                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
   1107       }
   1108 
   1109       case Iex_ITE:
   1110          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
   1111                                                  e2->Iex.ITE.cond )
   1112                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
   1113                                                  e2->Iex.ITE.iftrue )
   1114                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
   1115                                                  e2->Iex.ITE.iffalse ));
   1116 
   1117       default:
   1118          /* Not very likely to be "same". */
   1119          break;
   1120    }
   1121 
   1122    return False;
   1123 }
   1124 
   1125 inline
   1126 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1127 {
   1128    Bool same;
   1129 
   1130    num_nodes_visited = 0;
   1131    same = sameIRExprs_aux(env, e1, e2);
   1132 
   1133 #if STATS_IROPT
   1134    ++invocation_count;
   1135    if (recursed) ++recursion_count;
   1136    success_count += same;
   1137    if (same && recursion_helped)
   1138       ++recursion_success_count;
   1139    if (num_nodes_visited > max_nodes_visited)
   1140       max_nodes_visited = num_nodes_visited;
   1141    recursed = False; /* reset */
   1142    recursion_helped = False;  /* reset */
   1143 #endif /* STATS_IROPT */
   1144 
   1145    return same;
   1146 }
   1147 
   1148 
   1149 /* Debugging-only hack (not used in production runs): make a guess
   1150    whether sameIRExprs might assert due to the two args being of
   1151    different types.  If in doubt return False.  Is only used when
   1152    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
   1153    Bad because it duplicates functionality from typeOfIRExpr.  See
   1154    comment on the single use point below for rationale. */
   1155 static
   1156 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
   1157 {
   1158    if (e1->tag != e2->tag) return False;
   1159    switch (e1->tag) {
   1160       case Iex_Const: {
   1161          /* The only interesting case */
   1162          IRConst *c1 = e1->Iex.Const.con;
   1163          IRConst *c2 = e2->Iex.Const.con;
   1164          return c1->tag != c2->tag;
   1165       }
   1166       default:
   1167          break;
   1168    }
   1169    return False;
   1170 }
   1171 
   1172 
   1173 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
   1174 static Bool isZeroU32 ( IRExpr* e )
   1175 {
   1176    return toBool( e->tag == Iex_Const
   1177                   && e->Iex.Const.con->tag == Ico_U32
   1178                   && e->Iex.Const.con->Ico.U32 == 0);
   1179 }
   1180 
   1181 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
   1182 static Bool isZeroU64 ( IRExpr* e )
   1183 {
   1184    return toBool( e->tag == Iex_Const
   1185                   && e->Iex.Const.con->tag == Ico_U64
   1186                   && e->Iex.Const.con->Ico.U64 == 0);
   1187 }
   1188 
   1189 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
   1190 static Bool isZeroV128 ( IRExpr* e )
   1191 {
   1192    return toBool( e->tag == Iex_Const
   1193                   && e->Iex.Const.con->tag == Ico_V128
   1194                   && e->Iex.Const.con->Ico.V128 == 0x0000);
   1195 }
   1196 
   1197 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
   1198 static Bool isZeroV256 ( IRExpr* e )
   1199 {
   1200    return toBool( e->tag == Iex_Const
   1201                   && e->Iex.Const.con->tag == Ico_V256
   1202                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
   1203 }
   1204 
   1205 /* Is this an integer constant with value 0 ? */
   1206 static Bool isZeroU ( IRExpr* e )
   1207 {
   1208    if (e->tag != Iex_Const) return False;
   1209    switch (e->Iex.Const.con->tag) {
   1210       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
   1211       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
   1212       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
   1213       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
   1214       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
   1215       default: vpanic("isZeroU");
   1216    }
   1217 }
   1218 
   1219 /* Is this an integer constant with value 1---1b ? */
   1220 static Bool isOnesU ( IRExpr* e )
   1221 {
   1222    if (e->tag != Iex_Const) return False;
   1223    switch (e->Iex.Const.con->tag) {
   1224       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
   1225       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
   1226       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
   1227                                      == 0xFFFFFFFF);
   1228       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
   1229                                      == 0xFFFFFFFFFFFFFFFFULL);
   1230       default: ppIRExpr(e); vpanic("isOnesU");
   1231    }
   1232 }
   1233 
   1234 static Bool notBool ( Bool b )
   1235 {
   1236    if (b == True) return False;
   1237    if (b == False) return True;
   1238    vpanic("notBool");
   1239 }
   1240 
   1241 /* Make a zero which has the same type as the result of the given
   1242    primop. */
   1243 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
   1244 {
   1245    switch (op) {
   1246       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
   1247       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
   1248       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
   1249       case Iop_Sub32:
   1250       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
   1251       case Iop_And64:
   1252       case Iop_Sub64:
   1253       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
   1254       case Iop_XorV128:
   1255       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
   1256       default: vpanic("mkZeroOfPrimopResultType: bad primop");
   1257    }
   1258 }
   1259 
   1260 /* Make a value containing all 1-bits, which has the same type as the
   1261    result of the given primop. */
   1262 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
   1263 {
   1264    switch (op) {
   1265       case Iop_CmpEQ32:
   1266       case Iop_CmpEQ64:
   1267          return IRExpr_Const(IRConst_U1(toBool(1)));
   1268       case Iop_Or8:
   1269          return IRExpr_Const(IRConst_U8(0xFF));
   1270       case Iop_Or16:
   1271          return IRExpr_Const(IRConst_U16(0xFFFF));
   1272       case Iop_Or32:
   1273          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
   1274       case Iop_CmpEQ8x8:
   1275       case Iop_Or64:
   1276          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
   1277       case Iop_CmpEQ8x16:
   1278       case Iop_CmpEQ16x8:
   1279       case Iop_CmpEQ32x4:
   1280          return IRExpr_Const(IRConst_V128(0xFFFF));
   1281       default:
   1282          ppIROp(op);
   1283          vpanic("mkOnesOfPrimopResultType: bad primop");
   1284    }
   1285 }
   1286 
   1287 /* Helpers for folding Clz32/64. */
   1288 static UInt fold_Clz64 ( ULong value )
   1289 {
   1290    UInt i;
   1291    vassert(value != 0ULL); /* no defined semantics for arg==0 */
   1292    for (i = 0; i < 64; ++i) {
   1293       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
   1294    }
   1295    vassert(0);
   1296    /*NOTREACHED*/
   1297    return 0;
   1298 }
   1299 
   1300 static UInt fold_Clz32 ( UInt value )
   1301 {
   1302    UInt i;
   1303    vassert(value != 0); /* no defined semantics for arg==0 */
   1304    for (i = 0; i < 32; ++i) {
   1305       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
   1306    }
   1307    vassert(0);
   1308    /*NOTREACHED*/
   1309    return 0;
   1310 }
   1311 
   1312 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
   1313    the corresponding real constant. */
   1314 //XXX re-check this before use
   1315 //static ULong de_summarise_V64 ( UChar v64 )
   1316 //{
   1317 //   ULong r = 0;
   1318 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
   1319 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
   1320 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
   1321 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
   1322 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
   1323 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
   1324 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
   1325 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
   1326 //   return r;
   1327 //}
   1328 
   1329 /* Helper for arbitrary expression pattern matching in flat IR.  If
   1330    'e' is a reference to a tmp, look it up in env -- repeatedly, if
   1331    necessary -- until it resolves to a non-tmp.  Note that this can
   1332    return NULL if it can't resolve 'e' to a new expression, which will
   1333    be the case if 'e' is instead defined by an IRStmt (IRDirty or
   1334    LLSC). */
   1335 static IRExpr* chase ( IRExpr** env, IRExpr* e )
   1336 {
   1337    /* Why is this loop guaranteed to terminate?  Because all tmps must
   1338       have definitions before use, hence a tmp cannot be bound
   1339       (directly or indirectly) to itself. */
   1340    while (e->tag == Iex_RdTmp) {
   1341       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
   1342       e = env[(Int)e->Iex.RdTmp.tmp];
   1343       if (e == NULL) break;
   1344    }
   1345    return e;
   1346 }
   1347 
   1348 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
   1349 {
   1350    Int     shift;
   1351    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1352 
   1353    switch (e->tag) {
   1354    case Iex_Unop:
   1355       /* UNARY ops */
   1356       if (e->Iex.Unop.arg->tag == Iex_Const) {
   1357          switch (e->Iex.Unop.op) {
   1358          case Iop_1Uto8:
   1359             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1360                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1361                     ? 1 : 0)));
   1362             break;
   1363          case Iop_1Uto32:
   1364             e2 = IRExpr_Const(IRConst_U32(
   1365                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1366                     ? 1 : 0));
   1367             break;
   1368          case Iop_1Uto64:
   1369             e2 = IRExpr_Const(IRConst_U64(
   1370                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1371                     ? 1 : 0));
   1372             break;
   1373 
   1374          case Iop_1Sto8:
   1375             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1376                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1377                     ? 0xFF : 0)));
   1378             break;
   1379          case Iop_1Sto16:
   1380             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1381                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1382                     ? 0xFFFF : 0)));
   1383             break;
   1384          case Iop_1Sto32:
   1385             e2 = IRExpr_Const(IRConst_U32(
   1386                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1387                     ? 0xFFFFFFFF : 0));
   1388             break;
   1389          case Iop_1Sto64:
   1390             e2 = IRExpr_Const(IRConst_U64(
   1391                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1392                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1393             break;
   1394 
   1395          case Iop_8Sto32: {
   1396             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1397             s32 <<= 24;
   1398             s32 >>= 24;
   1399             e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1400             break;
   1401          }
   1402          case Iop_16Sto32: {
   1403             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1404             s32 <<= 16;
   1405             s32 >>= 16;
   1406             e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
   1407             break;
   1408          }
   1409          case Iop_8Uto64:
   1410             e2 = IRExpr_Const(IRConst_U64(
   1411                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1412             break;
   1413          case Iop_16Uto64:
   1414             e2 = IRExpr_Const(IRConst_U64(
   1415                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1416             break;
   1417          case Iop_8Uto32:
   1418             e2 = IRExpr_Const(IRConst_U32(
   1419                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1420             break;
   1421          case Iop_8Sto16: {
   1422             /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1423             s16 <<= 8;
   1424             s16 >>= 8;
   1425             e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
   1426             break;
   1427          }
   1428          case Iop_8Uto16:
   1429             e2 = IRExpr_Const(IRConst_U16(
   1430                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1431             break;
   1432          case Iop_16Uto32:
   1433             e2 = IRExpr_Const(IRConst_U32(
   1434                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1435             break;
   1436          case Iop_32to16:
   1437             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1438                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1439             break;
   1440          case Iop_32to8:
   1441             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1442                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1443             break;
   1444          case Iop_32to1:
   1445             e2 = IRExpr_Const(IRConst_U1(toBool(
   1446                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1447                  )));
   1448             break;
   1449          case Iop_64to1:
   1450             e2 = IRExpr_Const(IRConst_U1(toBool(
   1451                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1452                  )));
   1453             break;
   1454 
   1455          case Iop_NotV128:
   1456             e2 = IRExpr_Const(IRConst_V128(
   1457                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
   1458             break;
   1459          case Iop_Not64:
   1460             e2 = IRExpr_Const(IRConst_U64(
   1461                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1462             break;
   1463          case Iop_Not32:
   1464             e2 = IRExpr_Const(IRConst_U32(
   1465                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1466             break;
   1467          case Iop_Not16:
   1468             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1469                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1470             break;
   1471          case Iop_Not8:
   1472             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1473                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1474             break;
   1475 
   1476          case Iop_Not1:
   1477             e2 = IRExpr_Const(IRConst_U1(
   1478                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1479             break;
   1480 
   1481          case Iop_64to8: {
   1482             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1483             w64 &= 0xFFULL;
   1484             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1485             break;
   1486          }
   1487          case Iop_64to16: {
   1488             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1489             w64 &= 0xFFFFULL;
   1490             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1491             break;
   1492          }
   1493          case Iop_64to32: {
   1494             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1495             w64 &= 0x00000000FFFFFFFFULL;
   1496             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1497             break;
   1498          }
   1499          case Iop_64HIto32: {
   1500             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1501             w64 >>= 32;
   1502             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1503             break;
   1504          }
   1505          case Iop_32Uto64:
   1506             e2 = IRExpr_Const(IRConst_U64(
   1507                     0xFFFFFFFFULL
   1508                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1509             break;
   1510          case Iop_16Sto64: {
   1511             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1512             s64 <<= 48;
   1513             s64 >>= 48;
   1514             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1515             break;
   1516          }
   1517          case Iop_32Sto64: {
   1518             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1519             s64 <<= 32;
   1520             s64 >>= 32;
   1521             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1522             break;
   1523          }
   1524 
   1525          case Iop_16to8: {
   1526             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1527             w16 &= 0xFF;
   1528             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1529             break;
   1530          }
   1531          case Iop_16HIto8: {
   1532             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1533             w16 >>= 8;
   1534             w16 &= 0xFF;
   1535             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1536             break;
   1537          }
   1538 
   1539          case Iop_CmpNEZ8:
   1540             e2 = IRExpr_Const(IRConst_U1(toBool(
   1541                     0 !=
   1542                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1543                  )));
   1544             break;
   1545          case Iop_CmpNEZ32:
   1546             e2 = IRExpr_Const(IRConst_U1(toBool(
   1547                     0 !=
   1548                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1549                  )));
   1550             break;
   1551          case Iop_CmpNEZ64:
   1552             e2 = IRExpr_Const(IRConst_U1(toBool(
   1553                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1554                  )));
   1555             break;
   1556 
   1557          case Iop_CmpwNEZ32: {
   1558             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1559             if (w32 == 0)
   1560                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1561             else
   1562                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1563             break;
   1564          }
   1565          case Iop_CmpwNEZ64: {
   1566             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1567             if (w64 == 0)
   1568                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1569             else
   1570                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1571             break;
   1572          }
   1573 
   1574          case Iop_Left32: {
   1575             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1576             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1577             s32 = (s32 | (-s32));
   1578             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1579             break;
   1580          }
   1581 
   1582          case Iop_Left64: {
   1583             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1584             Long  s64 = (Long)u64;
   1585             s64 = (s64 | (-s64));
   1586             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1587             break;
   1588          }
   1589 
   1590          case Iop_Clz32: {
   1591             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1592             if (u32 != 0)
   1593                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1594             break;
   1595          }
   1596          case Iop_Clz64: {
   1597             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1598             if (u64 != 0ULL)
   1599                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1600             break;
   1601          }
   1602 
   1603          /* For these vector ones, can't fold all cases, but at least
   1604             do the most obvious one.  Could do better here using
   1605             summarise/desummarise of vector constants, but too
   1606             difficult to verify; hence just handle the zero cases. */
   1607          case Iop_32UtoV128: {
   1608             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1609             if (0 == u32) {
   1610                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1611             } else {
   1612                goto unhandled;
   1613             }
   1614             break;
   1615          }
   1616          case Iop_V128to64: {
   1617             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1618             if (0 == ((v128 >> 0) & 0xFF)) {
   1619                e2 = IRExpr_Const(IRConst_U64(0));
   1620             } else {
   1621                goto unhandled;
   1622             }
   1623             break;
   1624          }
   1625          case Iop_V128HIto64: {
   1626             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1627             if (0 == ((v128 >> 8) & 0xFF)) {
   1628                e2 = IRExpr_Const(IRConst_U64(0));
   1629             } else {
   1630                goto unhandled;
   1631             }
   1632             break;
   1633          }
   1634          case Iop_64UtoV128: {
   1635             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1636             if (0 == u64) {
   1637                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1638             } else {
   1639                goto unhandled;
   1640             }
   1641             break;
   1642          }
   1643 
   1644          /* Even stupider (although still correct ..) */
   1645          case Iop_V256to64_0: case Iop_V256to64_1:
   1646          case Iop_V256to64_2: case Iop_V256to64_3: {
   1647             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
   1648             if (v256 == 0x00000000) {
   1649                e2 = IRExpr_Const(IRConst_U64(0));
   1650             } else {
   1651                goto unhandled;
   1652             }
   1653             break;
   1654          }
   1655 
   1656          default:
   1657             goto unhandled;
   1658       }
   1659       }
   1660       break;
   1661 
   1662    case Iex_Binop:
   1663       /* BINARY ops */
   1664       if (e->Iex.Binop.arg1->tag == Iex_Const
   1665           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1666          /* cases where both args are consts */
   1667          switch (e->Iex.Binop.op) {
   1668 
   1669             /* -- Or -- */
   1670             case Iop_Or8:
   1671                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1672                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1673                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1674                break;
   1675             case Iop_Or16:
   1676                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1677                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1678                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1679                break;
   1680             case Iop_Or32:
   1681                e2 = IRExpr_Const(IRConst_U32(
   1682                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1683                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1684                break;
   1685             case Iop_Or64:
   1686                e2 = IRExpr_Const(IRConst_U64(
   1687                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1688                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1689                break;
   1690             case Iop_OrV128:
   1691                e2 = IRExpr_Const(IRConst_V128(
   1692                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1693                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1694                break;
   1695 
   1696             /* -- Xor -- */
   1697             case Iop_Xor8:
   1698                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1699                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1700                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1701                break;
   1702             case Iop_Xor16:
   1703                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1704                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1705                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1706                break;
   1707             case Iop_Xor32:
   1708                e2 = IRExpr_Const(IRConst_U32(
   1709                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1710                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1711                break;
   1712             case Iop_Xor64:
   1713                e2 = IRExpr_Const(IRConst_U64(
   1714                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1715                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1716                break;
   1717             case Iop_XorV128:
   1718                e2 = IRExpr_Const(IRConst_V128(
   1719                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1720                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1721                break;
   1722 
   1723             /* -- And -- */
   1724             case Iop_And8:
   1725                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1726                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1727                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1728                break;
   1729             case Iop_And16:
   1730                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1731                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1732                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1733                break;
   1734             case Iop_And32:
   1735                e2 = IRExpr_Const(IRConst_U32(
   1736                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1737                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1738                break;
   1739             case Iop_And64:
   1740                e2 = IRExpr_Const(IRConst_U64(
   1741                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1742                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1743                break;
   1744             case Iop_AndV128:
   1745                e2 = IRExpr_Const(IRConst_V128(
   1746                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1747                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1748                break;
   1749 
   1750             /* -- Add -- */
   1751             case Iop_Add8:
   1752                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1753                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1754                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1755                break;
   1756             case Iop_Add32:
   1757                e2 = IRExpr_Const(IRConst_U32(
   1758                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1759                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1760                break;
   1761             case Iop_Add64:
   1762                e2 = IRExpr_Const(IRConst_U64(
   1763                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1764                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1765                break;
   1766 
   1767             /* -- Sub -- */
   1768             case Iop_Sub8:
   1769                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1770                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1771                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1772                break;
   1773             case Iop_Sub32:
   1774                e2 = IRExpr_Const(IRConst_U32(
   1775                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1776                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1777                break;
   1778             case Iop_Sub64:
   1779                e2 = IRExpr_Const(IRConst_U64(
   1780                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1781                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1782                break;
   1783 
   1784             /* -- Max32U -- */
   1785             case Iop_Max32U: {
   1786                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1787                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1788                UInt res  = u32a > u32b ? u32a : u32b;
   1789                e2 = IRExpr_Const(IRConst_U32(res));
   1790                break;
   1791             }
   1792 
   1793             /* -- Mul -- */
   1794             case Iop_Mul32:
   1795                e2 = IRExpr_Const(IRConst_U32(
   1796                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1797                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1798                break;
   1799             case Iop_Mul64:
   1800                e2 = IRExpr_Const(IRConst_U64(
   1801                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1802                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1803                break;
   1804 
   1805             case Iop_MullS32: {
   1806                /* very paranoid */
   1807                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1808                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1809                Int   s32a = (Int)u32a;
   1810                Int   s32b = (Int)u32b;
   1811                Long  s64a = (Long)s32a;
   1812                Long  s64b = (Long)s32b;
   1813                Long  sres = s64a * s64b;
   1814                ULong ures = (ULong)sres;
   1815                e2 = IRExpr_Const(IRConst_U64(ures));
   1816                break;
   1817             }
   1818 
   1819             /* -- Shl -- */
   1820             case Iop_Shl32:
   1821                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1822                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1823                if (shift >= 0 && shift <= 31)
   1824                   e2 = IRExpr_Const(IRConst_U32(
   1825                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1826                            << shift)));
   1827                break;
   1828             case Iop_Shl64:
   1829                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1830                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1831                if (shift >= 0 && shift <= 63)
   1832                   e2 = IRExpr_Const(IRConst_U64(
   1833                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1834                            << shift)));
   1835                break;
   1836 
   1837             /* -- Sar -- */
   1838             case Iop_Sar32: {
   1839                /* paranoid ... */
   1840                /*signed*/ Int s32;
   1841                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1842                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1843                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1844                if (shift >= 0 && shift <= 31) {
   1845                   s32 >>=/*signed*/ shift;
   1846                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1847                }
   1848                break;
   1849             }
   1850             case Iop_Sar64: {
   1851                /* paranoid ... */
   1852                /*signed*/ Long s64;
   1853                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1854                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1855                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1856                if (shift >= 0 && shift <= 63) {
   1857                   s64 >>=/*signed*/ shift;
   1858                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1859                }
   1860                break;
   1861             }
   1862 
   1863             /* -- Shr -- */
   1864             case Iop_Shr32: {
   1865                /* paranoid ... */
   1866                /*unsigned*/ UInt u32;
   1867                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1868                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1869                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1870                if (shift >= 0 && shift <= 31) {
   1871                   u32 >>=/*unsigned*/ shift;
   1872                   e2 = IRExpr_Const(IRConst_U32(u32));
   1873                }
   1874                break;
   1875             }
   1876             case Iop_Shr64: {
   1877                /* paranoid ... */
   1878                /*unsigned*/ ULong u64;
   1879                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1880                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1881                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1882                if (shift >= 0 && shift <= 63) {
   1883                   u64 >>=/*unsigned*/ shift;
   1884                   e2 = IRExpr_Const(IRConst_U64(u64));
   1885                }
   1886                break;
   1887             }
   1888 
   1889             /* -- CmpEQ -- */
   1890             case Iop_CmpEQ32:
   1891                e2 = IRExpr_Const(IRConst_U1(toBool(
   1892                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1893                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1894                break;
   1895             case Iop_CmpEQ64:
   1896                e2 = IRExpr_Const(IRConst_U1(toBool(
   1897                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1898                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1899                break;
   1900 
   1901             /* -- CmpNE -- */
   1902             case Iop_CmpNE8:
   1903             case Iop_CasCmpNE8:
   1904             case Iop_ExpCmpNE8:
   1905                e2 = IRExpr_Const(IRConst_U1(toBool(
   1906                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1907                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1908                break;
   1909             case Iop_CmpNE32:
   1910             case Iop_CasCmpNE32:
   1911             case Iop_ExpCmpNE32:
   1912                e2 = IRExpr_Const(IRConst_U1(toBool(
   1913                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1914                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1915                break;
   1916             case Iop_CmpNE64:
   1917             case Iop_CasCmpNE64:
   1918             case Iop_ExpCmpNE64:
   1919                e2 = IRExpr_Const(IRConst_U1(toBool(
   1920                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1921                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1922                break;
   1923 
   1924             /* -- CmpLEU -- */
   1925             case Iop_CmpLE32U:
   1926                e2 = IRExpr_Const(IRConst_U1(toBool(
   1927                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1928                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1929                break;
   1930             case Iop_CmpLE64U:
   1931                e2 = IRExpr_Const(IRConst_U1(toBool(
   1932                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1933                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1934                break;
   1935 
   1936             /* -- CmpLES -- */
   1937             case Iop_CmpLE32S:
   1938                e2 = IRExpr_Const(IRConst_U1(toBool(
   1939                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1940                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1941                break;
   1942             case Iop_CmpLE64S:
   1943                e2 = IRExpr_Const(IRConst_U1(toBool(
   1944                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1945                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1946                break;
   1947 
   1948             /* -- CmpLTS -- */
   1949             case Iop_CmpLT32S:
   1950                e2 = IRExpr_Const(IRConst_U1(toBool(
   1951                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1952                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1953                break;
   1954             case Iop_CmpLT64S:
   1955                e2 = IRExpr_Const(IRConst_U1(toBool(
   1956                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1957                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1958                break;
   1959 
   1960             /* -- CmpLTU -- */
   1961             case Iop_CmpLT32U:
   1962                e2 = IRExpr_Const(IRConst_U1(toBool(
   1963                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1964                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1965                break;
   1966             case Iop_CmpLT64U:
   1967                e2 = IRExpr_Const(IRConst_U1(toBool(
   1968                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1969                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1970                break;
   1971 
   1972             /* -- CmpORD -- */
   1973             case Iop_CmpORD32S: {
   1974                /* very paranoid */
   1975                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1976                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1977                Int   s32a = (Int)u32a;
   1978                Int   s32b = (Int)u32b;
   1979                Int   r = 0x2; /* EQ */
   1980                if (s32a < s32b) {
   1981                   r = 0x8; /* LT */
   1982                }
   1983                else if (s32a > s32b) {
   1984                   r = 0x4; /* GT */
   1985                }
   1986                e2 = IRExpr_Const(IRConst_U32(r));
   1987                break;
   1988             }
   1989 
   1990             /* -- nHLto2n -- */
   1991             case Iop_32HLto64:
   1992                e2 = IRExpr_Const(IRConst_U64(
   1993                        (((ULong)(e->Iex.Binop.arg1
   1994                                   ->Iex.Const.con->Ico.U32)) << 32)
   1995                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   1996                     ));
   1997                break;
   1998             case Iop_64HLto128:
   1999                /* We can't fold this, because there is no way to
   2000                   express he result in IR, but at least pretend to
   2001                   handle it, so as to stop getting blasted with
   2002                   no-rule-for-this-primop messages. */
   2003                break;
   2004             /* For this vector one, can't fold all cases, but at
   2005                least do the most obvious one.  Could do better here
   2006                using summarise/desummarise of vector constants, but
   2007                too difficult to verify; hence just handle the zero
   2008                cases. */
   2009             case Iop_64HLtoV128: {
   2010                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
   2011                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2012                if (0 == argHi && 0 == argLo) {
   2013                   e2 = IRExpr_Const(IRConst_V128(0));
   2014                } else {
   2015                   goto unhandled;
   2016                }
   2017                break;
   2018             }
   2019             /* Same reasoning for the 256-bit version. */
   2020             case Iop_V128HLtoV256: {
   2021                IRExpr* argHi = e->Iex.Binop.arg1;
   2022                IRExpr* argLo = e->Iex.Binop.arg2;
   2023                if (isZeroV128(argHi) && isZeroV128(argLo)) {
   2024                   e2 = IRExpr_Const(IRConst_V256(0));
   2025                } else {
   2026                   goto unhandled;
   2027                }
   2028                break;
   2029             }
   2030 
   2031             /* -- V128 stuff -- */
   2032             case Iop_InterleaveLO8x16: {
   2033                /* This turns up a lot in Memcheck instrumentation of
   2034                   Icc generated code.  I don't know why. */
   2035                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
   2036                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
   2037                if (0 == arg1 && 0 == arg2) {
   2038                   e2 = IRExpr_Const(IRConst_V128(0));
   2039                } else {
   2040                   goto unhandled;
   2041                }
   2042                break;
   2043             }
   2044 
   2045             default:
   2046                goto unhandled;
   2047          }
   2048 
   2049       } else {
   2050 
   2051          /* other cases (identities, etc) */
   2052          switch (e->Iex.Binop.op) {
   2053 
   2054             case Iop_Shl32:
   2055             case Iop_Shl64:
   2056             case Iop_Shr64:
   2057                /* Shl32/Shl64/Shr64(x,0) ==> x */
   2058                if (isZeroU(e->Iex.Binop.arg2)) {
   2059                   e2 = e->Iex.Binop.arg1;
   2060                   break;
   2061                }
   2062                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
   2063                if (isZeroU(e->Iex.Binop.arg1)) {
   2064                   e2 = e->Iex.Binop.arg1;
   2065                   break;
   2066                }
   2067                break;
   2068 
   2069             case Iop_Shr32:
   2070                /* Shr32(x,0) ==> x */
   2071                if (isZeroU(e->Iex.Binop.arg2)) {
   2072                   e2 = e->Iex.Binop.arg1;
   2073                   break;
   2074                }
   2075                break;
   2076 
   2077             case Iop_Or8:
   2078             case Iop_Or16:
   2079             case Iop_Or32:
   2080             case Iop_Or64:
   2081             case Iop_Max32U:
   2082                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
   2083                if (isZeroU(e->Iex.Binop.arg2)) {
   2084                   e2 = e->Iex.Binop.arg1;
   2085                   break;
   2086                }
   2087                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
   2088                if (isZeroU(e->Iex.Binop.arg1)) {
   2089                   e2 = e->Iex.Binop.arg2;
   2090                   break;
   2091                }
   2092                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
   2093                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
   2094                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
   2095                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2096                   break;
   2097                }
   2098                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
   2099                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2100                   e2 = e->Iex.Binop.arg1;
   2101                   break;
   2102                }
   2103                break;
   2104 
   2105             case Iop_Add8:
   2106                /* Add8(t,t) ==> t << 1.
   2107                   Memcheck doesn't understand that
   2108                   x+x produces a defined least significant bit, and it seems
   2109                   simplest just to get rid of the problem by rewriting it
   2110                   out, since the opportunity to do so exists. */
   2111                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2112                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
   2113                                     IRExpr_Const(IRConst_U8(1)));
   2114                   break;
   2115                }
   2116                break;
   2117 
   2118                /* NB no Add16(t,t) case yet as no known test case exists */
   2119 
   2120             case Iop_Add32:
   2121             case Iop_Add64:
   2122                /* Add32/Add64(x,0) ==> x */
   2123                if (isZeroU(e->Iex.Binop.arg2)) {
   2124                   e2 = e->Iex.Binop.arg1;
   2125                   break;
   2126                }
   2127                /* Add32/Add64(0,x) ==> x */
   2128                if (isZeroU(e->Iex.Binop.arg1)) {
   2129                   e2 = e->Iex.Binop.arg2;
   2130                   break;
   2131                }
   2132                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
   2133                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2134                   e2 = IRExpr_Binop(
   2135                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
   2136                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
   2137                   break;
   2138                }
   2139                break;
   2140 
   2141             case Iop_Sub32:
   2142             case Iop_Sub64:
   2143                /* Sub32/Sub64(x,0) ==> x */
   2144                if (isZeroU(e->Iex.Binop.arg2)) {
   2145                   e2 = e->Iex.Binop.arg1;
   2146                   break;
   2147                }
   2148                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
   2149                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2150                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2151                   break;
   2152                }
   2153                break;
   2154             case Iop_Sub8x16:
   2155                /* Sub8x16(x,0) ==> x */
   2156                if (isZeroV128(e->Iex.Binop.arg2)) {
   2157                   e2 = e->Iex.Binop.arg1;
   2158                   break;
   2159                }
   2160                break;
   2161 
   2162             case Iop_And64:
   2163             case Iop_And32:
   2164                /* And32/And64(x,1---1b) ==> x */
   2165                if (isOnesU(e->Iex.Binop.arg2)) {
   2166                   e2 = e->Iex.Binop.arg1;
   2167                   break;
   2168                }
   2169                /* And32/And64(x,0) ==> 0 */
   2170                if (isZeroU(e->Iex.Binop.arg2)) {
   2171                   e2 = e->Iex.Binop.arg2;
   2172                   break;
   2173                }
   2174                /* And32/And64(0,x) ==> 0 */
   2175                if (isZeroU(e->Iex.Binop.arg1)) {
   2176                   e2 = e->Iex.Binop.arg1;
   2177                   break;
   2178                }
   2179                /* And32/And64(t,t) ==> t, for some IRTemp t */
   2180                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2181                   e2 = e->Iex.Binop.arg1;
   2182                   break;
   2183                }
   2184                break;
   2185 
   2186             case Iop_And8:
   2187             case Iop_And16:
   2188             case Iop_AndV128:
   2189             case Iop_AndV256:
   2190                /* And8/And16/AndV128/AndV256(t,t)
   2191                   ==> t, for some IRTemp t */
   2192                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2193                   e2 = e->Iex.Binop.arg1;
   2194                   break;
   2195                }
   2196                /* Deal with either arg zero.  Could handle other And
   2197                   cases here too. */
   2198                if (e->Iex.Binop.op == Iop_And64
   2199                    && (isZeroU64(e->Iex.Binop.arg1)
   2200                        || isZeroU64(e->Iex.Binop.arg2))) {
   2201                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2202                   break;
   2203                } else if (e->Iex.Binop.op == Iop_AndV128
   2204                           && (isZeroV128(e->Iex.Binop.arg1)
   2205                               || isZeroV128(e->Iex.Binop.arg2))) {
   2206                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2207                   break;
   2208                }
   2209                break;
   2210 
   2211             case Iop_OrV128:
   2212             case Iop_OrV256:
   2213                /* V128/V256(t,t) ==> t, for some IRTemp t */
   2214                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2215                   e2 = e->Iex.Binop.arg1;
   2216                   break;
   2217                }
   2218                /* OrV128(t,0) ==> t */
   2219                if (e->Iex.Binop.op == Iop_OrV128) {
   2220                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2221                      e2 = e->Iex.Binop.arg1;
   2222                      break;
   2223                   }
   2224                   if (isZeroV128(e->Iex.Binop.arg1)) {
   2225                      e2 = e->Iex.Binop.arg2;
   2226                      break;
   2227                   }
   2228                }
   2229                /* OrV256(t,0) ==> t */
   2230                if (e->Iex.Binop.op == Iop_OrV256) {
   2231                   if (isZeroV256(e->Iex.Binop.arg2)) {
   2232                      e2 = e->Iex.Binop.arg1;
   2233                      break;
   2234                   }
   2235                   //Disabled because there's no known test case right now.
   2236                   //if (isZeroV256(e->Iex.Binop.arg1)) {
   2237                   //   e2 = e->Iex.Binop.arg2;
   2238                   //   break;
   2239                   //}
   2240                }
   2241                break;
   2242 
   2243             case Iop_Xor8:
   2244             case Iop_Xor16:
   2245             case Iop_Xor32:
   2246             case Iop_Xor64:
   2247             case Iop_XorV128:
   2248                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   2249                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2250                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2251                   break;
   2252                }
   2253                break;
   2254 
   2255             case Iop_CmpNE32:
   2256                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
   2257                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2258                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2259                   break;
   2260                }
   2261                /* CmpNE32(1Uto32(b), 0) ==> b */
   2262                if (isZeroU32(e->Iex.Binop.arg2)) {
   2263                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
   2264                   if (a1 && a1->tag == Iex_Unop
   2265                          && a1->Iex.Unop.op == Iop_1Uto32) {
   2266                      e2 = a1->Iex.Unop.arg;
   2267                      break;
   2268                   }
   2269                }
   2270                break;
   2271 
   2272             case Iop_CmpEQ32:
   2273             case Iop_CmpEQ64:
   2274             case Iop_CmpEQ8x8:
   2275             case Iop_CmpEQ8x16:
   2276             case Iop_CmpEQ16x8:
   2277             case Iop_CmpEQ32x4:
   2278                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2279                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2280                   break;
   2281                }
   2282                break;
   2283 
   2284             default:
   2285                break;
   2286          }
   2287       }
   2288       break;
   2289 
   2290    case Iex_ITE:
   2291       /* ITE */
   2292       /* is the discriminant is a constant? */
   2293       if (e->Iex.ITE.cond->tag == Iex_Const) {
   2294          /* assured us by the IR type rules */
   2295          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
   2296          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
   2297                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
   2298       }
   2299       else
   2300       /* are the arms identical? (pretty weedy test) */
   2301       if (sameIRExprs(env, e->Iex.ITE.iftrue,
   2302                            e->Iex.ITE.iffalse)) {
   2303          e2 = e->Iex.ITE.iffalse;
   2304       }
   2305       break;
   2306 
   2307    default:
   2308       /* not considered */
   2309       break;
   2310    }
   2311 
   2312    /* Show cases where we've found but not folded 'op(t,t)'.  Be
   2313       careful not to call sameIRExprs with values of different types,
   2314       though, else it will assert (and so it should!).  We can't
   2315       conveniently call typeOfIRExpr on the two args without a whole
   2316       bunch of extra plumbing to pass in a type env, so just use a
   2317       hacky test to check the arguments are not anything that might
   2318       sameIRExprs to assert.  This is only OK because this kludge is
   2319       only used for debug printing, not for "real" operation.  For
   2320       "real" operation (ie, all other calls to sameIRExprs), it is
   2321       essential that the to args have the same type.
   2322 
   2323       The "right" solution is to plumb the containing block's
   2324       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
   2325       that's a bunch of extra parameter passing which will just slow
   2326       down the normal case, for no purpose. */
   2327    if (vex_control.iropt_verbosity > 0
   2328        && e == e2
   2329        && e->tag == Iex_Binop
   2330        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
   2331                                                     e->Iex.Binop.arg2)
   2332        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2333       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
   2334       ppIRExpr(e);
   2335       vex_printf("\n");
   2336    }
   2337 
   2338    /* Show the overall results of folding. */
   2339    if (DEBUG_IROPT && e2 != e) {
   2340       vex_printf("FOLD: ");
   2341       ppIRExpr(e); vex_printf("  ->  ");
   2342       ppIRExpr(e2); vex_printf("\n");
   2343    }
   2344 
   2345    return e2;
   2346 
   2347  unhandled:
   2348 #  if 0
   2349    vex_printf("\n\n");
   2350    ppIRExpr(e);
   2351    vpanic("fold_Expr: no rule for the above");
   2352 #  else
   2353    if (vex_control.iropt_verbosity > 0) {
   2354       vex_printf("vex iropt: fold_Expr: no const rule for: ");
   2355       ppIRExpr(e);
   2356       vex_printf("\n");
   2357    }
   2358    return e2;
   2359 #  endif
   2360 }
   2361 
   2362 
   2363 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   2364    1-level due to previous flattening pass. */
   2365 
   2366 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   2367 {
   2368    switch (ex->tag) {
   2369       case Iex_RdTmp:
   2370          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   2371             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
   2372             if (rhs->tag == Iex_RdTmp)
   2373                return rhs;
   2374             if (rhs->tag == Iex_Const
   2375                 && rhs->Iex.Const.con->tag != Ico_F64i)
   2376                return rhs;
   2377          }
   2378          /* not bound in env */
   2379          return ex;
   2380 
   2381       case Iex_Const:
   2382       case Iex_Get:
   2383          return ex;
   2384 
   2385       case Iex_GetI:
   2386          vassert(isIRAtom(ex->Iex.GetI.ix));
   2387          return IRExpr_GetI(
   2388             ex->Iex.GetI.descr,
   2389             subst_Expr(env, ex->Iex.GetI.ix),
   2390             ex->Iex.GetI.bias
   2391          );
   2392 
   2393       case Iex_Qop: {
   2394          IRQop* qop = ex->Iex.Qop.details;
   2395          vassert(isIRAtom(qop->arg1));
   2396          vassert(isIRAtom(qop->arg2));
   2397          vassert(isIRAtom(qop->arg3));
   2398          vassert(isIRAtom(qop->arg4));
   2399          return IRExpr_Qop(
   2400                    qop->op,
   2401                    subst_Expr(env, qop->arg1),
   2402                    subst_Expr(env, qop->arg2),
   2403                    subst_Expr(env, qop->arg3),
   2404                    subst_Expr(env, qop->arg4)
   2405                 );
   2406       }
   2407 
   2408       case Iex_Triop: {
   2409          IRTriop* triop = ex->Iex.Triop.details;
   2410          vassert(isIRAtom(triop->arg1));
   2411          vassert(isIRAtom(triop->arg2));
   2412          vassert(isIRAtom(triop->arg3));
   2413          return IRExpr_Triop(
   2414                    triop->op,
   2415                    subst_Expr(env, triop->arg1),
   2416                    subst_Expr(env, triop->arg2),
   2417                    subst_Expr(env, triop->arg3)
   2418                 );
   2419       }
   2420 
   2421       case Iex_Binop:
   2422          vassert(isIRAtom(ex->Iex.Binop.arg1));
   2423          vassert(isIRAtom(ex->Iex.Binop.arg2));
   2424          return IRExpr_Binop(
   2425                    ex->Iex.Binop.op,
   2426                    subst_Expr(env, ex->Iex.Binop.arg1),
   2427                    subst_Expr(env, ex->Iex.Binop.arg2)
   2428                 );
   2429 
   2430       case Iex_Unop:
   2431          vassert(isIRAtom(ex->Iex.Unop.arg));
   2432          return IRExpr_Unop(
   2433                    ex->Iex.Unop.op,
   2434                    subst_Expr(env, ex->Iex.Unop.arg)
   2435                 );
   2436 
   2437       case Iex_Load:
   2438          vassert(isIRAtom(ex->Iex.Load.addr));
   2439          return IRExpr_Load(
   2440                    ex->Iex.Load.end,
   2441                    ex->Iex.Load.ty,
   2442                    subst_Expr(env, ex->Iex.Load.addr)
   2443                 );
   2444 
   2445       case Iex_CCall: {
   2446          Int      i;
   2447          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   2448          for (i = 0; args2[i]; i++) {
   2449             vassert(isIRAtom(args2[i]));
   2450             args2[i] = subst_Expr(env, args2[i]);
   2451          }
   2452          return IRExpr_CCall(
   2453                    ex->Iex.CCall.cee,
   2454                    ex->Iex.CCall.retty,
   2455                    args2
   2456                 );
   2457       }
   2458 
   2459       case Iex_ITE:
   2460          vassert(isIRAtom(ex->Iex.ITE.cond));
   2461          vassert(isIRAtom(ex->Iex.ITE.iftrue));
   2462          vassert(isIRAtom(ex->Iex.ITE.iffalse));
   2463          return IRExpr_ITE(
   2464                    subst_Expr(env, ex->Iex.ITE.cond),
   2465                    subst_Expr(env, ex->Iex.ITE.iftrue),
   2466                    subst_Expr(env, ex->Iex.ITE.iffalse)
   2467                 );
   2468 
   2469       default:
   2470          vex_printf("\n\n"); ppIRExpr(ex);
   2471          vpanic("subst_Expr");
   2472 
   2473    }
   2474 }
   2475 
   2476 
   2477 /* Apply the subst to stmt, then fold the result as much as possible.
   2478    Much simplified due to stmt being previously flattened.  As a
   2479    result of this, the stmt may wind up being turned into a no-op.
   2480 */
   2481 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   2482 {
   2483 #  if 0
   2484    vex_printf("\nsubst and fold stmt\n");
   2485    ppIRStmt(st);
   2486    vex_printf("\n");
   2487 #  endif
   2488 
   2489    switch (st->tag) {
   2490       case Ist_AbiHint:
   2491          vassert(isIRAtom(st->Ist.AbiHint.base));
   2492          vassert(isIRAtom(st->Ist.AbiHint.nia));
   2493          return IRStmt_AbiHint(
   2494                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
   2495                    st->Ist.AbiHint.len,
   2496                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
   2497                 );
   2498       case Ist_Put:
   2499          vassert(isIRAtom(st->Ist.Put.data));
   2500          return IRStmt_Put(
   2501                    st->Ist.Put.offset,
   2502                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
   2503                 );
   2504 
   2505       case Ist_PutI: {
   2506          IRPutI *puti, *puti2;
   2507          puti = st->Ist.PutI.details;
   2508          vassert(isIRAtom(puti->ix));
   2509          vassert(isIRAtom(puti->data));
   2510          puti2 = mkIRPutI(puti->descr,
   2511                           fold_Expr(env, subst_Expr(env, puti->ix)),
   2512                           puti->bias,
   2513                           fold_Expr(env, subst_Expr(env, puti->data)));
   2514          return IRStmt_PutI(puti2);
   2515       }
   2516 
   2517       case Ist_WrTmp:
   2518          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   2519             allowed to be more than just a constant or a tmp. */
   2520          return IRStmt_WrTmp(
   2521                    st->Ist.WrTmp.tmp,
   2522                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
   2523                 );
   2524 
   2525       case Ist_Store:
   2526          vassert(isIRAtom(st->Ist.Store.addr));
   2527          vassert(isIRAtom(st->Ist.Store.data));
   2528          return IRStmt_Store(
   2529                    st->Ist.Store.end,
   2530                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
   2531                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
   2532                 );
   2533 
   2534       case Ist_StoreG: {
   2535          IRStoreG* sg = st->Ist.StoreG.details;
   2536          vassert(isIRAtom(sg->addr));
   2537          vassert(isIRAtom(sg->data));
   2538          vassert(isIRAtom(sg->guard));
   2539          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
   2540          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
   2541          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
   2542          if (fguard->tag == Iex_Const) {
   2543             /* The condition on this store has folded down to a constant. */
   2544             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2545             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2546                return IRStmt_NoOp();
   2547             } else {
   2548                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2549                return IRStmt_Store(sg->end, faddr, fdata);
   2550             }
   2551          }
   2552          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
   2553       }
   2554 
   2555       case Ist_LoadG: {
   2556          /* This is complicated.  If the guard folds down to 'false',
   2557             we can replace it with an assignment 'dst := alt', but if
   2558             the guard folds down to 'true', we can't conveniently
   2559             replace it with an unconditional load, because doing so
   2560             requires generating a new temporary, and that is not easy
   2561             to do at this point. */
   2562          IRLoadG* lg = st->Ist.LoadG.details;
   2563          vassert(isIRAtom(lg->addr));
   2564          vassert(isIRAtom(lg->alt));
   2565          vassert(isIRAtom(lg->guard));
   2566          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
   2567          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
   2568          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
   2569          if (fguard->tag == Iex_Const) {
   2570             /* The condition on this load has folded down to a constant. */
   2571             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2572             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2573                /* The load is not going to happen -- instead 'alt' is
   2574                   assigned to 'dst'.  */
   2575                return IRStmt_WrTmp(lg->dst, falt);
   2576             } else {
   2577                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2578                /* The load is always going to happen.  We want to
   2579                   convert to an unconditional load and assign to 'dst'
   2580                   (IRStmt_WrTmp).  Problem is we need an extra temp to
   2581                   hold the loaded value, but none is available.
   2582                   Instead, reconstitute the conditional load (with
   2583                   folded args, of course) and let the caller of this
   2584                   routine deal with the problem. */
   2585             }
   2586          }
   2587          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
   2588       }
   2589 
   2590       case Ist_CAS: {
   2591          IRCAS *cas, *cas2;
   2592          cas = st->Ist.CAS.details;
   2593          vassert(isIRAtom(cas->addr));
   2594          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   2595          vassert(isIRAtom(cas->expdLo));
   2596          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   2597          vassert(isIRAtom(cas->dataLo));
   2598          cas2 = mkIRCAS(
   2599                    cas->oldHi, cas->oldLo, cas->end,
   2600                    fold_Expr(env, subst_Expr(env, cas->addr)),
   2601                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
   2602                                : NULL,
   2603                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
   2604                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
   2605                                : NULL,
   2606                    fold_Expr(env, subst_Expr(env, cas->dataLo))
   2607                 );
   2608          return IRStmt_CAS(cas2);
   2609       }
   2610 
   2611       case Ist_LLSC:
   2612          vassert(isIRAtom(st->Ist.LLSC.addr));
   2613          if (st->Ist.LLSC.storedata)
   2614             vassert(isIRAtom(st->Ist.LLSC.storedata));
   2615          return IRStmt_LLSC(
   2616                    st->Ist.LLSC.end,
   2617                    st->Ist.LLSC.result,
   2618                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
   2619                    st->Ist.LLSC.storedata
   2620                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
   2621                       : NULL
   2622                 );
   2623 
   2624       case Ist_Dirty: {
   2625          Int     i;
   2626          IRDirty *d, *d2;
   2627          d = st->Ist.Dirty.details;
   2628          d2 = emptyIRDirty();
   2629          *d2 = *d;
   2630          d2->args = shallowCopyIRExprVec(d2->args);
   2631          if (d2->mFx != Ifx_None) {
   2632             vassert(isIRAtom(d2->mAddr));
   2633             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
   2634          }
   2635          vassert(isIRAtom(d2->guard));
   2636          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
   2637          for (i = 0; d2->args[i]; i++) {
   2638             IRExpr* arg = d2->args[i];
   2639             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
   2640                vassert(isIRAtom(arg));
   2641                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
   2642             }
   2643          }
   2644          return IRStmt_Dirty(d2);
   2645       }
   2646 
   2647       case Ist_IMark:
   2648          return IRStmt_IMark(st->Ist.IMark.addr,
   2649                              st->Ist.IMark.len,
   2650                              st->Ist.IMark.delta);
   2651 
   2652       case Ist_NoOp:
   2653          return IRStmt_NoOp();
   2654 
   2655       case Ist_MBE:
   2656          return IRStmt_MBE(st->Ist.MBE.event);
   2657 
   2658       case Ist_Exit: {
   2659          IRExpr* fcond;
   2660          vassert(isIRAtom(st->Ist.Exit.guard));
   2661          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
   2662          if (fcond->tag == Iex_Const) {
   2663             /* Interesting.  The condition on this exit has folded down to
   2664                a constant. */
   2665             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2666             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2667                /* exit is never going to happen, so dump the statement. */
   2668                return IRStmt_NoOp();
   2669             } else {
   2670                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2671                /* Hmmm.  The exit has become unconditional.  Leave it
   2672                   as it is for now, since we'd have to truncate the BB
   2673                   at this point, which is tricky.  Such truncation is
   2674                   done later by the dead-code elimination pass. */
   2675                /* fall out into the reconstruct-the-exit code. */
   2676                if (vex_control.iropt_verbosity > 0)
   2677                   /* really a misuse of vex_control.iropt_verbosity */
   2678                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2679             }
   2680          }
   2681          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
   2682                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
   2683       }
   2684 
   2685    default:
   2686       vex_printf("\n"); ppIRStmt(st);
   2687       vpanic("subst_and_fold_Stmt");
   2688    }
   2689 }
   2690 
   2691 
   2692 IRSB* cprop_BB ( IRSB* in )
   2693 {
   2694    Int      i;
   2695    IRSB*    out;
   2696    IRStmt*  st2;
   2697    Int      n_tmps = in->tyenv->types_used;
   2698    IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
   2699    /* Keep track of IRStmt_LoadGs that we need to revisit after
   2700       processing all the other statements. */
   2701    const Int N_FIXUPS = 16;
   2702    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
   2703    Int n_fixups = 0;
   2704 
   2705    out = emptyIRSB();
   2706    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2707 
   2708    /* Set up the env with which travels forward.  This holds a
   2709       substitution, mapping IRTemps to IRExprs. The environment
   2710       is to be applied as we move along.  Keys are IRTemps.
   2711       Values are IRExpr*s.
   2712    */
   2713    for (i = 0; i < n_tmps; i++)
   2714       env[i] = NULL;
   2715 
   2716    /* For each original SSA-form stmt ... */
   2717    for (i = 0; i < in->stmts_used; i++) {
   2718 
   2719       /* First apply the substitution to the current stmt.  This
   2720          propagates in any constants and tmp-tmp assignments
   2721          accumulated prior to this point.  As part of the subst_Stmt
   2722          call, also then fold any constant expressions resulting. */
   2723 
   2724       st2 = in->stmts[i];
   2725 
   2726       /* perhaps st2 is already a no-op? */
   2727       if (st2->tag == Ist_NoOp) continue;
   2728 
   2729       st2 = subst_and_fold_Stmt( env, st2 );
   2730 
   2731       /* Deal with some post-folding special cases. */
   2732       switch (st2->tag) {
   2733 
   2734          /* If the statement has been folded into a no-op, forget
   2735             it. */
   2736          case Ist_NoOp:
   2737             continue;
   2738 
   2739          /* If the statement assigns to an IRTemp add it to the
   2740             running environment. This is for the benefit of copy
   2741             propagation and to allow sameIRExpr look through
   2742             IRTemps. */
   2743          case Ist_WrTmp: {
   2744             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
   2745             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2746 
   2747             /* 't1 = t2' -- don't add to BB; will be optimized out */
   2748             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
   2749                continue;
   2750 
   2751             /* 't = const' && 'const != F64i' -- don't add to BB
   2752                Note, we choose not to propagate const when const is an
   2753                F64i, so that F64i literals can be CSE'd later.  This
   2754                helps x86 floating point code generation. */
   2755             if (st2->Ist.WrTmp.data->tag == Iex_Const
   2756                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   2757                continue;
   2758             }
   2759             /* else add it to the output, as normal */
   2760             break;
   2761          }
   2762 
   2763          case Ist_LoadG: {
   2764             IRLoadG* lg    = st2->Ist.LoadG.details;
   2765             IRExpr*  guard = lg->guard;
   2766             if (guard->tag == Iex_Const) {
   2767                /* The guard has folded to a constant, and that
   2768                   constant must be 1:I1, since subst_and_fold_Stmt
   2769                   folds out the case 0:I1 by itself. */
   2770                vassert(guard->Iex.Const.con->tag == Ico_U1);
   2771                vassert(guard->Iex.Const.con->Ico.U1 == True);
   2772                /* Add a NoOp here as a placeholder, and make a note of
   2773                   where it is in the output block.  Afterwards we'll
   2774                   come back here and transform the NoOp and the LoadG
   2775                   into a load-convert pair.  The fixups[] entry
   2776                   refers to the inserted NoOp, and we expect to find
   2777                   the relevant LoadG immediately after it. */
   2778                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2779                if (n_fixups < N_FIXUPS) {
   2780                   fixups[n_fixups++] = out->stmts_used;
   2781                   addStmtToIRSB( out, IRStmt_NoOp() );
   2782                }
   2783             }
   2784             /* And always add the LoadG to the output, regardless. */
   2785             break;
   2786          }
   2787 
   2788       default:
   2789          break;
   2790       }
   2791 
   2792       /* Not interesting, copy st2 into the output block. */
   2793       addStmtToIRSB( out, st2 );
   2794    }
   2795 
   2796 #  if STATS_IROPT
   2797    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
   2798               invocation_count, recursion_count, success_count,
   2799               recursion_success_count, max_nodes_visited);
   2800 #  endif
   2801 
   2802    out->next     = subst_Expr( env, in->next );
   2803    out->jumpkind = in->jumpkind;
   2804    out->offsIP   = in->offsIP;
   2805 
   2806    /* Process any leftover unconditional LoadGs that we noticed
   2807       in the main pass. */
   2808    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2809    for (i = 0; i < n_fixups; i++) {
   2810       Int ix = fixups[i];
   2811       /* Carefully verify that the LoadG has the expected form. */
   2812       vassert(ix >= 0 && ix+1 < out->stmts_used);
   2813       IRStmt* nop = out->stmts[ix];
   2814       IRStmt* lgu = out->stmts[ix+1];
   2815       vassert(nop->tag == Ist_NoOp);
   2816       vassert(lgu->tag == Ist_LoadG);
   2817       IRLoadG* lg    = lgu->Ist.LoadG.details;
   2818       IRExpr*  guard = lg->guard;
   2819       vassert(guard->Iex.Const.con->tag == Ico_U1);
   2820       vassert(guard->Iex.Const.con->Ico.U1 == True);
   2821       /* Figure out the load and result types, and the implied
   2822          conversion operation. */
   2823       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
   2824       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
   2825       IROp cvtOp = Iop_INVALID;
   2826       switch (lg->cvt) {
   2827          case ILGop_Ident32: break;
   2828          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
   2829          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
   2830          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
   2831          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
   2832          default: vpanic("cprop_BB: unhandled ILGOp");
   2833       }
   2834       /* Replace the placeholder NoOp by the required unconditional
   2835          load. */
   2836       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
   2837       out->stmts[ix]
   2838          = IRStmt_WrTmp(tLoaded,
   2839                         IRExpr_Load(lg->end, cvtArg, lg->addr));
   2840       /* Replace the LoadG by a conversion from the loaded value's
   2841          type to the required result type. */
   2842       out->stmts[ix+1]
   2843          = IRStmt_WrTmp(
   2844               lg->dst, cvtOp == Iop_INVALID
   2845                           ? IRExpr_RdTmp(tLoaded)
   2846                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
   2847    }
   2848 
   2849    return out;
   2850 }
   2851 
   2852 
   2853 /*---------------------------------------------------------------*/
   2854 /*--- Dead code (t = E) removal                               ---*/
   2855 /*---------------------------------------------------------------*/
   2856 
   2857 /* As a side effect, also removes all code following an unconditional
   2858    side exit. */
   2859 
   2860 /* The type of the HashHW map is: a map from IRTemp to nothing
   2861    -- really just operating a set or IRTemps.
   2862 */
   2863 
   2864 inline
   2865 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2866 {
   2867    set[(Int)tmp] = True;
   2868 }
   2869 
   2870 static void addUses_Expr ( Bool* set, IRExpr* e )
   2871 {
   2872    Int i;
   2873    switch (e->tag) {
   2874       case Iex_GetI:
   2875          addUses_Expr(set, e->Iex.GetI.ix);
   2876          return;
   2877       case Iex_ITE:
   2878          addUses_Expr(set, e->Iex.ITE.cond);
   2879          addUses_Expr(set, e->Iex.ITE.iftrue);
   2880          addUses_Expr(set, e->Iex.ITE.iffalse);
   2881          return;
   2882       case Iex_CCall:
   2883          for (i = 0; e->Iex.CCall.args[i]; i++)
   2884             addUses_Expr(set, e->Iex.CCall.args[i]);
   2885          return;
   2886       case Iex_Load:
   2887          addUses_Expr(set, e->Iex.Load.addr);
   2888          return;
   2889       case Iex_Qop:
   2890          addUses_Expr(set, e->Iex.Qop.details->arg1);
   2891          addUses_Expr(set, e->Iex.Qop.details->arg2);
   2892          addUses_Expr(set, e->Iex.Qop.details->arg3);
   2893          addUses_Expr(set, e->Iex.Qop.details->arg4);
   2894          return;
   2895       case Iex_Triop:
   2896          addUses_Expr(set, e->Iex.Triop.details->arg1);
   2897          addUses_Expr(set, e->Iex.Triop.details->arg2);
   2898          addUses_Expr(set, e->Iex.Triop.details->arg3);
   2899          return;
   2900       case Iex_Binop:
   2901          addUses_Expr(set, e->Iex.Binop.arg1);
   2902          addUses_Expr(set, e->Iex.Binop.arg2);
   2903          return;
   2904       case Iex_Unop:
   2905          addUses_Expr(set, e->Iex.Unop.arg);
   2906          return;
   2907       case Iex_RdTmp:
   2908          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2909          return;
   2910       case Iex_Const:
   2911       case Iex_Get:
   2912          return;
   2913       default:
   2914          vex_printf("\n");
   2915          ppIRExpr(e);
   2916          vpanic("addUses_Expr");
   2917    }
   2918 }
   2919 
   2920 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2921 {
   2922    Int      i;
   2923    IRDirty* d;
   2924    IRCAS*   cas;
   2925    switch (st->tag) {
   2926       case Ist_AbiHint:
   2927          addUses_Expr(set, st->Ist.AbiHint.base);
   2928          addUses_Expr(set, st->Ist.AbiHint.nia);
   2929          return;
   2930       case Ist_PutI:
   2931          addUses_Expr(set, st->Ist.PutI.details->ix);
   2932          addUses_Expr(set, st->Ist.PutI.details->data);
   2933          return;
   2934       case Ist_WrTmp:
   2935          addUses_Expr(set, st->Ist.WrTmp.data);
   2936          return;
   2937       case Ist_Put:
   2938          addUses_Expr(set, st->Ist.Put.data);
   2939          return;
   2940       case Ist_Store:
   2941          addUses_Expr(set, st->Ist.Store.addr);
   2942          addUses_Expr(set, st->Ist.Store.data);
   2943          return;
   2944       case Ist_StoreG: {
   2945          IRStoreG* sg = st->Ist.StoreG.details;
   2946          addUses_Expr(set, sg->addr);
   2947          addUses_Expr(set, sg->data);
   2948          addUses_Expr(set, sg->guard);
   2949          return;
   2950       }
   2951       case Ist_LoadG: {
   2952          IRLoadG* lg = st->Ist.LoadG.details;
   2953          addUses_Expr(set, lg->addr);
   2954          addUses_Expr(set, lg->alt);
   2955          addUses_Expr(set, lg->guard);
   2956          return;
   2957       }
   2958       case Ist_CAS:
   2959          cas = st->Ist.CAS.details;
   2960          addUses_Expr(set, cas->addr);
   2961          if (cas->expdHi)
   2962             addUses_Expr(set, cas->expdHi);
   2963          addUses_Expr(set, cas->expdLo);
   2964          if (cas->dataHi)
   2965             addUses_Expr(set, cas->dataHi);
   2966          addUses_Expr(set, cas->dataLo);
   2967          return;
   2968       case Ist_LLSC:
   2969          addUses_Expr(set, st->Ist.LLSC.addr);
   2970          if (st->Ist.LLSC.storedata)
   2971             addUses_Expr(set, st->Ist.LLSC.storedata);
   2972          return;
   2973       case Ist_Dirty:
   2974          d = st->Ist.Dirty.details;
   2975          if (d->mFx != Ifx_None)
   2976             addUses_Expr(set, d->mAddr);
   2977          addUses_Expr(set, d->guard);
   2978          for (i = 0; d->args[i] != NULL; i++) {
   2979             IRExpr* arg = d->args[i];
   2980             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   2981                addUses_Expr(set, arg);
   2982          }
   2983          return;
   2984       case Ist_NoOp:
   2985       case Ist_IMark:
   2986       case Ist_MBE:
   2987          return;
   2988       case Ist_Exit:
   2989          addUses_Expr(set, st->Ist.Exit.guard);
   2990          return;
   2991       default:
   2992          vex_printf("\n");
   2993          ppIRStmt(st);
   2994          vpanic("addUses_Stmt");
   2995    }
   2996 }
   2997 
   2998 
   2999 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   3000 static Bool isZeroU1 ( IRExpr* e )
   3001 {
   3002    return toBool( e->tag == Iex_Const
   3003                   && e->Iex.Const.con->tag == Ico_U1
   3004                   && e->Iex.Const.con->Ico.U1 == False );
   3005 }
   3006 
   3007 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   3008 static Bool isOneU1 ( IRExpr* e )
   3009 {
   3010    return toBool( e->tag == Iex_Const
   3011                   && e->Iex.Const.con->tag == Ico_U1
   3012                   && e->Iex.Const.con->Ico.U1 == True );
   3013 }
   3014 
   3015 
   3016 /* Note, this destructively modifies the given IRSB. */
   3017 
   3018 /* Scan backwards through statements, carrying a set of IRTemps which
   3019    are known to be used after the current point.  On encountering 't =
   3020    E', delete the binding if it is not used.  Otherwise, add any temp
   3021    uses to the set and keep on moving backwards.
   3022 
   3023    As an enhancement, the first (backwards) pass searches for IR exits
   3024    with always-taken conditions and notes the location of the earliest
   3025    one in the block.  If any such are found, a second pass copies the
   3026    exit destination and jump kind to the bb-end.  Then, the exit and
   3027    all statements following it are turned into no-ops.
   3028 */
   3029 
   3030 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   3031 {
   3032    Int     i, i_unconditional_exit;
   3033    Int     n_tmps = bb->tyenv->types_used;
   3034    Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
   3035    IRStmt* st;
   3036 
   3037    for (i = 0; i < n_tmps; i++)
   3038       set[i] = False;
   3039 
   3040    /* start off by recording IRTemp uses in the next field. */
   3041    addUses_Expr(set, bb->next);
   3042 
   3043    /* First pass */
   3044 
   3045    /* Work backwards through the stmts */
   3046    i_unconditional_exit = -1;
   3047    for (i = bb->stmts_used-1; i >= 0; i--) {
   3048       st = bb->stmts[i];
   3049       if (st->tag == Ist_NoOp)
   3050          continue;
   3051       /* take note of any unconditional exits */
   3052       if (st->tag == Ist_Exit
   3053           && isOneU1(st->Ist.Exit.guard))
   3054          i_unconditional_exit = i;
   3055       if (st->tag == Ist_WrTmp
   3056           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   3057           /* it's an IRTemp which never got used.  Delete it. */
   3058          if (DEBUG_IROPT) {
   3059             vex_printf("DEAD: ");
   3060             ppIRStmt(st);
   3061             vex_printf("\n");
   3062          }
   3063          bb->stmts[i] = IRStmt_NoOp();
   3064       }
   3065       else
   3066       if (st->tag == Ist_Dirty
   3067           && st->Ist.Dirty.details->guard
   3068           && isZeroU1(st->Ist.Dirty.details->guard)) {
   3069          /* This is a dirty helper which will never get called.
   3070             Delete it. */
   3071          bb->stmts[i] = IRStmt_NoOp();
   3072        }
   3073        else {
   3074          /* Note any IRTemp uses made by the current statement. */
   3075          addUses_Stmt(set, st);
   3076       }
   3077    }
   3078 
   3079    /* Optional second pass: if any unconditional exits were found,
   3080       delete them and all following statements. */
   3081 
   3082    if (i_unconditional_exit != -1) {
   3083       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   3084                         i_unconditional_exit);
   3085       vassert(i_unconditional_exit >= 0
   3086               && i_unconditional_exit < bb->stmts_used);
   3087       bb->next
   3088          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   3089       bb->jumpkind
   3090          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   3091       bb->offsIP
   3092          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
   3093       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   3094          bb->stmts[i] = IRStmt_NoOp();
   3095    }
   3096 }
   3097 
   3098 
   3099 /*---------------------------------------------------------------*/
   3100 /*--- Specialisation of helper function calls, in             ---*/
   3101 /*--- collaboration with the front end                        ---*/
   3102 /*---------------------------------------------------------------*/
   3103 
   3104 static
   3105 IRSB* spec_helpers_BB(
   3106          IRSB* bb,
   3107          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
   3108       )
   3109 {
   3110    Int     i;
   3111    IRStmt* st;
   3112    IRExpr* ex;
   3113    Bool    any = False;
   3114 
   3115    for (i = bb->stmts_used-1; i >= 0; i--) {
   3116       st = bb->stmts[i];
   3117 
   3118       if (st->tag != Ist_WrTmp
   3119           || st->Ist.WrTmp.data->tag != Iex_CCall)
   3120          continue;
   3121 
   3122       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   3123                           st->Ist.WrTmp.data->Iex.CCall.args,
   3124                           &bb->stmts[0], i );
   3125       if (!ex)
   3126         /* the front end can't think of a suitable replacement */
   3127         continue;
   3128 
   3129       /* We got something better.  Install it in the bb. */
   3130       any = True;
   3131       bb->stmts[i]
   3132          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   3133 
   3134       if (0) {
   3135          vex_printf("SPEC: ");
   3136          ppIRExpr(st->Ist.WrTmp.data);
   3137          vex_printf("  -->  ");
   3138          ppIRExpr(ex);
   3139          vex_printf("\n");
   3140       }
   3141    }
   3142 
   3143    if (any)
   3144       bb = flatten_BB(bb);
   3145    return bb;
   3146 }
   3147 
   3148 
   3149 /*---------------------------------------------------------------*/
   3150 /*--- Determination of guest state aliasing relationships     ---*/
   3151 /*---------------------------------------------------------------*/
   3152 
   3153 /* These are helper functions for CSE and GetI/PutI transformations.
   3154 
   3155    Determine, to the extent possible, the relationship between two
   3156    guest state accesses.  The possible outcomes are:
   3157 
   3158    * Exact alias.  These two accesses denote precisely the same
   3159      piece of the guest state.
   3160 
   3161    * Definitely no alias.  These two accesses are guaranteed not to
   3162      overlap any part of the guest state.
   3163 
   3164    * Unknown -- if neither of the above can be established.
   3165 
   3166    If in doubt, return Unknown.  */
   3167 
   3168 typedef
   3169    enum { ExactAlias, NoAlias, UnknownAlias }
   3170    GSAliasing;
   3171 
   3172 
   3173 /* Produces the alias relation between an indexed guest
   3174    state access and a non-indexed access. */
   3175 
   3176 static
   3177 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   3178                                     Int offset2, IRType ty2 )
   3179 {
   3180    UInt minoff1, maxoff1, minoff2, maxoff2;
   3181 
   3182    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3183    minoff2 = offset2;
   3184    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   3185 
   3186    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3187       return NoAlias;
   3188 
   3189    /* Could probably do better here if required.  For the moment
   3190       however just claim not to know anything more. */
   3191    return UnknownAlias;
   3192 }
   3193 
   3194 
   3195 /* Produces the alias relation between two indexed guest state
   3196    accesses. */
   3197 
   3198 static
   3199 GSAliasing getAliasingRelation_II (
   3200               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   3201               IRRegArray* descr2, IRExpr* ix2, Int bias2
   3202            )
   3203 {
   3204    UInt minoff1, maxoff1, minoff2, maxoff2;
   3205    Int  iters;
   3206 
   3207    /* First try hard to show they don't alias. */
   3208    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3209    getArrayBounds( descr2, &minoff2, &maxoff2 );
   3210    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3211       return NoAlias;
   3212 
   3213    /* So the two arrays at least partially overlap.  To get any
   3214       further we'll have to be sure that the descriptors are
   3215       identical. */
   3216    if (!eqIRRegArray(descr1, descr2))
   3217       return UnknownAlias;
   3218 
   3219    /* The descriptors are identical.  Now the only difference can be
   3220       in the index expressions.  If they cannot be shown to be
   3221       identical, we have to say we don't know what the aliasing
   3222       relation will be.  Now, since the IR is flattened, the index
   3223       expressions should be atoms -- either consts or tmps.  So that
   3224       makes the comparison simple. */
   3225    vassert(isIRAtom(ix1));
   3226    vassert(isIRAtom(ix2));
   3227    if (!eqIRAtom(ix1,ix2))
   3228       return UnknownAlias;
   3229 
   3230    /* Ok, the index expressions are identical.  So now the only way
   3231       they can be different is in the bias.  Normalise this
   3232       paranoidly, to reliably establish equality/non-equality. */
   3233 
   3234    /* So now we know that the GetI and PutI index the same array
   3235       with the same base.  Are the offsets the same, modulo the
   3236       array size?  Do this paranoidly. */
   3237    vassert(descr1->nElems == descr2->nElems);
   3238    vassert(descr1->elemTy == descr2->elemTy);
   3239    vassert(descr1->base   == descr2->base);
   3240    iters = 0;
   3241    while (bias1 < 0 || bias2 < 0) {
   3242       bias1 += descr1->nElems;
   3243       bias2 += descr1->nElems;
   3244       iters++;
   3245       if (iters > 10)
   3246          vpanic("getAliasingRelation: iters");
   3247    }
   3248    vassert(bias1 >= 0 && bias2 >= 0);
   3249    bias1 %= descr1->nElems;
   3250    bias2 %= descr1->nElems;
   3251    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   3252    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   3253 
   3254    /* Finally, biasP and biasG are normalised into the range
   3255       0 .. descrP/G->nElems - 1.  And so we can establish
   3256       equality/non-equality. */
   3257 
   3258    return bias1==bias2 ? ExactAlias : NoAlias;
   3259 }
   3260 
   3261 
   3262 /*---------------------------------------------------------------*/
   3263 /*--- Common Subexpression Elimination                        ---*/
   3264 /*---------------------------------------------------------------*/
   3265 
   3266 /* Expensive in time and space. */
   3267 
   3268 /* Uses two environments:
   3269    a IRTemp -> IRTemp mapping
   3270    a mapping from AvailExpr* to IRTemp
   3271 */
   3272 
   3273 typedef
   3274    struct {
   3275       enum { TCc, TCt } tag;
   3276       union { IRTemp tmp; IRConst* con; } u;
   3277    }
   3278    TmpOrConst;
   3279 
   3280 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
   3281 {
   3282    if (tc1->tag != tc2->tag)
   3283       return False;
   3284    switch (tc1->tag) {
   3285       case TCc:
   3286          return eqIRConst(tc1->u.con, tc2->u.con);
   3287       case TCt:
   3288          return tc1->u.tmp == tc2->u.tmp;
   3289       default:
   3290          vpanic("eqTmpOrConst");
   3291    }
   3292 }
   3293 
   3294 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
   3295 {
   3296    Bool eq = cee1->addr == cee2->addr;
   3297    if (eq) {
   3298       vassert(cee1->regparms == cee2->regparms);
   3299       vassert(cee1->mcx_mask == cee2->mcx_mask);
   3300       /* Names should be the same too, but we don't bother to
   3301          check. */
   3302    }
   3303    return eq;
   3304 }
   3305 
   3306 /* Convert a NULL terminated IRExpr* vector to an array of
   3307    TmpOrConsts, and a length. */
   3308 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
   3309                                        /*OUT*/Int* nOuts,
   3310                                        IRExpr** ins )
   3311 {
   3312    Int i, n;
   3313    /* We have to make two passes, one to count, one to copy. */
   3314    for (n = 0; ins[n]; n++)
   3315       ;
   3316    *outs  = LibVEX_Alloc(n * sizeof(TmpOrConst));
   3317    *nOuts = n;
   3318    /* and now copy .. */
   3319    for (i = 0; i < n; i++) {
   3320       IRExpr*       arg = ins[i];
   3321       TmpOrConst* dst = &(*outs)[i];
   3322       if (arg->tag == Iex_RdTmp) {
   3323          dst->tag   = TCt;
   3324          dst->u.tmp = arg->Iex.RdTmp.tmp;
   3325       }
   3326       else if (arg->tag == Iex_Const) {
   3327          dst->tag   = TCc;
   3328          dst->u.con = arg->Iex.Const.con;
   3329       }
   3330       else {
   3331          /* Failure of this is serious; it means that the presented arg
   3332             isn't an IR atom, as it should be. */
   3333          vpanic("irExprVec_to_TmpOrConsts");
   3334       }
   3335    }
   3336 }
   3337 
   3338 typedef
   3339    struct {
   3340       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
   3341              CCall
   3342       } tag;
   3343       union {
   3344          /* unop(tmp) */
   3345          struct {
   3346             IROp   op;
   3347             IRTemp arg;
   3348          } Ut;
   3349          /* binop(tmp,tmp) */
   3350          struct {
   3351             IROp   op;
   3352             IRTemp arg1;
   3353             IRTemp arg2;
   3354          } Btt;
   3355          /* binop(tmp,const) */
   3356          struct {
   3357             IROp    op;
   3358             IRTemp  arg1;
   3359             IRConst con2;
   3360          } Btc;
   3361          /* binop(const,tmp) */
   3362          struct {
   3363             IROp    op;
   3364             IRConst con1;
   3365             IRTemp  arg2;
   3366          } Bct;
   3367          /* F64i-style const */
   3368          struct {
   3369             ULong f64i;
   3370          } Cf64i;
   3371          /* ITE(tmp,tmp,tmp) */
   3372          struct {
   3373             IRTemp co;
   3374             IRTemp e1;
   3375             IRTemp e0;
   3376          } Ittt;
   3377          /* ITE(tmp,tmp,const) */
   3378          struct {
   3379             IRTemp  co;
   3380             IRTemp  e1;
   3381             IRConst con0;
   3382          } Ittc;
   3383          /* ITE(tmp,const,tmp) */
   3384          struct {
   3385             IRTemp  co;
   3386             IRConst con1;
   3387             IRTemp  e0;
   3388          } Itct;
   3389          /* ITE(tmp,const,const) */
   3390          struct {
   3391             IRTemp  co;
   3392             IRConst con1;
   3393             IRConst con0;
   3394          } Itcc;
   3395          /* GetI(descr,tmp,bias)*/
   3396          struct {
   3397             IRRegArray* descr;
   3398             IRTemp      ix;
   3399             Int         bias;
   3400          } GetIt;
   3401          /* Clean helper call */
   3402          struct {
   3403             IRCallee*   cee;
   3404             TmpOrConst* args;
   3405             Int         nArgs;
   3406             IRType      retty;
   3407          } CCall;
   3408       } u;
   3409    }
   3410    AvailExpr;
   3411 
   3412 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   3413 {
   3414    if (LIKELY(a1->tag != a2->tag))
   3415       return False;
   3416    switch (a1->tag) {
   3417       case Ut:
   3418          return toBool(
   3419                 a1->u.Ut.op == a2->u.Ut.op
   3420                 && a1->u.Ut.arg == a2->u.Ut.arg);
   3421       case Btt:
   3422          return toBool(
   3423                 a1->u.Btt.op == a2->u.Btt.op
   3424                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   3425                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   3426       case Btc:
   3427          return toBool(
   3428                 a1->u.Btc.op == a2->u.Btc.op
   3429                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   3430                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   3431       case Bct:
   3432          return toBool(
   3433                 a1->u.Bct.op == a2->u.Bct.op
   3434                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   3435                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   3436       case Cf64i:
   3437          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   3438       case Ittt:
   3439          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
   3440                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
   3441                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
   3442       case Ittc:
   3443          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
   3444                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
   3445                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
   3446       case Itct:
   3447          return toBool(a1->u.Itct.co == a2->u.Itct.co
   3448                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
   3449                        && a1->u.Itct.e0 == a2->u.Itct.e0);
   3450       case Itcc:
   3451          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
   3452                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
   3453                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
   3454       case GetIt:
   3455          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   3456                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   3457                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   3458       case CCall: {
   3459          Int  i, n;
   3460          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
   3461                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
   3462          if (eq) {
   3463             n = a1->u.CCall.nArgs;
   3464             for (i = 0; i < n; i++) {
   3465                if (!eqTmpOrConst( &a1->u.CCall.args[i],
   3466                                   &a2->u.CCall.args[i] )) {
   3467                   eq = False;
   3468                   break;
   3469                }
   3470             }
   3471          }
   3472          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
   3473          return eq;
   3474       }
   3475       default: vpanic("eq_AvailExpr");
   3476    }
   3477 }
   3478 
   3479 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   3480 {
   3481    IRConst *con, *con0, *con1;
   3482    switch (ae->tag) {
   3483       case Ut:
   3484          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   3485       case Btt:
   3486          return IRExpr_Binop( ae->u.Btt.op,
   3487                               IRExpr_RdTmp(ae->u.Btt.arg1),
   3488                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   3489       case Btc:
   3490          con = LibVEX_Alloc(sizeof(IRConst));
   3491          *con = ae->u.Btc.con2;
   3492          return IRExpr_Binop( ae->u.Btc.op,
   3493                               IRExpr_RdTmp(ae->u.Btc.arg1),
   3494                               IRExpr_Const(con) );
   3495       case Bct:
   3496          con = LibVEX_Alloc(sizeof(IRConst));
   3497          *con = ae->u.Bct.con1;
   3498          return IRExpr_Binop( ae->u.Bct.op,
   3499                               IRExpr_Const(con),
   3500                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   3501       case Cf64i:
   3502          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   3503       case Ittt:
   3504          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
   3505                            IRExpr_RdTmp(ae->u.Ittt.e1),
   3506                            IRExpr_RdTmp(ae->u.Ittt.e0));
   3507       case Ittc:
   3508          con0 = LibVEX_Alloc(sizeof(IRConst));
   3509          *con0 = ae->u.Ittc.con0;
   3510          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
   3511                            IRExpr_RdTmp(ae->u.Ittc.e1),
   3512                            IRExpr_Const(con0));
   3513       case Itct:
   3514          con1 = LibVEX_Alloc(sizeof(IRConst));
   3515          *con1 = ae->u.Itct.con1;
   3516          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
   3517                            IRExpr_Const(con1),
   3518                            IRExpr_RdTmp(ae->u.Itct.e0));
   3519 
   3520       case Itcc:
   3521          con0 = LibVEX_Alloc(sizeof(IRConst));
   3522          con1 = LibVEX_Alloc(sizeof(IRConst));
   3523          *con0 = ae->u.Itcc.con0;
   3524          *con1 = ae->u.Itcc.con1;
   3525          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
   3526                            IRExpr_Const(con1),
   3527                            IRExpr_Const(con0));
   3528       case GetIt:
   3529          return IRExpr_GetI(ae->u.GetIt.descr,
   3530                             IRExpr_RdTmp(ae->u.GetIt.ix),
   3531                             ae->u.GetIt.bias);
   3532       case CCall: {
   3533          Int i, n = ae->u.CCall.nArgs;
   3534          vassert(n >= 0);
   3535          IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*));
   3536          vec[n] = NULL;
   3537          for (i = 0; i < n; i++) {
   3538             TmpOrConst* tc = &ae->u.CCall.args[i];
   3539             if (tc->tag == TCc) {
   3540                vec[i] = IRExpr_Const(tc->u.con);
   3541             }
   3542             else if (tc->tag == TCt) {
   3543                vec[i] = IRExpr_RdTmp(tc->u.tmp);
   3544             }
   3545             else vpanic("availExpr_to_IRExpr:CCall-arg");
   3546          }
   3547          return IRExpr_CCall(ae->u.CCall.cee,
   3548                              ae->u.CCall.retty,
   3549                              vec);
   3550       }
   3551       default:
   3552          vpanic("availExpr_to_IRExpr");
   3553    }
   3554 }
   3555 
   3556 inline
   3557 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   3558 {
   3559    HWord res;
   3560    /* env :: IRTemp -> IRTemp */
   3561    if (lookupHHW( env, &res, (HWord)tmp ))
   3562       return (IRTemp)res;
   3563    else
   3564       return tmp;
   3565 }
   3566 
   3567 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   3568 {
   3569    /* env :: IRTemp -> IRTemp */
   3570    switch (ae->tag) {
   3571       case Ut:
   3572          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   3573          break;
   3574       case Btt:
   3575          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   3576          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   3577          break;
   3578       case Btc:
   3579          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   3580          break;
   3581       case Bct:
   3582          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   3583          break;
   3584       case Cf64i:
   3585          break;
   3586       case Ittt:
   3587          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
   3588          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
   3589          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
   3590          break;
   3591       case Ittc:
   3592          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
   3593          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
   3594          break;
   3595       case Itct:
   3596          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
   3597          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
   3598          break;
   3599       case Itcc:
   3600          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
   3601          break;
   3602       case GetIt:
   3603          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   3604          break;
   3605       case CCall: {
   3606          Int i, n = ae->u.CCall.nArgs;;
   3607          for (i = 0; i < n; i++) {
   3608             TmpOrConst* tc = &ae->u.CCall.args[i];
   3609             if (tc->tag == TCt) {
   3610                tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
   3611             }
   3612          }
   3613          break;
   3614       }
   3615       default:
   3616          vpanic("subst_AvailExpr");
   3617    }
   3618 }
   3619 
   3620 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
   3621 {
   3622    AvailExpr* ae;
   3623 
   3624    switch (e->tag) {
   3625       case Iex_Unop:
   3626          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
   3627             ae = LibVEX_Alloc(sizeof(AvailExpr));
   3628             ae->tag      = Ut;
   3629             ae->u.Ut.op  = e->Iex.Unop.op;
   3630             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   3631             return ae;
   3632          }
   3633          break;
   3634 
   3635       case Iex_Binop:
   3636          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
   3637             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3638                ae = LibVEX_Alloc(sizeof(AvailExpr));
   3639                ae->tag        = Btt;
   3640                ae->u.Btt.op   = e->Iex.Binop.op;
   3641                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3642                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3643                return ae;
   3644             }
   3645             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   3646                ae = LibVEX_Alloc(sizeof(AvailExpr));
   3647                ae->tag        = Btc;
   3648                ae->u.Btc.op   = e->Iex.Binop.op;
   3649                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3650                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   3651                return ae;
   3652             }
   3653          } else if (e->Iex.Binop.arg1->tag == Iex_Const
   3654                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3655             ae = LibVEX_Alloc(sizeof(AvailExpr));
   3656             ae->tag        = Bct;
   3657             ae->u.Bct.op   = e->Iex.Binop.op;
   3658             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3659             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   3660             return ae;
   3661          }
   3662          break;
   3663 
   3664       case Iex_Const:
   3665          if (e->Iex.Const.con->tag == Ico_F64i) {
   3666             ae = LibVEX_Alloc(sizeof(AvailExpr));
   3667             ae->tag          = Cf64i;
   3668             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   3669             return ae;
   3670          }
   3671          break;
   3672 
   3673       case Iex_ITE:
   3674          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
   3675             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
   3676                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3677                   ae = LibVEX_Alloc(sizeof(AvailExpr));
   3678                   ae->tag       = Ittt;
   3679                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3680                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3681                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3682                   return ae;
   3683                }
   3684                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3685                   ae = LibVEX_Alloc(sizeof(AvailExpr));
   3686                   ae->tag       = Itct;
   3687                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3688                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3689                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3690                   return ae;
   3691                }
   3692             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
   3693                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3694                   ae = LibVEX_Alloc(sizeof(AvailExpr));
   3695                   ae->tag       = Ittc;
   3696                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3697                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3698                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3699                   return ae;
   3700                }
   3701                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3702                   ae = LibVEX_Alloc(sizeof(AvailExpr));
   3703                   ae->tag       = Itcc;
   3704                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3705                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3706                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3707                   return ae;
   3708                }
   3709             }
   3710          }
   3711          break;
   3712 
   3713       case Iex_GetI:
   3714          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
   3715             ae = LibVEX_Alloc(sizeof(AvailExpr));
   3716             ae->tag           = GetIt;
   3717             ae->u.GetIt.descr = e->Iex.GetI.descr;
   3718             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   3719             ae->u.GetIt.bias  = e->Iex.GetI.bias;
   3720             return ae;
   3721          }
   3722          break;
   3723 
   3724       case Iex_CCall:
   3725          ae = LibVEX_Alloc(sizeof(AvailExpr));
   3726          ae->tag = CCall;
   3727          /* Ok to share only the cee, since it is immutable. */
   3728          ae->u.CCall.cee   = e->Iex.CCall.cee;
   3729          ae->u.CCall.retty = e->Iex.CCall.retty;
   3730          /* irExprVec_to_TmpOrConsts will assert if the args are
   3731             neither tmps nor constants, but that's ok .. that's all they
   3732             should be. */
   3733          irExprVec_to_TmpOrConsts(
   3734                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
   3735                                   e->Iex.CCall.args
   3736                                  );
   3737          return ae;
   3738 
   3739       default:
   3740          break;
   3741    }
   3742 
   3743    return NULL;
   3744 }
   3745 
   3746 
   3747 /* The BB is modified in-place.  Returns True if any changes were
   3748    made. */
   3749 
   3750 static Bool do_cse_BB ( IRSB* bb )
   3751 {
   3752    Int        i, j, paranoia;
   3753    IRTemp     t, q;
   3754    IRStmt*    st;
   3755    AvailExpr* eprime;
   3756    AvailExpr* ae;
   3757    Bool       invalidate;
   3758    Bool       anyDone = False;
   3759 
   3760    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   3761    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   3762 
   3763    vassert(sizeof(IRTemp) <= sizeof(HWord));
   3764 
   3765    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   3766 
   3767    /* Iterate forwards over the stmts.
   3768       On seeing "t = E", where E is one of the AvailExpr forms:
   3769          let E' = apply tenv substitution to E
   3770          search aenv for E'
   3771             if a mapping E' -> q is found,
   3772                replace this stmt by "t = q"
   3773                and add binding t -> q to tenv
   3774             else
   3775                add binding E' -> t to aenv
   3776                replace this stmt by "t = E'"
   3777 
   3778       Other statements are only interesting to the extent that they
   3779       might invalidate some of the expressions in aenv.  So there is
   3780       an invalidate-bindings check for each statement seen.
   3781    */
   3782    for (i = 0; i < bb->stmts_used; i++) {
   3783       st = bb->stmts[i];
   3784 
   3785       /* ------ BEGIN invalidate aenv bindings ------ */
   3786       /* This is critical: remove from aenv any E' -> .. bindings
   3787          which might be invalidated by this statement.  The only
   3788          vulnerable kind of bindings are the GetI kind.
   3789             Dirty call - dump (paranoia level -> 2)
   3790             Store      - dump (ditto)
   3791             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   3792          Uses getAliasingRelation_IC and getAliasingRelation_II
   3793          to do the no-overlap assessments needed for Put/PutI.
   3794       */
   3795       switch (st->tag) {
   3796          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   3797          case Ist_CAS: case Ist_LLSC:
   3798          case Ist_StoreG:
   3799             paranoia = 2; break;
   3800          case Ist_Put: case Ist_PutI:
   3801             paranoia = 1; break;
   3802          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   3803          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
   3804             paranoia = 0; break;
   3805          default:
   3806             vpanic("do_cse_BB(1)");
   3807       }
   3808 
   3809       if (paranoia > 0) {
   3810          for (j = 0; j < aenv->used; j++) {
   3811             if (!aenv->inuse[j])
   3812                continue;
   3813             ae = (AvailExpr*)aenv->key[j];
   3814             if (ae->tag != GetIt)
   3815                continue;
   3816             invalidate = False;
   3817             if (paranoia >= 2) {
   3818                invalidate = True;
   3819             } else {
   3820                vassert(paranoia == 1);
   3821                if (st->tag == Ist_Put) {
   3822                   if (getAliasingRelation_IC(
   3823                          ae->u.GetIt.descr,
   3824                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3825                          st->Ist.Put.offset,
   3826                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   3827                       ) != NoAlias)
   3828                      invalidate = True;
   3829                }
   3830                else
   3831                if (st->tag == Ist_PutI) {
   3832                   IRPutI *puti = st->Ist.PutI.details;
   3833                   if (getAliasingRelation_II(
   3834                          ae->u.GetIt.descr,
   3835                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3836                          ae->u.GetIt.bias,
   3837                          puti->descr,
   3838                          puti->ix,
   3839                          puti->bias
   3840                       ) != NoAlias)
   3841                      invalidate = True;
   3842                }
   3843                else
   3844                   vpanic("do_cse_BB(2)");
   3845             }
   3846 
   3847             if (invalidate) {
   3848                aenv->inuse[j] = False;
   3849                aenv->key[j]   = (HWord)NULL;  /* be sure */
   3850             }
   3851          } /* for j */
   3852       } /* paranoia > 0 */
   3853 
   3854       /* ------ ENV invalidate aenv bindings ------ */
   3855 
   3856       /* ignore not-interestings */
   3857       if (st->tag != Ist_WrTmp)
   3858          continue;
   3859 
   3860       t = st->Ist.WrTmp.tmp;
   3861       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
   3862       /* ignore if not of AvailExpr form */
   3863       if (!eprime)
   3864          continue;
   3865 
   3866       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   3867 
   3868       /* apply tenv */
   3869       subst_AvailExpr( tenv, eprime );
   3870 
   3871       /* search aenv for eprime, unfortunately the hard way */
   3872       for (j = 0; j < aenv->used; j++)
   3873          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   3874             break;
   3875 
   3876       if (j < aenv->used) {
   3877          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   3878             note the t->q binding in tenv. */
   3879          /* (this is the core of the CSE action) */
   3880          q = (IRTemp)aenv->val[j];
   3881          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   3882          addToHHW( tenv, (HWord)t, (HWord)q );
   3883          anyDone = True;
   3884       } else {
   3885          /* No binding was found, so instead we add E' -> t to our
   3886             collection of available expressions, replace this stmt
   3887             with "t = E'", and move on. */
   3888          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   3889          addToHHW( aenv, (HWord)eprime, (HWord)t );
   3890       }
   3891    }
   3892 
   3893    /*
   3894    ppIRSB(bb);
   3895    sanityCheckIRSB(bb, Ity_I32);
   3896    vex_printf("\n\n");
   3897    */
   3898    return anyDone;
   3899 }
   3900 
   3901 
   3902 /*---------------------------------------------------------------*/
   3903 /*--- Add32/Sub32 chain collapsing                            ---*/
   3904 /*---------------------------------------------------------------*/
   3905 
   3906 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   3907 
   3908 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   3909    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   3910    root node is Add32, not Sub32. */
   3911 
   3912 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   3913 {
   3914    if (e->tag != Iex_Binop)
   3915       return False;
   3916    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   3917       return False;
   3918    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   3919       return False;
   3920    if (e->Iex.Binop.arg2->tag != Iex_Const)
   3921       return False;
   3922    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3923    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   3924    if (e->Iex.Binop.op == Iop_Sub32)
   3925       *i32 = -*i32;
   3926    return True;
   3927 }
   3928 
   3929 
   3930 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   3931    other tmp2.  Scan backwards from the specified start point -- an
   3932    optimisation. */
   3933 
   3934 static Bool collapseChain ( IRSB* bb, Int startHere,
   3935                             IRTemp tmp,
   3936                             IRTemp* tmp2, Int* i32 )
   3937 {
   3938    Int     j, ii;
   3939    IRTemp  vv;
   3940    IRStmt* st;
   3941    IRExpr* e;
   3942 
   3943    /* the (var, con) pair contain the current 'representation' for
   3944       'tmp'.  We start with 'tmp + 0'.  */
   3945    IRTemp var = tmp;
   3946    Int    con = 0;
   3947 
   3948    /* Scan backwards to see if tmp can be replaced by some other tmp
   3949      +/- a constant. */
   3950    for (j = startHere; j >= 0; j--) {
   3951       st = bb->stmts[j];
   3952       if (st->tag != Ist_WrTmp)
   3953          continue;
   3954       if (st->Ist.WrTmp.tmp != var)
   3955          continue;
   3956       e = st->Ist.WrTmp.data;
   3957       if (!isAdd32OrSub32(e, &vv, &ii))
   3958          break;
   3959       var = vv;
   3960       con += ii;
   3961    }
   3962    if (j == -1)
   3963       /* no earlier binding for var .. ill-formed IR */
   3964       vpanic("collapseChain");
   3965 
   3966    /* so, did we find anything interesting? */
   3967    if (var == tmp)
   3968       return False; /* no .. */
   3969 
   3970    *tmp2 = var;
   3971    *i32  = con;
   3972    return True;
   3973 }
   3974 
   3975 
   3976 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   3977 
   3978 static void collapse_AddSub_chains_BB ( IRSB* bb )
   3979 {
   3980    IRStmt *st;
   3981    IRTemp var, var2;
   3982    Int    i, con, con2;
   3983 
   3984    for (i = bb->stmts_used-1; i >= 0; i--) {
   3985       st = bb->stmts[i];
   3986       if (st->tag == Ist_NoOp)
   3987          continue;
   3988 
   3989       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   3990 
   3991       if (st->tag == Ist_WrTmp
   3992           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   3993 
   3994          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   3995             Find out if var can be expressed as var2 + con2. */
   3996          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   3997             if (DEBUG_IROPT) {
   3998                vex_printf("replacing1 ");
   3999                ppIRStmt(st);
   4000                vex_printf(" with ");
   4001             }
   4002             con2 += con;
   4003             bb->stmts[i]
   4004                = IRStmt_WrTmp(
   4005                     st->Ist.WrTmp.tmp,
   4006                     (con2 >= 0)
   4007                       ? IRExpr_Binop(Iop_Add32,
   4008                                      IRExpr_RdTmp(var2),
   4009                                      IRExpr_Const(IRConst_U32(con2)))
   4010                       : IRExpr_Binop(Iop_Sub32,
   4011                                      IRExpr_RdTmp(var2),
   4012                                      IRExpr_Const(IRConst_U32(-con2)))
   4013                  );
   4014             if (DEBUG_IROPT) {
   4015                ppIRStmt(bb->stmts[i]);
   4016                vex_printf("\n");
   4017             }
   4018          }
   4019 
   4020          continue;
   4021       }
   4022 
   4023       /* Try to collapse 't1 = GetI[t2, con]'. */
   4024 
   4025       if (st->tag == Ist_WrTmp
   4026           && st->Ist.WrTmp.data->tag == Iex_GetI
   4027           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   4028           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   4029                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   4030          if (DEBUG_IROPT) {
   4031             vex_printf("replacing3 ");
   4032             ppIRStmt(st);
   4033             vex_printf(" with ");
   4034          }
   4035          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   4036          bb->stmts[i]
   4037             = IRStmt_WrTmp(
   4038                  st->Ist.WrTmp.tmp,
   4039                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   4040                              IRExpr_RdTmp(var2),
   4041                              con2));
   4042          if (DEBUG_IROPT) {
   4043             ppIRStmt(bb->stmts[i]);
   4044             vex_printf("\n");
   4045          }
   4046          continue;
   4047       }
   4048 
   4049       /* Perhaps st is PutI[t, con] ? */
   4050       IRPutI *puti = st->Ist.PutI.details;
   4051       if (st->tag == Ist_PutI
   4052           && puti->ix->tag == Iex_RdTmp
   4053           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
   4054                                &var2, &con2)) {
   4055          if (DEBUG_IROPT) {
   4056             vex_printf("replacing2 ");
   4057             ppIRStmt(st);
   4058             vex_printf(" with ");
   4059          }
   4060          con2 += puti->bias;
   4061          bb->stmts[i]
   4062             = IRStmt_PutI(mkIRPutI(puti->descr,
   4063                                    IRExpr_RdTmp(var2),
   4064                                    con2,
   4065                                    puti->data));
   4066          if (DEBUG_IROPT) {
   4067             ppIRStmt(bb->stmts[i]);
   4068             vex_printf("\n");
   4069          }
   4070          continue;
   4071       }
   4072 
   4073    } /* for */
   4074 }
   4075 
   4076 
   4077 /*---------------------------------------------------------------*/
   4078 /*--- PutI/GetI transformations                               ---*/
   4079 /*---------------------------------------------------------------*/
   4080 
   4081 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   4082    the given starting point to find, if any, a PutI which writes
   4083    exactly the same piece of guest state, and so return the expression
   4084    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   4085 
   4086 static
   4087 IRExpr* findPutI ( IRSB* bb, Int startHere,
   4088                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   4089 {
   4090    Int        j;
   4091    IRStmt*    st;
   4092    GSAliasing relation;
   4093 
   4094    if (0) {
   4095       vex_printf("\nfindPutI ");
   4096       ppIRRegArray(descrG);
   4097       vex_printf(" ");
   4098       ppIRExpr(ixG);
   4099       vex_printf(" %d\n", biasG);
   4100    }
   4101 
   4102    /* Scan backwards in bb from startHere to find a suitable PutI
   4103       binding for (descrG, ixG, biasG), if any. */
   4104 
   4105    for (j = startHere; j >= 0; j--) {
   4106       st = bb->stmts[j];
   4107       if (st->tag == Ist_NoOp)
   4108          continue;
   4109 
   4110       if (st->tag == Ist_Put) {
   4111          /* Non-indexed Put.  This can't give a binding, but we do
   4112             need to check it doesn't invalidate the search by
   4113             overlapping any part of the indexed guest state. */
   4114 
   4115          relation
   4116             = getAliasingRelation_IC(
   4117                  descrG, ixG,
   4118                  st->Ist.Put.offset,
   4119                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   4120 
   4121          if (relation == NoAlias) {
   4122             /* we're OK; keep going */
   4123             continue;
   4124          } else {
   4125             /* relation == UnknownAlias || relation == ExactAlias */
   4126             /* If this assertion fails, we've found a Put which writes
   4127                an area of guest state which is read by a GetI.  Which
   4128                is unlikely (although not per se wrong). */
   4129             vassert(relation != ExactAlias);
   4130             /* This Put potentially writes guest state that the GetI
   4131                reads; we must fail. */
   4132             return NULL;
   4133          }
   4134       }
   4135 
   4136       if (st->tag == Ist_PutI) {
   4137          IRPutI *puti = st->Ist.PutI.details;
   4138 
   4139          relation = getAliasingRelation_II(
   4140                        descrG, ixG, biasG,
   4141                        puti->descr,
   4142                        puti->ix,
   4143                        puti->bias
   4144                     );
   4145 
   4146          if (relation == NoAlias) {
   4147             /* This PutI definitely doesn't overlap.  Ignore it and
   4148                keep going. */
   4149             continue; /* the for j loop */
   4150          }
   4151 
   4152          if (relation == UnknownAlias) {
   4153             /* We don't know if this PutI writes to the same guest
   4154                state that the GetI, or not.  So we have to give up. */
   4155             return NULL;
   4156          }
   4157 
   4158          /* Otherwise, we've found what we're looking for.  */
   4159          vassert(relation == ExactAlias);
   4160          return puti->data;
   4161 
   4162       } /* if (st->tag == Ist_PutI) */
   4163 
   4164       if (st->tag == Ist_Dirty) {
   4165          /* Be conservative.  If the dirty call has any guest effects at
   4166             all, give up.  We could do better -- only give up if there
   4167             are any guest writes/modifies. */
   4168          if (st->Ist.Dirty.details->nFxState > 0)
   4169             return NULL;
   4170       }
   4171 
   4172    } /* for */
   4173 
   4174    /* No valid replacement was found. */
   4175    return NULL;
   4176 }
   4177 
   4178 
   4179 
   4180 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   4181    that it writes exactly the same piece of guest state) ?  Safe
   4182    answer: False. */
   4183 
   4184 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   4185 {
   4186    vassert(pi->tag == Ist_PutI);
   4187    if (s2->tag != Ist_PutI)
   4188       return False;
   4189 
   4190    IRPutI *p1 = pi->Ist.PutI.details;
   4191    IRPutI *p2 = s2->Ist.PutI.details;
   4192 
   4193    return toBool(
   4194           getAliasingRelation_II(
   4195              p1->descr, p1->ix, p1->bias,
   4196              p2->descr, p2->ix, p2->bias
   4197           )
   4198           == ExactAlias
   4199           );
   4200 }
   4201 
   4202 
   4203 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   4204    overlap it?  Safe answer: True.  Note, we could do a lot better
   4205    than this if needed. */
   4206 
   4207 static
   4208 Bool guestAccessWhichMightOverlapPutI (
   4209         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   4210      )
   4211 {
   4212    GSAliasing relation;
   4213    UInt       minoffP, maxoffP;
   4214 
   4215    vassert(pi->tag == Ist_PutI);
   4216 
   4217    IRPutI *p1 = pi->Ist.PutI.details;
   4218 
   4219    getArrayBounds(p1->descr, &minoffP, &maxoffP);
   4220    switch (s2->tag) {
   4221 
   4222       case Ist_NoOp:
   4223       case Ist_IMark:
   4224          return False;
   4225 
   4226       case Ist_MBE:
   4227       case Ist_AbiHint:
   4228          /* just be paranoid ... these should be rare. */
   4229          return True;
   4230 
   4231       case Ist_CAS:
   4232          /* This is unbelievably lame, but it's probably not
   4233             significant from a performance point of view.  Really, a
   4234             CAS is a load-store op, so it should be safe to say False.
   4235             However .. */
   4236          return True;
   4237 
   4238       case Ist_Dirty:
   4239          /* If the dirty call has any guest effects at all, give up.
   4240             Probably could do better. */
   4241          if (s2->Ist.Dirty.details->nFxState > 0)
   4242             return True;
   4243          return False;
   4244 
   4245       case Ist_Put:
   4246          vassert(isIRAtom(s2->Ist.Put.data));
   4247          relation
   4248             = getAliasingRelation_IC(
   4249                  p1->descr, p1->ix,
   4250                  s2->Ist.Put.offset,
   4251                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   4252               );
   4253          goto have_relation;
   4254 
   4255       case Ist_PutI: {
   4256          IRPutI *p2 = s2->Ist.PutI.details;
   4257 
   4258          vassert(isIRAtom(p2->ix));
   4259          vassert(isIRAtom(p2->data));
   4260          relation
   4261             = getAliasingRelation_II(
   4262                  p1->descr, p1->ix, p1->bias,
   4263                  p2->descr, p2->ix, p2->bias
   4264               );
   4265          goto have_relation;
   4266       }
   4267 
   4268       case Ist_WrTmp:
   4269          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   4270             relation
   4271                = getAliasingRelation_II(
   4272                     p1->descr, p1->ix, p1->bias,
   4273                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   4274                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   4275                     s2->Ist.WrTmp.data->Iex.GetI.bias
   4276                  );
   4277             goto have_relation;
   4278          }
   4279          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   4280             relation
   4281                = getAliasingRelation_IC(
   4282                     p1->descr, p1->ix,
   4283                     s2->Ist.WrTmp.data->Iex.Get.offset,
   4284                     s2->Ist.WrTmp.data->Iex.Get.ty
   4285                  );
   4286             goto have_relation;
   4287          }
   4288          return False;
   4289 
   4290       case Ist_Store:
   4291          vassert(isIRAtom(s2->Ist.Store.addr));
   4292          vassert(isIRAtom(s2->Ist.Store.data));
   4293          return False;
   4294 
   4295       default:
   4296          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   4297          vpanic("guestAccessWhichMightOverlapPutI");
   4298    }
   4299 
   4300   have_relation:
   4301    if (relation == NoAlias)
   4302       return False;
   4303    else
   4304       return True; /* ExactAlias or UnknownAlias */
   4305 }
   4306 
   4307 
   4308 
   4309 /* ---------- PutI/GetI transformations main functions --------- */
   4310 
   4311 /* Remove redundant GetIs, to the extent that they can be detected.
   4312    bb is modified in-place. */
   4313 
   4314 static
   4315 void do_redundant_GetI_elimination ( IRSB* bb )
   4316 {
   4317    Int     i;
   4318    IRStmt* st;
   4319 
   4320    for (i = bb->stmts_used-1; i >= 0; i--) {
   4321       st = bb->stmts[i];
   4322       if (st->tag == Ist_NoOp)
   4323          continue;
   4324 
   4325       if (st->tag == Ist_WrTmp
   4326           && st->Ist.WrTmp.data->tag == Iex_GetI
   4327           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   4328          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   4329          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   4330          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   4331          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   4332          if (replacement
   4333              && isIRAtom(replacement)
   4334              /* Make sure we're doing a type-safe transformation! */
   4335              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   4336             if (DEBUG_IROPT) {
   4337                vex_printf("rGI:  ");
   4338                ppIRExpr(st->Ist.WrTmp.data);
   4339                vex_printf(" -> ");
   4340                ppIRExpr(replacement);
   4341                vex_printf("\n");
   4342             }
   4343             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   4344          }
   4345       }
   4346    }
   4347 
   4348 }
   4349 
   4350 
   4351 /* Remove redundant PutIs, to the extent which they can be detected.
   4352    bb is modified in-place. */
   4353 
   4354 static
   4355 void do_redundant_PutI_elimination ( IRSB* bb )
   4356 {
   4357    Int    i, j;
   4358    Bool   delete;
   4359    IRStmt *st, *stj;
   4360 
   4361    vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn);
   4362 
   4363    for (i = 0; i < bb->stmts_used; i++) {
   4364       st = bb->stmts[i];
   4365       if (st->tag != Ist_PutI)
   4366          continue;
   4367       /* Ok, search forwards from here to see if we can find another
   4368          PutI which makes this one redundant, and dodging various
   4369          hazards.  Search forwards:
   4370          * If conditional exit, give up (because anything after that
   4371            does not postdominate this put).
   4372          * If a Get which might overlap, give up (because this PutI
   4373            not necessarily dead).
   4374          * If a Put which is identical, stop with success.
   4375          * If a Put which might overlap, but is not identical, give up.
   4376          * If a dirty helper call which might write guest state, give up.
   4377          * If a Put which definitely doesn't overlap, or any other
   4378            kind of stmt, continue.
   4379       */
   4380       delete = False;
   4381       for (j = i+1; j < bb->stmts_used; j++) {
   4382          stj = bb->stmts[j];
   4383          if (stj->tag == Ist_NoOp)
   4384             continue;
   4385          if (identicalPutIs(st, stj)) {
   4386             /* success! */
   4387             delete = True;
   4388             break;
   4389          }
   4390          if (stj->tag == Ist_Exit)
   4391             /* give up */
   4392             break;
   4393          if (st->tag == Ist_Dirty)
   4394             /* give up; could do better here */
   4395             break;
   4396          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   4397             /* give up */
   4398            break;
   4399       }
   4400 
   4401       if (delete) {
   4402          if (DEBUG_IROPT) {
   4403             vex_printf("rPI:  ");
   4404             ppIRStmt(st);
   4405             vex_printf("\n");
   4406          }
   4407          bb->stmts[i] = IRStmt_NoOp();
   4408       }
   4409 
   4410    }
   4411 }
   4412 
   4413 
   4414 /*---------------------------------------------------------------*/
   4415 /*--- Loop unrolling                                          ---*/
   4416 /*---------------------------------------------------------------*/
   4417 
   4418 /* Adjust all tmp values (names) in e by delta.  e is destructively
   4419    modified. */
   4420 
   4421 static void deltaIRExpr ( IRExpr* e, Int delta )
   4422 {
   4423    Int i;
   4424    switch (e->tag) {
   4425       case Iex_RdTmp:
   4426          e->Iex.RdTmp.tmp += delta;
   4427          break;
   4428       case Iex_Get:
   4429       case Iex_Const:
   4430          break;
   4431       case Iex_GetI:
   4432          deltaIRExpr(e->Iex.GetI.ix, delta);
   4433          break;
   4434       case Iex_Qop:
   4435          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
   4436          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
   4437          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
   4438          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
   4439          break;
   4440       case Iex_Triop:
   4441          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
   4442          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
   4443          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
   4444          break;
   4445       case Iex_Binop:
   4446          deltaIRExpr(e->Iex.Binop.arg1, delta);
   4447          deltaIRExpr(e->Iex.Binop.arg2, delta);
   4448          break;
   4449       case Iex_Unop:
   4450          deltaIRExpr(e->Iex.Unop.arg, delta);
   4451          break;
   4452       case Iex_Load:
   4453          deltaIRExpr(e->Iex.Load.addr, delta);
   4454          break;
   4455       case Iex_CCall:
   4456          for (i = 0; e->Iex.CCall.args[i]; i++)
   4457             deltaIRExpr(e->Iex.CCall.args[i], delta);
   4458          break;
   4459       case Iex_ITE:
   4460          deltaIRExpr(e->Iex.ITE.cond, delta);
   4461          deltaIRExpr(e->Iex.ITE.iftrue, delta);
   4462          deltaIRExpr(e->Iex.ITE.iffalse, delta);
   4463          break;
   4464       default:
   4465          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4466          vpanic("deltaIRExpr");
   4467    }
   4468 }
   4469 
   4470 /* Adjust all tmp values (names) in st by delta.  st is destructively
   4471    modified. */
   4472 
   4473 static void deltaIRStmt ( IRStmt* st, Int delta )
   4474 {
   4475    Int      i;
   4476    IRDirty* d;
   4477    switch (st->tag) {
   4478       case Ist_NoOp:
   4479       case Ist_IMark:
   4480       case Ist_MBE:
   4481          break;
   4482       case Ist_AbiHint:
   4483          deltaIRExpr(st->Ist.AbiHint.base, delta);
   4484          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   4485          break;
   4486       case Ist_Put:
   4487          deltaIRExpr(st->Ist.Put.data, delta);
   4488          break;
   4489       case Ist_PutI:
   4490          deltaIRExpr(st->Ist.PutI.details->ix, delta);
   4491          deltaIRExpr(st->Ist.PutI.details->data, delta);
   4492          break;
   4493       case Ist_WrTmp:
   4494          st->Ist.WrTmp.tmp += delta;
   4495          deltaIRExpr(st->Ist.WrTmp.data, delta);
   4496          break;
   4497       case Ist_Exit:
   4498          deltaIRExpr(st->Ist.Exit.guard, delta);
   4499          break;
   4500       case Ist_Store:
   4501          deltaIRExpr(st->Ist.Store.addr, delta);
   4502          deltaIRExpr(st->Ist.Store.data, delta);
   4503          break;
   4504       case Ist_StoreG: {
   4505          IRStoreG* sg = st->Ist.StoreG.details;
   4506          deltaIRExpr(sg->addr, delta);
   4507          deltaIRExpr(sg->data, delta);
   4508          deltaIRExpr(sg->guard, delta);
   4509          break;
   4510       }
   4511       case Ist_LoadG: {
   4512          IRLoadG* lg = st->Ist.LoadG.details;
   4513          lg->dst += delta;
   4514          deltaIRExpr(lg->addr, delta);
   4515          deltaIRExpr(lg->alt, delta);
   4516          deltaIRExpr(lg->guard, delta);
   4517          break;
   4518       }
   4519       case Ist_CAS:
   4520          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   4521             st->Ist.CAS.details->oldHi += delta;
   4522          st->Ist.CAS.details->oldLo += delta;
   4523          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   4524          if (st->Ist.CAS.details->expdHi)
   4525             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   4526          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   4527          if (st->Ist.CAS.details->dataHi)
   4528             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   4529          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   4530          break;
   4531       case Ist_LLSC:
   4532          st->Ist.LLSC.result += delta;
   4533          deltaIRExpr(st->Ist.LLSC.addr, delta);
   4534          if (st->Ist.LLSC.storedata)
   4535             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   4536          break;
   4537       case Ist_Dirty:
   4538          d = st->Ist.Dirty.details;
   4539          deltaIRExpr(d->guard, delta);
   4540          for (i = 0; d->args[i]; i++) {
   4541             IRExpr* arg = d->args[i];
   4542             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   4543                deltaIRExpr(arg, delta);
   4544          }
   4545          if (d->tmp != IRTemp_INVALID)
   4546             d->tmp += delta;
   4547          if (d->mAddr)
   4548             deltaIRExpr(d->mAddr, delta);
   4549          break;
   4550       default:
   4551          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4552          vpanic("deltaIRStmt");
   4553    }
   4554 }
   4555 
   4556 
   4557 /* If possible, return a loop-unrolled version of bb0.  The original
   4558    is changed.  If not possible, return NULL.  */
   4559 
   4560 /* The two schemas considered are:
   4561 
   4562      X: BODY; goto X
   4563 
   4564      which unrolls to (eg)  X: BODY;BODY; goto X
   4565 
   4566    and
   4567 
   4568        X: BODY; if (c) goto X; goto Y
   4569    which trivially transforms to
   4570        X: BODY; if (!c) goto Y; goto X;
   4571    so it falls in the scope of the first case.
   4572 
   4573    X and Y must be literal (guest) addresses.
   4574 */
   4575 
   4576 static Int calc_unroll_factor( IRSB* bb )
   4577 {
   4578    Int n_stmts, i;
   4579 
   4580    n_stmts = 0;
   4581    for (i = 0; i < bb->stmts_used; i++) {
   4582       if (bb->stmts[i]->tag != Ist_NoOp)
   4583          n_stmts++;
   4584    }
   4585 
   4586    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   4587       if (vex_control.iropt_verbosity > 0)
   4588          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   4589                     n_stmts, 8* n_stmts);
   4590       return 8;
   4591    }
   4592    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   4593       if (vex_control.iropt_verbosity > 0)
   4594          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   4595                     n_stmts, 4* n_stmts);
   4596       return 4;
   4597    }
   4598 
   4599    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   4600       if (vex_control.iropt_verbosity > 0)
   4601          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   4602                     n_stmts, 2* n_stmts);
   4603       return 2;
   4604    }
   4605 
   4606    if (vex_control.iropt_verbosity > 0)
   4607       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   4608 
   4609    return 1;
   4610 }
   4611 
   4612 
   4613 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
   4614 {
   4615    Int      i, j, jmax, n_vars;
   4616    Bool     xxx_known;
   4617    Addr64   xxx_value, yyy_value;
   4618    IRExpr*  udst;
   4619    IRStmt*  st;
   4620    IRConst* con;
   4621    IRSB     *bb1, *bb2;
   4622    Int      unroll_factor;
   4623 
   4624    if (vex_control.iropt_unroll_thresh <= 0)
   4625       return NULL;
   4626 
   4627    /* First off, figure out if we can unroll this loop.  Do this
   4628       without modifying bb0. */
   4629 
   4630    if (bb0->jumpkind != Ijk_Boring)
   4631       return NULL;
   4632 
   4633    xxx_known = False;
   4634    xxx_value = 0;
   4635 
   4636    /* Extract the next-guest address.  If it isn't a literal, we
   4637       have to give up. */
   4638 
   4639    udst = bb0->next;
   4640    if (udst->tag == Iex_Const
   4641        && (udst->Iex.Const.con->tag == Ico_U32
   4642            || udst->Iex.Const.con->tag == Ico_U64)) {
   4643       /* The BB ends in a jump to a literal location. */
   4644       xxx_known = True;
   4645       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   4646                     ?  udst->Iex.Const.con->Ico.U64
   4647                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   4648    }
   4649 
   4650    if (!xxx_known)
   4651       return NULL;
   4652 
   4653    /* Now we know the BB ends to a jump to a literal location.  If
   4654       it's a jump to itself (viz, idiom #1), move directly to the
   4655       unrolling stage, first cloning the bb so the original isn't
   4656       modified. */
   4657    if (xxx_value == my_addr) {
   4658       unroll_factor = calc_unroll_factor( bb0 );
   4659       if (unroll_factor < 2)
   4660          return NULL;
   4661       bb1 = deepCopyIRSB( bb0 );
   4662       bb0 = NULL;
   4663       udst = NULL; /* is now invalid */
   4664       goto do_unroll;
   4665    }
   4666 
   4667    /* Search for the second idiomatic form:
   4668         X: BODY; if (c) goto X; goto Y
   4669       We know Y, but need to establish that the last stmt
   4670       is 'if (c) goto X'.
   4671    */
   4672    yyy_value = xxx_value;
   4673    for (i = bb0->stmts_used-1; i >= 0; i--)
   4674       if (bb0->stmts[i])
   4675          break;
   4676 
   4677    if (i < 0)
   4678       return NULL; /* block with no stmts.  Strange. */
   4679 
   4680    st = bb0->stmts[i];
   4681    if (st->tag != Ist_Exit)
   4682       return NULL;
   4683    if (st->Ist.Exit.jk != Ijk_Boring)
   4684       return NULL;
   4685 
   4686    con = st->Ist.Exit.dst;
   4687    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4688 
   4689    xxx_value = con->tag == Ico_U64
   4690                   ? st->Ist.Exit.dst->Ico.U64
   4691                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   4692 
   4693    /* If this assertion fails, we have some kind of type error. */
   4694    vassert(con->tag == udst->Iex.Const.con->tag);
   4695 
   4696    if (xxx_value != my_addr)
   4697       /* We didn't find either idiom.  Give up. */
   4698       return NULL;
   4699 
   4700    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   4701       yyy values (which makes it look like idiom #1), and go into
   4702       unrolling proper.  This means finding (again) the last stmt, in
   4703       the copied BB. */
   4704 
   4705    unroll_factor = calc_unroll_factor( bb0 );
   4706    if (unroll_factor < 2)
   4707       return NULL;
   4708 
   4709    bb1 = deepCopyIRSB( bb0 );
   4710    bb0 = NULL;
   4711    udst = NULL; /* is now invalid */
   4712    for (i = bb1->stmts_used-1; i >= 0; i--)
   4713       if (bb1->stmts[i])
   4714          break;
   4715 
   4716    /* The next bunch of assertions should be true since we already
   4717       found and checked the last stmt in the original bb. */
   4718 
   4719    vassert(i >= 0);
   4720 
   4721    st = bb1->stmts[i];
   4722    vassert(st->tag == Ist_Exit);
   4723 
   4724    con = st->Ist.Exit.dst;
   4725    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4726 
   4727    udst = bb1->next;
   4728    vassert(udst->tag == Iex_Const);
   4729    vassert(udst->Iex.Const.con->tag == Ico_U32
   4730           || udst->Iex.Const.con->tag == Ico_U64);
   4731    vassert(con->tag == udst->Iex.Const.con->tag);
   4732 
   4733    /* switch the xxx and yyy fields around */
   4734    if (con->tag == Ico_U64) {
   4735       udst->Iex.Const.con->Ico.U64 = xxx_value;
   4736       con->Ico.U64 = yyy_value;
   4737    } else {
   4738       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   4739       con->Ico.U32 = (UInt)yyy_value;
   4740    }
   4741 
   4742    /* negate the test condition */
   4743    st->Ist.Exit.guard
   4744       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   4745 
   4746    /* --- The unroller proper.  Both idioms are by now --- */
   4747    /* --- now converted to idiom 1. --- */
   4748 
   4749   do_unroll:
   4750 
   4751    vassert(unroll_factor == 2
   4752            || unroll_factor == 4
   4753            || unroll_factor == 8);
   4754 
   4755    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   4756    for (j = 1; j <= jmax; j++) {
   4757 
   4758       n_vars = bb1->tyenv->types_used;
   4759 
   4760       bb2 = deepCopyIRSB(bb1);
   4761       for (i = 0; i < n_vars; i++)
   4762          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   4763 
   4764       for (i = 0; i < bb2->stmts_used; i++) {
   4765          /* deltaIRStmt destructively modifies the stmt, but
   4766             that's OK since bb2 is a complete fresh copy of bb1. */
   4767          deltaIRStmt(bb2->stmts[i], n_vars);
   4768          addStmtToIRSB(bb1, bb2->stmts[i]);
   4769       }
   4770    }
   4771 
   4772    if (DEBUG_IROPT) {
   4773       vex_printf("\nUNROLLED (%llx)\n", my_addr);
   4774       ppIRSB(bb1);
   4775       vex_printf("\n");
   4776    }
   4777 
   4778    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   4779       by negating the test condition.  This should be fixed properly.
   4780       For the moment use this shotgun approach.  */
   4781    return flatten_BB(bb1);
   4782 }
   4783 
   4784 
   4785 /*---------------------------------------------------------------*/
   4786 /*--- The tree builder                                        ---*/
   4787 /*---------------------------------------------------------------*/
   4788 
   4789 /* This isn't part of IR optimisation.  Really it's a pass done prior
   4790    to instruction selection, which improves the code that the
   4791    instruction selector can produce. */
   4792 
   4793 /* --- The 'tmp' environment is the central data structure here --- */
   4794 
   4795 /* The number of outstanding bindings we're prepared to track.
   4796    The number of times the env becomes full and we have to dump
   4797    the oldest binding (hence reducing code quality) falls very
   4798    rapidly as the env size increases.  8 gives reasonable performance
   4799    under most circumstances. */
   4800 #define A_NENV 10
   4801 
   4802 /* An interval. Used to record the bytes in the guest state accessed
   4803    by a Put[I] statement or by (one or more) Get[I] expression(s). In
   4804    case of several Get[I] expressions, the lower/upper bounds are recorded.
   4805    This is conservative but cheap.
   4806    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
   4807    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
   4808    offset 200 would be recorded as [100,203] */
   4809 typedef
   4810    struct {
   4811       Bool present;
   4812       Int  low;
   4813       Int  high;
   4814    }
   4815    Interval;
   4816 
   4817 static inline Bool
   4818 intervals_overlap(Interval i1, Interval i2)
   4819 {
   4820    return (i1.low >= i2.low && i1.low <= i2.high) ||
   4821           (i2.low >= i1.low && i2.low <= i1.high);
   4822 }
   4823 
   4824 static inline void
   4825 update_interval(Interval *i, Int low, Int high)
   4826 {
   4827    vassert(low <= high);
   4828 
   4829    if (i->present) {
   4830       if (low  < i->low)  i->low  = low;
   4831       if (high > i->high) i->high = high;
   4832    } else {
   4833       i->present = True;
   4834       i->low  = low;
   4835       i->high = high;
   4836    }
   4837 }
   4838 
   4839 
   4840 /* bindee == NULL   ===  slot is not in use
   4841    bindee != NULL   ===  slot is in use
   4842 */
   4843 typedef
   4844    struct {
   4845       IRTemp  binder;
   4846       IRExpr* bindee;
   4847       Bool    doesLoad;
   4848       /* Record the bytes of the guest state BINDEE reads from. */
   4849       Interval getInterval;
   4850    }
   4851    ATmpInfo;
   4852 
   4853 __attribute__((unused))
   4854 static void ppAEnv ( ATmpInfo* env )
   4855 {
   4856    Int i;
   4857    for (i = 0; i < A_NENV; i++) {
   4858       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   4859       if (env[i].bindee)
   4860          ppIRExpr(env[i].bindee);
   4861       else
   4862          vex_printf("(null)");
   4863       vex_printf("\n");
   4864    }
   4865 }
   4866 
   4867 /* --- Tree-traversal fns --- */
   4868 
   4869 /* Traverse an expr, and detect if any part of it reads memory or does
   4870    a Get.  Be careful ... this really controls how much the
   4871    tree-builder can reorder the code, so getting it right is critical.
   4872 */
   4873 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
   4874 {
   4875    Int i;
   4876    switch (e->tag) {
   4877       case Iex_CCall:
   4878          for (i = 0; e->Iex.CCall.args[i]; i++)
   4879             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
   4880          return;
   4881       case Iex_ITE:
   4882          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
   4883          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
   4884          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
   4885          return;
   4886       case Iex_Qop:
   4887          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
   4888          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
   4889          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
   4890          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
   4891          return;
   4892       case Iex_Triop:
   4893          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
   4894          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
   4895          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
   4896          return;
   4897       case Iex_Binop:
   4898          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
   4899          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
   4900          return;
   4901       case Iex_Unop:
   4902          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
   4903          return;
   4904       case Iex_Load:
   4905          *doesLoad = True;
   4906          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
   4907          return;
   4908       case Iex_Get: {
   4909          Int low = e->Iex.Get.offset;
   4910          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
   4911          update_interval(getInterval, low, high);
   4912          return;
   4913       }
   4914       case Iex_GetI: {
   4915          IRRegArray *descr = e->Iex.GetI.descr;
   4916          Int size = sizeofIRType(descr->elemTy);
   4917          Int low  = descr->base;
   4918          Int high = low + descr->nElems * size - 1;
   4919          update_interval(getInterval, low, high);
   4920          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
   4921          return;
   4922       }
   4923       case Iex_RdTmp:
   4924       case Iex_Const:
   4925          return;
   4926       default:
   4927          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4928          vpanic("setHints_Expr");
   4929    }
   4930 }
   4931 
   4932 
   4933 /* Add a binding to the front of the env and slide all the rest
   4934    backwards.  It should be the case that the last slot is free. */
   4935 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   4936 {
   4937    Int i;
   4938    vassert(env[A_NENV-1].bindee == NULL);
   4939    for (i = A_NENV-1; i >= 1; i--)
   4940       env[i] = env[i-1];
   4941    env[0].binder   = binder;
   4942    env[0].bindee   = bindee;
   4943    env[0].doesLoad = False; /* filled in later */
   4944    env[0].getInterval.present  = False; /* filled in later */
   4945    env[0].getInterval.low  = -1; /* filled in later */
   4946    env[0].getInterval.high = -1; /* filled in later */
   4947 }
   4948 
   4949 /* Given uses :: array of UShort, indexed by IRTemp
   4950    Add the use-occurrences of temps in this expression
   4951    to the env.
   4952 */
   4953 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   4954 {
   4955    Int i;
   4956 
   4957    switch (e->tag) {
   4958 
   4959       case Iex_RdTmp: /* the only interesting case */
   4960          uses[e->Iex.RdTmp.tmp]++;
   4961          return;
   4962 
   4963       case Iex_ITE:
   4964          aoccCount_Expr(uses, e->Iex.ITE.cond);
   4965          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
   4966          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
   4967          return;
   4968 
   4969       case Iex_Qop:
   4970          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
   4971          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
   4972          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
   4973          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
   4974          return;
   4975 
   4976       case Iex_Triop:
   4977          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
   4978          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
   4979          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
   4980          return;
   4981 
   4982       case Iex_Binop:
   4983          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   4984          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   4985          return;
   4986 
   4987       case Iex_Unop:
   4988          aoccCount_Expr(uses, e->Iex.Unop.arg);
   4989          return;
   4990 
   4991       case Iex_Load:
   4992          aoccCount_Expr(uses, e->Iex.Load.addr);
   4993          return;
   4994 
   4995       case Iex_CCall:
   4996          for (i = 0; e->Iex.CCall.args[i]; i++)
   4997             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   4998          return;
   4999 
   5000       case Iex_GetI:
   5001          aoccCount_Expr(uses, e->Iex.GetI.ix);
   5002          return;
   5003 
   5004       case Iex_Const:
   5005       case Iex_Get:
   5006          return;
   5007 
   5008       default:
   5009          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5010          vpanic("aoccCount_Expr");
   5011     }
   5012 }
   5013 
   5014 
   5015 /* Given uses :: array of UShort, indexed by IRTemp
   5016    Add the use-occurrences of temps in this statement
   5017    to the env.
   5018 */
   5019 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   5020 {
   5021    Int      i;
   5022    IRDirty* d;
   5023    IRCAS*   cas;
   5024    switch (st->tag) {
   5025       case Ist_AbiHint:
   5026          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   5027          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   5028          return;
   5029       case Ist_WrTmp:
   5030          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   5031          return;
   5032       case Ist_Put:
   5033          aoccCount_Expr(uses, st->Ist.Put.data);
   5034          return;
   5035       case Ist_PutI:
   5036          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
   5037          aoccCount_Expr(uses, st->Ist.PutI.details->data);
   5038          return;
   5039       case Ist_Store:
   5040          aoccCount_Expr(uses, st->Ist.Store.addr);
   5041          aoccCount_Expr(uses, st->Ist.Store.data);
   5042          return;
   5043       case Ist_StoreG: {
   5044          IRStoreG* sg = st->Ist.StoreG.details;
   5045          aoccCount_Expr(uses, sg->addr);
   5046          aoccCount_Expr(uses, sg->data);
   5047          aoccCount_Expr(uses, sg->guard);
   5048          return;
   5049       }
   5050       case Ist_LoadG: {
   5051          IRLoadG* lg = st->Ist.LoadG.details;
   5052          aoccCount_Expr(uses, lg->addr);
   5053          aoccCount_Expr(uses, lg->alt);
   5054          aoccCount_Expr(uses, lg->guard);
   5055          return;
   5056       }
   5057       case Ist_CAS:
   5058          cas = st->Ist.CAS.details;
   5059          aoccCount_Expr(uses, cas->addr);
   5060          if (cas->expdHi)
   5061             aoccCount_Expr(uses, cas->expdHi);
   5062          aoccCount_Expr(uses, cas->expdLo);
   5063          if (cas->dataHi)
   5064             aoccCount_Expr(uses, cas->dataHi);
   5065          aoccCount_Expr(uses, cas->dataLo);
   5066          return;
   5067       case Ist_LLSC:
   5068          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   5069          if (st->Ist.LLSC.storedata)
   5070             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   5071          return;
   5072       case Ist_Dirty:
   5073          d = st->Ist.Dirty.details;
   5074          if (d->mFx != Ifx_None)
   5075             aoccCount_Expr(uses, d->mAddr);
   5076          aoccCount_Expr(uses, d->guard);
   5077          for (i = 0; d->args[i]; i++) {
   5078             IRExpr* arg = d->args[i];
   5079             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5080                aoccCount_Expr(uses, arg);
   5081          }
   5082          return;
   5083       case Ist_NoOp:
   5084       case Ist_IMark:
   5085       case Ist_MBE:
   5086          return;
   5087       case Ist_Exit:
   5088          aoccCount_Expr(uses, st->Ist.Exit.guard);
   5089          return;
   5090       default:
   5091          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5092          vpanic("aoccCount_Stmt");
   5093    }
   5094 }
   5095 
   5096 /* Look up a binding for tmp in the env.  If found, return the bound
   5097    expression, and set the env's binding to NULL so it is marked as
   5098    used.  If not found, return NULL. */
   5099 
   5100 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   5101 {
   5102    Int i;
   5103    for (i = 0; i < A_NENV; i++) {
   5104       if (env[i].binder == tmp && env[i].bindee != NULL) {
   5105          IRExpr* bindee = env[i].bindee;
   5106          env[i].bindee = NULL;
   5107          return bindee;
   5108       }
   5109    }
   5110    return NULL;
   5111 }
   5112 
   5113 /* Traverse e, looking for temps.  For each observed temp, see if env
   5114    contains a binding for the temp, and if so return the bound value.
   5115    The env has the property that any binding it holds is
   5116    'single-shot', so once a binding is used, it is marked as no longer
   5117    available, by setting its .bindee field to NULL. */
   5118 
   5119 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   5120    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   5121 }
   5122 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   5123    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   5124 }
   5125 
   5126 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   5127 {
   5128    switch (op) {
   5129    case Iop_Or32:
   5130       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   5131       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   5132          return IRExpr_Unop( Iop_CmpwNEZ32,
   5133                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   5134                                                      a2->Iex.Unop.arg ) );
   5135       break;
   5136 
   5137    case Iop_CmpNE32:
   5138       /* Since X has type Ity_I1 we can simplify:
   5139          CmpNE32(1Uto32(X),0)) ==> X */
   5140       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   5141          return a1->Iex.Unop.arg;
   5142       break;
   5143 
   5144    default:
   5145       break;
   5146    }
   5147    /* no reduction rule applies */
   5148    return IRExpr_Binop( op, a1, a2 );
   5149 }
   5150 
   5151 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   5152 {
   5153    switch (op) {
   5154    case Iop_CmpwNEZ64:
   5155       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
   5156       if (is_Unop(aa, Iop_CmpwNEZ64))
   5157          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
   5158       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5159       if (is_Binop(aa, Iop_Or64)
   5160           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   5161          return fold_IRExpr_Unop(
   5162                    Iop_CmpwNEZ64,
   5163                    IRExpr_Binop(Iop_Or64,
   5164                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   5165                                 aa->Iex.Binop.arg2));
   5166       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5167       if (is_Binop(aa, Iop_Or64)
   5168           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   5169          return fold_IRExpr_Unop(
   5170                    Iop_CmpwNEZ64,
   5171                    IRExpr_Binop(Iop_Or64,
   5172                                 aa->Iex.Binop.arg1,
   5173                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   5174       break;
   5175    case Iop_CmpNEZ64:
   5176       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   5177       if (is_Unop(aa, Iop_Left64))
   5178          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   5179       /* CmpNEZ64( 1Uto64(X) ) --> X */
   5180       if (is_Unop(aa, Iop_1Uto64))
   5181          return aa->Iex.Unop.arg;
   5182       break;
   5183    case Iop_CmpwNEZ32:
   5184       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   5185       if (is_Unop(aa, Iop_CmpwNEZ32))
   5186          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   5187       break;
   5188    case Iop_CmpNEZ32:
   5189       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   5190       if (is_Unop(aa, Iop_Left32))
   5191          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   5192       /* CmpNEZ32( 1Uto32(X) ) --> X */
   5193       if (is_Unop(aa, Iop_1Uto32))
   5194          return aa->Iex.Unop.arg;
   5195       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
   5196       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
   5197          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
   5198       break;
   5199    case Iop_CmpNEZ8:
   5200       /* CmpNEZ8( 1Uto8(X) ) --> X */
   5201       if (is_Unop(aa, Iop_1Uto8))
   5202          return aa->Iex.Unop.arg;
   5203       break;
   5204    case Iop_Left32:
   5205       /* Left32( Left32(x) ) --> Left32(x) */
   5206       if (is_Unop(aa, Iop_Left32))
   5207          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   5208       break;
   5209    case Iop_Left64:
   5210       /* Left64( Left64(x) ) --> Left64(x) */
   5211       if (is_Unop(aa, Iop_Left64))
   5212          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
   5213       break;
   5214    case Iop_32to1:
   5215       /* 32to1( 1Uto32 ( x ) ) --> x */
   5216       if (is_Unop(aa, Iop_1Uto32))
   5217          return aa->Iex.Unop.arg;
   5218       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   5219       if (is_Unop(aa, Iop_CmpwNEZ32))
   5220          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   5221       break;
   5222    case Iop_64to1:
   5223       /* 64to1( 1Uto64 ( x ) ) --> x */
   5224       if (is_Unop(aa, Iop_1Uto64))
   5225          return aa->Iex.Unop.arg;
   5226       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   5227       if (is_Unop(aa, Iop_CmpwNEZ64))
   5228          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   5229       break;
   5230    case Iop_64to32:
   5231       /* 64to32( 32Uto64 ( x )) --> x */
   5232       if (is_Unop(aa, Iop_32Uto64))
   5233          return aa->Iex.Unop.arg;
   5234       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   5235       if (is_Unop(aa, Iop_8Uto64))
   5236          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   5237       break;
   5238 
   5239    case Iop_32Uto64:
   5240       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   5241       if (is_Unop(aa, Iop_8Uto32))
   5242          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   5243       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   5244       if (is_Unop(aa, Iop_16Uto32))
   5245          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   5246       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
   5247                      --> Shr64( 32Uto64(64to32(x)), sh )) */
   5248       if (is_Unop(aa, Iop_64to32)
   5249           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
   5250           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5251           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5252                      Iop_64to32)) {
   5253          return aa->Iex.Unop.arg;
   5254       }
   5255       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
   5256          --> 32Uto64(64to32( Shl64(                x,   sh )) */
   5257       if (is_Unop(aa, Iop_64to32)
   5258           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
   5259           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5260           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5261                      Iop_64to32)) {
   5262          return
   5263             IRExpr_Unop(
   5264                Iop_32Uto64,
   5265                IRExpr_Unop(
   5266                   Iop_64to32,
   5267                   IRExpr_Binop(
   5268                      Iop_Shl64,
   5269                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
   5270                      aa->Iex.Unop.arg->Iex.Binop.arg2
   5271             )));
   5272       }
   5273       break;
   5274 
   5275    case Iop_1Sto32:
   5276       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   5277       if (is_Unop(aa, Iop_CmpNEZ8)
   5278           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   5279           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   5280           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   5281                      Iop_CmpNEZ32)) {
   5282          return IRExpr_Unop( Iop_CmpwNEZ32,
   5283                              aa->Iex.Unop.arg->Iex.Unop.arg
   5284                                ->Iex.Unop.arg->Iex.Unop.arg);
   5285       }
   5286       break;
   5287 
   5288    default:
   5289       break;
   5290    }
   5291    /* no reduction rule applies */
   5292    return IRExpr_Unop( op, aa );
   5293 }
   5294 
   5295 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   5296 {
   5297    IRExpr*  e2;
   5298    IRExpr** args2;
   5299    Int      i;
   5300 
   5301    switch (e->tag) {
   5302 
   5303       case Iex_CCall:
   5304          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   5305          for (i = 0; args2[i]; i++)
   5306             args2[i] = atbSubst_Expr(env,args2[i]);
   5307          return IRExpr_CCall(
   5308                    e->Iex.CCall.cee,
   5309                    e->Iex.CCall.retty,
   5310                    args2
   5311                 );
   5312       case Iex_RdTmp:
   5313          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   5314          return e2 ? e2 : e;
   5315       case Iex_ITE:
   5316          return IRExpr_ITE(
   5317                    atbSubst_Expr(env, e->Iex.ITE.cond),
   5318                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
   5319                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
   5320                 );
   5321       case Iex_Qop:
   5322          return IRExpr_Qop(
   5323                    e->Iex.Qop.details->op,
   5324                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
   5325                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
   5326                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
   5327                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
   5328                 );
   5329       case Iex_Triop:
   5330          return IRExpr_Triop(
   5331                    e->Iex.Triop.details->op,
   5332                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
   5333                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
   5334                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
   5335                 );
   5336       case Iex_Binop:
   5337          return fold_IRExpr_Binop(
   5338                    e->Iex.Binop.op,
   5339                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   5340                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   5341                 );
   5342       case Iex_Unop:
   5343          return fold_IRExpr_Unop(
   5344                    e->Iex.Unop.op,
   5345                    atbSubst_Expr(env, e->Iex.Unop.arg)
   5346                 );
   5347       case Iex_Load:
   5348          return IRExpr_Load(
   5349                    e->Iex.Load.end,
   5350                    e->Iex.Load.ty,
   5351                    atbSubst_Expr(env, e->Iex.Load.addr)
   5352                 );
   5353       case Iex_GetI:
   5354          return IRExpr_GetI(
   5355                    e->Iex.GetI.descr,
   5356                    atbSubst_Expr(env, e->Iex.GetI.ix),
   5357                    e->Iex.GetI.bias
   5358                 );
   5359       case Iex_Const:
   5360       case Iex_Get:
   5361          return e;
   5362       default:
   5363          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5364          vpanic("atbSubst_Expr");
   5365    }
   5366 }
   5367 
   5368 /* Same deal as atbSubst_Expr, except for stmts. */
   5369 
   5370 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   5371 {
   5372    Int     i;
   5373    IRDirty *d, *d2;
   5374    IRCAS   *cas, *cas2;
   5375    IRPutI  *puti, *puti2;
   5376 
   5377    switch (st->tag) {
   5378       case Ist_AbiHint:
   5379          return IRStmt_AbiHint(
   5380                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   5381                    st->Ist.AbiHint.len,
   5382                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   5383                 );
   5384       case Ist_Store:
   5385          return IRStmt_Store(
   5386                    st->Ist.Store.end,
   5387                    atbSubst_Expr(env, st->Ist.Store.addr),
   5388                    atbSubst_Expr(env, st->Ist.Store.data)
   5389                 );
   5390       case Ist_StoreG: {
   5391          IRStoreG* sg = st->Ist.StoreG.details;
   5392          return IRStmt_StoreG(sg->end,
   5393                               atbSubst_Expr(env, sg->addr),
   5394                               atbSubst_Expr(env, sg->data),
   5395                               atbSubst_Expr(env, sg->guard));
   5396       }
   5397       case Ist_LoadG: {
   5398          IRLoadG* lg = st->Ist.LoadG.details;
   5399          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
   5400                              atbSubst_Expr(env, lg->addr),
   5401                              atbSubst_Expr(env, lg->alt),
   5402                              atbSubst_Expr(env, lg->guard));
   5403       }
   5404       case Ist_WrTmp:
   5405          return IRStmt_WrTmp(
   5406                    st->Ist.WrTmp.tmp,
   5407                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   5408                 );
   5409       case Ist_Put:
   5410          return IRStmt_Put(
   5411                    st->Ist.Put.offset,
   5412                    atbSubst_Expr(env, st->Ist.Put.data)
   5413                 );
   5414       case Ist_PutI:
   5415          puti  = st->Ist.PutI.details;
   5416          puti2 = mkIRPutI(puti->descr,
   5417                           atbSubst_Expr(env, puti->ix),
   5418                           puti->bias,
   5419                           atbSubst_Expr(env, puti->data));
   5420          return IRStmt_PutI(puti2);
   5421 
   5422       case Ist_Exit:
   5423          return IRStmt_Exit(
   5424                    atbSubst_Expr(env, st->Ist.Exit.guard),
   5425                    st->Ist.Exit.jk,
   5426                    st->Ist.Exit.dst,
   5427                    st->Ist.Exit.offsIP
   5428                 );
   5429       case Ist_IMark:
   5430          return IRStmt_IMark(st->Ist.IMark.addr,
   5431                              st->Ist.IMark.len,
   5432                              st->Ist.IMark.delta);
   5433       case Ist_NoOp:
   5434          return IRStmt_NoOp();
   5435       case Ist_MBE:
   5436          return IRStmt_MBE(st->Ist.MBE.event);
   5437       case Ist_CAS:
   5438          cas  = st->Ist.CAS.details;
   5439          cas2 = mkIRCAS(
   5440                    cas->oldHi, cas->oldLo, cas->end,
   5441                    atbSubst_Expr(env, cas->addr),
   5442                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   5443                    atbSubst_Expr(env, cas->expdLo),
   5444                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   5445                    atbSubst_Expr(env, cas->dataLo)
   5446                 );
   5447          return IRStmt_CAS(cas2);
   5448       case Ist_LLSC:
   5449          return IRStmt_LLSC(
   5450                    st->Ist.LLSC.end,
   5451                    st->Ist.LLSC.result,
   5452                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   5453                    st->Ist.LLSC.storedata
   5454                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   5455                 );
   5456       case Ist_Dirty:
   5457          d  = st->Ist.Dirty.details;
   5458          d2 = emptyIRDirty();
   5459          *d2 = *d;
   5460          if (d2->mFx != Ifx_None)
   5461             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   5462          d2->guard = atbSubst_Expr(env, d2->guard);
   5463          for (i = 0; d2->args[i]; i++) {
   5464             IRExpr* arg = d2->args[i];
   5465             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5466                d2->args[i] = atbSubst_Expr(env, arg);
   5467          }
   5468          return IRStmt_Dirty(d2);
   5469       default:
   5470          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5471          vpanic("atbSubst_Stmt");
   5472    }
   5473 }
   5474 
   5475 inline
   5476 static Bool dirty_helper_stores ( const IRDirty *d )
   5477 {
   5478    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
   5479 }
   5480 
   5481 inline
   5482 static Interval dirty_helper_puts ( const IRDirty *d,
   5483                                     Bool (*preciseMemExnsFn)(Int, Int),
   5484                                     Bool *requiresPreciseMemExns )
   5485 {
   5486    Int i;
   5487    Interval interval;
   5488 
   5489    /* Passing the guest state pointer opens the door to modifying the
   5490       guest state under the covers.  It's not allowed, but let's be
   5491       extra conservative and assume the worst. */
   5492    for (i = 0; d->args[i]; i++) {
   5493       if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
   5494          *requiresPreciseMemExns = True;
   5495          /* Assume all guest state is written. */
   5496          interval.present = True;
   5497          interval.low  = 0;
   5498          interval.high = 0x7FFFFFFF;
   5499          return interval;
   5500       }
   5501    }
   5502 
   5503    /* Check the side effects on the guest state */
   5504    interval.present = False;
   5505    interval.low = interval.high = -1;
   5506    *requiresPreciseMemExns = False;
   5507 
   5508    for (i = 0; i < d->nFxState; ++i) {
   5509       if (d->fxState[i].fx != Ifx_Read) {
   5510          Int offset = d->fxState[i].offset;
   5511          Int size = d->fxState[i].size;
   5512          Int nRepeats = d->fxState[i].nRepeats;
   5513          Int repeatLen = d->fxState[i].repeatLen;
   5514 
   5515          if (preciseMemExnsFn(offset,
   5516                               offset + nRepeats * repeatLen + size - 1)) {
   5517             *requiresPreciseMemExns = True;
   5518          }
   5519          update_interval(&interval, offset,
   5520                          offset + nRepeats * repeatLen + size - 1);
   5521       }
   5522    }
   5523 
   5524    return interval;
   5525 }
   5526 
   5527 /* Return an interval if st modifies the guest state. Via requiresPreciseMemExns
   5528    return whether or not that modification requires precise exceptions. */
   5529 static Interval stmt_modifies_guest_state ( IRSB *bb, const IRStmt *st,
   5530                                             Bool (*preciseMemExnsFn)(Int,Int),
   5531                                             Bool *requiresPreciseMemExns )
   5532 {
   5533    Interval interval;
   5534 
   5535    switch (st->tag) {
   5536    case Ist_Put: {
   5537       Int offset = st->Ist.Put.offset;
   5538       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
   5539 
   5540       *requiresPreciseMemExns = preciseMemExnsFn(offset, offset + size - 1);
   5541       interval.present = True;
   5542       interval.low  = offset;
   5543       interval.high = offset + size - 1;
   5544       return interval;
   5545    }
   5546 
   5547    case Ist_PutI: {
   5548       IRRegArray *descr = st->Ist.PutI.details->descr;
   5549       Int offset = descr->base;
   5550       Int size = sizeofIRType(descr->elemTy);
   5551 
   5552       /* We quietly assume here that all segments are contiguous and there
   5553          are no holes. This is to avoid a loop. The assumption is conservative
   5554          in the sense that we might report that precise memory exceptions are
   5555          needed when in fact they are not. */
   5556       *requiresPreciseMemExns =
   5557          preciseMemExnsFn(offset, offset + descr->nElems * size - 1);
   5558       interval.present = True;
   5559       interval.low  = offset;
   5560       interval.high = offset + descr->nElems * size - 1;
   5561       return interval;
   5562    }
   5563 
   5564    case Ist_Dirty:
   5565       return dirty_helper_puts(st->Ist.Dirty.details, preciseMemExnsFn,
   5566                                requiresPreciseMemExns);
   5567 
   5568    default:
   5569       *requiresPreciseMemExns = False;
   5570       interval.present = False;
   5571       interval.low  = -1;
   5572       interval.high = -1;
   5573       return interval;
   5574    }
   5575 }
   5576 
   5577 /* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb,
   5578                                           Bool (*preciseMemExnsFn)(Int,Int) )
   5579 {
   5580    Int      i, j, k, m;
   5581    Bool     stmtStores, invalidateMe;
   5582    Interval putInterval;
   5583    IRStmt*  st;
   5584    IRStmt*  st2;
   5585    ATmpInfo env[A_NENV];
   5586 
   5587    Bool   max_ga_known = False;
   5588    Addr64 max_ga       = 0;
   5589 
   5590    Int       n_tmps = bb->tyenv->types_used;
   5591    UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
   5592 
   5593    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   5594       temp.  Also count occurrences in the bb->next field.  Take the
   5595       opportunity to also find the maximum guest address in the block,
   5596       since that will be needed later for deciding when we can safely
   5597       elide event checks. */
   5598 
   5599    for (i = 0; i < n_tmps; i++)
   5600       uses[i] = 0;
   5601 
   5602    for (i = 0; i < bb->stmts_used; i++) {
   5603       st = bb->stmts[i];
   5604       switch (st->tag) {
   5605          case Ist_NoOp:
   5606             continue;
   5607          case Ist_IMark: {
   5608             Int    len = st->Ist.IMark.len;
   5609             Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
   5610             max_ga_known = True;
   5611             if (mga > max_ga)
   5612                max_ga = mga;
   5613             break;
   5614          }
   5615          default:
   5616             break;
   5617       }
   5618       aoccCount_Stmt( uses, st );
   5619    }
   5620    aoccCount_Expr(uses, bb->next );
   5621 
   5622 #  if 0
   5623    for (i = 0; i < n_tmps; i++) {
   5624       if (uses[i] == 0)
   5625         continue;
   5626       ppIRTemp( (IRTemp)i );
   5627       vex_printf("  used %d\n", (Int)uses[i] );
   5628    }
   5629 #  endif
   5630 
   5631    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   5632 
   5633          If the env is full, emit the end element.  This guarantees
   5634          there is at least one free slot in the following.
   5635 
   5636          On seeing 't = E', occ(t)==1,
   5637             let E'=env(E)
   5638             delete this stmt
   5639             add t -> E' to the front of the env
   5640             Examine E' and set the hints for E' appropriately
   5641               (doesLoad? doesGet?)
   5642 
   5643          On seeing any other stmt,
   5644             let stmt' = env(stmt)
   5645             remove from env any 't=E' binds invalidated by stmt
   5646                 emit the invalidated stmts
   5647             emit stmt'
   5648             compact any holes in env
   5649               by sliding entries towards the front
   5650 
   5651       Finally, apply env to bb->next.
   5652    */
   5653 
   5654    for (i = 0; i < A_NENV; i++) {
   5655       env[i].bindee = NULL;
   5656       env[i].binder = IRTemp_INVALID;
   5657    }
   5658 
   5659    /* The stmts in bb are being reordered, and we are guaranteed to
   5660       end up with no more than the number we started with.  Use i to
   5661       be the cursor of the current stmt examined and j <= i to be that
   5662       for the current stmt being written.
   5663    */
   5664    j = 0;
   5665    for (i = 0; i < bb->stmts_used; i++) {
   5666 
   5667       st = bb->stmts[i];
   5668       if (st->tag == Ist_NoOp)
   5669          continue;
   5670 
   5671       /* Ensure there's at least one space in the env, by emitting
   5672          the oldest binding if necessary. */
   5673       if (env[A_NENV-1].bindee != NULL) {
   5674          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   5675                                       env[A_NENV-1].bindee );
   5676          j++;
   5677          vassert(j <= i);
   5678          env[A_NENV-1].bindee = NULL;
   5679       }
   5680 
   5681       /* Consider current stmt. */
   5682       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   5683          IRExpr *e, *e2;
   5684 
   5685          /* optional extra: dump dead bindings as we find them.
   5686             Removes the need for a prior dead-code removal pass. */
   5687          if (uses[st->Ist.WrTmp.tmp] == 0) {
   5688 	    if (0) vex_printf("DEAD binding\n");
   5689             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5690          }
   5691          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   5692 
   5693          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   5694             actions. */
   5695          e  = st->Ist.WrTmp.data;
   5696          e2 = atbSubst_Expr(env, e);
   5697          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   5698          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
   5699          /* don't advance j, as we are deleting this stmt and instead
   5700             holding it temporarily in the env. */
   5701          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5702       }
   5703 
   5704       /* we get here for any other kind of statement. */
   5705       /* 'use up' any bindings required by the current statement. */
   5706       st2 = atbSubst_Stmt(env, st);
   5707 
   5708       /* Now, before this stmt, dump any bindings in env that it
   5709          invalidates.  These need to be dumped in the order in which
   5710          they originally entered env -- that means from oldest to
   5711          youngest. */
   5712 
   5713       /* putInterval/stmtStores characterise what the stmt under
   5714          consideration does, or might do (sidely safe @ True). */
   5715 
   5716       Bool putRequiresPreciseMemExns;
   5717       putInterval = stmt_modifies_guest_state( bb, st, preciseMemExnsFn,
   5718                                                &putRequiresPreciseMemExns);
   5719 
   5720       /* be True if this stmt writes memory or might do (==> we don't
   5721          want to reorder other loads or stores relative to it).  Also,
   5722          both LL and SC fall under this classification, since we
   5723          really ought to be conservative and not reorder any other
   5724          memory transactions relative to them. */
   5725       stmtStores
   5726          = toBool( st->tag == Ist_Store
   5727                    || (st->tag == Ist_Dirty
   5728                        && dirty_helper_stores(st->Ist.Dirty.details))
   5729                    || st->tag == Ist_LLSC
   5730                    || st->tag == Ist_CAS );
   5731 
   5732       for (k = A_NENV-1; k >= 0; k--) {
   5733          if (env[k].bindee == NULL)
   5734             continue;
   5735          /* Compare the actions of this stmt with the actions of
   5736             binding 'k', to see if they invalidate the binding. */
   5737          invalidateMe
   5738             = toBool(
   5739               /* a store invalidates loaded data */
   5740               (env[k].doesLoad && stmtStores)
   5741               /* a put invalidates get'd data, if they overlap */
   5742               || ((env[k].getInterval.present && putInterval.present) &&
   5743                   intervals_overlap(env[k].getInterval, putInterval))
   5744               /* a put invalidates loaded data. That means, in essense, that
   5745                  a load expression cannot be substituted into a statement
   5746                  that follows the put. But there is nothing wrong doing so
   5747                  except when the put statement requries precise exceptions.
   5748                  Think of a load that is moved past a put where the put
   5749                  updates the IP in the guest state. If the load generates
   5750                  a segfault, the wrong address (line number) would be
   5751                  reported. */
   5752               || (env[k].doesLoad && putInterval.present &&
   5753                   putRequiresPreciseMemExns)
   5754               /* probably overly conservative: a memory bus event
   5755                  invalidates absolutely everything, so that all
   5756                  computation prior to it is forced to complete before
   5757                  proceeding with the event (fence,lock,unlock). */
   5758               || st->tag == Ist_MBE
   5759               /* also be (probably overly) paranoid re AbiHints */
   5760               || st->tag == Ist_AbiHint
   5761               );
   5762          if (invalidateMe) {
   5763             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   5764             j++;
   5765             vassert(j <= i);
   5766             env[k].bindee = NULL;
   5767          }
   5768       }
   5769 
   5770       /* Slide in-use entries in env up to the front */
   5771       m = 0;
   5772       for (k = 0; k < A_NENV; k++) {
   5773          if (env[k].bindee != NULL) {
   5774             env[m] = env[k];
   5775             m++;
   5776 	 }
   5777       }
   5778       for (m = m; m < A_NENV; m++) {
   5779          env[m].bindee = NULL;
   5780       }
   5781 
   5782       /* finally, emit the substituted statement */
   5783       bb->stmts[j] = st2;
   5784       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   5785       j++;
   5786 
   5787       vassert(j <= i+1);
   5788    } /* for each stmt in the original bb ... */
   5789 
   5790    /* Finally ... substitute the ->next field as much as possible, and
   5791       dump any left-over bindings.  Hmm.  Perhaps there should be no
   5792       left over bindings?  Or any left-over bindings are
   5793       by definition dead? */
   5794    bb->next = atbSubst_Expr(env, bb->next);
   5795    bb->stmts_used = j;
   5796 
   5797    return max_ga_known ? max_ga : ~(Addr64)0;
   5798 }
   5799 
   5800 
   5801 /*---------------------------------------------------------------*/
   5802 /*--- iropt main                                              ---*/
   5803 /*---------------------------------------------------------------*/
   5804 
   5805 static Bool iropt_verbose = False; /* True; */
   5806 
   5807 
   5808 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   5809    redundant Put removal, constant propagation, dead code removal,
   5810    clean helper specialisation, and dead code removal (again).
   5811 */
   5812 
   5813 
   5814 static
   5815 IRSB* cheap_transformations (
   5816          IRSB* bb,
   5817          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   5818          Bool (*preciseMemExnsFn)(Int,Int)
   5819       )
   5820 {
   5821    redundant_get_removal_BB ( bb );
   5822    if (iropt_verbose) {
   5823       vex_printf("\n========= REDUNDANT GET\n\n" );
   5824       ppIRSB(bb);
   5825    }
   5826 
   5827    if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
   5828       redundant_put_removal_BB ( bb, preciseMemExnsFn );
   5829    }
   5830    if (iropt_verbose) {
   5831       vex_printf("\n========= REDUNDANT PUT\n\n" );
   5832       ppIRSB(bb);
   5833    }
   5834 
   5835    bb = cprop_BB ( bb );
   5836    if (iropt_verbose) {
   5837       vex_printf("\n========= CPROPD\n\n" );
   5838       ppIRSB(bb);
   5839    }
   5840 
   5841    do_deadcode_BB ( bb );
   5842    if (iropt_verbose) {
   5843       vex_printf("\n========= DEAD\n\n" );
   5844       ppIRSB(bb);
   5845    }
   5846 
   5847    bb = spec_helpers_BB ( bb, specHelper );
   5848    do_deadcode_BB ( bb );
   5849    if (iropt_verbose) {
   5850       vex_printf("\n========= SPECd \n\n" );
   5851       ppIRSB(bb);
   5852    }
   5853 
   5854    return bb;
   5855 }
   5856 
   5857 
   5858 /* Do some more expensive transformations on bb, which are aimed at
   5859    optimising as much as possible in the presence of GetI and PutI.  */
   5860 
   5861 static
   5862 IRSB* expensive_transformations( IRSB* bb )
   5863 {
   5864    (void)do_cse_BB( bb );
   5865    collapse_AddSub_chains_BB( bb );
   5866    do_redundant_GetI_elimination( bb );
   5867    if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
   5868       do_redundant_PutI_elimination( bb );
   5869    }
   5870    do_deadcode_BB( bb );
   5871    return bb;
   5872 }
   5873 
   5874 
   5875 /* Scan a flattened BB to look for signs that more expensive
   5876    optimisations might be useful:
   5877    - find out if there are any GetIs and PutIs
   5878    - find out if there are any floating or vector-typed temporaries
   5879 */
   5880 
   5881 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   5882                                  /*OUT*/Bool* hasVorFtemps,
   5883                                  IRSB* bb )
   5884 {
   5885    Int      i, j;
   5886    IRStmt*  st;
   5887    IRDirty* d;
   5888    IRCAS*   cas;
   5889 
   5890    *hasGetIorPutI = False;
   5891    *hasVorFtemps  = False;
   5892 
   5893    for (i = 0; i < bb->stmts_used; i++) {
   5894       st = bb->stmts[i];
   5895       switch (st->tag) {
   5896          case Ist_AbiHint:
   5897             vassert(isIRAtom(st->Ist.AbiHint.base));
   5898             vassert(isIRAtom(st->Ist.AbiHint.nia));
   5899             break;
   5900          case Ist_PutI:
   5901             *hasGetIorPutI = True;
   5902             break;
   5903          case Ist_WrTmp:
   5904             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   5905                *hasGetIorPutI = True;
   5906             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   5907                case Ity_I1: case Ity_I8: case Ity_I16:
   5908                case Ity_I32: case Ity_I64: case Ity_I128:
   5909                   break;
   5910                case Ity_F32: case Ity_F64: case Ity_F128:
   5911                case Ity_V128: case Ity_V256:
   5912                   *hasVorFtemps = True;
   5913                   break;
   5914                case Ity_D32: case Ity_D64: case Ity_D128:
   5915                   *hasVorFtemps = True;
   5916                   break;
   5917                default:
   5918                   goto bad;
   5919             }
   5920             break;
   5921          case Ist_Put:
   5922             vassert(isIRAtom(st->Ist.Put.data));
   5923             break;
   5924          case Ist_Store:
   5925             vassert(isIRAtom(st->Ist.Store.addr));
   5926             vassert(isIRAtom(st->Ist.Store.data));
   5927             break;
   5928          case Ist_StoreG: {
   5929             IRStoreG* sg = st->Ist.StoreG.details;
   5930             vassert(isIRAtom(sg->addr));
   5931             vassert(isIRAtom(sg->data));
   5932             vassert(isIRAtom(sg->guard));
   5933             break;
   5934          }
   5935          case Ist_LoadG: {
   5936             IRLoadG* lg = st->Ist.LoadG.details;
   5937             vassert(isIRAtom(lg->addr));
   5938             vassert(isIRAtom(lg->alt));
   5939             vassert(isIRAtom(lg->guard));
   5940             break;
   5941          }
   5942          case Ist_CAS:
   5943             cas = st->Ist.CAS.details;
   5944             vassert(isIRAtom(cas->addr));
   5945             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   5946             vassert(isIRAtom(cas->expdLo));
   5947             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   5948             vassert(isIRAtom(cas->dataLo));
   5949             break;
   5950          case Ist_LLSC:
   5951             vassert(isIRAtom(st->Ist.LLSC.addr));
   5952             if (st->Ist.LLSC.storedata)
   5953                vassert(isIRAtom(st->Ist.LLSC.storedata));
   5954             break;
   5955          case Ist_Dirty:
   5956             d = st->Ist.Dirty.details;
   5957             vassert(isIRAtom(d->guard));
   5958             for (j = 0; d->args[j]; j++) {
   5959                IRExpr* arg = d->args[j];
   5960                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5961                   vassert(isIRAtom(arg));
   5962             }
   5963             if (d->mFx != Ifx_None)
   5964                vassert(isIRAtom(d->mAddr));
   5965             break;
   5966          case Ist_NoOp:
   5967          case Ist_IMark:
   5968          case Ist_MBE:
   5969             break;
   5970          case Ist_Exit:
   5971             vassert(isIRAtom(st->Ist.Exit.guard));
   5972             break;
   5973          default:
   5974          bad:
   5975             ppIRStmt(st);
   5976             vpanic("considerExpensives");
   5977       }
   5978    }
   5979 }
   5980 
   5981 
   5982 /* ---------------- The main iropt entry point. ---------------- */
   5983 
   5984 /* exported from this file */
   5985 /* Rules of the game:
   5986 
   5987    - IRExpr/IRStmt trees should be treated as immutable, as they
   5988      may get shared.  So never change a field of such a tree node;
   5989      instead construct and return a new one if needed.
   5990 */
   5991 
   5992 
   5993 IRSB* do_iropt_BB(
   5994          IRSB* bb0,
   5995          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   5996          Bool (*preciseMemExnsFn)(Int,Int),
   5997          Addr64 guest_addr,
   5998          VexArch guest_arch
   5999       )
   6000 {
   6001    static Int n_total     = 0;
   6002    static Int n_expensive = 0;
   6003 
   6004    Bool hasGetIorPutI, hasVorFtemps;
   6005    IRSB *bb, *bb2;
   6006 
   6007    n_total++;
   6008 
   6009    /* First flatten the block out, since all other
   6010       phases assume flat code. */
   6011 
   6012    bb = flatten_BB ( bb0 );
   6013 
   6014    if (iropt_verbose) {
   6015       vex_printf("\n========= FLAT\n\n" );
   6016       ppIRSB(bb);
   6017    }
   6018 
   6019    /* If at level 0, stop now. */
   6020    if (vex_control.iropt_level <= 0) return bb;
   6021 
   6022    /* Now do a preliminary cleanup pass, and figure out if we also
   6023       need to do 'expensive' optimisations.  Expensive optimisations
   6024       are deemed necessary if the block contains any GetIs or PutIs.
   6025       If needed, do expensive transformations and then another cheap
   6026       cleanup pass. */
   6027 
   6028    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   6029 
   6030    if (guest_arch == VexArchARM) {
   6031       /* Translating Thumb2 code produces a lot of chaff.  We have to
   6032          work extra hard to get rid of it. */
   6033       bb = cprop_BB(bb);
   6034       bb = spec_helpers_BB ( bb, specHelper );
   6035       if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
   6036          redundant_put_removal_BB ( bb, preciseMemExnsFn );
   6037       }
   6038       do_cse_BB( bb );
   6039       do_deadcode_BB( bb );
   6040    }
   6041 
   6042    if (vex_control.iropt_level > 1) {
   6043 
   6044       /* Peer at what we have, to decide how much more effort to throw
   6045          at it. */
   6046       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   6047 
   6048       if (hasVorFtemps && !hasGetIorPutI) {
   6049          /* If any evidence of FP or Vector activity, CSE, as that
   6050             tends to mop up all manner of lardy code to do with
   6051             rounding modes.  Don't bother if hasGetIorPutI since that
   6052             case leads into the expensive transformations, which do
   6053             CSE anyway. */
   6054          (void)do_cse_BB( bb );
   6055          do_deadcode_BB( bb );
   6056       }
   6057 
   6058       if (hasGetIorPutI) {
   6059          Bool cses;
   6060          n_expensive++;
   6061          if (DEBUG_IROPT)
   6062             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   6063          bb = expensive_transformations( bb );
   6064          bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   6065          /* Potentially common up GetIs */
   6066          cses = do_cse_BB( bb );
   6067          if (cses)
   6068             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   6069       }
   6070 
   6071       /* Now have a go at unrolling simple (single-BB) loops.  If
   6072          successful, clean up the results as much as possible. */
   6073 
   6074       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   6075       if (bb2) {
   6076          bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
   6077          if (hasGetIorPutI) {
   6078             bb = expensive_transformations( bb );
   6079             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   6080          } else {
   6081             /* at least do CSE and dead code removal */
   6082             do_cse_BB( bb );
   6083             do_deadcode_BB( bb );
   6084          }
   6085          if (0) vex_printf("vex iropt: unrolled a loop\n");
   6086       }
   6087 
   6088    }
   6089 
   6090    return bb;
   6091 }
   6092 
   6093 
   6094 /*---------------------------------------------------------------*/
   6095 /*--- end                                            ir_opt.c ---*/
   6096 /*---------------------------------------------------------------*/
   6097