Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                          ir_opt.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 
     40 #include "main_util.h"
     41 #include "main_globals.h"
     42 #include "ir_opt.h"
     43 
     44 
     45 /* Set to 1 for lots of debugging output. */
     46 #define DEBUG_IROPT 0
     47 
     48 
     49 /* What iropt does, 29 Dec 04.
     50 
     51    It takes an IRSB and produces a new one with the same meaning,
     52    defined thus:
     53 
     54    After execution of the new BB, all guest state and guest memory is
     55    the same as after execution of the original.  This is true
     56    regardless of how the block was exited (at the end vs side exit).
     57 
     58    In addition, parts of the guest state will be identical to that
     59    created by execution of the original at the following observation
     60    points:
     61 
     62    * In a dirty helper call, any parts of the guest state that the
     63      helper states that it reads or modifies will be up to date.
     64      Also, guest memory will be up to date.  Parts of the guest state
     65      not marked as being read or modified by the helper cannot be
     66      assumed to be up-to-date at the point where the helper is called.
     67 
     68    * Immediately prior to any load or store, those parts of the guest
     69      state marked as requiring precise exceptions will be up to date.
     70      Also, guest memory will be up to date.  Parts of the guest state
     71      not marked as requiring precise exceptions cannot be assumed to
     72      be up-to-date at the point of the load/store.
     73 
     74    The relative order of loads and stores (including loads/stores of
     75    guest memory done by dirty helpers annotated as such) is not
     76    changed.  However, the relative order of loads with no intervening
     77    stores/modifies may be changed.
     78 
     79    Transformation order
     80    ~~~~~~~~~~~~~~~~~~~~
     81 
     82    There are three levels of optimisation, controlled by
     83    vex_control.iropt_level.  Define first:
     84 
     85    "Cheap transformations" are the following sequence:
     86       * Redundant-Get removal
     87       * Redundant-Put removal
     88       * Constant propagation/folding
     89       * Dead code removal
     90       * Specialisation of clean helper functions
     91       * Dead code removal
     92 
     93    "Expensive transformations" are the following sequence:
     94       * CSE
     95       * Folding of add/sub chains
     96       * Redundant-GetI removal
     97       * Redundant-PutI removal
     98       * Dead code removal
     99 
    100    Then the transformations are as follows, as defined by
    101    vex_control.iropt_level:
    102 
    103    Level 0:
    104       * Flatten into atomic form.
    105 
    106    Level 1: the following sequence:
    107       * Flatten into atomic form.
    108       * Cheap transformations.
    109 
    110    Level 2: the following sequence
    111       * Flatten into atomic form.
    112       * Cheap transformations.
    113       * If block contains any floating or vector types, CSE.
    114       * If block contains GetI or PutI, Expensive transformations.
    115       * Try unrolling loops.  Three possible outcomes:
    116         - No effect: do nothing more.
    117         - Unrolled a loop, and block does not contain GetI or PutI:
    118           Do: * CSE
    119               * Dead code removal
    120         - Unrolled a loop, and block contains GetI or PutI:
    121           Do: * Expensive transformations
    122               * Cheap transformations
    123 */
    124 
    125 /* Implementation notes, 29 Dec 04.
    126 
    127    TODO (important): I think rPutI removal ignores precise exceptions
    128    and is therefore in a sense, wrong.  In the sense that PutIs are
    129    assumed not to write parts of the guest state that we need to have
    130    up-to-date at loads/stores.  So far on x86 guest that has not
    131    mattered since indeed only the x87 FP registers and tags are
    132    accessed using GetI/PutI, and there is no need so far for them to
    133    be up to date at mem exception points.  The rPutI pass should be
    134    fixed.
    135 
    136    TODO: improve pessimistic handling of precise exceptions
    137      in the tree builder.
    138 
    139    TODO: check interaction of rGetI and dirty helpers.
    140 
    141    F64i constants are treated differently from other constants.
    142    They are not regarded as atoms, and instead lifted off and
    143    bound to temps.  This allows them to participate in CSE, which
    144    is important for getting good performance for x86 guest code.
    145 
    146    CSE up F64 literals (already doing F64is)
    147 
    148    CSE: consider carefully the requirement for precise exns
    149         prior to making CSE any more aggressive.  */
    150 
    151 
    152 /*---------------------------------------------------------------*/
    153 /*--- Finite mappery, of a sort                               ---*/
    154 /*---------------------------------------------------------------*/
    155 
    156 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    157    hashing, but it's not clear whether or not this would really be any
    158    faster. */
    159 
    160 typedef
    161    struct {
    162       Bool*  inuse;
    163       HWord* key;
    164       HWord* val;
    165       Int    size;
    166       Int    used;
    167    }
    168    HashHW;
    169 
    170 static HashHW* newHHW ( void )
    171 {
    172    HashHW* h = LibVEX_Alloc(sizeof(HashHW));
    173    h->size   = 8;
    174    h->used   = 0;
    175    h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
    176    h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
    177    h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
    178    return h;
    179 }
    180 
    181 
    182 /* Look up key in the map. */
    183 
    184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    185 {
    186    Int i;
    187    /* vex_printf("lookupHHW(%llx)\n", key ); */
    188    for (i = 0; i < h->used; i++) {
    189       if (h->inuse[i] && h->key[i] == key) {
    190          if (val)
    191             *val = h->val[i];
    192          return True;
    193       }
    194    }
    195    return False;
    196 }
    197 
    198 
    199 /* Add key->val to the map.  Replaces any existing binding for key. */
    200 
    201 static void addToHHW ( HashHW* h, HWord key, HWord val )
    202 {
    203    Int i, j;
    204    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    205 
    206    /* Find and replace existing binding, if any. */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          h->val[i] = val;
    210          return;
    211       }
    212    }
    213 
    214    /* Ensure a space is available. */
    215    if (h->used == h->size) {
    216       /* Copy into arrays twice the size. */
    217       Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
    218       HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    219       HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    220       for (i = j = 0; i < h->size; i++) {
    221          if (!h->inuse[i]) continue;
    222          inuse2[j] = True;
    223          key2[j] = h->key[i];
    224          val2[j] = h->val[i];
    225          j++;
    226       }
    227       h->used = j;
    228       h->size *= 2;
    229       h->inuse = inuse2;
    230       h->key = key2;
    231       h->val = val2;
    232    }
    233 
    234    /* Finally, add it. */
    235    vassert(h->used < h->size);
    236    h->inuse[h->used] = True;
    237    h->key[h->used] = key;
    238    h->val[h->used] = val;
    239    h->used++;
    240 }
    241 
    242 
    243 /*---------------------------------------------------------------*/
    244 /*--- Flattening out a BB into atomic SSA form                ---*/
    245 /*---------------------------------------------------------------*/
    246 
    247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    248    copies made by flattening.  If in doubt return False. */
    249 
    250 static Bool isFlat ( IRExpr* e )
    251 {
    252    if (e->tag == Iex_Get)
    253       return True;
    254    if (e->tag == Iex_Binop)
    255       return toBool( isIRAtom(e->Iex.Binop.arg1)
    256                      && isIRAtom(e->Iex.Binop.arg2) );
    257    if (e->tag == Iex_Load)
    258       return isIRAtom(e->Iex.Load.addr);
    259    return False;
    260 }
    261 
    262 /* Flatten out 'ex' so it is atomic, returning a new expression with
    263    the same value, after having appended extra IRTemp assignments to
    264    the end of 'bb'. */
    265 
    266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    267 {
    268    Int i;
    269    IRExpr** newargs;
    270    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    271    IRTemp t1;
    272 
    273    switch (ex->tag) {
    274 
    275       case Iex_GetI:
    276          t1 = newIRTemp(bb->tyenv, ty);
    277          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    278             IRExpr_GetI(ex->Iex.GetI.descr,
    279                         flatten_Expr(bb, ex->Iex.GetI.ix),
    280                         ex->Iex.GetI.bias)));
    281          return IRExpr_RdTmp(t1);
    282 
    283       case Iex_Get:
    284          t1 = newIRTemp(bb->tyenv, ty);
    285          addStmtToIRSB(bb,
    286             IRStmt_WrTmp(t1, ex));
    287          return IRExpr_RdTmp(t1);
    288 
    289       case Iex_Qop:
    290          t1 = newIRTemp(bb->tyenv, ty);
    291          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    292             IRExpr_Qop(ex->Iex.Qop.op,
    293                          flatten_Expr(bb, ex->Iex.Qop.arg1),
    294                          flatten_Expr(bb, ex->Iex.Qop.arg2),
    295                          flatten_Expr(bb, ex->Iex.Qop.arg3),
    296                          flatten_Expr(bb, ex->Iex.Qop.arg4))));
    297          return IRExpr_RdTmp(t1);
    298 
    299       case Iex_Triop:
    300          t1 = newIRTemp(bb->tyenv, ty);
    301          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    302             IRExpr_Triop(ex->Iex.Triop.op,
    303                          flatten_Expr(bb, ex->Iex.Triop.arg1),
    304                          flatten_Expr(bb, ex->Iex.Triop.arg2),
    305                          flatten_Expr(bb, ex->Iex.Triop.arg3))));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Binop:
    309          t1 = newIRTemp(bb->tyenv, ty);
    310          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    311             IRExpr_Binop(ex->Iex.Binop.op,
    312                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    313                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    314          return IRExpr_RdTmp(t1);
    315 
    316       case Iex_Unop:
    317          t1 = newIRTemp(bb->tyenv, ty);
    318          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    319             IRExpr_Unop(ex->Iex.Unop.op,
    320                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    321          return IRExpr_RdTmp(t1);
    322 
    323       case Iex_Load:
    324          t1 = newIRTemp(bb->tyenv, ty);
    325          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    326             IRExpr_Load(ex->Iex.Load.end,
    327                         ex->Iex.Load.ty,
    328                         flatten_Expr(bb, ex->Iex.Load.addr))));
    329          return IRExpr_RdTmp(t1);
    330 
    331       case Iex_CCall:
    332          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    333          for (i = 0; newargs[i]; i++)
    334             newargs[i] = flatten_Expr(bb, newargs[i]);
    335          t1 = newIRTemp(bb->tyenv, ty);
    336          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    337             IRExpr_CCall(ex->Iex.CCall.cee,
    338                          ex->Iex.CCall.retty,
    339                          newargs)));
    340          return IRExpr_RdTmp(t1);
    341 
    342       case Iex_Mux0X:
    343          t1 = newIRTemp(bb->tyenv, ty);
    344          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    345             IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
    346                          flatten_Expr(bb, ex->Iex.Mux0X.expr0),
    347                          flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
    348          return IRExpr_RdTmp(t1);
    349 
    350       case Iex_Const:
    351          /* Lift F64i constants out onto temps so they can be CSEd
    352             later. */
    353          if (ex->Iex.Const.con->tag == Ico_F64i) {
    354             t1 = newIRTemp(bb->tyenv, ty);
    355             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    356                IRExpr_Const(ex->Iex.Const.con)));
    357             return IRExpr_RdTmp(t1);
    358          } else {
    359             /* Leave all other constants alone. */
    360             return ex;
    361          }
    362 
    363       case Iex_RdTmp:
    364          return ex;
    365 
    366       default:
    367          vex_printf("\n");
    368          ppIRExpr(ex);
    369          vex_printf("\n");
    370          vpanic("flatten_Expr");
    371    }
    372 }
    373 
    374 
    375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    376 
    377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    378 {
    379    Int i;
    380    IRExpr  *e1, *e2, *e3, *e4, *e5;
    381    IRDirty *d,  *d2;
    382    IRCAS   *cas, *cas2;
    383    switch (st->tag) {
    384       case Ist_Put:
    385          if (isIRAtom(st->Ist.Put.data)) {
    386             /* optimisation to reduce the amount of heap wasted
    387                by the flattener */
    388             addStmtToIRSB(bb, st);
    389          } else {
    390             /* general case, always correct */
    391             e1 = flatten_Expr(bb, st->Ist.Put.data);
    392             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    393          }
    394          break;
    395       case Ist_PutI:
    396          e1 = flatten_Expr(bb, st->Ist.PutI.ix);
    397          e2 = flatten_Expr(bb, st->Ist.PutI.data);
    398          addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
    399                                        e1,
    400                                        st->Ist.PutI.bias,
    401                                        e2));
    402          break;
    403       case Ist_WrTmp:
    404          if (isFlat(st->Ist.WrTmp.data)) {
    405             /* optimisation, to reduce the number of tmp-tmp
    406                copies generated */
    407             addStmtToIRSB(bb, st);
    408          } else {
    409             /* general case, always correct */
    410             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    411             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    412          }
    413          break;
    414       case Ist_Store:
    415          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    416          e2 = flatten_Expr(bb, st->Ist.Store.data);
    417          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    418          break;
    419       case Ist_CAS:
    420          cas  = st->Ist.CAS.details;
    421          e1   = flatten_Expr(bb, cas->addr);
    422          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    423          e3   = flatten_Expr(bb, cas->expdLo);
    424          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    425          e5   = flatten_Expr(bb, cas->dataLo);
    426          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    427                          e1, e2, e3, e4, e5 );
    428          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    429          break;
    430       case Ist_LLSC:
    431          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    432          e2 = st->Ist.LLSC.storedata
    433                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    434                  : NULL;
    435          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    436                                        st->Ist.LLSC.result, e1, e2));
    437          break;
    438       case Ist_Dirty:
    439          d = st->Ist.Dirty.details;
    440          d2 = emptyIRDirty();
    441          *d2 = *d;
    442          d2->args = shallowCopyIRExprVec(d2->args);
    443          if (d2->mFx != Ifx_None) {
    444             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    445          } else {
    446             vassert(d2->mAddr == NULL);
    447          }
    448          d2->guard = flatten_Expr(bb, d2->guard);
    449          for (i = 0; d2->args[i]; i++)
    450             d2->args[i] = flatten_Expr(bb, d2->args[i]);
    451          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    452          break;
    453       case Ist_NoOp:
    454       case Ist_MBE:
    455       case Ist_IMark:
    456          addStmtToIRSB(bb, st);
    457          break;
    458       case Ist_AbiHint:
    459          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    460          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    461          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    462          break;
    463       case Ist_Exit:
    464          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    465          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    466                                            st->Ist.Exit.dst));
    467          break;
    468       default:
    469          vex_printf("\n");
    470          ppIRStmt(st);
    471          vex_printf("\n");
    472          vpanic("flatten_Stmt");
    473    }
    474 }
    475 
    476 
    477 static IRSB* flatten_BB ( IRSB* in )
    478 {
    479    Int   i;
    480    IRSB* out;
    481    out = emptyIRSB();
    482    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    483    for (i = 0; i < in->stmts_used; i++)
    484       if (in->stmts[i])
    485          flatten_Stmt( out, in->stmts[i] );
    486    out->next     = flatten_Expr( out, in->next );
    487    out->jumpkind = in->jumpkind;
    488    return out;
    489 }
    490 
    491 
    492 /*---------------------------------------------------------------*/
    493 /*--- In-place removal of redundant GETs                      ---*/
    494 /*---------------------------------------------------------------*/
    495 
    496 /* Scan forwards, building up an environment binding (min offset, max
    497    offset) pairs to values, which will either be temps or constants.
    498 
    499    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    500    env and if it matches, replace the Get with the stored value.  If
    501    there is no match, add a (minoff,maxoff) :-> t binding.
    502 
    503    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    504    any binding which fully or partially overlaps with (minoff,maxoff).
    505    Then add a new (minoff,maxoff) :-> t or c binding.  */
    506 
    507 /* Extract the min/max offsets from a guest state array descriptor. */
    508 
    509 inline
    510 static void getArrayBounds ( IRRegArray* descr,
    511                              UInt* minoff, UInt* maxoff )
    512 {
    513    *minoff = descr->base;
    514    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    515    vassert((*minoff & ~0xFFFF) == 0);
    516    vassert((*maxoff & ~0xFFFF) == 0);
    517    vassert(*minoff <= *maxoff);
    518 }
    519 
    520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    521 
    522 static UInt mk_key_GetPut ( Int offset, IRType ty )
    523 {
    524    /* offset should fit in 16 bits. */
    525    UInt minoff = offset;
    526    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    527    vassert((minoff & ~0xFFFF) == 0);
    528    vassert((maxoff & ~0xFFFF) == 0);
    529    return (minoff << 16) | maxoff;
    530 }
    531 
    532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    533 {
    534    UInt minoff, maxoff;
    535    getArrayBounds( descr, &minoff, &maxoff );
    536    vassert((minoff & ~0xFFFF) == 0);
    537    vassert((maxoff & ~0xFFFF) == 0);
    538    return (minoff << 16) | maxoff;
    539 }
    540 
    541 /* Supposing h has keys of the form generated by mk_key_GetPut and
    542    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    543    .. k_hi).
    544 */
    545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    546 {
    547    Int  j;
    548    UInt e_lo, e_hi;
    549    vassert(k_lo <= k_hi);
    550    /* invalidate any env entries which in any way overlap (k_lo
    551       .. k_hi) */
    552    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    553 
    554    for (j = 0; j < h->used; j++) {
    555       if (!h->inuse[j])
    556          continue;
    557       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    558       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    559       vassert(e_lo <= e_hi);
    560       if (e_hi < k_lo || k_hi < e_lo)
    561          continue; /* no overlap possible */
    562       else
    563          /* overlap; invalidate */
    564          h->inuse[j] = False;
    565    }
    566 }
    567 
    568 
    569 static void redundant_get_removal_BB ( IRSB* bb )
    570 {
    571    HashHW* env = newHHW();
    572    UInt    key = 0; /* keep gcc -O happy */
    573    Int     i, j;
    574    HWord   val;
    575 
    576    for (i = 0; i < bb->stmts_used; i++) {
    577       IRStmt* st = bb->stmts[i];
    578 
    579       if (st->tag == Ist_NoOp)
    580          continue;
    581 
    582       /* Deal with Gets */
    583       if (st->tag == Ist_WrTmp
    584           && st->Ist.WrTmp.data->tag == Iex_Get) {
    585          /* st is 't = Get(...)'.  Look up in the environment and see
    586             if the Get can be replaced. */
    587          IRExpr* get = st->Ist.WrTmp.data;
    588          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    589                                      get->Iex.Get.ty );
    590          if (lookupHHW(env, &val, (HWord)key)) {
    591             /* found it */
    592             /* Note, we could do better here.  If the types are
    593                different we don't do the substitution, since doing so
    594                could lead to invalidly-typed IR.  An improvement would
    595                be to stick in a reinterpret-style cast, although that
    596                would make maintaining flatness more difficult. */
    597             IRExpr* valE    = (IRExpr*)val;
    598             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    599                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    600             if (typesOK && DEBUG_IROPT) {
    601                vex_printf("rGET: "); ppIRExpr(get);
    602                vex_printf("  ->  "); ppIRExpr(valE);
    603                vex_printf("\n");
    604             }
    605             if (typesOK)
    606                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    607          } else {
    608             /* Not found, but at least we know that t and the Get(...)
    609                are now associated.  So add a binding to reflect that
    610                fact. */
    611             addToHHW( env, (HWord)key,
    612                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    613          }
    614       }
    615 
    616       /* Deal with Puts: invalidate any env entries overlapped by this
    617          Put */
    618       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    619          UInt k_lo, k_hi;
    620          if (st->tag == Ist_Put) {
    621             key = mk_key_GetPut( st->Ist.Put.offset,
    622                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    623          } else {
    624             vassert(st->tag == Ist_PutI);
    625             key = mk_key_GetIPutI( st->Ist.PutI.descr );
    626          }
    627 
    628          k_lo = (key >> 16) & 0xFFFF;
    629          k_hi = key & 0xFFFF;
    630          invalidateOverlaps(env, k_lo, k_hi);
    631       }
    632       else
    633       if (st->tag == Ist_Dirty) {
    634          /* Deal with dirty helpers which write or modify guest state.
    635             Invalidate the entire env.  We could do a lot better
    636             here. */
    637          IRDirty* d      = st->Ist.Dirty.details;
    638          Bool     writes = False;
    639          for (j = 0; j < d->nFxState; j++) {
    640             if (d->fxState[j].fx == Ifx_Modify
    641                 || d->fxState[j].fx == Ifx_Write)
    642             writes = True;
    643          }
    644          if (writes) {
    645             /* dump the entire env (not clever, but correct ...) */
    646             for (j = 0; j < env->used; j++)
    647                env->inuse[j] = False;
    648             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    649          }
    650       }
    651 
    652       /* add this one to the env, if appropriate */
    653       if (st->tag == Ist_Put) {
    654          vassert(isIRAtom(st->Ist.Put.data));
    655          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    656       }
    657 
    658    } /* for (i = 0; i < bb->stmts_used; i++) */
    659 
    660 }
    661 
    662 
    663 /*---------------------------------------------------------------*/
    664 /*--- In-place removal of redundant PUTs                      ---*/
    665 /*---------------------------------------------------------------*/
    666 
    667 /* Find any Get uses in st and invalidate any partially or fully
    668    overlapping ranges listed in env.  Due to the flattening phase, the
    669    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    670 
    671 static void handle_gets_Stmt (
    672                HashHW* env,
    673                IRStmt* st,
    674                Bool (*preciseMemExnsFn)(Int,Int)
    675             )
    676 {
    677    Int     j;
    678    UInt    key = 0; /* keep gcc -O happy */
    679    Bool    isGet;
    680    Bool    memRW = False;
    681    IRExpr* e;
    682 
    683    switch (st->tag) {
    684 
    685       /* This is the only interesting case.  Deal with Gets in the RHS
    686          expression. */
    687       case Ist_WrTmp:
    688          e = st->Ist.WrTmp.data;
    689          switch (e->tag) {
    690             case Iex_Get:
    691                isGet = True;
    692                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    693                break;
    694             case Iex_GetI:
    695                isGet = True;
    696                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    697                break;
    698             case Iex_Load:
    699                isGet = False;
    700                memRW = True;
    701                break;
    702             default:
    703                isGet = False;
    704          }
    705          if (isGet) {
    706             UInt k_lo, k_hi;
    707             k_lo = (key >> 16) & 0xFFFF;
    708             k_hi = key & 0xFFFF;
    709             invalidateOverlaps(env, k_lo, k_hi);
    710          }
    711          break;
    712 
    713       /* Be very conservative for dirty helper calls; dump the entire
    714          environment.  The helper might read guest state, in which
    715          case it needs to be flushed first.  Also, the helper might
    716          access guest memory, in which case all parts of the guest
    717          state requiring precise exceptions needs to be flushed.  The
    718          crude solution is just to flush everything; we could easily
    719          enough do a lot better if needed. */
    720       /* Probably also overly-conservative, but also dump everything
    721          if we hit a memory bus event (fence, lock, unlock).  Ditto
    722          AbiHints, CASs, LLs and SCs. */
    723       case Ist_AbiHint:
    724          vassert(isIRAtom(st->Ist.AbiHint.base));
    725          vassert(isIRAtom(st->Ist.AbiHint.nia));
    726          /* fall through */
    727       case Ist_MBE:
    728       case Ist_Dirty:
    729       case Ist_CAS:
    730       case Ist_LLSC:
    731          for (j = 0; j < env->used; j++)
    732             env->inuse[j] = False;
    733          break;
    734 
    735       /* all other cases are boring. */
    736       case Ist_Store:
    737          vassert(isIRAtom(st->Ist.Store.addr));
    738          vassert(isIRAtom(st->Ist.Store.data));
    739          memRW = True;
    740          break;
    741 
    742       case Ist_Exit:
    743          vassert(isIRAtom(st->Ist.Exit.guard));
    744          break;
    745 
    746       case Ist_PutI:
    747          vassert(isIRAtom(st->Ist.PutI.ix));
    748          vassert(isIRAtom(st->Ist.PutI.data));
    749          break;
    750 
    751       case Ist_NoOp:
    752       case Ist_IMark:
    753          break;
    754 
    755       default:
    756          vex_printf("\n");
    757          ppIRStmt(st);
    758          vex_printf("\n");
    759          vpanic("handle_gets_Stmt");
    760    }
    761 
    762    if (memRW) {
    763       /* This statement accesses memory.  So we need to dump all parts
    764          of the environment corresponding to guest state that may not
    765          be reordered with respect to memory references.  That means
    766          at least the stack pointer. */
    767       for (j = 0; j < env->used; j++) {
    768          if (!env->inuse[j])
    769             continue;
    770          if (vex_control.iropt_precise_memory_exns) {
    771             /* Precise exceptions required.  Flush all guest state. */
    772             env->inuse[j] = False;
    773          } else {
    774             /* Just flush the minimal amount required, as computed by
    775                preciseMemExnsFn. */
    776             HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    777             HWord k_hi = env->key[j] & 0xFFFF;
    778             if (preciseMemExnsFn( k_lo, k_hi ))
    779                env->inuse[j] = False;
    780          }
    781       }
    782    } /* if (memRW) */
    783 
    784 }
    785 
    786 
    787 /* Scan backwards, building up a set of (min offset, max
    788    offset) pairs, indicating those parts of the guest state
    789    for which the next event is a write.
    790 
    791    On seeing a conditional exit, empty the set.
    792 
    793    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    794    completely within the set, remove the Put.  Otherwise, add
    795    (minoff,maxoff) to the set.
    796 
    797    On seeing 'Get (minoff,maxoff)', remove any part of the set
    798    overlapping (minoff,maxoff).  The same has to happen for any events
    799    which implicitly read parts of the guest state: dirty helper calls
    800    and loads/stores.
    801 */
    802 
    803 static void redundant_put_removal_BB (
    804                IRSB* bb,
    805                Bool (*preciseMemExnsFn)(Int,Int)
    806             )
    807 {
    808    Int     i, j;
    809    Bool    isPut;
    810    IRStmt* st;
    811    UInt    key = 0; /* keep gcc -O happy */
    812 
    813    HashHW* env = newHHW();
    814    for (i = bb->stmts_used-1; i >= 0; i--) {
    815       st = bb->stmts[i];
    816 
    817       if (st->tag == Ist_NoOp)
    818          continue;
    819 
    820       /* Deal with conditional exits. */
    821       if (st->tag == Ist_Exit) {
    822          /* Since control may not get beyond this point, we must empty
    823             out the set, since we can no longer claim that the next
    824             event for any part of the guest state is definitely a
    825             write. */
    826          vassert(isIRAtom(st->Ist.Exit.guard));
    827          for (j = 0; j < env->used; j++)
    828             env->inuse[j] = False;
    829          continue;
    830       }
    831 
    832       /* Deal with Puts */
    833       switch (st->tag) {
    834          case Ist_Put:
    835             isPut = True;
    836             key = mk_key_GetPut( st->Ist.Put.offset,
    837                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    838             vassert(isIRAtom(st->Ist.Put.data));
    839             break;
    840          case Ist_PutI:
    841             isPut = True;
    842             key = mk_key_GetIPutI( st->Ist.PutI.descr );
    843             vassert(isIRAtom(st->Ist.PutI.ix));
    844             vassert(isIRAtom(st->Ist.PutI.data));
    845             break;
    846          default:
    847             isPut = False;
    848       }
    849       if (isPut && st->tag != Ist_PutI) {
    850          /* See if any single entry in env overlaps this Put.  This is
    851             simplistic in that the transformation is valid if, say, two
    852             or more entries in the env overlap this Put, but the use of
    853             lookupHHW will only find a single entry which exactly
    854             overlaps this Put.  This is suboptimal but safe. */
    855          if (lookupHHW(env, NULL, (HWord)key)) {
    856             /* This Put is redundant because a later one will overwrite
    857                it.  So NULL (nop) it out. */
    858             if (DEBUG_IROPT) {
    859                vex_printf("rPUT: "); ppIRStmt(st);
    860                vex_printf("\n");
    861             }
    862             bb->stmts[i] = IRStmt_NoOp();
    863          } else {
    864             /* We can't demonstrate that this Put is redundant, so add it
    865                to the running collection. */
    866             addToHHW(env, (HWord)key, 0);
    867          }
    868          continue;
    869       }
    870 
    871       /* Deal with Gets.  These remove bits of the environment since
    872          appearance of a Get means that the next event for that slice
    873          of the guest state is no longer a write, but a read.  Also
    874          deals with implicit reads of guest state needed to maintain
    875          precise exceptions. */
    876       handle_gets_Stmt( env, st, preciseMemExnsFn );
    877    }
    878 }
    879 
    880 
    881 /*---------------------------------------------------------------*/
    882 /*--- Constant propagation and folding                        ---*/
    883 /*---------------------------------------------------------------*/
    884 
    885 /* The env in this section is a map from IRTemp to IRExpr*,
    886    that is, an array indexed by IRTemp. */
    887 
    888 /* Are both expressions simply the same IRTemp ? */
    889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
    890 {
    891    return toBool( e1->tag == Iex_RdTmp
    892                   && e2->tag == Iex_RdTmp
    893                   && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
    894 }
    895 
    896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
    897 {
    898    return toBool( e1->tag == Iex_Const
    899                   && e2->tag == Iex_Const
    900                   && e1->Iex.Const.con->tag == Ico_U32
    901                   && e2->Iex.Const.con->tag == Ico_U32
    902                   && e1->Iex.Const.con->Ico.U32
    903                      == e2->Iex.Const.con->Ico.U32 );
    904 }
    905 
    906 /* Are both expressions either the same IRTemp or IRConst-U32s ?  If
    907    in doubt, say No. */
    908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
    909 {
    910    switch (e1->tag) {
    911       case Iex_RdTmp:
    912          return sameIRTemps(e1, e2);
    913       case Iex_Const:
    914          return sameIcoU32s(e1, e2);
    915       default:
    916          return False;
    917    }
    918 }
    919 
    920 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
    921 static Bool isZeroU32 ( IRExpr* e )
    922 {
    923    return toBool( e->tag == Iex_Const
    924                   && e->Iex.Const.con->tag == Ico_U32
    925                   && e->Iex.Const.con->Ico.U32 == 0);
    926 }
    927 
    928 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
    929 static Bool isZeroU64 ( IRExpr* e )
    930 {
    931    return toBool( e->tag == Iex_Const
    932                   && e->Iex.Const.con->tag == Ico_U64
    933                   && e->Iex.Const.con->Ico.U64 == 0);
    934 }
    935 
    936 static Bool notBool ( Bool b )
    937 {
    938    if (b == True) return False;
    939    if (b == False) return True;
    940    vpanic("notBool");
    941 }
    942 
    943 /* Make a zero which has the same type as the result of the given
    944    primop. */
    945 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
    946 {
    947    switch (op) {
    948       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
    949       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
    950       case Iop_Sub32:
    951       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
    952       case Iop_Sub64:
    953       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
    954       case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
    955       default: vpanic("mkZeroOfPrimopResultType: bad primop");
    956    }
    957 }
    958 
    959 /* Make a value containing all 1-bits, which has the same type as the
    960    result of the given primop. */
    961 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
    962 {
    963    switch (op) {
    964       case Iop_CmpEQ64:
    965          return IRExpr_Const(IRConst_U1(toBool(1)));
    966       case Iop_CmpEQ8x8:
    967          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
    968       case Iop_CmpEQ8x16:
    969          return IRExpr_Const(IRConst_V128(0xFFFF));
    970       default:
    971          vpanic("mkOnesOfPrimopResultType: bad primop");
    972    }
    973 }
    974 
    975 /* Helpers for folding Clz32/64. */
    976 static UInt fold_Clz64 ( ULong value )
    977 {
    978    UInt i;
    979    vassert(value != 0ULL); /* no defined semantics for arg==0 */
    980    for (i = 0; i < 64; ++i) {
    981       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
    982    }
    983    vassert(0);
    984    /*NOTREACHED*/
    985    return 0;
    986 }
    987 
    988 static UInt fold_Clz32 ( UInt value )
    989 {
    990    UInt i;
    991    vassert(value != 0); /* no defined semantics for arg==0 */
    992    for (i = 0; i < 32; ++i) {
    993       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
    994    }
    995    vassert(0);
    996    /*NOTREACHED*/
    997    return 0;
    998 }
    999 
   1000 
   1001 static IRExpr* fold_Expr ( IRExpr* e )
   1002 {
   1003    Int     shift;
   1004    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1005 
   1006    /* UNARY ops */
   1007    if (e->tag == Iex_Unop
   1008        && e->Iex.Unop.arg->tag == Iex_Const) {
   1009       switch (e->Iex.Unop.op) {
   1010          case Iop_1Uto8:
   1011             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1012                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1013                     ? 1 : 0)));
   1014             break;
   1015          case Iop_1Uto32:
   1016             e2 = IRExpr_Const(IRConst_U32(
   1017                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1018                     ? 1 : 0));
   1019             break;
   1020          case Iop_1Uto64:
   1021             e2 = IRExpr_Const(IRConst_U64(
   1022                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1023                     ? 1 : 0));
   1024             break;
   1025 
   1026          case Iop_1Sto8:
   1027             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1028                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1029                     ? 0xFF : 0)));
   1030             break;
   1031          case Iop_1Sto16:
   1032             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1033                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1034                     ? 0xFFFF : 0)));
   1035             break;
   1036          case Iop_1Sto32:
   1037             e2 = IRExpr_Const(IRConst_U32(
   1038                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1039                     ? 0xFFFFFFFF : 0));
   1040             break;
   1041          case Iop_1Sto64:
   1042             e2 = IRExpr_Const(IRConst_U64(
   1043                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1044                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1045             break;
   1046 
   1047          case Iop_8Sto32: {
   1048             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1049             s32 <<= 24;
   1050             s32 >>= 24;
   1051             e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1052             break;
   1053          }
   1054          case Iop_16Sto32: {
   1055             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1056             s32 <<= 16;
   1057             s32 >>= 16;
   1058             e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
   1059             break;
   1060          }
   1061          case Iop_8Uto64:
   1062             e2 = IRExpr_Const(IRConst_U64(
   1063                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1064             break;
   1065          case Iop_16Uto64:
   1066             e2 = IRExpr_Const(IRConst_U64(
   1067                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1068             break;
   1069          case Iop_8Uto32:
   1070             e2 = IRExpr_Const(IRConst_U32(
   1071                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1072             break;
   1073          case Iop_8Sto16: {
   1074             /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1075             s16 <<= 8;
   1076             s16 >>= 8;
   1077             e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
   1078             break;
   1079          }
   1080          case Iop_8Uto16:
   1081             e2 = IRExpr_Const(IRConst_U16(
   1082                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1083             break;
   1084          case Iop_16Uto32:
   1085             e2 = IRExpr_Const(IRConst_U32(
   1086                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1087             break;
   1088          case Iop_32to16:
   1089             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1090                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1091             break;
   1092          case Iop_32to8:
   1093             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1094                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1095             break;
   1096          case Iop_32to1:
   1097             e2 = IRExpr_Const(IRConst_U1(toBool(
   1098                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1099                  )));
   1100             break;
   1101          case Iop_64to1:
   1102             e2 = IRExpr_Const(IRConst_U1(toBool(
   1103                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1104                  )));
   1105             break;
   1106 
   1107          case Iop_Not64:
   1108             e2 = IRExpr_Const(IRConst_U64(
   1109                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1110             break;
   1111          case Iop_Not32:
   1112             e2 = IRExpr_Const(IRConst_U32(
   1113                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1114             break;
   1115          case Iop_Not16:
   1116             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1117                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1118             break;
   1119          case Iop_Not8:
   1120             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1121                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1122             break;
   1123 
   1124          case Iop_Not1:
   1125             e2 = IRExpr_Const(IRConst_U1(
   1126                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1127             break;
   1128 
   1129          case Iop_64to8: {
   1130             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1131             w64 &= 0xFFULL;
   1132             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1133             break;
   1134          }
   1135          case Iop_64to16: {
   1136             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1137             w64 &= 0xFFFFULL;
   1138             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1139             break;
   1140          }
   1141          case Iop_64to32: {
   1142             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1143             w64 &= 0x00000000FFFFFFFFULL;
   1144             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1145             break;
   1146          }
   1147          case Iop_64HIto32: {
   1148             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1149             w64 >>= 32;
   1150             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1151             break;
   1152          }
   1153          case Iop_32Uto64:
   1154             e2 = IRExpr_Const(IRConst_U64(
   1155                     0xFFFFFFFFULL
   1156                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1157             break;
   1158          case Iop_16Sto64: {
   1159             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1160             s64 <<= 48;
   1161             s64 >>= 48;
   1162             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1163             break;
   1164          }
   1165          case Iop_32Sto64: {
   1166             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1167             s64 <<= 32;
   1168             s64 >>= 32;
   1169             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1170             break;
   1171          }
   1172 
   1173          case Iop_16to8: {
   1174             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1175             w16 &= 0xFF;
   1176             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1177             break;
   1178          }
   1179          case Iop_16HIto8: {
   1180             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1181             w16 >>= 8;
   1182             w16 &= 0xFF;
   1183             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1184             break;
   1185          }
   1186 
   1187          case Iop_CmpNEZ8:
   1188             e2 = IRExpr_Const(IRConst_U1(toBool(
   1189                     0 !=
   1190                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1191                  )));
   1192             break;
   1193          case Iop_CmpNEZ32:
   1194             e2 = IRExpr_Const(IRConst_U1(toBool(
   1195                     0 !=
   1196                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1197                  )));
   1198             break;
   1199          case Iop_CmpNEZ64:
   1200             e2 = IRExpr_Const(IRConst_U1(toBool(
   1201                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1202                  )));
   1203             break;
   1204 
   1205          case Iop_CmpwNEZ32: {
   1206             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1207             if (w32 == 0)
   1208                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1209             else
   1210                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1211             break;
   1212          }
   1213          case Iop_CmpwNEZ64: {
   1214             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1215             if (w64 == 0)
   1216                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1217             else
   1218                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1219             break;
   1220          }
   1221 
   1222          case Iop_Left32: {
   1223             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1224             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1225             s32 = (s32 | (-s32));
   1226             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1227             break;
   1228          }
   1229 
   1230          case Iop_Left64: {
   1231             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1232             Long  s64 = (Long)u64;
   1233             s64 = (s64 | (-s64));
   1234             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1235             break;
   1236          }
   1237 
   1238          case Iop_Clz32: {
   1239             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1240             if (u32 != 0)
   1241                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1242             break;
   1243          }
   1244          case Iop_Clz64: {
   1245             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1246             if (u64 != 0ULL)
   1247                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1248             break;
   1249          }
   1250 
   1251          default:
   1252             goto unhandled;
   1253       }
   1254    }
   1255 
   1256    /* BINARY ops */
   1257    if (e->tag == Iex_Binop) {
   1258       if (e->Iex.Binop.arg1->tag == Iex_Const
   1259           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1260          /* cases where both args are consts */
   1261          switch (e->Iex.Binop.op) {
   1262 
   1263             /* -- Or -- */
   1264             case Iop_Or8:
   1265                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1266                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1267                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1268                break;
   1269             case Iop_Or16:
   1270                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1271                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1272                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1273                break;
   1274             case Iop_Or32:
   1275                e2 = IRExpr_Const(IRConst_U32(
   1276                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1277                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1278                break;
   1279             case Iop_Or64:
   1280                e2 = IRExpr_Const(IRConst_U64(
   1281                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1282                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1283                break;
   1284 
   1285             /* -- Xor -- */
   1286             case Iop_Xor8:
   1287                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1288                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1289                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1290                break;
   1291             case Iop_Xor16:
   1292                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1293                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1294                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1295                break;
   1296             case Iop_Xor32:
   1297                e2 = IRExpr_Const(IRConst_U32(
   1298                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1299                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1300                break;
   1301             case Iop_Xor64:
   1302                e2 = IRExpr_Const(IRConst_U64(
   1303                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1304                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1305                break;
   1306 
   1307             /* -- And -- */
   1308             case Iop_And8:
   1309                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1310                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1311                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1312                break;
   1313             case Iop_And16:
   1314                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1315                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1316                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1317                break;
   1318             case Iop_And32:
   1319                e2 = IRExpr_Const(IRConst_U32(
   1320                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1321                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1322                break;
   1323             case Iop_And64:
   1324                e2 = IRExpr_Const(IRConst_U64(
   1325                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1326                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1327                break;
   1328 
   1329             /* -- Add -- */
   1330             case Iop_Add8:
   1331                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1332                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1333                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1334                break;
   1335             case Iop_Add32:
   1336                e2 = IRExpr_Const(IRConst_U32(
   1337                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1338                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1339                break;
   1340             case Iop_Add64:
   1341                e2 = IRExpr_Const(IRConst_U64(
   1342                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1343                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1344                break;
   1345 
   1346             /* -- Sub -- */
   1347             case Iop_Sub8:
   1348                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1349                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1350                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1351                break;
   1352             case Iop_Sub32:
   1353                e2 = IRExpr_Const(IRConst_U32(
   1354                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1355                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1356                break;
   1357             case Iop_Sub64:
   1358                e2 = IRExpr_Const(IRConst_U64(
   1359                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1360                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1361                break;
   1362 
   1363             /* -- Max32U -- */
   1364             case Iop_Max32U: {
   1365                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1366                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1367                UInt res  = u32a > u32b ? u32a : u32b;
   1368                e2 = IRExpr_Const(IRConst_U32(res));
   1369                break;
   1370             }
   1371 
   1372             /* -- Mul -- */
   1373             case Iop_Mul32:
   1374                e2 = IRExpr_Const(IRConst_U32(
   1375                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1376                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1377                break;
   1378             case Iop_Mul64:
   1379                e2 = IRExpr_Const(IRConst_U64(
   1380                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1381                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1382                break;
   1383 
   1384             case Iop_MullS32: {
   1385                /* very paranoid */
   1386                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1387                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1388                Int   s32a = (Int)u32a;
   1389                Int   s32b = (Int)u32b;
   1390                Long  s64a = (Long)s32a;
   1391                Long  s64b = (Long)s32b;
   1392                Long  sres = s64a * s64b;
   1393                ULong ures = (ULong)sres;
   1394                e2 = IRExpr_Const(IRConst_U64(ures));
   1395                break;
   1396             }
   1397 
   1398             /* -- Shl -- */
   1399             case Iop_Shl32:
   1400                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1401                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1402                if (shift >= 0 && shift <= 31)
   1403                   e2 = IRExpr_Const(IRConst_U32(
   1404                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1405                            << shift)));
   1406                break;
   1407             case Iop_Shl64:
   1408                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1409                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1410                if (shift >= 0 && shift <= 63)
   1411                   e2 = IRExpr_Const(IRConst_U64(
   1412                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1413                            << shift)));
   1414                break;
   1415 
   1416             /* -- Sar -- */
   1417             case Iop_Sar32: {
   1418                /* paranoid ... */
   1419                /*signed*/ Int s32;
   1420                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1421                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1422                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1423                if (shift >= 0 && shift <= 31) {
   1424                   s32 >>=/*signed*/ shift;
   1425                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1426                }
   1427                break;
   1428             }
   1429             case Iop_Sar64: {
   1430                /* paranoid ... */
   1431                /*signed*/ Long s64;
   1432                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1433                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1434                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1435                if (shift >= 0 && shift <= 63) {
   1436                   s64 >>=/*signed*/ shift;
   1437                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1438                }
   1439                break;
   1440             }
   1441 
   1442             /* -- Shr -- */
   1443             case Iop_Shr32: {
   1444                /* paranoid ... */
   1445                /*unsigned*/ UInt u32;
   1446                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1447                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1448                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1449                if (shift >= 0 && shift <= 31) {
   1450                   u32 >>=/*unsigned*/ shift;
   1451                   e2 = IRExpr_Const(IRConst_U32(u32));
   1452                }
   1453                break;
   1454             }
   1455             case Iop_Shr64: {
   1456                /* paranoid ... */
   1457                /*unsigned*/ ULong u64;
   1458                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1459                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1460                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1461                if (shift >= 0 && shift <= 63) {
   1462                   u64 >>=/*unsigned*/ shift;
   1463                   e2 = IRExpr_Const(IRConst_U64(u64));
   1464                }
   1465                break;
   1466             }
   1467 
   1468             /* -- CmpEQ -- */
   1469             case Iop_CmpEQ32:
   1470                e2 = IRExpr_Const(IRConst_U1(toBool(
   1471                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1472                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1473                break;
   1474             case Iop_CmpEQ64:
   1475                e2 = IRExpr_Const(IRConst_U1(toBool(
   1476                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1477                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1478                break;
   1479 
   1480             /* -- CmpNE -- */
   1481             case Iop_CmpNE8:
   1482                e2 = IRExpr_Const(IRConst_U1(toBool(
   1483                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1484                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1485                break;
   1486             case Iop_CmpNE32:
   1487                e2 = IRExpr_Const(IRConst_U1(toBool(
   1488                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1489                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1490                break;
   1491             case Iop_CmpNE64:
   1492                e2 = IRExpr_Const(IRConst_U1(toBool(
   1493                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1494                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1495                break;
   1496 
   1497             /* -- CmpLEU -- */
   1498             case Iop_CmpLE32U:
   1499                e2 = IRExpr_Const(IRConst_U1(toBool(
   1500                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1501                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1502                break;
   1503             case Iop_CmpLE64U:
   1504                e2 = IRExpr_Const(IRConst_U1(toBool(
   1505                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1506                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1507                break;
   1508 
   1509             /* -- CmpLES -- */
   1510             case Iop_CmpLE32S:
   1511                e2 = IRExpr_Const(IRConst_U1(toBool(
   1512                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1513                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1514                break;
   1515             case Iop_CmpLE64S:
   1516                e2 = IRExpr_Const(IRConst_U1(toBool(
   1517                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1518                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1519                break;
   1520 
   1521             /* -- CmpLTS -- */
   1522             case Iop_CmpLT32S:
   1523                e2 = IRExpr_Const(IRConst_U1(toBool(
   1524                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1525                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1526                break;
   1527             case Iop_CmpLT64S:
   1528                e2 = IRExpr_Const(IRConst_U1(toBool(
   1529                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1530                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1531                break;
   1532 
   1533             /* -- CmpLTU -- */
   1534             case Iop_CmpLT32U:
   1535                e2 = IRExpr_Const(IRConst_U1(toBool(
   1536                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1537                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1538                break;
   1539             case Iop_CmpLT64U:
   1540                e2 = IRExpr_Const(IRConst_U1(toBool(
   1541                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1542                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1543                break;
   1544 
   1545             /* -- CmpORD -- */
   1546             case Iop_CmpORD32S: {
   1547                /* very paranoid */
   1548                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1549                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1550                Int   s32a = (Int)u32a;
   1551                Int   s32b = (Int)u32b;
   1552                Int   r = 0x2; /* EQ */
   1553                if (s32a < s32b) {
   1554                   r = 0x8; /* LT */
   1555                }
   1556                else if (s32a > s32b) {
   1557                   r = 0x4; /* GT */
   1558                }
   1559                e2 = IRExpr_Const(IRConst_U32(r));
   1560                break;
   1561             }
   1562 
   1563             /* -- nHLto2n -- */
   1564             case Iop_32HLto64:
   1565                e2 = IRExpr_Const(IRConst_U64(
   1566                        (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
   1567                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   1568                     ));
   1569                break;
   1570             case Iop_64HLto128:
   1571                /* We can't fold this, because there is no way to
   1572                   express he result in IR, but at least pretend to
   1573                   handle it, so as to stop getting blasted with
   1574                   no-rule-for-this-primop messages. */
   1575                break;
   1576 
   1577             default:
   1578                goto unhandled;
   1579          }
   1580 
   1581       } else {
   1582 
   1583          /* other cases (identities, etc) */
   1584 
   1585          /* Shl64/Shr64(x,0) ==> x */
   1586          if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
   1587              && e->Iex.Binop.arg2->tag == Iex_Const
   1588              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1589             e2 = e->Iex.Binop.arg1;
   1590          } else
   1591 
   1592          /* Shl32/Shr32(x,0) ==> x */
   1593          if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
   1594              && e->Iex.Binop.arg2->tag == Iex_Const
   1595              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1596             e2 = e->Iex.Binop.arg1;
   1597          } else
   1598 
   1599          /* Or8(x,0) ==> x */
   1600          if ((e->Iex.Binop.op == Iop_Or8)
   1601              && e->Iex.Binop.arg2->tag == Iex_Const
   1602              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1603             e2 = e->Iex.Binop.arg1;
   1604          } else
   1605 
   1606          /* Or16(x,0) ==> x */
   1607          if ((e->Iex.Binop.op == Iop_Or16)
   1608              && e->Iex.Binop.arg2->tag == Iex_Const
   1609              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
   1610             e2 = e->Iex.Binop.arg1;
   1611          } else
   1612 
   1613          /* Or32/Add32/Max32U(x,0) ==> x
   1614             Or32/Add32/Max32U(0,x) ==> x */
   1615          if (e->Iex.Binop.op == Iop_Add32
   1616              || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) {
   1617             if (isZeroU32(e->Iex.Binop.arg2))
   1618                e2 = e->Iex.Binop.arg1;
   1619             else if (isZeroU32(e->Iex.Binop.arg1))
   1620                e2 = e->Iex.Binop.arg2;
   1621          } else
   1622 
   1623          /* Sub64(x,0) ==> x */
   1624          if (e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg2)) {
   1625             e2 = e->Iex.Binop.arg1;
   1626          } else
   1627 
   1628          /* Add32(t,t) ==> t << 1.  Memcheck doesn't understand that
   1629             x+x produces a defined least significant bit, and it seems
   1630             simplest just to get rid of the problem by rewriting it
   1631             out, since the opportunity to do so exists. */
   1632          if (e->Iex.Binop.op == Iop_Add32
   1633              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1634              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1635              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1636                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1637             e2 = IRExpr_Binop(Iop_Shl32,
   1638                               e->Iex.Binop.arg1,
   1639                               IRExpr_Const(IRConst_U8(1)));
   1640          } else
   1641 
   1642          /* Add64(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
   1643          if (e->Iex.Binop.op == Iop_Add64
   1644              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1645              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1646              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1647                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1648             e2 = IRExpr_Binop(Iop_Shl64,
   1649                               e->Iex.Binop.arg1,
   1650                               IRExpr_Const(IRConst_U8(1)));
   1651          } else
   1652 
   1653          /* Add8(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
   1654          if (e->Iex.Binop.op == Iop_Add8
   1655              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1656              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1657              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1658                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1659             e2 = IRExpr_Binop(Iop_Shl8,
   1660                               e->Iex.Binop.arg1,
   1661                               IRExpr_Const(IRConst_U8(1)));
   1662          } else
   1663          /* NB no Add16(t,t) case yet as no known test case exists */
   1664 
   1665          /* Or64/Add64(x,0) ==> x
   1666             Or64/Add64(0,x) ==> x */
   1667          if (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64) {
   1668             if (isZeroU64(e->Iex.Binop.arg2))
   1669                e2 = e->Iex.Binop.arg1;
   1670             else if (isZeroU64(e->Iex.Binop.arg1))
   1671                e2 = e->Iex.Binop.arg2;
   1672          } else
   1673 
   1674          /* And32(x,0xFFFFFFFF) ==> x */
   1675          if (e->Iex.Binop.op == Iop_And32
   1676              && e->Iex.Binop.arg2->tag == Iex_Const
   1677              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
   1678             e2 = e->Iex.Binop.arg1;
   1679          } else
   1680 
   1681          /* And32(x,0) ==> 0 */
   1682          if (e->Iex.Binop.op == Iop_And32
   1683              && e->Iex.Binop.arg2->tag == Iex_Const
   1684              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
   1685             e2 = IRExpr_Const(IRConst_U32(0));
   1686          } else
   1687 
   1688          /* And32/Shl32(0,x) ==> 0 */
   1689          if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
   1690              && e->Iex.Binop.arg1->tag == Iex_Const
   1691              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
   1692             e2 = IRExpr_Const(IRConst_U32(0));
   1693          } else
   1694 
   1695          /* Or8(0,x) ==> x */
   1696          if (e->Iex.Binop.op == Iop_Or8
   1697              && e->Iex.Binop.arg1->tag == Iex_Const
   1698              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
   1699             e2 = e->Iex.Binop.arg2;
   1700          } else
   1701 
   1702          /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
   1703          /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
   1704          /* Max32U(t,t) ==> t, for some IRTemp t */
   1705          switch (e->Iex.Binop.op) {
   1706             case Iop_And64: case Iop_And32:
   1707             case Iop_And16: case Iop_And8:
   1708             case Iop_Or64: case Iop_Or32:
   1709             case Iop_Or16: case Iop_Or8: case Iop_OrV128:
   1710             case Iop_Max32U:
   1711                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1712                   e2 = e->Iex.Binop.arg1;
   1713                break;
   1714             default:
   1715                break;
   1716          }
   1717 
   1718          /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   1719          /* Sub32/64(t,t) ==> 0, for some IRTemp t */
   1720          switch (e->Iex.Binop.op) {
   1721             case Iop_Xor64: case Iop_Xor32:
   1722             case Iop_Xor16: case Iop_Xor8:
   1723             case Iop_XorV128:
   1724             case Iop_Sub64: case Iop_Sub32:
   1725                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1726                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   1727                break;
   1728             default:
   1729                break;
   1730          }
   1731 
   1732          switch (e->Iex.Binop.op) {
   1733             case Iop_CmpEQ64:
   1734             case Iop_CmpEQ8x8:
   1735             case Iop_CmpEQ8x16:
   1736                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1737                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   1738                break;
   1739             default:
   1740                break;
   1741          }
   1742 
   1743       }
   1744    }
   1745 
   1746    /* Mux0X */
   1747    if (e->tag == Iex_Mux0X) {
   1748       /* is the discriminant is a constant? */
   1749       if (e->Iex.Mux0X.cond->tag == Iex_Const) {
   1750          Bool zero;
   1751          /* assured us by the IR type rules */
   1752          vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
   1753          zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
   1754                                      ->Iex.Const.con->Ico.U8));
   1755          e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
   1756       }
   1757       else
   1758       /* are the arms identical? (pretty weedy test) */
   1759       if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
   1760                                e->Iex.Mux0X.exprX)) {
   1761          e2 = e->Iex.Mux0X.expr0;
   1762       }
   1763    }
   1764 
   1765    /* Show cases where we've found but not folded 'op(t,t)'. */
   1766    if (0 && e == e2 && e->tag == Iex_Binop
   1767        && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   1768       vex_printf("IDENT: ");
   1769       ppIRExpr(e); vex_printf("\n");
   1770    }
   1771 
   1772    /* Show the overall results of folding. */
   1773    if (DEBUG_IROPT && e2 != e) {
   1774       vex_printf("FOLD: ");
   1775       ppIRExpr(e); vex_printf("  ->  ");
   1776       ppIRExpr(e2); vex_printf("\n");
   1777    }
   1778 
   1779    return e2;
   1780 
   1781  unhandled:
   1782 #  if 0
   1783    vex_printf("\n\n");
   1784    ppIRExpr(e);
   1785    vpanic("fold_Expr: no rule for the above");
   1786 #  else
   1787    if (vex_control.iropt_verbosity > 0) {
   1788       vex_printf("vex iropt: fold_Expr: no rule for: ");
   1789       ppIRExpr(e);
   1790       vex_printf("\n");
   1791    }
   1792    return e2;
   1793 #  endif
   1794 }
   1795 
   1796 
   1797 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   1798    1-level due to previous flattening pass. */
   1799 
   1800 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   1801 {
   1802    switch (ex->tag) {
   1803       case Iex_RdTmp:
   1804          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   1805             return env[(Int)ex->Iex.RdTmp.tmp];
   1806          } else {
   1807             /* not bound in env */
   1808             return ex;
   1809          }
   1810 
   1811       case Iex_Const:
   1812       case Iex_Get:
   1813          return ex;
   1814 
   1815       case Iex_GetI:
   1816          vassert(isIRAtom(ex->Iex.GetI.ix));
   1817          return IRExpr_GetI(
   1818             ex->Iex.GetI.descr,
   1819             subst_Expr(env, ex->Iex.GetI.ix),
   1820             ex->Iex.GetI.bias
   1821          );
   1822 
   1823       case Iex_Qop:
   1824          vassert(isIRAtom(ex->Iex.Qop.arg1));
   1825          vassert(isIRAtom(ex->Iex.Qop.arg2));
   1826          vassert(isIRAtom(ex->Iex.Qop.arg3));
   1827          vassert(isIRAtom(ex->Iex.Qop.arg4));
   1828          return IRExpr_Qop(
   1829                    ex->Iex.Qop.op,
   1830                    subst_Expr(env, ex->Iex.Qop.arg1),
   1831                    subst_Expr(env, ex->Iex.Qop.arg2),
   1832                    subst_Expr(env, ex->Iex.Qop.arg3),
   1833                    subst_Expr(env, ex->Iex.Qop.arg4)
   1834                 );
   1835 
   1836       case Iex_Triop:
   1837          vassert(isIRAtom(ex->Iex.Triop.arg1));
   1838          vassert(isIRAtom(ex->Iex.Triop.arg2));
   1839          vassert(isIRAtom(ex->Iex.Triop.arg3));
   1840          return IRExpr_Triop(
   1841                    ex->Iex.Triop.op,
   1842                    subst_Expr(env, ex->Iex.Triop.arg1),
   1843                    subst_Expr(env, ex->Iex.Triop.arg2),
   1844                    subst_Expr(env, ex->Iex.Triop.arg3)
   1845                 );
   1846 
   1847       case Iex_Binop:
   1848          vassert(isIRAtom(ex->Iex.Binop.arg1));
   1849          vassert(isIRAtom(ex->Iex.Binop.arg2));
   1850          return IRExpr_Binop(
   1851                    ex->Iex.Binop.op,
   1852                    subst_Expr(env, ex->Iex.Binop.arg1),
   1853                    subst_Expr(env, ex->Iex.Binop.arg2)
   1854                 );
   1855 
   1856       case Iex_Unop:
   1857          vassert(isIRAtom(ex->Iex.Unop.arg));
   1858          return IRExpr_Unop(
   1859                    ex->Iex.Unop.op,
   1860                    subst_Expr(env, ex->Iex.Unop.arg)
   1861                 );
   1862 
   1863       case Iex_Load:
   1864          vassert(isIRAtom(ex->Iex.Load.addr));
   1865          return IRExpr_Load(
   1866                    ex->Iex.Load.end,
   1867                    ex->Iex.Load.ty,
   1868                    subst_Expr(env, ex->Iex.Load.addr)
   1869                 );
   1870 
   1871       case Iex_CCall: {
   1872          Int      i;
   1873          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   1874          for (i = 0; args2[i]; i++) {
   1875             vassert(isIRAtom(args2[i]));
   1876             args2[i] = subst_Expr(env, args2[i]);
   1877          }
   1878          return IRExpr_CCall(
   1879                    ex->Iex.CCall.cee,
   1880                    ex->Iex.CCall.retty,
   1881                    args2
   1882                 );
   1883       }
   1884 
   1885       case Iex_Mux0X:
   1886          vassert(isIRAtom(ex->Iex.Mux0X.cond));
   1887          vassert(isIRAtom(ex->Iex.Mux0X.expr0));
   1888          vassert(isIRAtom(ex->Iex.Mux0X.exprX));
   1889          return IRExpr_Mux0X(
   1890                    subst_Expr(env, ex->Iex.Mux0X.cond),
   1891                    subst_Expr(env, ex->Iex.Mux0X.expr0),
   1892                    subst_Expr(env, ex->Iex.Mux0X.exprX)
   1893                 );
   1894 
   1895       default:
   1896          vex_printf("\n\n"); ppIRExpr(ex);
   1897          vpanic("subst_Expr");
   1898 
   1899    }
   1900 }
   1901 
   1902 
   1903 /* Apply the subst to stmt, then fold the result as much as possible.
   1904    Much simplified due to stmt being previously flattened.  As a
   1905    result of this, the stmt may wind up being turned into a no-op.
   1906 */
   1907 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   1908 {
   1909 #  if 0
   1910    vex_printf("\nsubst and fold stmt\n");
   1911    ppIRStmt(st);
   1912    vex_printf("\n");
   1913 #  endif
   1914 
   1915    switch (st->tag) {
   1916       case Ist_AbiHint:
   1917          vassert(isIRAtom(st->Ist.AbiHint.base));
   1918          vassert(isIRAtom(st->Ist.AbiHint.nia));
   1919          return IRStmt_AbiHint(
   1920                    fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
   1921                    st->Ist.AbiHint.len,
   1922                    fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
   1923                 );
   1924       case Ist_Put:
   1925          vassert(isIRAtom(st->Ist.Put.data));
   1926          return IRStmt_Put(
   1927                    st->Ist.Put.offset,
   1928                    fold_Expr(subst_Expr(env, st->Ist.Put.data))
   1929                 );
   1930 
   1931       case Ist_PutI:
   1932          vassert(isIRAtom(st->Ist.PutI.ix));
   1933          vassert(isIRAtom(st->Ist.PutI.data));
   1934          return IRStmt_PutI(
   1935                    st->Ist.PutI.descr,
   1936                    fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
   1937                    st->Ist.PutI.bias,
   1938                    fold_Expr(subst_Expr(env, st->Ist.PutI.data))
   1939                 );
   1940 
   1941       case Ist_WrTmp:
   1942          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   1943             allowed to be more than just a constant or a tmp. */
   1944          return IRStmt_WrTmp(
   1945                    st->Ist.WrTmp.tmp,
   1946                    fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
   1947                 );
   1948 
   1949       case Ist_Store:
   1950          vassert(isIRAtom(st->Ist.Store.addr));
   1951          vassert(isIRAtom(st->Ist.Store.data));
   1952          return IRStmt_Store(
   1953                    st->Ist.Store.end,
   1954                    fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
   1955                    fold_Expr(subst_Expr(env, st->Ist.Store.data))
   1956                 );
   1957 
   1958       case Ist_CAS: {
   1959          IRCAS *cas, *cas2;
   1960          cas = st->Ist.CAS.details;
   1961          vassert(isIRAtom(cas->addr));
   1962          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   1963          vassert(isIRAtom(cas->expdLo));
   1964          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   1965          vassert(isIRAtom(cas->dataLo));
   1966          cas2 = mkIRCAS(
   1967                    cas->oldHi, cas->oldLo, cas->end,
   1968                    fold_Expr(subst_Expr(env, cas->addr)),
   1969                    cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
   1970                    fold_Expr(subst_Expr(env, cas->expdLo)),
   1971                    cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
   1972                    fold_Expr(subst_Expr(env, cas->dataLo))
   1973                 );
   1974          return IRStmt_CAS(cas2);
   1975       }
   1976 
   1977       case Ist_LLSC:
   1978          vassert(isIRAtom(st->Ist.LLSC.addr));
   1979          if (st->Ist.LLSC.storedata)
   1980             vassert(isIRAtom(st->Ist.LLSC.storedata));
   1981          return IRStmt_LLSC(
   1982                    st->Ist.LLSC.end,
   1983                    st->Ist.LLSC.result,
   1984                    fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
   1985                    st->Ist.LLSC.storedata
   1986                       ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
   1987                       : NULL
   1988                 );
   1989 
   1990       case Ist_Dirty: {
   1991          Int     i;
   1992          IRDirty *d, *d2;
   1993          d = st->Ist.Dirty.details;
   1994          d2 = emptyIRDirty();
   1995          *d2 = *d;
   1996          d2->args = shallowCopyIRExprVec(d2->args);
   1997          if (d2->mFx != Ifx_None) {
   1998             vassert(isIRAtom(d2->mAddr));
   1999             d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
   2000          }
   2001          vassert(isIRAtom(d2->guard));
   2002          d2->guard = fold_Expr(subst_Expr(env, d2->guard));
   2003          for (i = 0; d2->args[i]; i++) {
   2004             vassert(isIRAtom(d2->args[i]));
   2005             d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
   2006          }
   2007          return IRStmt_Dirty(d2);
   2008       }
   2009 
   2010       case Ist_IMark:
   2011          return IRStmt_IMark(st->Ist.IMark.addr,
   2012                              st->Ist.IMark.len,
   2013                              st->Ist.IMark.delta);
   2014 
   2015       case Ist_NoOp:
   2016          return IRStmt_NoOp();
   2017 
   2018       case Ist_MBE:
   2019          return IRStmt_MBE(st->Ist.MBE.event);
   2020 
   2021       case Ist_Exit: {
   2022          IRExpr* fcond;
   2023          vassert(isIRAtom(st->Ist.Exit.guard));
   2024          fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
   2025          if (fcond->tag == Iex_Const) {
   2026             /* Interesting.  The condition on this exit has folded down to
   2027                a constant. */
   2028             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2029             vassert(fcond->Iex.Const.con->Ico.U1 == False
   2030                     || fcond->Iex.Const.con->Ico.U1 == True);
   2031             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2032                /* exit is never going to happen, so dump the statement. */
   2033                return IRStmt_NoOp();
   2034             } else {
   2035                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2036                /* Hmmm.  The exit has become unconditional.  Leave it
   2037                   as it is for now, since we'd have to truncate the BB
   2038                   at this point, which is tricky.  Such truncation is
   2039                   done later by the dead-code elimination pass. */
   2040                /* fall out into the reconstruct-the-exit code. */
   2041                if (vex_control.iropt_verbosity > 0)
   2042                   /* really a misuse of vex_control.iropt_verbosity */
   2043                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2044             }
   2045          }
   2046          return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
   2047       }
   2048 
   2049    default:
   2050       vex_printf("\n"); ppIRStmt(st);
   2051       vpanic("subst_and_fold_Stmt");
   2052    }
   2053 }
   2054 
   2055 
   2056 IRSB* cprop_BB ( IRSB* in )
   2057 {
   2058    Int      i;
   2059    IRSB*    out;
   2060    IRStmt*  st2;
   2061    Int      n_tmps = in->tyenv->types_used;
   2062    IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
   2063 
   2064    out = emptyIRSB();
   2065    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2066 
   2067    /* Set up the env with which travels forward.  This holds a
   2068       substitution, mapping IRTemps to atoms, that is, IRExprs which
   2069       are either IRTemps or IRConsts.  Thus, copy and constant
   2070       propagation is done.  The environment is to be applied as we
   2071       move along.  Keys are IRTemps.  Values are IRExpr*s.
   2072    */
   2073    for (i = 0; i < n_tmps; i++)
   2074       env[i] = NULL;
   2075 
   2076    /* For each original SSA-form stmt ... */
   2077    for (i = 0; i < in->stmts_used; i++) {
   2078 
   2079       /* First apply the substitution to the current stmt.  This
   2080          propagates in any constants and tmp-tmp assignments
   2081          accumulated prior to this point.  As part of the subst_Stmt
   2082          call, also then fold any constant expressions resulting. */
   2083 
   2084       st2 = in->stmts[i];
   2085 
   2086       /* perhaps st2 is already a no-op? */
   2087       if (st2->tag == Ist_NoOp) continue;
   2088 
   2089       st2 = subst_and_fold_Stmt( env, st2 );
   2090 
   2091       /* If the statement has been folded into a no-op, forget it. */
   2092       if (st2->tag == Ist_NoOp) continue;
   2093 
   2094       /* Now consider what the stmt looks like.  If it's of the form
   2095          't = const' or 't1 = t2', add it to the running environment
   2096          and not to the output BB.  Otherwise, add it to the output
   2097          BB.  Note, we choose not to propagate const when const is an
   2098          F64i, so that F64i literals can be CSE'd later.  This helps
   2099          x86 floating point code generation. */
   2100 
   2101       if (st2->tag == Ist_WrTmp
   2102           && st2->Ist.WrTmp.data->tag == Iex_Const
   2103           && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   2104          /* 't = const' -- add to env.
   2105              The pair (IRTemp, IRExpr*) is added. */
   2106          env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2107       }
   2108       else
   2109       if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
   2110          /* 't1 = t2' -- add to env.
   2111              The pair (IRTemp, IRExpr*) is added. */
   2112          env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2113       }
   2114       else {
   2115          /* Not interesting, copy st2 into the output block. */
   2116          addStmtToIRSB( out, st2 );
   2117       }
   2118    }
   2119 
   2120    out->next     = subst_Expr( env, in->next );
   2121    out->jumpkind = in->jumpkind;
   2122    return out;
   2123 }
   2124 
   2125 
   2126 /*---------------------------------------------------------------*/
   2127 /*--- Dead code (t = E) removal                               ---*/
   2128 /*---------------------------------------------------------------*/
   2129 
   2130 /* As a side effect, also removes all code following an unconditional
   2131    side exit. */
   2132 
   2133 /* The type of the HashHW map is: a map from IRTemp to nothing
   2134    -- really just operating a set or IRTemps.
   2135 */
   2136 
   2137 inline
   2138 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2139 {
   2140    set[(Int)tmp] = True;
   2141 }
   2142 
   2143 static void addUses_Expr ( Bool* set, IRExpr* e )
   2144 {
   2145    Int i;
   2146    switch (e->tag) {
   2147       case Iex_GetI:
   2148          addUses_Expr(set, e->Iex.GetI.ix);
   2149          return;
   2150       case Iex_Mux0X:
   2151          addUses_Expr(set, e->Iex.Mux0X.cond);
   2152          addUses_Expr(set, e->Iex.Mux0X.expr0);
   2153          addUses_Expr(set, e->Iex.Mux0X.exprX);
   2154          return;
   2155       case Iex_CCall:
   2156          for (i = 0; e->Iex.CCall.args[i]; i++)
   2157             addUses_Expr(set, e->Iex.CCall.args[i]);
   2158          return;
   2159       case Iex_Load:
   2160          addUses_Expr(set, e->Iex.Load.addr);
   2161          return;
   2162       case Iex_Qop:
   2163          addUses_Expr(set, e->Iex.Qop.arg1);
   2164          addUses_Expr(set, e->Iex.Qop.arg2);
   2165          addUses_Expr(set, e->Iex.Qop.arg3);
   2166          addUses_Expr(set, e->Iex.Qop.arg4);
   2167          return;
   2168       case Iex_Triop:
   2169          addUses_Expr(set, e->Iex.Triop.arg1);
   2170          addUses_Expr(set, e->Iex.Triop.arg2);
   2171          addUses_Expr(set, e->Iex.Triop.arg3);
   2172          return;
   2173       case Iex_Binop:
   2174          addUses_Expr(set, e->Iex.Binop.arg1);
   2175          addUses_Expr(set, e->Iex.Binop.arg2);
   2176          return;
   2177       case Iex_Unop:
   2178          addUses_Expr(set, e->Iex.Unop.arg);
   2179          return;
   2180       case Iex_RdTmp:
   2181          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2182          return;
   2183       case Iex_Const:
   2184       case Iex_Get:
   2185          return;
   2186       default:
   2187          vex_printf("\n");
   2188          ppIRExpr(e);
   2189          vpanic("addUses_Expr");
   2190    }
   2191 }
   2192 
   2193 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2194 {
   2195    Int      i;
   2196    IRDirty* d;
   2197    IRCAS*   cas;
   2198    switch (st->tag) {
   2199       case Ist_AbiHint:
   2200          addUses_Expr(set, st->Ist.AbiHint.base);
   2201          addUses_Expr(set, st->Ist.AbiHint.nia);
   2202          return;
   2203       case Ist_PutI:
   2204          addUses_Expr(set, st->Ist.PutI.ix);
   2205          addUses_Expr(set, st->Ist.PutI.data);
   2206          return;
   2207       case Ist_WrTmp:
   2208          addUses_Expr(set, st->Ist.WrTmp.data);
   2209          return;
   2210       case Ist_Put:
   2211          addUses_Expr(set, st->Ist.Put.data);
   2212          return;
   2213       case Ist_Store:
   2214          addUses_Expr(set, st->Ist.Store.addr);
   2215          addUses_Expr(set, st->Ist.Store.data);
   2216          return;
   2217       case Ist_CAS:
   2218          cas = st->Ist.CAS.details;
   2219          addUses_Expr(set, cas->addr);
   2220          if (cas->expdHi)
   2221             addUses_Expr(set, cas->expdHi);
   2222          addUses_Expr(set, cas->expdLo);
   2223          if (cas->dataHi)
   2224             addUses_Expr(set, cas->dataHi);
   2225          addUses_Expr(set, cas->dataLo);
   2226          return;
   2227       case Ist_LLSC:
   2228          addUses_Expr(set, st->Ist.LLSC.addr);
   2229          if (st->Ist.LLSC.storedata)
   2230             addUses_Expr(set, st->Ist.LLSC.storedata);
   2231          return;
   2232       case Ist_Dirty:
   2233          d = st->Ist.Dirty.details;
   2234          if (d->mFx != Ifx_None)
   2235             addUses_Expr(set, d->mAddr);
   2236          addUses_Expr(set, d->guard);
   2237          for (i = 0; d->args[i] != NULL; i++)
   2238             addUses_Expr(set, d->args[i]);
   2239          return;
   2240       case Ist_NoOp:
   2241       case Ist_IMark:
   2242       case Ist_MBE:
   2243          return;
   2244       case Ist_Exit:
   2245          addUses_Expr(set, st->Ist.Exit.guard);
   2246          return;
   2247       default:
   2248          vex_printf("\n");
   2249          ppIRStmt(st);
   2250          vpanic("addUses_Stmt");
   2251    }
   2252 }
   2253 
   2254 
   2255 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   2256 static Bool isZeroU1 ( IRExpr* e )
   2257 {
   2258    return toBool( e->tag == Iex_Const
   2259                   && e->Iex.Const.con->tag == Ico_U1
   2260                   && e->Iex.Const.con->Ico.U1 == False );
   2261 }
   2262 
   2263 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   2264 static Bool isOneU1 ( IRExpr* e )
   2265 {
   2266    return toBool( e->tag == Iex_Const
   2267                   && e->Iex.Const.con->tag == Ico_U1
   2268                   && e->Iex.Const.con->Ico.U1 == True );
   2269 }
   2270 
   2271 
   2272 /* Note, this destructively modifies the given IRSB. */
   2273 
   2274 /* Scan backwards through statements, carrying a set of IRTemps which
   2275    are known to be used after the current point.  On encountering 't =
   2276    E', delete the binding if it is not used.  Otherwise, add any temp
   2277    uses to the set and keep on moving backwards.
   2278 
   2279    As an enhancement, the first (backwards) pass searches for IR exits
   2280    with always-taken conditions and notes the location of the earliest
   2281    one in the block.  If any such are found, a second pass copies the
   2282    exit destination and jump kind to the bb-end.  Then, the exit and
   2283    all statements following it are turned into no-ops.
   2284 */
   2285 
   2286 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   2287 {
   2288    Int     i, i_unconditional_exit;
   2289    Int     n_tmps = bb->tyenv->types_used;
   2290    Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
   2291    IRStmt* st;
   2292 
   2293    for (i = 0; i < n_tmps; i++)
   2294       set[i] = False;
   2295 
   2296    /* start off by recording IRTemp uses in the next field. */
   2297    addUses_Expr(set, bb->next);
   2298 
   2299    /* First pass */
   2300 
   2301    /* Work backwards through the stmts */
   2302    i_unconditional_exit = -1;
   2303    for (i = bb->stmts_used-1; i >= 0; i--) {
   2304       st = bb->stmts[i];
   2305       if (st->tag == Ist_NoOp)
   2306          continue;
   2307       /* take note of any unconditional exits */
   2308       if (st->tag == Ist_Exit
   2309           && isOneU1(st->Ist.Exit.guard))
   2310          i_unconditional_exit = i;
   2311       if (st->tag == Ist_WrTmp
   2312           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   2313           /* it's an IRTemp which never got used.  Delete it. */
   2314          if (DEBUG_IROPT) {
   2315             vex_printf("DEAD: ");
   2316             ppIRStmt(st);
   2317             vex_printf("\n");
   2318          }
   2319          bb->stmts[i] = IRStmt_NoOp();
   2320       }
   2321       else
   2322       if (st->tag == Ist_Dirty
   2323           && st->Ist.Dirty.details->guard
   2324           && isZeroU1(st->Ist.Dirty.details->guard)) {
   2325          /* This is a dirty helper which will never get called.
   2326             Delete it. */
   2327          bb->stmts[i] = IRStmt_NoOp();
   2328        }
   2329        else {
   2330          /* Note any IRTemp uses made by the current statement. */
   2331          addUses_Stmt(set, st);
   2332       }
   2333    }
   2334 
   2335    /* Optional second pass: if any unconditional exits were found,
   2336       delete them and all following statements. */
   2337 
   2338    if (i_unconditional_exit != -1) {
   2339       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   2340                         i_unconditional_exit);
   2341       vassert(i_unconditional_exit >= 0
   2342               && i_unconditional_exit < bb->stmts_used);
   2343       bb->next
   2344          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   2345       bb->jumpkind
   2346          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   2347       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   2348          bb->stmts[i] = IRStmt_NoOp();
   2349    }
   2350 }
   2351 
   2352 
   2353 /*---------------------------------------------------------------*/
   2354 /*--- Specialisation of helper function calls, in             ---*/
   2355 /*--- collaboration with the front end                        ---*/
   2356 /*---------------------------------------------------------------*/
   2357 
   2358 static
   2359 IRSB* spec_helpers_BB(
   2360          IRSB* bb,
   2361          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
   2362       )
   2363 {
   2364    Int     i;
   2365    IRStmt* st;
   2366    IRExpr* ex;
   2367    Bool    any = False;
   2368 
   2369    for (i = bb->stmts_used-1; i >= 0; i--) {
   2370       st = bb->stmts[i];
   2371 
   2372       if (st->tag != Ist_WrTmp
   2373           || st->Ist.WrTmp.data->tag != Iex_CCall)
   2374          continue;
   2375 
   2376       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   2377                           st->Ist.WrTmp.data->Iex.CCall.args,
   2378                           &bb->stmts[0], i );
   2379       if (!ex)
   2380         /* the front end can't think of a suitable replacement */
   2381         continue;
   2382 
   2383       /* We got something better.  Install it in the bb. */
   2384       any = True;
   2385       bb->stmts[i]
   2386          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   2387 
   2388       if (0) {
   2389          vex_printf("SPEC: ");
   2390          ppIRExpr(st->Ist.WrTmp.data);
   2391          vex_printf("  -->  ");
   2392          ppIRExpr(ex);
   2393          vex_printf("\n");
   2394       }
   2395    }
   2396 
   2397    if (any)
   2398       bb = flatten_BB(bb);
   2399    return bb;
   2400 }
   2401 
   2402 
   2403 /*---------------------------------------------------------------*/
   2404 /*--- Determination of guest state aliasing relationships     ---*/
   2405 /*---------------------------------------------------------------*/
   2406 
   2407 /* These are helper functions for CSE and GetI/PutI transformations.
   2408 
   2409    Determine, to the extent possible, the relationship between two
   2410    guest state accesses.  The possible outcomes are:
   2411 
   2412    * Exact alias.  These two accesses denote precisely the same
   2413      piece of the guest state.
   2414 
   2415    * Definitely no alias.  These two accesses are guaranteed not to
   2416      overlap any part of the guest state.
   2417 
   2418    * Unknown -- if neither of the above can be established.
   2419 
   2420    If in doubt, return Unknown.  */
   2421 
   2422 typedef
   2423    enum { ExactAlias, NoAlias, UnknownAlias }
   2424    GSAliasing;
   2425 
   2426 
   2427 /* Produces the alias relation between an indexed guest
   2428    state access and a non-indexed access. */
   2429 
   2430 static
   2431 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   2432                                     Int offset2, IRType ty2 )
   2433 {
   2434    UInt minoff1, maxoff1, minoff2, maxoff2;
   2435 
   2436    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2437    minoff2 = offset2;
   2438    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   2439 
   2440    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2441       return NoAlias;
   2442 
   2443    /* Could probably do better here if required.  For the moment
   2444       however just claim not to know anything more. */
   2445    return UnknownAlias;
   2446 }
   2447 
   2448 
   2449 /* Produces the alias relation between two indexed guest state
   2450    accesses. */
   2451 
   2452 static
   2453 GSAliasing getAliasingRelation_II (
   2454               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   2455               IRRegArray* descr2, IRExpr* ix2, Int bias2
   2456            )
   2457 {
   2458    UInt minoff1, maxoff1, minoff2, maxoff2;
   2459    Int  iters;
   2460 
   2461    /* First try hard to show they don't alias. */
   2462    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2463    getArrayBounds( descr2, &minoff2, &maxoff2 );
   2464    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2465       return NoAlias;
   2466 
   2467    /* So the two arrays at least partially overlap.  To get any
   2468       further we'll have to be sure that the descriptors are
   2469       identical. */
   2470    if (!eqIRRegArray(descr1, descr2))
   2471       return UnknownAlias;
   2472 
   2473    /* The descriptors are identical.  Now the only difference can be
   2474       in the index expressions.  If they cannot be shown to be
   2475       identical, we have to say we don't know what the aliasing
   2476       relation will be.  Now, since the IR is flattened, the index
   2477       expressions should be atoms -- either consts or tmps.  So that
   2478       makes the comparison simple. */
   2479    vassert(isIRAtom(ix1));
   2480    vassert(isIRAtom(ix2));
   2481    if (!eqIRAtom(ix1,ix2))
   2482       return UnknownAlias;
   2483 
   2484    /* Ok, the index expressions are identical.  So now the only way
   2485       they can be different is in the bias.  Normalise this
   2486       paranoidly, to reliably establish equality/non-equality. */
   2487 
   2488    /* So now we know that the GetI and PutI index the same array
   2489       with the same base.  Are the offsets the same, modulo the
   2490       array size?  Do this paranoidly. */
   2491    vassert(descr1->nElems == descr2->nElems);
   2492    vassert(descr1->elemTy == descr2->elemTy);
   2493    vassert(descr1->base   == descr2->base);
   2494    iters = 0;
   2495    while (bias1 < 0 || bias2 < 0) {
   2496       bias1 += descr1->nElems;
   2497       bias2 += descr1->nElems;
   2498       iters++;
   2499       if (iters > 10)
   2500          vpanic("getAliasingRelation: iters");
   2501    }
   2502    vassert(bias1 >= 0 && bias2 >= 0);
   2503    bias1 %= descr1->nElems;
   2504    bias2 %= descr1->nElems;
   2505    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   2506    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   2507 
   2508    /* Finally, biasP and biasG are normalised into the range
   2509       0 .. descrP/G->nElems - 1.  And so we can establish
   2510       equality/non-equality. */
   2511 
   2512    return bias1==bias2 ? ExactAlias : NoAlias;
   2513 }
   2514 
   2515 
   2516 /*---------------------------------------------------------------*/
   2517 /*--- Common Subexpression Elimination                        ---*/
   2518 /*---------------------------------------------------------------*/
   2519 
   2520 /* Expensive in time and space. */
   2521 
   2522 /* Uses two environments:
   2523    a IRTemp -> IRTemp mapping
   2524    a mapping from AvailExpr* to IRTemp
   2525 */
   2526 
   2527 typedef
   2528    struct {
   2529       enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
   2530       union {
   2531          /* unop(tmp) */
   2532          struct {
   2533             IROp   op;
   2534             IRTemp arg;
   2535          } Ut;
   2536          /* binop(tmp,tmp) */
   2537          struct {
   2538             IROp   op;
   2539             IRTemp arg1;
   2540             IRTemp arg2;
   2541          } Btt;
   2542          /* binop(tmp,const) */
   2543          struct {
   2544             IROp    op;
   2545             IRTemp  arg1;
   2546             IRConst con2;
   2547          } Btc;
   2548          /* binop(const,tmp) */
   2549          struct {
   2550             IROp    op;
   2551             IRConst con1;
   2552             IRTemp  arg2;
   2553          } Bct;
   2554          /* F64i-style const */
   2555          struct {
   2556             ULong f64i;
   2557          } Cf64i;
   2558          /* Mux0X(tmp,tmp,tmp) */
   2559          struct {
   2560             IRTemp co;
   2561             IRTemp e0;
   2562             IRTemp eX;
   2563          } Mttt;
   2564          /* GetI(descr,tmp,bias)*/
   2565          struct {
   2566             IRRegArray* descr;
   2567             IRTemp      ix;
   2568             Int         bias;
   2569          } GetIt;
   2570       } u;
   2571    }
   2572    AvailExpr;
   2573 
   2574 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   2575 {
   2576    if (a1->tag != a2->tag)
   2577       return False;
   2578    switch (a1->tag) {
   2579       case Ut:
   2580          return toBool(
   2581                 a1->u.Ut.op == a2->u.Ut.op
   2582                 && a1->u.Ut.arg == a2->u.Ut.arg);
   2583       case Btt:
   2584          return toBool(
   2585                 a1->u.Btt.op == a2->u.Btt.op
   2586                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   2587                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   2588       case Btc:
   2589          return toBool(
   2590                 a1->u.Btc.op == a2->u.Btc.op
   2591                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   2592                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   2593       case Bct:
   2594          return toBool(
   2595                 a1->u.Bct.op == a2->u.Bct.op
   2596                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   2597                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   2598       case Cf64i:
   2599          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   2600       case Mttt:
   2601          return toBool(a1->u.Mttt.co == a2->u.Mttt.co
   2602                        && a1->u.Mttt.e0 == a2->u.Mttt.e0
   2603                        && a1->u.Mttt.eX == a2->u.Mttt.eX);
   2604       case GetIt:
   2605          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   2606                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   2607                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   2608       default: vpanic("eq_AvailExpr");
   2609    }
   2610 }
   2611 
   2612 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   2613 {
   2614    IRConst* con;
   2615    switch (ae->tag) {
   2616       case Ut:
   2617          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   2618       case Btt:
   2619          return IRExpr_Binop( ae->u.Btt.op,
   2620                               IRExpr_RdTmp(ae->u.Btt.arg1),
   2621                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   2622       case Btc:
   2623          con = LibVEX_Alloc(sizeof(IRConst));
   2624          *con = ae->u.Btc.con2;
   2625          return IRExpr_Binop( ae->u.Btc.op,
   2626                               IRExpr_RdTmp(ae->u.Btc.arg1),
   2627                               IRExpr_Const(con) );
   2628       case Bct:
   2629          con = LibVEX_Alloc(sizeof(IRConst));
   2630          *con = ae->u.Bct.con1;
   2631          return IRExpr_Binop( ae->u.Bct.op,
   2632                               IRExpr_Const(con),
   2633                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   2634       case Cf64i:
   2635          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   2636       case Mttt:
   2637          return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
   2638                              IRExpr_RdTmp(ae->u.Mttt.e0),
   2639                              IRExpr_RdTmp(ae->u.Mttt.eX));
   2640       case GetIt:
   2641          return IRExpr_GetI(ae->u.GetIt.descr,
   2642                             IRExpr_RdTmp(ae->u.GetIt.ix),
   2643                             ae->u.GetIt.bias);
   2644       default:
   2645          vpanic("availExpr_to_IRExpr");
   2646    }
   2647 }
   2648 
   2649 inline
   2650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   2651 {
   2652    HWord res;
   2653    /* env :: IRTemp -> IRTemp */
   2654    if (lookupHHW( env, &res, (HWord)tmp ))
   2655       return (IRTemp)res;
   2656    else
   2657       return tmp;
   2658 }
   2659 
   2660 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   2661 {
   2662    /* env :: IRTemp -> IRTemp */
   2663    switch (ae->tag) {
   2664       case Ut:
   2665          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   2666          break;
   2667       case Btt:
   2668          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   2669          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   2670          break;
   2671       case Btc:
   2672          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   2673          break;
   2674       case Bct:
   2675          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   2676          break;
   2677       case Cf64i:
   2678          break;
   2679       case Mttt:
   2680          ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
   2681          ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
   2682          ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
   2683          break;
   2684       case GetIt:
   2685          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   2686          break;
   2687       default:
   2688          vpanic("subst_AvailExpr");
   2689    }
   2690 }
   2691 
   2692 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
   2693 {
   2694    AvailExpr* ae;
   2695 
   2696    if (e->tag == Iex_Unop
   2697        && e->Iex.Unop.arg->tag == Iex_RdTmp) {
   2698       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2699       ae->tag      = Ut;
   2700       ae->u.Ut.op  = e->Iex.Unop.op;
   2701       ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   2702       return ae;
   2703    }
   2704 
   2705    if (e->tag == Iex_Binop
   2706        && e->Iex.Binop.arg1->tag == Iex_RdTmp
   2707        && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   2708       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2709       ae->tag        = Btt;
   2710       ae->u.Btt.op   = e->Iex.Binop.op;
   2711       ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2712       ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   2713       return ae;
   2714    }
   2715 
   2716    if (e->tag == Iex_Binop
   2717       && e->Iex.Binop.arg1->tag == Iex_RdTmp
   2718       && e->Iex.Binop.arg2->tag == Iex_Const) {
   2719       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2720       ae->tag        = Btc;
   2721       ae->u.Btc.op   = e->Iex.Binop.op;
   2722       ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2723       ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   2724       return ae;
   2725    }
   2726 
   2727    if (e->tag == Iex_Binop
   2728       && e->Iex.Binop.arg1->tag == Iex_Const
   2729       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   2730       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2731       ae->tag        = Bct;
   2732       ae->u.Bct.op   = e->Iex.Binop.op;
   2733       ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   2734       ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   2735       return ae;
   2736    }
   2737 
   2738    if (e->tag == Iex_Const
   2739        && e->Iex.Const.con->tag == Ico_F64i) {
   2740       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2741       ae->tag          = Cf64i;
   2742       ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   2743       return ae;
   2744    }
   2745 
   2746    if (e->tag == Iex_Mux0X
   2747        && e->Iex.Mux0X.cond->tag == Iex_RdTmp
   2748        && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
   2749        && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
   2750       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2751       ae->tag       = Mttt;
   2752       ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
   2753       ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
   2754       ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
   2755       return ae;
   2756    }
   2757 
   2758    if (e->tag == Iex_GetI
   2759        && e->Iex.GetI.ix->tag == Iex_RdTmp) {
   2760       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2761       ae->tag           = GetIt;
   2762       ae->u.GetIt.descr = e->Iex.GetI.descr;
   2763       ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   2764       ae->u.GetIt.bias  = e->Iex.GetI.bias;
   2765       return ae;
   2766    }
   2767 
   2768    return NULL;
   2769 }
   2770 
   2771 
   2772 /* The BB is modified in-place.  Returns True if any changes were
   2773    made. */
   2774 
   2775 static Bool do_cse_BB ( IRSB* bb )
   2776 {
   2777    Int        i, j, paranoia;
   2778    IRTemp     t, q;
   2779    IRStmt*    st;
   2780    AvailExpr* eprime;
   2781    AvailExpr* ae;
   2782    Bool       invalidate;
   2783    Bool       anyDone = False;
   2784 
   2785    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   2786    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   2787 
   2788    vassert(sizeof(IRTemp) <= sizeof(HWord));
   2789 
   2790    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   2791 
   2792    /* Iterate forwards over the stmts.
   2793       On seeing "t = E", where E is one of the 5 AvailExpr forms:
   2794          let E' = apply tenv substitution to E
   2795          search aenv for E'
   2796             if a mapping E' -> q is found,
   2797                replace this stmt by "t = q"
   2798                and add binding t -> q to tenv
   2799             else
   2800                add binding E' -> t to aenv
   2801                replace this stmt by "t = E'"
   2802 
   2803       Other statements are only interesting to the extent that they
   2804       might invalidate some of the expressions in aenv.  So there is
   2805       an invalidate-bindings check for each statement seen.
   2806    */
   2807    for (i = 0; i < bb->stmts_used; i++) {
   2808       st = bb->stmts[i];
   2809 
   2810       /* ------ BEGIN invalidate aenv bindings ------ */
   2811       /* This is critical: remove from aenv any E' -> .. bindings
   2812          which might be invalidated by this statement.  The only
   2813          vulnerable kind of bindings are the GetI kind.
   2814             Dirty call - dump (paranoia level -> 2)
   2815             Store      - dump (ditto)
   2816             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   2817          Uses getAliasingRelation_IC and getAliasingRelation_II
   2818          to do the no-overlap assessments needed for Put/PutI.
   2819       */
   2820       switch (st->tag) {
   2821          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   2822          case Ist_CAS: case Ist_LLSC:
   2823             paranoia = 2; break;
   2824          case Ist_Put: case Ist_PutI:
   2825             paranoia = 1; break;
   2826          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   2827          case Ist_WrTmp: case Ist_Exit:
   2828             paranoia = 0; break;
   2829          default:
   2830             vpanic("do_cse_BB(1)");
   2831       }
   2832 
   2833       if (paranoia > 0) {
   2834          for (j = 0; j < aenv->used; j++) {
   2835             if (!aenv->inuse[j])
   2836                continue;
   2837             ae = (AvailExpr*)aenv->key[j];
   2838             if (ae->tag != GetIt)
   2839                continue;
   2840             invalidate = False;
   2841             if (paranoia >= 2) {
   2842                invalidate = True;
   2843             } else {
   2844                vassert(paranoia == 1);
   2845                if (st->tag == Ist_Put) {
   2846                   if (getAliasingRelation_IC(
   2847                          ae->u.GetIt.descr,
   2848                          IRExpr_RdTmp(ae->u.GetIt.ix),
   2849                          st->Ist.Put.offset,
   2850                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   2851                       ) != NoAlias)
   2852                      invalidate = True;
   2853                }
   2854                else
   2855                if (st->tag == Ist_PutI) {
   2856                   if (getAliasingRelation_II(
   2857                          ae->u.GetIt.descr,
   2858                          IRExpr_RdTmp(ae->u.GetIt.ix),
   2859                          ae->u.GetIt.bias,
   2860                          st->Ist.PutI.descr,
   2861                          st->Ist.PutI.ix,
   2862                          st->Ist.PutI.bias
   2863                       ) != NoAlias)
   2864                      invalidate = True;
   2865                }
   2866                else
   2867                   vpanic("do_cse_BB(2)");
   2868             }
   2869 
   2870             if (invalidate) {
   2871                aenv->inuse[j] = False;
   2872                aenv->key[j]   = (HWord)NULL;  /* be sure */
   2873             }
   2874          } /* for j */
   2875       } /* paranoia > 0 */
   2876 
   2877       /* ------ ENV invalidate aenv bindings ------ */
   2878 
   2879       /* ignore not-interestings */
   2880       if (st->tag != Ist_WrTmp)
   2881          continue;
   2882 
   2883       t = st->Ist.WrTmp.tmp;
   2884       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
   2885       /* ignore if not of AvailExpr form */
   2886       if (!eprime)
   2887          continue;
   2888 
   2889       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   2890 
   2891       /* apply tenv */
   2892       subst_AvailExpr( tenv, eprime );
   2893 
   2894       /* search aenv for eprime, unfortunately the hard way */
   2895       for (j = 0; j < aenv->used; j++)
   2896          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   2897             break;
   2898 
   2899       if (j < aenv->used) {
   2900          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   2901             note the t->q binding in tenv. */
   2902          /* (this is the core of the CSE action) */
   2903          q = (IRTemp)aenv->val[j];
   2904          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   2905          addToHHW( tenv, (HWord)t, (HWord)q );
   2906          anyDone = True;
   2907       } else {
   2908          /* No binding was found, so instead we add E' -> t to our
   2909             collection of available expressions, replace this stmt
   2910             with "t = E'", and move on. */
   2911          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   2912          addToHHW( aenv, (HWord)eprime, (HWord)t );
   2913       }
   2914    }
   2915 
   2916    /*
   2917    ppIRSB(bb);
   2918    sanityCheckIRSB(bb, Ity_I32);
   2919    vex_printf("\n\n");
   2920    */
   2921    return anyDone;
   2922 }
   2923 
   2924 
   2925 /*---------------------------------------------------------------*/
   2926 /*--- Add32/Sub32 chain collapsing                            ---*/
   2927 /*---------------------------------------------------------------*/
   2928 
   2929 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   2930 
   2931 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   2932    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   2933    root node is Add32, not Sub32. */
   2934 
   2935 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   2936 {
   2937    if (e->tag != Iex_Binop)
   2938       return False;
   2939    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   2940       return False;
   2941    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   2942       return False;
   2943    if (e->Iex.Binop.arg2->tag != Iex_Const)
   2944       return False;
   2945    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2946    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   2947    if (e->Iex.Binop.op == Iop_Sub32)
   2948       *i32 = -*i32;
   2949    return True;
   2950 }
   2951 
   2952 
   2953 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   2954    other tmp2.  Scan backwards from the specified start point -- an
   2955    optimisation. */
   2956 
   2957 static Bool collapseChain ( IRSB* bb, Int startHere,
   2958                             IRTemp tmp,
   2959                             IRTemp* tmp2, Int* i32 )
   2960 {
   2961    Int     j, ii;
   2962    IRTemp  vv;
   2963    IRStmt* st;
   2964    IRExpr* e;
   2965 
   2966    /* the (var, con) pair contain the current 'representation' for
   2967       'tmp'.  We start with 'tmp + 0'.  */
   2968    IRTemp var = tmp;
   2969    Int    con = 0;
   2970 
   2971    /* Scan backwards to see if tmp can be replaced by some other tmp
   2972      +/- a constant. */
   2973    for (j = startHere; j >= 0; j--) {
   2974       st = bb->stmts[j];
   2975       if (st->tag != Ist_WrTmp)
   2976          continue;
   2977       if (st->Ist.WrTmp.tmp != var)
   2978          continue;
   2979       e = st->Ist.WrTmp.data;
   2980       if (!isAdd32OrSub32(e, &vv, &ii))
   2981          break;
   2982       var = vv;
   2983       con += ii;
   2984    }
   2985    if (j == -1)
   2986       /* no earlier binding for var .. ill-formed IR */
   2987       vpanic("collapseChain");
   2988 
   2989    /* so, did we find anything interesting? */
   2990    if (var == tmp)
   2991       return False; /* no .. */
   2992 
   2993    *tmp2 = var;
   2994    *i32  = con;
   2995    return True;
   2996 }
   2997 
   2998 
   2999 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   3000 
   3001 static void collapse_AddSub_chains_BB ( IRSB* bb )
   3002 {
   3003    IRStmt *st;
   3004    IRTemp var, var2;
   3005    Int    i, con, con2;
   3006 
   3007    for (i = bb->stmts_used-1; i >= 0; i--) {
   3008       st = bb->stmts[i];
   3009       if (st->tag == Ist_NoOp)
   3010          continue;
   3011 
   3012       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   3013 
   3014       if (st->tag == Ist_WrTmp
   3015           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   3016 
   3017          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   3018             Find out if var can be expressed as var2 + con2. */
   3019          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   3020             if (DEBUG_IROPT) {
   3021                vex_printf("replacing1 ");
   3022                ppIRStmt(st);
   3023                vex_printf(" with ");
   3024             }
   3025             con2 += con;
   3026             bb->stmts[i]
   3027                = IRStmt_WrTmp(
   3028                     st->Ist.WrTmp.tmp,
   3029                     (con2 >= 0)
   3030                       ? IRExpr_Binop(Iop_Add32,
   3031                                      IRExpr_RdTmp(var2),
   3032                                      IRExpr_Const(IRConst_U32(con2)))
   3033                       : IRExpr_Binop(Iop_Sub32,
   3034                                      IRExpr_RdTmp(var2),
   3035                                      IRExpr_Const(IRConst_U32(-con2)))
   3036                  );
   3037             if (DEBUG_IROPT) {
   3038                ppIRStmt(bb->stmts[i]);
   3039                vex_printf("\n");
   3040             }
   3041          }
   3042 
   3043          continue;
   3044       }
   3045 
   3046       /* Try to collapse 't1 = GetI[t2, con]'. */
   3047 
   3048       if (st->tag == Ist_WrTmp
   3049           && st->Ist.WrTmp.data->tag == Iex_GetI
   3050           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   3051           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   3052                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   3053          if (DEBUG_IROPT) {
   3054             vex_printf("replacing3 ");
   3055             ppIRStmt(st);
   3056             vex_printf(" with ");
   3057          }
   3058          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   3059          bb->stmts[i]
   3060             = IRStmt_WrTmp(
   3061                  st->Ist.WrTmp.tmp,
   3062                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   3063                              IRExpr_RdTmp(var2),
   3064                              con2));
   3065          if (DEBUG_IROPT) {
   3066             ppIRStmt(bb->stmts[i]);
   3067             vex_printf("\n");
   3068          }
   3069          continue;
   3070       }
   3071 
   3072       /* Perhaps st is PutI[t, con] ? */
   3073 
   3074       if (st->tag == Ist_PutI
   3075           && st->Ist.PutI.ix->tag == Iex_RdTmp
   3076           && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
   3077                                &var2, &con2)) {
   3078          if (DEBUG_IROPT) {
   3079             vex_printf("replacing2 ");
   3080             ppIRStmt(st);
   3081             vex_printf(" with ");
   3082          }
   3083          con2 += st->Ist.PutI.bias;
   3084          bb->stmts[i]
   3085            = IRStmt_PutI(st->Ist.PutI.descr,
   3086                          IRExpr_RdTmp(var2),
   3087                          con2,
   3088                          st->Ist.PutI.data);
   3089          if (DEBUG_IROPT) {
   3090             ppIRStmt(bb->stmts[i]);
   3091             vex_printf("\n");
   3092          }
   3093          continue;
   3094       }
   3095 
   3096    } /* for */
   3097 }
   3098 
   3099 
   3100 /*---------------------------------------------------------------*/
   3101 /*--- PutI/GetI transformations                               ---*/
   3102 /*---------------------------------------------------------------*/
   3103 
   3104 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   3105    the given starting point to find, if any, a PutI which writes
   3106    exactly the same piece of guest state, and so return the expression
   3107    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   3108 
   3109 static
   3110 IRExpr* findPutI ( IRSB* bb, Int startHere,
   3111                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   3112 {
   3113    Int        j;
   3114    IRStmt*    st;
   3115    GSAliasing relation;
   3116 
   3117    if (0) {
   3118       vex_printf("\nfindPutI ");
   3119       ppIRRegArray(descrG);
   3120       vex_printf(" ");
   3121       ppIRExpr(ixG);
   3122       vex_printf(" %d\n", biasG);
   3123    }
   3124 
   3125    /* Scan backwards in bb from startHere to find a suitable PutI
   3126       binding for (descrG, ixG, biasG), if any. */
   3127 
   3128    for (j = startHere; j >= 0; j--) {
   3129       st = bb->stmts[j];
   3130       if (st->tag == Ist_NoOp)
   3131          continue;
   3132 
   3133       if (st->tag == Ist_Put) {
   3134          /* Non-indexed Put.  This can't give a binding, but we do
   3135             need to check it doesn't invalidate the search by
   3136             overlapping any part of the indexed guest state. */
   3137 
   3138          relation
   3139             = getAliasingRelation_IC(
   3140                  descrG, ixG,
   3141                  st->Ist.Put.offset,
   3142                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   3143 
   3144          if (relation == NoAlias) {
   3145             /* we're OK; keep going */
   3146             continue;
   3147          } else {
   3148             /* relation == UnknownAlias || relation == ExactAlias */
   3149             /* If this assertion fails, we've found a Put which writes
   3150                an area of guest state which is read by a GetI.  Which
   3151                is unlikely (although not per se wrong). */
   3152             vassert(relation != ExactAlias);
   3153             /* This Put potentially writes guest state that the GetI
   3154                reads; we must fail. */
   3155             return NULL;
   3156          }
   3157       }
   3158 
   3159       if (st->tag == Ist_PutI) {
   3160 
   3161          relation = getAliasingRelation_II(
   3162                        descrG, ixG, biasG,
   3163                        st->Ist.PutI.descr,
   3164                        st->Ist.PutI.ix,
   3165                        st->Ist.PutI.bias
   3166                     );
   3167 
   3168          if (relation == NoAlias) {
   3169             /* This PutI definitely doesn't overlap.  Ignore it and
   3170                keep going. */
   3171             continue; /* the for j loop */
   3172          }
   3173 
   3174          if (relation == UnknownAlias) {
   3175             /* We don't know if this PutI writes to the same guest
   3176                state that the GetI, or not.  So we have to give up. */
   3177             return NULL;
   3178          }
   3179 
   3180          /* Otherwise, we've found what we're looking for.  */
   3181          vassert(relation == ExactAlias);
   3182          return st->Ist.PutI.data;
   3183 
   3184       } /* if (st->tag == Ist_PutI) */
   3185 
   3186       if (st->tag == Ist_Dirty) {
   3187          /* Be conservative.  If the dirty call has any guest effects at
   3188             all, give up.  We could do better -- only give up if there
   3189             are any guest writes/modifies. */
   3190          if (st->Ist.Dirty.details->nFxState > 0)
   3191             return NULL;
   3192       }
   3193 
   3194    } /* for */
   3195 
   3196    /* No valid replacement was found. */
   3197    return NULL;
   3198 }
   3199 
   3200 
   3201 
   3202 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   3203    that it writes exactly the same piece of guest state) ?  Safe
   3204    answer: False. */
   3205 
   3206 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   3207 {
   3208    vassert(pi->tag == Ist_PutI);
   3209    if (s2->tag != Ist_PutI)
   3210       return False;
   3211 
   3212    return toBool(
   3213           getAliasingRelation_II(
   3214              pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
   3215              s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
   3216           )
   3217           == ExactAlias
   3218           );
   3219 }
   3220 
   3221 
   3222 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   3223    overlap it?  Safe answer: True.  Note, we could do a lot better
   3224    than this if needed. */
   3225 
   3226 static
   3227 Bool guestAccessWhichMightOverlapPutI (
   3228         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   3229      )
   3230 {
   3231    GSAliasing relation;
   3232    UInt       minoffP, maxoffP;
   3233 
   3234    vassert(pi->tag == Ist_PutI);
   3235    getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
   3236    switch (s2->tag) {
   3237 
   3238       case Ist_NoOp:
   3239       case Ist_IMark:
   3240          return False;
   3241 
   3242       case Ist_MBE:
   3243       case Ist_AbiHint:
   3244          /* just be paranoid ... these should be rare. */
   3245          return True;
   3246 
   3247       case Ist_CAS:
   3248          /* This is unbelievably lame, but it's probably not
   3249             significant from a performance point of view.  Really, a
   3250             CAS is a load-store op, so it should be safe to say False.
   3251             However .. */
   3252          return True;
   3253 
   3254       case Ist_Dirty:
   3255          /* If the dirty call has any guest effects at all, give up.
   3256             Probably could do better. */
   3257          if (s2->Ist.Dirty.details->nFxState > 0)
   3258             return True;
   3259          return False;
   3260 
   3261       case Ist_Put:
   3262          vassert(isIRAtom(s2->Ist.Put.data));
   3263          relation
   3264             = getAliasingRelation_IC(
   3265                  pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3266                  s2->Ist.Put.offset,
   3267                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   3268               );
   3269          goto have_relation;
   3270 
   3271       case Ist_PutI:
   3272          vassert(isIRAtom(s2->Ist.PutI.ix));
   3273          vassert(isIRAtom(s2->Ist.PutI.data));
   3274          relation
   3275             = getAliasingRelation_II(
   3276                  pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
   3277                  s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
   3278               );
   3279          goto have_relation;
   3280 
   3281       case Ist_WrTmp:
   3282          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   3283             relation
   3284                = getAliasingRelation_II(
   3285                     pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3286                                         pi->Ist.PutI.bias,
   3287                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   3288                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   3289                     s2->Ist.WrTmp.data->Iex.GetI.bias
   3290                  );
   3291             goto have_relation;
   3292          }
   3293          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   3294             relation
   3295                = getAliasingRelation_IC(
   3296                     pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3297                     s2->Ist.WrTmp.data->Iex.Get.offset,
   3298                     s2->Ist.WrTmp.data->Iex.Get.ty
   3299                  );
   3300             goto have_relation;
   3301          }
   3302          return False;
   3303 
   3304       case Ist_Store:
   3305          vassert(isIRAtom(s2->Ist.Store.addr));
   3306          vassert(isIRAtom(s2->Ist.Store.data));
   3307          return False;
   3308 
   3309       default:
   3310          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   3311          vpanic("guestAccessWhichMightOverlapPutI");
   3312    }
   3313 
   3314   have_relation:
   3315    if (relation == NoAlias)
   3316       return False;
   3317    else
   3318       return True; /* ExactAlias or UnknownAlias */
   3319 }
   3320 
   3321 
   3322 
   3323 /* ---------- PutI/GetI transformations main functions --------- */
   3324 
   3325 /* Remove redundant GetIs, to the extent that they can be detected.
   3326    bb is modified in-place. */
   3327 
   3328 static
   3329 void do_redundant_GetI_elimination ( IRSB* bb )
   3330 {
   3331    Int     i;
   3332    IRStmt* st;
   3333 
   3334    for (i = bb->stmts_used-1; i >= 0; i--) {
   3335       st = bb->stmts[i];
   3336       if (st->tag == Ist_NoOp)
   3337          continue;
   3338 
   3339       if (st->tag == Ist_WrTmp
   3340           && st->Ist.WrTmp.data->tag == Iex_GetI
   3341           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   3342          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   3343          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   3344          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   3345          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   3346          if (replacement
   3347              && isIRAtom(replacement)
   3348              /* Make sure we're doing a type-safe transformation! */
   3349              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   3350             if (DEBUG_IROPT) {
   3351                vex_printf("rGI:  ");
   3352                ppIRExpr(st->Ist.WrTmp.data);
   3353                vex_printf(" -> ");
   3354                ppIRExpr(replacement);
   3355                vex_printf("\n");
   3356             }
   3357             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   3358          }
   3359       }
   3360    }
   3361 
   3362 }
   3363 
   3364 
   3365 /* Remove redundant PutIs, to the extent which they can be detected.
   3366    bb is modified in-place. */
   3367 
   3368 static
   3369 void do_redundant_PutI_elimination ( IRSB* bb )
   3370 {
   3371    Int    i, j;
   3372    Bool   delete;
   3373    IRStmt *st, *stj;
   3374 
   3375    for (i = 0; i < bb->stmts_used; i++) {
   3376       st = bb->stmts[i];
   3377       if (st->tag != Ist_PutI)
   3378          continue;
   3379       /* Ok, search forwards from here to see if we can find another
   3380          PutI which makes this one redundant, and dodging various
   3381          hazards.  Search forwards:
   3382          * If conditional exit, give up (because anything after that
   3383            does not postdominate this put).
   3384          * If a Get which might overlap, give up (because this PutI
   3385            not necessarily dead).
   3386          * If a Put which is identical, stop with success.
   3387          * If a Put which might overlap, but is not identical, give up.
   3388          * If a dirty helper call which might write guest state, give up.
   3389          * If a Put which definitely doesn't overlap, or any other
   3390            kind of stmt, continue.
   3391       */
   3392       delete = False;
   3393       for (j = i+1; j < bb->stmts_used; j++) {
   3394          stj = bb->stmts[j];
   3395          if (stj->tag == Ist_NoOp)
   3396             continue;
   3397          if (identicalPutIs(st, stj)) {
   3398             /* success! */
   3399             delete = True;
   3400             break;
   3401          }
   3402          if (stj->tag == Ist_Exit)
   3403             /* give up */
   3404             break;
   3405          if (st->tag == Ist_Dirty)
   3406             /* give up; could do better here */
   3407             break;
   3408          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   3409             /* give up */
   3410            break;
   3411       }
   3412 
   3413       if (delete) {
   3414          if (DEBUG_IROPT) {
   3415             vex_printf("rPI:  ");
   3416             ppIRStmt(st);
   3417             vex_printf("\n");
   3418          }
   3419          bb->stmts[i] = IRStmt_NoOp();
   3420       }
   3421 
   3422    }
   3423 }
   3424 
   3425 
   3426 /*---------------------------------------------------------------*/
   3427 /*--- Loop unrolling                                          ---*/
   3428 /*---------------------------------------------------------------*/
   3429 
   3430 /* Adjust all tmp values (names) in e by delta.  e is destructively
   3431    modified. */
   3432 
   3433 static void deltaIRExpr ( IRExpr* e, Int delta )
   3434 {
   3435    Int i;
   3436    switch (e->tag) {
   3437       case Iex_RdTmp:
   3438          e->Iex.RdTmp.tmp += delta;
   3439          break;
   3440       case Iex_Get:
   3441       case Iex_Const:
   3442          break;
   3443       case Iex_GetI:
   3444          deltaIRExpr(e->Iex.GetI.ix, delta);
   3445          break;
   3446       case Iex_Qop:
   3447          deltaIRExpr(e->Iex.Qop.arg1, delta);
   3448          deltaIRExpr(e->Iex.Qop.arg2, delta);
   3449          deltaIRExpr(e->Iex.Qop.arg3, delta);
   3450          deltaIRExpr(e->Iex.Qop.arg4, delta);
   3451          break;
   3452       case Iex_Triop:
   3453          deltaIRExpr(e->Iex.Triop.arg1, delta);
   3454          deltaIRExpr(e->Iex.Triop.arg2, delta);
   3455          deltaIRExpr(e->Iex.Triop.arg3, delta);
   3456          break;
   3457       case Iex_Binop:
   3458          deltaIRExpr(e->Iex.Binop.arg1, delta);
   3459          deltaIRExpr(e->Iex.Binop.arg2, delta);
   3460          break;
   3461       case Iex_Unop:
   3462          deltaIRExpr(e->Iex.Unop.arg, delta);
   3463          break;
   3464       case Iex_Load:
   3465          deltaIRExpr(e->Iex.Load.addr, delta);
   3466          break;
   3467       case Iex_CCall:
   3468          for (i = 0; e->Iex.CCall.args[i]; i++)
   3469             deltaIRExpr(e->Iex.CCall.args[i], delta);
   3470          break;
   3471       case Iex_Mux0X:
   3472          deltaIRExpr(e->Iex.Mux0X.cond, delta);
   3473          deltaIRExpr(e->Iex.Mux0X.expr0, delta);
   3474          deltaIRExpr(e->Iex.Mux0X.exprX, delta);
   3475          break;
   3476       default:
   3477          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3478          vpanic("deltaIRExpr");
   3479    }
   3480 }
   3481 
   3482 /* Adjust all tmp values (names) in st by delta.  st is destructively
   3483    modified. */
   3484 
   3485 static void deltaIRStmt ( IRStmt* st, Int delta )
   3486 {
   3487    Int      i;
   3488    IRDirty* d;
   3489    switch (st->tag) {
   3490       case Ist_NoOp:
   3491       case Ist_IMark:
   3492       case Ist_MBE:
   3493          break;
   3494       case Ist_AbiHint:
   3495          deltaIRExpr(st->Ist.AbiHint.base, delta);
   3496          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   3497          break;
   3498       case Ist_Put:
   3499          deltaIRExpr(st->Ist.Put.data, delta);
   3500          break;
   3501       case Ist_PutI:
   3502          deltaIRExpr(st->Ist.PutI.ix, delta);
   3503          deltaIRExpr(st->Ist.PutI.data, delta);
   3504          break;
   3505       case Ist_WrTmp:
   3506          st->Ist.WrTmp.tmp += delta;
   3507          deltaIRExpr(st->Ist.WrTmp.data, delta);
   3508          break;
   3509       case Ist_Exit:
   3510          deltaIRExpr(st->Ist.Exit.guard, delta);
   3511          break;
   3512       case Ist_Store:
   3513          deltaIRExpr(st->Ist.Store.addr, delta);
   3514          deltaIRExpr(st->Ist.Store.data, delta);
   3515          break;
   3516       case Ist_CAS:
   3517          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   3518             st->Ist.CAS.details->oldHi += delta;
   3519          st->Ist.CAS.details->oldLo += delta;
   3520          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   3521          if (st->Ist.CAS.details->expdHi)
   3522             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   3523          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   3524          if (st->Ist.CAS.details->dataHi)
   3525             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   3526          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   3527          break;
   3528       case Ist_LLSC:
   3529          st->Ist.LLSC.result += delta;
   3530          deltaIRExpr(st->Ist.LLSC.addr, delta);
   3531          if (st->Ist.LLSC.storedata)
   3532             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   3533          break;
   3534       case Ist_Dirty:
   3535          d = st->Ist.Dirty.details;
   3536          deltaIRExpr(d->guard, delta);
   3537          for (i = 0; d->args[i]; i++)
   3538             deltaIRExpr(d->args[i], delta);
   3539          if (d->tmp != IRTemp_INVALID)
   3540             d->tmp += delta;
   3541          if (d->mAddr)
   3542             deltaIRExpr(d->mAddr, delta);
   3543          break;
   3544       default:
   3545          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   3546          vpanic("deltaIRStmt");
   3547    }
   3548 }
   3549 
   3550 
   3551 /* If possible, return a loop-unrolled version of bb0.  The original
   3552    is changed.  If not possible, return NULL.  */
   3553 
   3554 /* The two schemas considered are:
   3555 
   3556      X: BODY; goto X
   3557 
   3558      which unrolls to (eg)  X: BODY;BODY; goto X
   3559 
   3560    and
   3561 
   3562        X: BODY; if (c) goto X; goto Y
   3563    which trivially transforms to
   3564        X: BODY; if (!c) goto Y; goto X;
   3565    so it falls in the scope of the first case.
   3566 
   3567    X and Y must be literal (guest) addresses.
   3568 */
   3569 
   3570 static Int calc_unroll_factor( IRSB* bb )
   3571 {
   3572    Int n_stmts, i;
   3573 
   3574    n_stmts = 0;
   3575    for (i = 0; i < bb->stmts_used; i++) {
   3576       if (bb->stmts[i]->tag != Ist_NoOp)
   3577          n_stmts++;
   3578    }
   3579 
   3580    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   3581       if (vex_control.iropt_verbosity > 0)
   3582          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   3583                     n_stmts, 8* n_stmts);
   3584       return 8;
   3585    }
   3586    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   3587       if (vex_control.iropt_verbosity > 0)
   3588          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   3589                     n_stmts, 4* n_stmts);
   3590       return 4;
   3591    }
   3592 
   3593    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   3594       if (vex_control.iropt_verbosity > 0)
   3595          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   3596                     n_stmts, 2* n_stmts);
   3597       return 2;
   3598    }
   3599 
   3600    if (vex_control.iropt_verbosity > 0)
   3601       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   3602 
   3603    return 1;
   3604 }
   3605 
   3606 
   3607 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
   3608 {
   3609    Int      i, j, jmax, n_vars;
   3610    Bool     xxx_known;
   3611    Addr64   xxx_value, yyy_value;
   3612    IRExpr*  udst;
   3613    IRStmt*  st;
   3614    IRConst* con;
   3615    IRSB     *bb1, *bb2;
   3616    Int      unroll_factor;
   3617 
   3618    if (vex_control.iropt_unroll_thresh <= 0)
   3619       return NULL;
   3620 
   3621    /* First off, figure out if we can unroll this loop.  Do this
   3622       without modifying bb0. */
   3623 
   3624    if (bb0->jumpkind != Ijk_Boring)
   3625       return NULL;
   3626 
   3627    xxx_known = False;
   3628    xxx_value = 0;
   3629 
   3630    /* Extract the next-guest address.  If it isn't a literal, we
   3631       have to give up. */
   3632 
   3633    udst = bb0->next;
   3634    if (udst->tag == Iex_Const
   3635        && (udst->Iex.Const.con->tag == Ico_U32
   3636            || udst->Iex.Const.con->tag == Ico_U64)) {
   3637       /* The BB ends in a jump to a literal location. */
   3638       xxx_known = True;
   3639       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   3640                     ?  udst->Iex.Const.con->Ico.U64
   3641                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   3642    }
   3643 
   3644    if (!xxx_known)
   3645       return NULL;
   3646 
   3647    /* Now we know the BB ends to a jump to a literal location.  If
   3648       it's a jump to itself (viz, idiom #1), move directly to the
   3649       unrolling stage, first cloning the bb so the original isn't
   3650       modified. */
   3651    if (xxx_value == my_addr) {
   3652       unroll_factor = calc_unroll_factor( bb0 );
   3653       if (unroll_factor < 2)
   3654          return NULL;
   3655       bb1 = deepCopyIRSB( bb0 );
   3656       bb0 = NULL;
   3657       udst = NULL; /* is now invalid */
   3658       goto do_unroll;
   3659    }
   3660 
   3661    /* Search for the second idiomatic form:
   3662         X: BODY; if (c) goto X; goto Y
   3663       We know Y, but need to establish that the last stmt
   3664       is 'if (c) goto X'.
   3665    */
   3666    yyy_value = xxx_value;
   3667    for (i = bb0->stmts_used-1; i >= 0; i--)
   3668       if (bb0->stmts[i])
   3669          break;
   3670 
   3671    if (i < 0)
   3672       return NULL; /* block with no stmts.  Strange. */
   3673 
   3674    st = bb0->stmts[i];
   3675    if (st->tag != Ist_Exit)
   3676       return NULL;
   3677    if (st->Ist.Exit.jk != Ijk_Boring)
   3678       return NULL;
   3679 
   3680    con = st->Ist.Exit.dst;
   3681    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   3682 
   3683    xxx_value = con->tag == Ico_U64
   3684                   ? st->Ist.Exit.dst->Ico.U64
   3685                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   3686 
   3687    /* If this assertion fails, we have some kind of type error. */
   3688    vassert(con->tag == udst->Iex.Const.con->tag);
   3689 
   3690    if (xxx_value != my_addr)
   3691       /* We didn't find either idiom.  Give up. */
   3692       return NULL;
   3693 
   3694    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   3695       yyy values (which makes it look like idiom #1), and go into
   3696       unrolling proper.  This means finding (again) the last stmt, in
   3697       the copied BB. */
   3698 
   3699    unroll_factor = calc_unroll_factor( bb0 );
   3700    if (unroll_factor < 2)
   3701       return NULL;
   3702 
   3703    bb1 = deepCopyIRSB( bb0 );
   3704    bb0 = NULL;
   3705    udst = NULL; /* is now invalid */
   3706    for (i = bb1->stmts_used-1; i >= 0; i--)
   3707       if (bb1->stmts[i])
   3708          break;
   3709 
   3710    /* The next bunch of assertions should be true since we already
   3711       found and checked the last stmt in the original bb. */
   3712 
   3713    vassert(i >= 0);
   3714 
   3715    st = bb1->stmts[i];
   3716    vassert(st->tag == Ist_Exit);
   3717 
   3718    con = st->Ist.Exit.dst;
   3719    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   3720 
   3721    udst = bb1->next;
   3722    vassert(udst->tag == Iex_Const);
   3723    vassert(udst->Iex.Const.con->tag == Ico_U32
   3724           || udst->Iex.Const.con->tag == Ico_U64);
   3725    vassert(con->tag == udst->Iex.Const.con->tag);
   3726 
   3727    /* switch the xxx and yyy fields around */
   3728    if (con->tag == Ico_U64) {
   3729       udst->Iex.Const.con->Ico.U64 = xxx_value;
   3730       con->Ico.U64 = yyy_value;
   3731    } else {
   3732       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   3733       con->Ico.U32 = (UInt)yyy_value;
   3734    }
   3735 
   3736    /* negate the test condition */
   3737    st->Ist.Exit.guard
   3738       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   3739 
   3740    /* --- The unroller proper.  Both idioms are by now --- */
   3741    /* --- now converted to idiom 1. --- */
   3742 
   3743   do_unroll:
   3744 
   3745    vassert(unroll_factor == 2
   3746            || unroll_factor == 4
   3747            || unroll_factor == 8);
   3748 
   3749    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   3750    for (j = 1; j <= jmax; j++) {
   3751 
   3752       n_vars = bb1->tyenv->types_used;
   3753 
   3754       bb2 = deepCopyIRSB(bb1);
   3755       for (i = 0; i < n_vars; i++)
   3756          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   3757 
   3758       for (i = 0; i < bb2->stmts_used; i++) {
   3759          /* deltaIRStmt destructively modifies the stmt, but
   3760             that's OK since bb2 is a complete fresh copy of bb1. */
   3761          deltaIRStmt(bb2->stmts[i], n_vars);
   3762          addStmtToIRSB(bb1, bb2->stmts[i]);
   3763       }
   3764    }
   3765 
   3766    if (DEBUG_IROPT) {
   3767       vex_printf("\nUNROLLED (%llx)\n", my_addr);
   3768       ppIRSB(bb1);
   3769       vex_printf("\n");
   3770    }
   3771 
   3772    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   3773       by negating the test condition.  This should be fixed properly.
   3774       For the moment use this shotgun approach.  */
   3775    return flatten_BB(bb1);
   3776 }
   3777 
   3778 
   3779 /*---------------------------------------------------------------*/
   3780 /*--- The tree builder                                        ---*/
   3781 /*---------------------------------------------------------------*/
   3782 
   3783 /* This isn't part of IR optimisation.  Really it's a pass done prior
   3784    to instruction selection, which improves the code that the
   3785    instruction selector can produce. */
   3786 
   3787 /* --- The 'tmp' environment is the central data structure here --- */
   3788 
   3789 /* The number of outstanding bindings we're prepared to track.
   3790    The number of times the env becomes full and we have to dump
   3791    the oldest binding (hence reducing code quality) falls very
   3792    rapidly as the env size increases.  8 gives reasonable performance
   3793    under most circumstances. */
   3794 #define A_NENV 10
   3795 
   3796 /* bindee == NULL   ===  slot is not in use
   3797    bindee != NULL   ===  slot is in use
   3798 */
   3799 typedef
   3800    struct {
   3801       IRTemp  binder;
   3802       IRExpr* bindee;
   3803       Bool    doesLoad;
   3804       Bool    doesGet;
   3805    }
   3806    ATmpInfo;
   3807 
   3808 __attribute__((unused))
   3809 static void ppAEnv ( ATmpInfo* env )
   3810 {
   3811    Int i;
   3812    for (i = 0; i < A_NENV; i++) {
   3813       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   3814       if (env[i].bindee)
   3815          ppIRExpr(env[i].bindee);
   3816       else
   3817          vex_printf("(null)");
   3818       vex_printf("\n");
   3819    }
   3820 }
   3821 
   3822 /* --- Tree-traversal fns --- */
   3823 
   3824 /* Traverse an expr, and detect if any part of it reads memory or does
   3825    a Get.  Be careful ... this really controls how much the
   3826    tree-builder can reorder the code, so getting it right is critical.
   3827 */
   3828 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
   3829 {
   3830    Int i;
   3831    switch (e->tag) {
   3832       case Iex_CCall:
   3833          for (i = 0; e->Iex.CCall.args[i]; i++)
   3834             setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
   3835          return;
   3836       case Iex_Mux0X:
   3837          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
   3838          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
   3839          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
   3840          return;
   3841       case Iex_Qop:
   3842          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
   3843          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
   3844          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
   3845          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
   3846          return;
   3847       case Iex_Triop:
   3848          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
   3849          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
   3850          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
   3851          return;
   3852       case Iex_Binop:
   3853          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
   3854          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
   3855          return;
   3856       case Iex_Unop:
   3857          setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
   3858          return;
   3859       case Iex_Load:
   3860          *doesLoad = True;
   3861          setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
   3862          return;
   3863       case Iex_Get:
   3864          *doesGet = True;
   3865          return;
   3866       case Iex_GetI:
   3867          *doesGet = True;
   3868          setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
   3869          return;
   3870       case Iex_RdTmp:
   3871       case Iex_Const:
   3872          return;
   3873       default:
   3874          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3875          vpanic("setHints_Expr");
   3876    }
   3877 }
   3878 
   3879 
   3880 /* Add a binding to the front of the env and slide all the rest
   3881    backwards.  It should be the case that the last slot is free. */
   3882 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   3883 {
   3884    Int i;
   3885    vassert(env[A_NENV-1].bindee == NULL);
   3886    for (i = A_NENV-1; i >= 1; i--)
   3887       env[i] = env[i-1];
   3888    env[0].binder   = binder;
   3889    env[0].bindee   = bindee;
   3890    env[0].doesLoad = False; /* filled in later */
   3891    env[0].doesGet  = False; /* filled in later */
   3892 }
   3893 
   3894 /* Given uses :: array of UShort, indexed by IRTemp
   3895    Add the use-occurrences of temps in this expression
   3896    to the env.
   3897 */
   3898 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   3899 {
   3900    Int i;
   3901 
   3902    switch (e->tag) {
   3903 
   3904       case Iex_RdTmp: /* the only interesting case */
   3905          uses[e->Iex.RdTmp.tmp]++;
   3906          return;
   3907 
   3908       case Iex_Mux0X:
   3909          aoccCount_Expr(uses, e->Iex.Mux0X.cond);
   3910          aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
   3911          aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
   3912          return;
   3913 
   3914       case Iex_Qop:
   3915          aoccCount_Expr(uses, e->Iex.Qop.arg1);
   3916          aoccCount_Expr(uses, e->Iex.Qop.arg2);
   3917          aoccCount_Expr(uses, e->Iex.Qop.arg3);
   3918          aoccCount_Expr(uses, e->Iex.Qop.arg4);
   3919          return;
   3920 
   3921       case Iex_Triop:
   3922          aoccCount_Expr(uses, e->Iex.Triop.arg1);
   3923          aoccCount_Expr(uses, e->Iex.Triop.arg2);
   3924          aoccCount_Expr(uses, e->Iex.Triop.arg3);
   3925          return;
   3926 
   3927       case Iex_Binop:
   3928          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   3929          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   3930          return;
   3931 
   3932       case Iex_Unop:
   3933          aoccCount_Expr(uses, e->Iex.Unop.arg);
   3934          return;
   3935 
   3936       case Iex_Load:
   3937          aoccCount_Expr(uses, e->Iex.Load.addr);
   3938          return;
   3939 
   3940       case Iex_CCall:
   3941          for (i = 0; e->Iex.CCall.args[i]; i++)
   3942             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   3943          return;
   3944 
   3945       case Iex_GetI:
   3946          aoccCount_Expr(uses, e->Iex.GetI.ix);
   3947          return;
   3948 
   3949       case Iex_Const:
   3950       case Iex_Get:
   3951          return;
   3952 
   3953       default:
   3954          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3955          vpanic("aoccCount_Expr");
   3956     }
   3957 }
   3958 
   3959 
   3960 /* Given uses :: array of UShort, indexed by IRTemp
   3961    Add the use-occurrences of temps in this statement
   3962    to the env.
   3963 */
   3964 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   3965 {
   3966    Int      i;
   3967    IRDirty* d;
   3968    IRCAS*   cas;
   3969    switch (st->tag) {
   3970       case Ist_AbiHint:
   3971          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   3972          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   3973          return;
   3974       case Ist_WrTmp:
   3975          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   3976          return;
   3977       case Ist_Put:
   3978          aoccCount_Expr(uses, st->Ist.Put.data);
   3979          return;
   3980       case Ist_PutI:
   3981          aoccCount_Expr(uses, st->Ist.PutI.ix);
   3982          aoccCount_Expr(uses, st->Ist.PutI.data);
   3983          return;
   3984       case Ist_Store:
   3985          aoccCount_Expr(uses, st->Ist.Store.addr);
   3986          aoccCount_Expr(uses, st->Ist.Store.data);
   3987          return;
   3988       case Ist_CAS:
   3989          cas = st->Ist.CAS.details;
   3990          aoccCount_Expr(uses, cas->addr);
   3991          if (cas->expdHi)
   3992             aoccCount_Expr(uses, cas->expdHi);
   3993          aoccCount_Expr(uses, cas->expdLo);
   3994          if (cas->dataHi)
   3995             aoccCount_Expr(uses, cas->dataHi);
   3996          aoccCount_Expr(uses, cas->dataLo);
   3997          return;
   3998       case Ist_LLSC:
   3999          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   4000          if (st->Ist.LLSC.storedata)
   4001             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   4002          return;
   4003       case Ist_Dirty:
   4004          d = st->Ist.Dirty.details;
   4005          if (d->mFx != Ifx_None)
   4006             aoccCount_Expr(uses, d->mAddr);
   4007          aoccCount_Expr(uses, d->guard);
   4008          for (i = 0; d->args[i]; i++)
   4009             aoccCount_Expr(uses, d->args[i]);
   4010          return;
   4011       case Ist_NoOp:
   4012       case Ist_IMark:
   4013       case Ist_MBE:
   4014          return;
   4015       case Ist_Exit:
   4016          aoccCount_Expr(uses, st->Ist.Exit.guard);
   4017          return;
   4018       default:
   4019          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4020          vpanic("aoccCount_Stmt");
   4021    }
   4022 }
   4023 
   4024 /* Look up a binding for tmp in the env.  If found, return the bound
   4025    expression, and set the env's binding to NULL so it is marked as
   4026    used.  If not found, return NULL. */
   4027 
   4028 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   4029 {
   4030    Int i;
   4031    for (i = 0; i < A_NENV; i++) {
   4032       if (env[i].binder == tmp && env[i].bindee != NULL) {
   4033          IRExpr* bindee = env[i].bindee;
   4034          env[i].bindee = NULL;
   4035          return bindee;
   4036       }
   4037    }
   4038    return NULL;
   4039 }
   4040 
   4041 /* Traverse e, looking for temps.  For each observed temp, see if env
   4042    contains a binding for the temp, and if so return the bound value.
   4043    The env has the property that any binding it holds is
   4044    'single-shot', so once a binding is used, it is marked as no longer
   4045    available, by setting its .bindee field to NULL. */
   4046 
   4047 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   4048    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   4049 }
   4050 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   4051    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   4052 }
   4053 
   4054 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   4055 {
   4056    switch (op) {
   4057    case Iop_Or32:
   4058       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   4059       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   4060          return IRExpr_Unop( Iop_CmpwNEZ32,
   4061                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   4062                                                      a2->Iex.Unop.arg ) );
   4063       break;
   4064 
   4065    case Iop_CmpNE32:
   4066       /* Since X has type Ity_I1 we can simplify:
   4067          CmpNE32(1Uto32(X),0)) ==> X */
   4068       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   4069          return a1->Iex.Unop.arg;
   4070       break;
   4071 
   4072    default:
   4073       break;
   4074    }
   4075    /* no reduction rule applies */
   4076    return IRExpr_Binop( op, a1, a2 );
   4077 }
   4078 
   4079 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   4080 {
   4081    switch (op) {
   4082    case Iop_CmpwNEZ64:
   4083       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   4084       if (is_Binop(aa, Iop_Or64)
   4085           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   4086          return fold_IRExpr_Unop(
   4087                    Iop_CmpwNEZ64,
   4088                    IRExpr_Binop(Iop_Or64,
   4089                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   4090                                 aa->Iex.Binop.arg2));
   4091       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   4092       if (is_Binop(aa, Iop_Or64)
   4093           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   4094          return fold_IRExpr_Unop(
   4095                    Iop_CmpwNEZ64,
   4096                    IRExpr_Binop(Iop_Or64,
   4097                                 aa->Iex.Binop.arg1,
   4098                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   4099       break;
   4100    case Iop_CmpNEZ64:
   4101       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   4102       if (is_Unop(aa, Iop_Left64))
   4103          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   4104       break;
   4105    case Iop_CmpwNEZ32:
   4106       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   4107       if (is_Unop(aa, Iop_CmpwNEZ32))
   4108          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   4109       break;
   4110    case Iop_CmpNEZ32:
   4111       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   4112       if (is_Unop(aa, Iop_Left32))
   4113          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   4114       /* CmpNEZ32( 1Uto32(X) ) --> X */
   4115       if (is_Unop(aa, Iop_1Uto32))
   4116          return aa->Iex.Unop.arg;
   4117       break;
   4118    case Iop_CmpNEZ8:
   4119       /* CmpNEZ8( 1Uto8(X) ) --> X */
   4120       if (is_Unop(aa, Iop_1Uto8))
   4121          return aa->Iex.Unop.arg;
   4122       break;
   4123    case Iop_Left32:
   4124       /* Left32( Left32(x) ) --> Left32(x) */
   4125       if (is_Unop(aa, Iop_Left32))
   4126          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   4127       break;
   4128    case Iop_32to1:
   4129       /* 32to1( 1Uto32 ( x ) ) --> x */
   4130       if (is_Unop(aa, Iop_1Uto32))
   4131          return aa->Iex.Unop.arg;
   4132       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   4133       if (is_Unop(aa, Iop_CmpwNEZ32))
   4134          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   4135       break;
   4136    case Iop_64to1:
   4137       /* 64to1( 1Uto64 ( x ) ) --> x */
   4138       if (is_Unop(aa, Iop_1Uto64))
   4139          return aa->Iex.Unop.arg;
   4140       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   4141       if (is_Unop(aa, Iop_CmpwNEZ64))
   4142          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   4143       break;
   4144    case Iop_64to32:
   4145       /* 64to32( 32Uto64 ( x )) --> x */
   4146       if (is_Unop(aa, Iop_32Uto64))
   4147          return aa->Iex.Unop.arg;
   4148       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   4149       if (is_Unop(aa, Iop_8Uto64))
   4150          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   4151       break;
   4152 
   4153    case Iop_32Uto64:
   4154       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   4155       if (is_Unop(aa, Iop_8Uto32))
   4156          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   4157       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   4158       if (is_Unop(aa, Iop_16Uto32))
   4159          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   4160       break;
   4161 
   4162    case Iop_1Sto32:
   4163       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   4164       if (is_Unop(aa, Iop_CmpNEZ8)
   4165           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   4166           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   4167           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   4168                      Iop_CmpNEZ32)) {
   4169          return IRExpr_Unop( Iop_CmpwNEZ32,
   4170                              aa->Iex.Unop.arg->Iex.Unop.arg
   4171                                ->Iex.Unop.arg->Iex.Unop.arg);
   4172       }
   4173       break;
   4174 
   4175 
   4176    default:
   4177       break;
   4178    }
   4179    /* no reduction rule applies */
   4180    return IRExpr_Unop( op, aa );
   4181 }
   4182 
   4183 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   4184 {
   4185    IRExpr*  e2;
   4186    IRExpr** args2;
   4187    Int      i;
   4188 
   4189    switch (e->tag) {
   4190 
   4191       case Iex_CCall:
   4192          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   4193          for (i = 0; args2[i]; i++)
   4194             args2[i] = atbSubst_Expr(env,args2[i]);
   4195          return IRExpr_CCall(
   4196                    e->Iex.CCall.cee,
   4197                    e->Iex.CCall.retty,
   4198                    args2
   4199                 );
   4200       case Iex_RdTmp:
   4201          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   4202          return e2 ? e2 : e;
   4203       case Iex_Mux0X:
   4204          return IRExpr_Mux0X(
   4205                    atbSubst_Expr(env, e->Iex.Mux0X.cond),
   4206                    atbSubst_Expr(env, e->Iex.Mux0X.expr0),
   4207                    atbSubst_Expr(env, e->Iex.Mux0X.exprX)
   4208                 );
   4209       case Iex_Qop:
   4210          return IRExpr_Qop(
   4211                    e->Iex.Qop.op,
   4212                    atbSubst_Expr(env, e->Iex.Qop.arg1),
   4213                    atbSubst_Expr(env, e->Iex.Qop.arg2),
   4214                    atbSubst_Expr(env, e->Iex.Qop.arg3),
   4215                    atbSubst_Expr(env, e->Iex.Qop.arg4)
   4216                 );
   4217       case Iex_Triop:
   4218          return IRExpr_Triop(
   4219                    e->Iex.Triop.op,
   4220                    atbSubst_Expr(env, e->Iex.Triop.arg1),
   4221                    atbSubst_Expr(env, e->Iex.Triop.arg2),
   4222                    atbSubst_Expr(env, e->Iex.Triop.arg3)
   4223                 );
   4224       case Iex_Binop:
   4225          return fold_IRExpr_Binop(
   4226                    e->Iex.Binop.op,
   4227                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   4228                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   4229                 );
   4230       case Iex_Unop:
   4231          return fold_IRExpr_Unop(
   4232                    e->Iex.Unop.op,
   4233                    atbSubst_Expr(env, e->Iex.Unop.arg)
   4234                 );
   4235       case Iex_Load:
   4236          return IRExpr_Load(
   4237                    e->Iex.Load.end,
   4238                    e->Iex.Load.ty,
   4239                    atbSubst_Expr(env, e->Iex.Load.addr)
   4240                 );
   4241       case Iex_GetI:
   4242          return IRExpr_GetI(
   4243                    e->Iex.GetI.descr,
   4244                    atbSubst_Expr(env, e->Iex.GetI.ix),
   4245                    e->Iex.GetI.bias
   4246                 );
   4247       case Iex_Const:
   4248       case Iex_Get:
   4249          return e;
   4250       default:
   4251          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4252          vpanic("atbSubst_Expr");
   4253    }
   4254 }
   4255 
   4256 /* Same deal as atbSubst_Expr, except for stmts. */
   4257 
   4258 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   4259 {
   4260    Int     i;
   4261    IRDirty *d, *d2;
   4262    IRCAS   *cas, *cas2;
   4263    switch (st->tag) {
   4264       case Ist_AbiHint:
   4265          return IRStmt_AbiHint(
   4266                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   4267                    st->Ist.AbiHint.len,
   4268                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   4269                 );
   4270       case Ist_Store:
   4271          return IRStmt_Store(
   4272                    st->Ist.Store.end,
   4273                    atbSubst_Expr(env, st->Ist.Store.addr),
   4274                    atbSubst_Expr(env, st->Ist.Store.data)
   4275                 );
   4276       case Ist_WrTmp:
   4277          return IRStmt_WrTmp(
   4278                    st->Ist.WrTmp.tmp,
   4279                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   4280                 );
   4281       case Ist_Put:
   4282          return IRStmt_Put(
   4283                    st->Ist.Put.offset,
   4284                    atbSubst_Expr(env, st->Ist.Put.data)
   4285                 );
   4286       case Ist_PutI:
   4287          return IRStmt_PutI(
   4288                    st->Ist.PutI.descr,
   4289                    atbSubst_Expr(env, st->Ist.PutI.ix),
   4290                    st->Ist.PutI.bias,
   4291                    atbSubst_Expr(env, st->Ist.PutI.data)
   4292                 );
   4293 
   4294       case Ist_Exit:
   4295          return IRStmt_Exit(
   4296                    atbSubst_Expr(env, st->Ist.Exit.guard),
   4297                    st->Ist.Exit.jk,
   4298                    st->Ist.Exit.dst
   4299                 );
   4300       case Ist_IMark:
   4301          return IRStmt_IMark(st->Ist.IMark.addr,
   4302                              st->Ist.IMark.len,
   4303                              st->Ist.IMark.delta);
   4304       case Ist_NoOp:
   4305          return IRStmt_NoOp();
   4306       case Ist_MBE:
   4307          return IRStmt_MBE(st->Ist.MBE.event);
   4308       case Ist_CAS:
   4309          cas  = st->Ist.CAS.details;
   4310          cas2 = mkIRCAS(
   4311                    cas->oldHi, cas->oldLo, cas->end,
   4312                    atbSubst_Expr(env, cas->addr),
   4313                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   4314                    atbSubst_Expr(env, cas->expdLo),
   4315                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   4316                    atbSubst_Expr(env, cas->dataLo)
   4317                 );
   4318          return IRStmt_CAS(cas2);
   4319       case Ist_LLSC:
   4320          return IRStmt_LLSC(
   4321                    st->Ist.LLSC.end,
   4322                    st->Ist.LLSC.result,
   4323                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   4324                    st->Ist.LLSC.storedata
   4325                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   4326                 );
   4327       case Ist_Dirty:
   4328          d  = st->Ist.Dirty.details;
   4329          d2 = emptyIRDirty();
   4330          *d2 = *d;
   4331          if (d2->mFx != Ifx_None)
   4332             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   4333          d2->guard = atbSubst_Expr(env, d2->guard);
   4334          for (i = 0; d2->args[i]; i++)
   4335             d2->args[i] = atbSubst_Expr(env, d2->args[i]);
   4336          return IRStmt_Dirty(d2);
   4337       default:
   4338          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4339          vpanic("atbSubst_Stmt");
   4340    }
   4341 }
   4342 
   4343 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
   4344 {
   4345    Int      i, j, k, m;
   4346    Bool     stmtPuts, stmtStores, invalidateMe;
   4347    IRStmt*  st;
   4348    IRStmt*  st2;
   4349    ATmpInfo env[A_NENV];
   4350 
   4351    Int       n_tmps = bb->tyenv->types_used;
   4352    UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
   4353 
   4354    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   4355       temp.  Also count occurrences in the bb->next field. */
   4356 
   4357    for (i = 0; i < n_tmps; i++)
   4358       uses[i] = 0;
   4359 
   4360    for (i = 0; i < bb->stmts_used; i++) {
   4361       st = bb->stmts[i];
   4362       if (st->tag == Ist_NoOp)
   4363          continue;
   4364       aoccCount_Stmt( uses, st );
   4365    }
   4366    aoccCount_Expr(uses, bb->next );
   4367 
   4368 #  if 0
   4369    for (i = 0; i < n_tmps; i++) {
   4370       if (uses[i] == 0)
   4371         continue;
   4372       ppIRTemp( (IRTemp)i );
   4373       vex_printf("  used %d\n", (Int)uses[i] );
   4374    }
   4375 #  endif
   4376 
   4377    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   4378 
   4379          If the env is full, emit the end element.  This guarantees
   4380          there is at least one free slot in the following.
   4381 
   4382          On seeing 't = E', occ(t)==1,
   4383             let E'=env(E)
   4384             delete this stmt
   4385             add t -> E' to the front of the env
   4386             Examine E' and set the hints for E' appropriately
   4387               (doesLoad? doesGet?)
   4388 
   4389          On seeing any other stmt,
   4390             let stmt' = env(stmt)
   4391             remove from env any 't=E' binds invalidated by stmt
   4392                 emit the invalidated stmts
   4393             emit stmt'
   4394             compact any holes in env
   4395               by sliding entries towards the front
   4396 
   4397       Finally, apply env to bb->next.
   4398    */
   4399 
   4400    for (i = 0; i < A_NENV; i++) {
   4401       env[i].bindee = NULL;
   4402       env[i].binder = IRTemp_INVALID;
   4403    }
   4404 
   4405    /* The stmts in bb are being reordered, and we are guaranteed to
   4406       end up with no more than the number we started with.  Use i to
   4407       be the cursor of the current stmt examined and j <= i to be that
   4408       for the current stmt being written.
   4409    */
   4410    j = 0;
   4411    for (i = 0; i < bb->stmts_used; i++) {
   4412 
   4413       st = bb->stmts[i];
   4414       if (st->tag == Ist_NoOp)
   4415          continue;
   4416 
   4417       /* Ensure there's at least one space in the env, by emitting
   4418          the oldest binding if necessary. */
   4419       if (env[A_NENV-1].bindee != NULL) {
   4420          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   4421                                       env[A_NENV-1].bindee );
   4422          j++;
   4423          vassert(j <= i);
   4424          env[A_NENV-1].bindee = NULL;
   4425       }
   4426 
   4427       /* Consider current stmt. */
   4428       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   4429          IRExpr *e, *e2;
   4430 
   4431          /* optional extra: dump dead bindings as we find them.
   4432             Removes the need for a prior dead-code removal pass. */
   4433          if (uses[st->Ist.WrTmp.tmp] == 0) {
   4434 	    if (0) vex_printf("DEAD binding\n");
   4435             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   4436          }
   4437          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   4438 
   4439          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   4440             actions. */
   4441          e  = st->Ist.WrTmp.data;
   4442          e2 = atbSubst_Expr(env, e);
   4443          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   4444          setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
   4445          /* don't advance j, as we are deleting this stmt and instead
   4446             holding it temporarily in the env. */
   4447          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   4448       }
   4449 
   4450       /* we get here for any other kind of statement. */
   4451       /* 'use up' any bindings required by the current statement. */
   4452       st2 = atbSubst_Stmt(env, st);
   4453 
   4454       /* Now, before this stmt, dump any bindings in env that it
   4455          invalidates.  These need to be dumped in the order in which
   4456          they originally entered env -- that means from oldest to
   4457          youngest. */
   4458 
   4459       /* stmtPuts/stmtStores characterise what the stmt under
   4460          consideration does, or might do (sidely safe @ True). */
   4461       stmtPuts
   4462          = toBool( st->tag == Ist_Put
   4463                    || st->tag == Ist_PutI
   4464                    || st->tag == Ist_Dirty );
   4465 
   4466       /* be True if this stmt writes memory or might do (==> we don't
   4467          want to reorder other loads or stores relative to it).  Also,
   4468          both LL and SC fall under this classification, since we
   4469          really ought to be conservative and not reorder any other
   4470          memory transactions relative to them. */
   4471       stmtStores
   4472          = toBool( st->tag == Ist_Store
   4473                    || st->tag == Ist_Dirty
   4474                    || st->tag == Ist_LLSC );
   4475 
   4476       for (k = A_NENV-1; k >= 0; k--) {
   4477          if (env[k].bindee == NULL)
   4478             continue;
   4479          /* Compare the actions of this stmt with the actions of
   4480             binding 'k', to see if they invalidate the binding. */
   4481          invalidateMe
   4482             = toBool(
   4483               /* a store invalidates loaded data */
   4484               (env[k].doesLoad && stmtStores)
   4485               /* a put invalidates get'd data */
   4486               || (env[k].doesGet && stmtPuts)
   4487               /* a put invalidates loaded data.  Note, we could do
   4488                  much better here in the sense that we only need to
   4489                  invalidate trees containing loads if the Put in
   4490                  question is marked as requiring precise
   4491                  exceptions. */
   4492               || (env[k].doesLoad && stmtPuts)
   4493               /* probably overly conservative: a memory bus event
   4494                  invalidates absolutely everything, so that all
   4495                  computation prior to it is forced to complete before
   4496                  proceeding with the event (fence,lock,unlock). */
   4497               || st->tag == Ist_MBE
   4498               /* also be (probably overly) paranoid re AbiHints */
   4499               || st->tag == Ist_AbiHint
   4500               );
   4501          if (invalidateMe) {
   4502             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   4503             j++;
   4504             vassert(j <= i);
   4505             env[k].bindee = NULL;
   4506          }
   4507       }
   4508 
   4509       /* Slide in-use entries in env up to the front */
   4510       m = 0;
   4511       for (k = 0; k < A_NENV; k++) {
   4512          if (env[k].bindee != NULL) {
   4513             env[m] = env[k];
   4514             m++;
   4515 	 }
   4516       }
   4517       for (m = m; m < A_NENV; m++) {
   4518          env[m].bindee = NULL;
   4519       }
   4520 
   4521       /* finally, emit the substituted statement */
   4522       bb->stmts[j] = st2;
   4523       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   4524       j++;
   4525 
   4526       vassert(j <= i+1);
   4527    } /* for each stmt in the original bb ... */
   4528 
   4529    /* Finally ... substitute the ->next field as much as possible, and
   4530       dump any left-over bindings.  Hmm.  Perhaps there should be no
   4531       left over bindings?  Or any left-over bindings are
   4532       by definition dead? */
   4533    bb->next = atbSubst_Expr(env, bb->next);
   4534    bb->stmts_used = j;
   4535 }
   4536 
   4537 
   4538 /*---------------------------------------------------------------*/
   4539 /*--- iropt main                                              ---*/
   4540 /*---------------------------------------------------------------*/
   4541 
   4542 static Bool iropt_verbose = False; /* True; */
   4543 
   4544 
   4545 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   4546    redundant Put removal, constant propagation, dead code removal,
   4547    clean helper specialisation, and dead code removal (again).
   4548 */
   4549 
   4550 
   4551 static
   4552 IRSB* cheap_transformations (
   4553          IRSB* bb,
   4554          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   4555          Bool (*preciseMemExnsFn)(Int,Int)
   4556       )
   4557 {
   4558    redundant_get_removal_BB ( bb );
   4559    if (iropt_verbose) {
   4560       vex_printf("\n========= REDUNDANT GET\n\n" );
   4561       ppIRSB(bb);
   4562    }
   4563 
   4564    redundant_put_removal_BB ( bb, preciseMemExnsFn );
   4565    if (iropt_verbose) {
   4566       vex_printf("\n========= REDUNDANT PUT\n\n" );
   4567       ppIRSB(bb);
   4568    }
   4569 
   4570    bb = cprop_BB ( bb );
   4571    if (iropt_verbose) {
   4572       vex_printf("\n========= CPROPD\n\n" );
   4573       ppIRSB(bb);
   4574    }
   4575 
   4576    do_deadcode_BB ( bb );
   4577    if (iropt_verbose) {
   4578       vex_printf("\n========= DEAD\n\n" );
   4579       ppIRSB(bb);
   4580    }
   4581 
   4582    bb = spec_helpers_BB ( bb, specHelper );
   4583    do_deadcode_BB ( bb );
   4584    if (iropt_verbose) {
   4585       vex_printf("\n========= SPECd \n\n" );
   4586       ppIRSB(bb);
   4587    }
   4588 
   4589    return bb;
   4590 }
   4591 
   4592 
   4593 /* Do some more expensive transformations on bb, which are aimed at
   4594    optimising as much as possible in the presence of GetI and PutI.  */
   4595 
   4596 static
   4597 IRSB* expensive_transformations( IRSB* bb )
   4598 {
   4599    (void)do_cse_BB( bb );
   4600    collapse_AddSub_chains_BB( bb );
   4601    do_redundant_GetI_elimination( bb );
   4602    do_redundant_PutI_elimination( bb );
   4603    do_deadcode_BB( bb );
   4604    return bb;
   4605 }
   4606 
   4607 
   4608 /* Scan a flattened BB to look for signs that more expensive
   4609    optimisations might be useful:
   4610    - find out if there are any GetIs and PutIs
   4611    - find out if there are any floating or vector-typed temporaries
   4612 */
   4613 
   4614 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   4615                                  /*OUT*/Bool* hasVorFtemps,
   4616                                  IRSB* bb )
   4617 {
   4618    Int      i, j;
   4619    IRStmt*  st;
   4620    IRDirty* d;
   4621    IRCAS*   cas;
   4622 
   4623    *hasGetIorPutI = False;
   4624    *hasVorFtemps  = False;
   4625 
   4626    for (i = 0; i < bb->stmts_used; i++) {
   4627       st = bb->stmts[i];
   4628       switch (st->tag) {
   4629          case Ist_AbiHint:
   4630             vassert(isIRAtom(st->Ist.AbiHint.base));
   4631             vassert(isIRAtom(st->Ist.AbiHint.nia));
   4632             break;
   4633          case Ist_PutI:
   4634             *hasGetIorPutI = True;
   4635             break;
   4636          case Ist_WrTmp:
   4637             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   4638                *hasGetIorPutI = True;
   4639             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   4640                case Ity_I1: case Ity_I8: case Ity_I16:
   4641                case Ity_I32: case Ity_I64: case Ity_I128:
   4642                   break;
   4643                case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
   4644                   *hasVorFtemps = True;
   4645                   break;
   4646                default:
   4647                   goto bad;
   4648             }
   4649             break;
   4650          case Ist_Put:
   4651             vassert(isIRAtom(st->Ist.Put.data));
   4652             break;
   4653          case Ist_Store:
   4654             vassert(isIRAtom(st->Ist.Store.addr));
   4655             vassert(isIRAtom(st->Ist.Store.data));
   4656             break;
   4657          case Ist_CAS:
   4658             cas = st->Ist.CAS.details;
   4659             vassert(isIRAtom(cas->addr));
   4660             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   4661             vassert(isIRAtom(cas->expdLo));
   4662             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   4663             vassert(isIRAtom(cas->dataLo));
   4664             break;
   4665          case Ist_LLSC:
   4666             vassert(isIRAtom(st->Ist.LLSC.addr));
   4667             if (st->Ist.LLSC.storedata)
   4668                vassert(isIRAtom(st->Ist.LLSC.storedata));
   4669             break;
   4670          case Ist_Dirty:
   4671             d = st->Ist.Dirty.details;
   4672             vassert(isIRAtom(d->guard));
   4673             for (j = 0; d->args[j]; j++)
   4674                vassert(isIRAtom(d->args[j]));
   4675             if (d->mFx != Ifx_None)
   4676                vassert(isIRAtom(d->mAddr));
   4677             break;
   4678          case Ist_NoOp:
   4679          case Ist_IMark:
   4680          case Ist_MBE:
   4681             break;
   4682          case Ist_Exit:
   4683             vassert(isIRAtom(st->Ist.Exit.guard));
   4684             break;
   4685          default:
   4686          bad:
   4687             ppIRStmt(st);
   4688             vpanic("considerExpensives");
   4689       }
   4690    }
   4691 }
   4692 
   4693 
   4694 /* ---------------- The main iropt entry point. ---------------- */
   4695 
   4696 /* exported from this file */
   4697 /* Rules of the game:
   4698 
   4699    - IRExpr/IRStmt trees should be treated as immutable, as they
   4700      may get shared.  So never change a field of such a tree node;
   4701      instead construct and return a new one if needed.
   4702 */
   4703 
   4704 
   4705 IRSB* do_iropt_BB(
   4706          IRSB* bb0,
   4707          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   4708          Bool (*preciseMemExnsFn)(Int,Int),
   4709          Addr64 guest_addr,
   4710          VexArch guest_arch
   4711       )
   4712 {
   4713    static Int n_total     = 0;
   4714    static Int n_expensive = 0;
   4715 
   4716    Bool hasGetIorPutI, hasVorFtemps;
   4717    IRSB *bb, *bb2;
   4718 
   4719    n_total++;
   4720 
   4721    /* First flatten the block out, since all other
   4722       phases assume flat code. */
   4723 
   4724    bb = flatten_BB ( bb0 );
   4725 
   4726    if (iropt_verbose) {
   4727       vex_printf("\n========= FLAT\n\n" );
   4728       ppIRSB(bb);
   4729    }
   4730 
   4731    /* If at level 0, stop now. */
   4732    if (vex_control.iropt_level <= 0) return bb;
   4733 
   4734    /* Now do a preliminary cleanup pass, and figure out if we also
   4735       need to do 'expensive' optimisations.  Expensive optimisations
   4736       are deemed necessary if the block contains any GetIs or PutIs.
   4737       If needed, do expensive transformations and then another cheap
   4738       cleanup pass. */
   4739 
   4740    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4741 
   4742    if (guest_arch == VexArchARM) {
   4743       /* Translating Thumb2 code produces a lot of chaff.  We have to
   4744          work extra hard to get rid of it. */
   4745       bb = cprop_BB(bb);
   4746       bb = spec_helpers_BB ( bb, specHelper );
   4747       redundant_put_removal_BB ( bb, preciseMemExnsFn );
   4748       do_cse_BB( bb );
   4749       do_deadcode_BB( bb );
   4750    }
   4751 
   4752    if (vex_control.iropt_level > 1) {
   4753 
   4754       /* Peer at what we have, to decide how much more effort to throw
   4755          at it. */
   4756       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   4757 
   4758       if (hasVorFtemps && !hasGetIorPutI) {
   4759          /* If any evidence of FP or Vector activity, CSE, as that
   4760             tends to mop up all manner of lardy code to do with
   4761             rounding modes.  Don't bother if hasGetIorPutI since that
   4762             case leads into the expensive transformations, which do
   4763             CSE anyway. */
   4764          (void)do_cse_BB( bb );
   4765          do_deadcode_BB( bb );
   4766       }
   4767 
   4768       if (hasGetIorPutI) {
   4769          Bool cses;
   4770          n_expensive++;
   4771          if (DEBUG_IROPT)
   4772             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   4773          bb = expensive_transformations( bb );
   4774          bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4775          /* Potentially common up GetIs */
   4776          cses = do_cse_BB( bb );
   4777          if (cses)
   4778             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4779       }
   4780 
   4781       /* Now have a go at unrolling simple (single-BB) loops.  If
   4782          successful, clean up the results as much as possible. */
   4783 
   4784       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   4785       if (bb2) {
   4786          bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
   4787          if (hasGetIorPutI) {
   4788             bb = expensive_transformations( bb );
   4789             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4790          } else {
   4791             /* at least do CSE and dead code removal */
   4792             do_cse_BB( bb );
   4793             do_deadcode_BB( bb );
   4794          }
   4795          if (0) vex_printf("vex iropt: unrolled a loop\n");
   4796       }
   4797 
   4798    }
   4799 
   4800    return bb;
   4801 }
   4802 
   4803 
   4804 /*---------------------------------------------------------------*/
   4805 /*--- end                                            ir_opt.c ---*/
   4806 /*---------------------------------------------------------------*/
   4807