Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                          ir_opt.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 
     40 #include "main_util.h"
     41 #include "main_globals.h"
     42 #include "ir_opt.h"
     43 
     44 
     45 /* Set to 1 for lots of debugging output. */
     46 #define DEBUG_IROPT 0
     47 
     48 
     49 /* What iropt does, 29 Dec 04.
     50 
     51    It takes an IRSB and produces a new one with the same meaning,
     52    defined thus:
     53 
     54    After execution of the new BB, all guest state and guest memory is
     55    the same as after execution of the original.  This is true
     56    regardless of how the block was exited (at the end vs side exit).
     57 
     58    In addition, parts of the guest state will be identical to that
     59    created by execution of the original at the following observation
     60    points:
     61 
     62    * In a dirty helper call, any parts of the guest state that the
     63      helper states that it reads or modifies will be up to date.
     64      Also, guest memory will be up to date.  Parts of the guest state
     65      not marked as being read or modified by the helper cannot be
     66      assumed to be up-to-date at the point where the helper is called.
     67 
     68    * Immediately prior to any load or store, those parts of the guest
     69      state marked as requiring precise exceptions will be up to date.
     70      Also, guest memory will be up to date.  Parts of the guest state
     71      not marked as requiring precise exceptions cannot be assumed to
     72      be up-to-date at the point of the load/store.
     73 
     74    The relative order of loads and stores (including loads/stores of
     75    guest memory done by dirty helpers annotated as such) is not
     76    changed.  However, the relative order of loads with no intervening
     77    stores/modifies may be changed.
     78 
     79    Transformation order
     80    ~~~~~~~~~~~~~~~~~~~~
     81 
     82    There are three levels of optimisation, controlled by
     83    vex_control.iropt_level.  Define first:
     84 
     85    "Cheap transformations" are the following sequence:
     86       * Redundant-Get removal
     87       * Redundant-Put removal
     88       * Constant propagation/folding
     89       * Dead code removal
     90       * Specialisation of clean helper functions
     91       * Dead code removal
     92 
     93    "Expensive transformations" are the following sequence:
     94       * CSE
     95       * Folding of add/sub chains
     96       * Redundant-GetI removal
     97       * Redundant-PutI removal
     98       * Dead code removal
     99 
    100    Then the transformations are as follows, as defined by
    101    vex_control.iropt_level:
    102 
    103    Level 0:
    104       * Flatten into atomic form.
    105 
    106    Level 1: the following sequence:
    107       * Flatten into atomic form.
    108       * Cheap transformations.
    109 
    110    Level 2: the following sequence
    111       * Flatten into atomic form.
    112       * Cheap transformations.
    113       * If block contains any floating or vector types, CSE.
    114       * If block contains GetI or PutI, Expensive transformations.
    115       * Try unrolling loops.  Three possible outcomes:
    116         - No effect: do nothing more.
    117         - Unrolled a loop, and block does not contain GetI or PutI:
    118           Do: * CSE
    119               * Dead code removal
    120         - Unrolled a loop, and block contains GetI or PutI:
    121           Do: * Expensive transformations
    122               * Cheap transformations
    123 */
    124 
    125 /* Implementation notes, 29 Dec 04.
    126 
    127    TODO (important): I think rPutI removal ignores precise exceptions
    128    and is therefore in a sense, wrong.  In the sense that PutIs are
    129    assumed not to write parts of the guest state that we need to have
    130    up-to-date at loads/stores.  So far on x86 guest that has not
    131    mattered since indeed only the x87 FP registers and tags are
    132    accessed using GetI/PutI, and there is no need so far for them to
    133    be up to date at mem exception points.  The rPutI pass should be
    134    fixed.
    135 
    136    TODO: improve pessimistic handling of precise exceptions
    137      in the tree builder.
    138 
    139    TODO: check interaction of rGetI and dirty helpers.
    140 
    141    F64i constants are treated differently from other constants.
    142    They are not regarded as atoms, and instead lifted off and
    143    bound to temps.  This allows them to participate in CSE, which
    144    is important for getting good performance for x86 guest code.
    145 
    146    CSE up F64 literals (already doing F64is)
    147 
    148    CSE: consider carefully the requirement for precise exns
    149         prior to making CSE any more aggressive.  */
    150 
    151 
    152 /*---------------------------------------------------------------*/
    153 /*--- Finite mappery, of a sort                               ---*/
    154 /*---------------------------------------------------------------*/
    155 
    156 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    157    hashing, but it's not clear whether or not this would really be any
    158    faster. */
    159 
    160 typedef
    161    struct {
    162       Bool*  inuse;
    163       HWord* key;
    164       HWord* val;
    165       Int    size;
    166       Int    used;
    167    }
    168    HashHW;
    169 
    170 static HashHW* newHHW ( void )
    171 {
    172    HashHW* h = LibVEX_Alloc(sizeof(HashHW));
    173    h->size   = 8;
    174    h->used   = 0;
    175    h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
    176    h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
    177    h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
    178    return h;
    179 }
    180 
    181 
    182 /* Look up key in the map. */
    183 
    184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    185 {
    186    Int i;
    187    /* vex_printf("lookupHHW(%llx)\n", key ); */
    188    for (i = 0; i < h->used; i++) {
    189       if (h->inuse[i] && h->key[i] == key) {
    190          if (val)
    191             *val = h->val[i];
    192          return True;
    193       }
    194    }
    195    return False;
    196 }
    197 
    198 
    199 /* Add key->val to the map.  Replaces any existing binding for key. */
    200 
    201 static void addToHHW ( HashHW* h, HWord key, HWord val )
    202 {
    203    Int i, j;
    204    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    205 
    206    /* Find and replace existing binding, if any. */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          h->val[i] = val;
    210          return;
    211       }
    212    }
    213 
    214    /* Ensure a space is available. */
    215    if (h->used == h->size) {
    216       /* Copy into arrays twice the size. */
    217       Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
    218       HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    219       HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    220       for (i = j = 0; i < h->size; i++) {
    221          if (!h->inuse[i]) continue;
    222          inuse2[j] = True;
    223          key2[j] = h->key[i];
    224          val2[j] = h->val[i];
    225          j++;
    226       }
    227       h->used = j;
    228       h->size *= 2;
    229       h->inuse = inuse2;
    230       h->key = key2;
    231       h->val = val2;
    232    }
    233 
    234    /* Finally, add it. */
    235    vassert(h->used < h->size);
    236    h->inuse[h->used] = True;
    237    h->key[h->used] = key;
    238    h->val[h->used] = val;
    239    h->used++;
    240 }
    241 
    242 
    243 /*---------------------------------------------------------------*/
    244 /*--- Flattening out a BB into atomic SSA form                ---*/
    245 /*---------------------------------------------------------------*/
    246 
    247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    248    copies made by flattening.  If in doubt return False. */
    249 
    250 static Bool isFlat ( IRExpr* e )
    251 {
    252    if (e->tag == Iex_Get)
    253       return True;
    254    if (e->tag == Iex_Binop)
    255       return toBool( isIRAtom(e->Iex.Binop.arg1)
    256                      && isIRAtom(e->Iex.Binop.arg2) );
    257    if (e->tag == Iex_Load)
    258       return isIRAtom(e->Iex.Load.addr);
    259    return False;
    260 }
    261 
    262 /* Flatten out 'ex' so it is atomic, returning a new expression with
    263    the same value, after having appended extra IRTemp assignments to
    264    the end of 'bb'. */
    265 
    266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    267 {
    268    Int i;
    269    IRExpr** newargs;
    270    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    271    IRTemp t1;
    272 
    273    switch (ex->tag) {
    274 
    275       case Iex_GetI:
    276          t1 = newIRTemp(bb->tyenv, ty);
    277          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    278             IRExpr_GetI(ex->Iex.GetI.descr,
    279                         flatten_Expr(bb, ex->Iex.GetI.ix),
    280                         ex->Iex.GetI.bias)));
    281          return IRExpr_RdTmp(t1);
    282 
    283       case Iex_Get:
    284          t1 = newIRTemp(bb->tyenv, ty);
    285          addStmtToIRSB(bb,
    286             IRStmt_WrTmp(t1, ex));
    287          return IRExpr_RdTmp(t1);
    288 
    289       case Iex_Qop:
    290          t1 = newIRTemp(bb->tyenv, ty);
    291          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    292             IRExpr_Qop(ex->Iex.Qop.op,
    293                          flatten_Expr(bb, ex->Iex.Qop.arg1),
    294                          flatten_Expr(bb, ex->Iex.Qop.arg2),
    295                          flatten_Expr(bb, ex->Iex.Qop.arg3),
    296                          flatten_Expr(bb, ex->Iex.Qop.arg4))));
    297          return IRExpr_RdTmp(t1);
    298 
    299       case Iex_Triop:
    300          t1 = newIRTemp(bb->tyenv, ty);
    301          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    302             IRExpr_Triop(ex->Iex.Triop.op,
    303                          flatten_Expr(bb, ex->Iex.Triop.arg1),
    304                          flatten_Expr(bb, ex->Iex.Triop.arg2),
    305                          flatten_Expr(bb, ex->Iex.Triop.arg3))));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Binop:
    309          t1 = newIRTemp(bb->tyenv, ty);
    310          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    311             IRExpr_Binop(ex->Iex.Binop.op,
    312                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    313                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    314          return IRExpr_RdTmp(t1);
    315 
    316       case Iex_Unop:
    317          t1 = newIRTemp(bb->tyenv, ty);
    318          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    319             IRExpr_Unop(ex->Iex.Unop.op,
    320                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    321          return IRExpr_RdTmp(t1);
    322 
    323       case Iex_Load:
    324          t1 = newIRTemp(bb->tyenv, ty);
    325          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    326             IRExpr_Load(ex->Iex.Load.end,
    327                         ex->Iex.Load.ty,
    328                         flatten_Expr(bb, ex->Iex.Load.addr))));
    329          return IRExpr_RdTmp(t1);
    330 
    331       case Iex_CCall:
    332          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    333          for (i = 0; newargs[i]; i++)
    334             newargs[i] = flatten_Expr(bb, newargs[i]);
    335          t1 = newIRTemp(bb->tyenv, ty);
    336          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    337             IRExpr_CCall(ex->Iex.CCall.cee,
    338                          ex->Iex.CCall.retty,
    339                          newargs)));
    340          return IRExpr_RdTmp(t1);
    341 
    342       case Iex_Mux0X:
    343          t1 = newIRTemp(bb->tyenv, ty);
    344          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    345             IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
    346                          flatten_Expr(bb, ex->Iex.Mux0X.expr0),
    347                          flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
    348          return IRExpr_RdTmp(t1);
    349 
    350       case Iex_Const:
    351          /* Lift F64i constants out onto temps so they can be CSEd
    352             later. */
    353          if (ex->Iex.Const.con->tag == Ico_F64i) {
    354             t1 = newIRTemp(bb->tyenv, ty);
    355             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    356                IRExpr_Const(ex->Iex.Const.con)));
    357             return IRExpr_RdTmp(t1);
    358          } else {
    359             /* Leave all other constants alone. */
    360             return ex;
    361          }
    362 
    363       case Iex_RdTmp:
    364          return ex;
    365 
    366       default:
    367          vex_printf("\n");
    368          ppIRExpr(ex);
    369          vex_printf("\n");
    370          vpanic("flatten_Expr");
    371    }
    372 }
    373 
    374 
    375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    376 
    377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    378 {
    379    Int i;
    380    IRExpr  *e1, *e2, *e3, *e4, *e5;
    381    IRDirty *d,  *d2;
    382    IRCAS   *cas, *cas2;
    383    switch (st->tag) {
    384       case Ist_Put:
    385          if (isIRAtom(st->Ist.Put.data)) {
    386             /* optimisation to reduce the amount of heap wasted
    387                by the flattener */
    388             addStmtToIRSB(bb, st);
    389          } else {
    390             /* general case, always correct */
    391             e1 = flatten_Expr(bb, st->Ist.Put.data);
    392             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    393          }
    394          break;
    395       case Ist_PutI:
    396          e1 = flatten_Expr(bb, st->Ist.PutI.ix);
    397          e2 = flatten_Expr(bb, st->Ist.PutI.data);
    398          addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
    399                                        e1,
    400                                        st->Ist.PutI.bias,
    401                                        e2));
    402          break;
    403       case Ist_WrTmp:
    404          if (isFlat(st->Ist.WrTmp.data)) {
    405             /* optimisation, to reduce the number of tmp-tmp
    406                copies generated */
    407             addStmtToIRSB(bb, st);
    408          } else {
    409             /* general case, always correct */
    410             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    411             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    412          }
    413          break;
    414       case Ist_Store:
    415          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    416          e2 = flatten_Expr(bb, st->Ist.Store.data);
    417          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    418          break;
    419       case Ist_CAS:
    420          cas  = st->Ist.CAS.details;
    421          e1   = flatten_Expr(bb, cas->addr);
    422          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    423          e3   = flatten_Expr(bb, cas->expdLo);
    424          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    425          e5   = flatten_Expr(bb, cas->dataLo);
    426          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    427                          e1, e2, e3, e4, e5 );
    428          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    429          break;
    430       case Ist_LLSC:
    431          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    432          e2 = st->Ist.LLSC.storedata
    433                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    434                  : NULL;
    435          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    436                                        st->Ist.LLSC.result, e1, e2));
    437          break;
    438       case Ist_Dirty:
    439          d = st->Ist.Dirty.details;
    440          d2 = emptyIRDirty();
    441          *d2 = *d;
    442          d2->args = shallowCopyIRExprVec(d2->args);
    443          if (d2->mFx != Ifx_None) {
    444             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    445          } else {
    446             vassert(d2->mAddr == NULL);
    447          }
    448          d2->guard = flatten_Expr(bb, d2->guard);
    449          for (i = 0; d2->args[i]; i++)
    450             d2->args[i] = flatten_Expr(bb, d2->args[i]);
    451          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    452          break;
    453       case Ist_NoOp:
    454       case Ist_MBE:
    455       case Ist_IMark:
    456          addStmtToIRSB(bb, st);
    457          break;
    458       case Ist_AbiHint:
    459          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    460          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    461          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    462          break;
    463       case Ist_Exit:
    464          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    465          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    466                                            st->Ist.Exit.dst));
    467          break;
    468       default:
    469          vex_printf("\n");
    470          ppIRStmt(st);
    471          vex_printf("\n");
    472          vpanic("flatten_Stmt");
    473    }
    474 }
    475 
    476 
    477 static IRSB* flatten_BB ( IRSB* in )
    478 {
    479    Int   i;
    480    IRSB* out;
    481    out = emptyIRSB();
    482    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    483    for (i = 0; i < in->stmts_used; i++)
    484       if (in->stmts[i])
    485          flatten_Stmt( out, in->stmts[i] );
    486    out->next     = flatten_Expr( out, in->next );
    487    out->jumpkind = in->jumpkind;
    488    return out;
    489 }
    490 
    491 
    492 /*---------------------------------------------------------------*/
    493 /*--- In-place removal of redundant GETs                      ---*/
    494 /*---------------------------------------------------------------*/
    495 
    496 /* Scan forwards, building up an environment binding (min offset, max
    497    offset) pairs to values, which will either be temps or constants.
    498 
    499    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    500    env and if it matches, replace the Get with the stored value.  If
    501    there is no match, add a (minoff,maxoff) :-> t binding.
    502 
    503    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    504    any binding which fully or partially overlaps with (minoff,maxoff).
    505    Then add a new (minoff,maxoff) :-> t or c binding.  */
    506 
    507 /* Extract the min/max offsets from a guest state array descriptor. */
    508 
    509 inline
    510 static void getArrayBounds ( IRRegArray* descr,
    511                              UInt* minoff, UInt* maxoff )
    512 {
    513    *minoff = descr->base;
    514    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    515    vassert((*minoff & ~0xFFFF) == 0);
    516    vassert((*maxoff & ~0xFFFF) == 0);
    517    vassert(*minoff <= *maxoff);
    518 }
    519 
    520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    521 
    522 static UInt mk_key_GetPut ( Int offset, IRType ty )
    523 {
    524    /* offset should fit in 16 bits. */
    525    UInt minoff = offset;
    526    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    527    vassert((minoff & ~0xFFFF) == 0);
    528    vassert((maxoff & ~0xFFFF) == 0);
    529    return (minoff << 16) | maxoff;
    530 }
    531 
    532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    533 {
    534    UInt minoff, maxoff;
    535    getArrayBounds( descr, &minoff, &maxoff );
    536    vassert((minoff & ~0xFFFF) == 0);
    537    vassert((maxoff & ~0xFFFF) == 0);
    538    return (minoff << 16) | maxoff;
    539 }
    540 
    541 /* Supposing h has keys of the form generated by mk_key_GetPut and
    542    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    543    .. k_hi).
    544 */
    545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    546 {
    547    Int  j;
    548    UInt e_lo, e_hi;
    549    vassert(k_lo <= k_hi);
    550    /* invalidate any env entries which in any way overlap (k_lo
    551       .. k_hi) */
    552    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    553 
    554    for (j = 0; j < h->used; j++) {
    555       if (!h->inuse[j])
    556          continue;
    557       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    558       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    559       vassert(e_lo <= e_hi);
    560       if (e_hi < k_lo || k_hi < e_lo)
    561          continue; /* no overlap possible */
    562       else
    563          /* overlap; invalidate */
    564          h->inuse[j] = False;
    565    }
    566 }
    567 
    568 
    569 static void redundant_get_removal_BB ( IRSB* bb )
    570 {
    571    HashHW* env = newHHW();
    572    UInt    key = 0; /* keep gcc -O happy */
    573    Int     i, j;
    574    HWord   val;
    575 
    576    for (i = 0; i < bb->stmts_used; i++) {
    577       IRStmt* st = bb->stmts[i];
    578 
    579       if (st->tag == Ist_NoOp)
    580          continue;
    581 
    582       /* Deal with Gets */
    583       if (st->tag == Ist_WrTmp
    584           && st->Ist.WrTmp.data->tag == Iex_Get) {
    585          /* st is 't = Get(...)'.  Look up in the environment and see
    586             if the Get can be replaced. */
    587          IRExpr* get = st->Ist.WrTmp.data;
    588          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    589                                      get->Iex.Get.ty );
    590          if (lookupHHW(env, &val, (HWord)key)) {
    591             /* found it */
    592             /* Note, we could do better here.  If the types are
    593                different we don't do the substitution, since doing so
    594                could lead to invalidly-typed IR.  An improvement would
    595                be to stick in a reinterpret-style cast, although that
    596                would make maintaining flatness more difficult. */
    597             IRExpr* valE    = (IRExpr*)val;
    598             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    599                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    600             if (typesOK && DEBUG_IROPT) {
    601                vex_printf("rGET: "); ppIRExpr(get);
    602                vex_printf("  ->  "); ppIRExpr(valE);
    603                vex_printf("\n");
    604             }
    605             if (typesOK)
    606                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    607          } else {
    608             /* Not found, but at least we know that t and the Get(...)
    609                are now associated.  So add a binding to reflect that
    610                fact. */
    611             addToHHW( env, (HWord)key,
    612                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    613          }
    614       }
    615 
    616       /* Deal with Puts: invalidate any env entries overlapped by this
    617          Put */
    618       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    619          UInt k_lo, k_hi;
    620          if (st->tag == Ist_Put) {
    621             key = mk_key_GetPut( st->Ist.Put.offset,
    622                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    623          } else {
    624             vassert(st->tag == Ist_PutI);
    625             key = mk_key_GetIPutI( st->Ist.PutI.descr );
    626          }
    627 
    628          k_lo = (key >> 16) & 0xFFFF;
    629          k_hi = key & 0xFFFF;
    630          invalidateOverlaps(env, k_lo, k_hi);
    631       }
    632       else
    633       if (st->tag == Ist_Dirty) {
    634          /* Deal with dirty helpers which write or modify guest state.
    635             Invalidate the entire env.  We could do a lot better
    636             here. */
    637          IRDirty* d      = st->Ist.Dirty.details;
    638          Bool     writes = False;
    639          for (j = 0; j < d->nFxState; j++) {
    640             if (d->fxState[j].fx == Ifx_Modify
    641                 || d->fxState[j].fx == Ifx_Write)
    642             writes = True;
    643          }
    644          if (writes) {
    645             /* dump the entire env (not clever, but correct ...) */
    646             for (j = 0; j < env->used; j++)
    647                env->inuse[j] = False;
    648             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    649          }
    650       }
    651 
    652       /* add this one to the env, if appropriate */
    653       if (st->tag == Ist_Put) {
    654          vassert(isIRAtom(st->Ist.Put.data));
    655          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    656       }
    657 
    658    } /* for (i = 0; i < bb->stmts_used; i++) */
    659 
    660 }
    661 
    662 
    663 /*---------------------------------------------------------------*/
    664 /*--- In-place removal of redundant PUTs                      ---*/
    665 /*---------------------------------------------------------------*/
    666 
    667 /* Find any Get uses in st and invalidate any partially or fully
    668    overlapping ranges listed in env.  Due to the flattening phase, the
    669    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    670 
    671 static void handle_gets_Stmt (
    672                HashHW* env,
    673                IRStmt* st,
    674                Bool (*preciseMemExnsFn)(Int,Int)
    675             )
    676 {
    677    Int     j;
    678    UInt    key = 0; /* keep gcc -O happy */
    679    Bool    isGet;
    680    Bool    memRW = False;
    681    IRExpr* e;
    682 
    683    switch (st->tag) {
    684 
    685       /* This is the only interesting case.  Deal with Gets in the RHS
    686          expression. */
    687       case Ist_WrTmp:
    688          e = st->Ist.WrTmp.data;
    689          switch (e->tag) {
    690             case Iex_Get:
    691                isGet = True;
    692                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    693                break;
    694             case Iex_GetI:
    695                isGet = True;
    696                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    697                break;
    698             case Iex_Load:
    699                isGet = False;
    700                memRW = True;
    701                break;
    702             default:
    703                isGet = False;
    704          }
    705          if (isGet) {
    706             UInt k_lo, k_hi;
    707             k_lo = (key >> 16) & 0xFFFF;
    708             k_hi = key & 0xFFFF;
    709             invalidateOverlaps(env, k_lo, k_hi);
    710          }
    711          break;
    712 
    713       /* Be very conservative for dirty helper calls; dump the entire
    714          environment.  The helper might read guest state, in which
    715          case it needs to be flushed first.  Also, the helper might
    716          access guest memory, in which case all parts of the guest
    717          state requiring precise exceptions needs to be flushed.  The
    718          crude solution is just to flush everything; we could easily
    719          enough do a lot better if needed. */
    720       /* Probably also overly-conservative, but also dump everything
    721          if we hit a memory bus event (fence, lock, unlock).  Ditto
    722          AbiHints, CASs, LLs and SCs. */
    723       case Ist_AbiHint:
    724          vassert(isIRAtom(st->Ist.AbiHint.base));
    725          vassert(isIRAtom(st->Ist.AbiHint.nia));
    726          /* fall through */
    727       case Ist_MBE:
    728       case Ist_Dirty:
    729       case Ist_CAS:
    730       case Ist_LLSC:
    731          for (j = 0; j < env->used; j++)
    732             env->inuse[j] = False;
    733          break;
    734 
    735       /* all other cases are boring. */
    736       case Ist_Store:
    737          vassert(isIRAtom(st->Ist.Store.addr));
    738          vassert(isIRAtom(st->Ist.Store.data));
    739          memRW = True;
    740          break;
    741 
    742       case Ist_Exit:
    743          vassert(isIRAtom(st->Ist.Exit.guard));
    744          break;
    745 
    746       case Ist_PutI:
    747          vassert(isIRAtom(st->Ist.PutI.ix));
    748          vassert(isIRAtom(st->Ist.PutI.data));
    749          break;
    750 
    751       case Ist_NoOp:
    752       case Ist_IMark:
    753          break;
    754 
    755       default:
    756          vex_printf("\n");
    757          ppIRStmt(st);
    758          vex_printf("\n");
    759          vpanic("handle_gets_Stmt");
    760    }
    761 
    762    if (memRW) {
    763       /* This statement accesses memory.  So we need to dump all parts
    764          of the environment corresponding to guest state that may not
    765          be reordered with respect to memory references.  That means
    766          at least the stack pointer. */
    767       for (j = 0; j < env->used; j++) {
    768          if (!env->inuse[j])
    769             continue;
    770          if (vex_control.iropt_precise_memory_exns) {
    771             /* Precise exceptions required.  Flush all guest state. */
    772             env->inuse[j] = False;
    773          } else {
    774             /* Just flush the minimal amount required, as computed by
    775                preciseMemExnsFn. */
    776             HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    777             HWord k_hi = env->key[j] & 0xFFFF;
    778             if (preciseMemExnsFn( k_lo, k_hi ))
    779                env->inuse[j] = False;
    780          }
    781       }
    782    } /* if (memRW) */
    783 
    784 }
    785 
    786 
    787 /* Scan backwards, building up a set of (min offset, max
    788    offset) pairs, indicating those parts of the guest state
    789    for which the next event is a write.
    790 
    791    On seeing a conditional exit, empty the set.
    792 
    793    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    794    completely within the set, remove the Put.  Otherwise, add
    795    (minoff,maxoff) to the set.
    796 
    797    On seeing 'Get (minoff,maxoff)', remove any part of the set
    798    overlapping (minoff,maxoff).  The same has to happen for any events
    799    which implicitly read parts of the guest state: dirty helper calls
    800    and loads/stores.
    801 */
    802 
    803 static void redundant_put_removal_BB (
    804                IRSB* bb,
    805                Bool (*preciseMemExnsFn)(Int,Int)
    806             )
    807 {
    808    Int     i, j;
    809    Bool    isPut;
    810    IRStmt* st;
    811    UInt    key = 0; /* keep gcc -O happy */
    812 
    813    HashHW* env = newHHW();
    814    for (i = bb->stmts_used-1; i >= 0; i--) {
    815       st = bb->stmts[i];
    816 
    817       if (st->tag == Ist_NoOp)
    818          continue;
    819 
    820       /* Deal with conditional exits. */
    821       if (st->tag == Ist_Exit) {
    822          /* Since control may not get beyond this point, we must empty
    823             out the set, since we can no longer claim that the next
    824             event for any part of the guest state is definitely a
    825             write. */
    826          vassert(isIRAtom(st->Ist.Exit.guard));
    827          for (j = 0; j < env->used; j++)
    828             env->inuse[j] = False;
    829          continue;
    830       }
    831 
    832       /* Deal with Puts */
    833       switch (st->tag) {
    834          case Ist_Put:
    835             isPut = True;
    836             key = mk_key_GetPut( st->Ist.Put.offset,
    837                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    838             vassert(isIRAtom(st->Ist.Put.data));
    839             break;
    840          case Ist_PutI:
    841             isPut = True;
    842             key = mk_key_GetIPutI( st->Ist.PutI.descr );
    843             vassert(isIRAtom(st->Ist.PutI.ix));
    844             vassert(isIRAtom(st->Ist.PutI.data));
    845             break;
    846          default:
    847             isPut = False;
    848       }
    849       if (isPut && st->tag != Ist_PutI) {
    850          /* See if any single entry in env overlaps this Put.  This is
    851             simplistic in that the transformation is valid if, say, two
    852             or more entries in the env overlap this Put, but the use of
    853             lookupHHW will only find a single entry which exactly
    854             overlaps this Put.  This is suboptimal but safe. */
    855          if (lookupHHW(env, NULL, (HWord)key)) {
    856             /* This Put is redundant because a later one will overwrite
    857                it.  So NULL (nop) it out. */
    858             if (DEBUG_IROPT) {
    859                vex_printf("rPUT: "); ppIRStmt(st);
    860                vex_printf("\n");
    861             }
    862             bb->stmts[i] = IRStmt_NoOp();
    863          } else {
    864             /* We can't demonstrate that this Put is redundant, so add it
    865                to the running collection. */
    866             addToHHW(env, (HWord)key, 0);
    867          }
    868          continue;
    869       }
    870 
    871       /* Deal with Gets.  These remove bits of the environment since
    872          appearance of a Get means that the next event for that slice
    873          of the guest state is no longer a write, but a read.  Also
    874          deals with implicit reads of guest state needed to maintain
    875          precise exceptions. */
    876       handle_gets_Stmt( env, st, preciseMemExnsFn );
    877    }
    878 }
    879 
    880 
    881 /*---------------------------------------------------------------*/
    882 /*--- Constant propagation and folding                        ---*/
    883 /*---------------------------------------------------------------*/
    884 
    885 /* The env in this section is a map from IRTemp to IRExpr*,
    886    that is, an array indexed by IRTemp. */
    887 
    888 /* Are both expressions simply the same IRTemp ? */
    889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
    890 {
    891    return toBool( e1->tag == Iex_RdTmp
    892                   && e2->tag == Iex_RdTmp
    893                   && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
    894 }
    895 
    896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
    897 {
    898    return toBool( e1->tag == Iex_Const
    899                   && e2->tag == Iex_Const
    900                   && e1->Iex.Const.con->tag == Ico_U32
    901                   && e2->Iex.Const.con->tag == Ico_U32
    902                   && e1->Iex.Const.con->Ico.U32
    903                      == e2->Iex.Const.con->Ico.U32 );
    904 }
    905 
    906 /* Are both expressions either the same IRTemp or IRConst-U32s ?  If
    907    in doubt, say No. */
    908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
    909 {
    910    switch (e1->tag) {
    911       case Iex_RdTmp:
    912          return sameIRTemps(e1, e2);
    913       case Iex_Const:
    914          return sameIcoU32s(e1, e2);
    915       default:
    916          return False;
    917    }
    918 }
    919 
    920 static Bool notBool ( Bool b )
    921 {
    922    if (b == True) return False;
    923    if (b == False) return True;
    924    vpanic("notBool");
    925 }
    926 
    927 /* Make a zero which has the same type as the result of the given
    928    primop. */
    929 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
    930 {
    931    switch (op) {
    932       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
    933       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
    934       case Iop_Sub32:
    935       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
    936       case Iop_Sub64:
    937       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
    938       case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
    939       default: vpanic("mkZeroOfPrimopResultType: bad primop");
    940    }
    941 }
    942 
    943 /* Make a value containing all 1-bits, which has the same type as the
    944    result of the given primop. */
    945 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
    946 {
    947    switch (op) {
    948       case Iop_CmpEQ64:
    949          return IRExpr_Const(IRConst_U1(toBool(1)));
    950       case Iop_CmpEQ8x8:
    951          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
    952       case Iop_CmpEQ8x16:
    953          return IRExpr_Const(IRConst_V128(0xFFFF));
    954       default:
    955          vpanic("mkOnesOfPrimopResultType: bad primop");
    956    }
    957 }
    958 
    959 
    960 static IRExpr* fold_Expr ( IRExpr* e )
    961 {
    962    Int     shift;
    963    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
    964 
    965    /* UNARY ops */
    966    if (e->tag == Iex_Unop
    967        && e->Iex.Unop.arg->tag == Iex_Const) {
    968       switch (e->Iex.Unop.op) {
    969          case Iop_1Uto8:
    970             e2 = IRExpr_Const(IRConst_U8(toUChar(
    971                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    972                     ? 1 : 0)));
    973             break;
    974          case Iop_1Uto32:
    975             e2 = IRExpr_Const(IRConst_U32(
    976                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    977                     ? 1 : 0));
    978             break;
    979          case Iop_1Uto64:
    980             e2 = IRExpr_Const(IRConst_U64(
    981                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    982                     ? 1 : 0));
    983             break;
    984 
    985          case Iop_1Sto8:
    986             e2 = IRExpr_Const(IRConst_U8(toUChar(
    987                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    988                     ? 0xFF : 0)));
    989             break;
    990          case Iop_1Sto16:
    991             e2 = IRExpr_Const(IRConst_U16(toUShort(
    992                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    993                     ? 0xFFFF : 0)));
    994             break;
    995          case Iop_1Sto32:
    996             e2 = IRExpr_Const(IRConst_U32(
    997                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
    998                     ? 0xFFFFFFFF : 0));
    999             break;
   1000          case Iop_1Sto64:
   1001             e2 = IRExpr_Const(IRConst_U64(
   1002                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1003                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1004             break;
   1005 
   1006          case Iop_8Sto32: {
   1007             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1008             s32 <<= 24;
   1009             s32 >>= 24;
   1010             e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1011             break;
   1012          }
   1013          case Iop_8Uto64:
   1014             e2 = IRExpr_Const(IRConst_U64(
   1015                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1016             break;
   1017          case Iop_16Uto64:
   1018             e2 = IRExpr_Const(IRConst_U64(
   1019                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1020             break;
   1021          case Iop_8Uto32:
   1022             e2 = IRExpr_Const(IRConst_U32(
   1023                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1024             break;
   1025          case Iop_16Uto32:
   1026             e2 = IRExpr_Const(IRConst_U32(
   1027                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1028             break;
   1029          case Iop_32to16:
   1030             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1031                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1032             break;
   1033          case Iop_32to8:
   1034             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1035                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1036             break;
   1037          case Iop_32to1:
   1038             e2 = IRExpr_Const(IRConst_U1(toBool(
   1039                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1040                  )));
   1041             break;
   1042          case Iop_64to1:
   1043             e2 = IRExpr_Const(IRConst_U1(toBool(
   1044                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1045                  )));
   1046             break;
   1047 
   1048          case Iop_Not64:
   1049             e2 = IRExpr_Const(IRConst_U64(
   1050                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1051             break;
   1052          case Iop_Not32:
   1053             e2 = IRExpr_Const(IRConst_U32(
   1054                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1055             break;
   1056          case Iop_Not16:
   1057             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1058                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1059             break;
   1060          case Iop_Not8:
   1061             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1062                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1063             break;
   1064 
   1065          case Iop_Not1:
   1066             e2 = IRExpr_Const(IRConst_U1(
   1067                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1068             break;
   1069 
   1070          case Iop_64to8: {
   1071             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1072             w64 &= 0xFFULL;
   1073             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1074             break;
   1075          }
   1076          case Iop_64to16: {
   1077             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1078             w64 &= 0xFFFFULL;
   1079             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1080             break;
   1081          }
   1082          case Iop_64to32: {
   1083             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1084             w64 &= 0x00000000FFFFFFFFULL;
   1085             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1086             break;
   1087          }
   1088          case Iop_64HIto32: {
   1089             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1090             w64 >>= 32;
   1091             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1092             break;
   1093          }
   1094          case Iop_32Uto64:
   1095             e2 = IRExpr_Const(IRConst_U64(
   1096                     0xFFFFFFFFULL
   1097                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1098             break;
   1099          case Iop_32Sto64: {
   1100             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1101             s64 <<= 32;
   1102             s64 >>= 32;
   1103             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1104             break;
   1105          }
   1106          case Iop_CmpNEZ8:
   1107             e2 = IRExpr_Const(IRConst_U1(toBool(
   1108                     0 !=
   1109                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1110                  )));
   1111             break;
   1112          case Iop_CmpNEZ32:
   1113             e2 = IRExpr_Const(IRConst_U1(toBool(
   1114                     0 !=
   1115                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1116                  )));
   1117             break;
   1118          case Iop_CmpNEZ64:
   1119             e2 = IRExpr_Const(IRConst_U1(toBool(
   1120                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1121                  )));
   1122             break;
   1123 
   1124          case Iop_CmpwNEZ32: {
   1125             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1126             if (w32 == 0)
   1127                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1128             else
   1129                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1130             break;
   1131          }
   1132          case Iop_CmpwNEZ64: {
   1133             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1134             if (w64 == 0)
   1135                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1136             else
   1137                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1138             break;
   1139          }
   1140 
   1141          case Iop_Left32: {
   1142             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1143             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1144             s32 = (s32 | (-s32));
   1145             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1146             break;
   1147          }
   1148 
   1149          case Iop_Left64: {
   1150             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1151             Long  s64 = (Long)u64;
   1152             s64 = (s64 | (-s64));
   1153             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1154             break;
   1155          }
   1156 
   1157          default:
   1158             goto unhandled;
   1159       }
   1160    }
   1161 
   1162    /* BINARY ops */
   1163    if (e->tag == Iex_Binop) {
   1164       if (e->Iex.Binop.arg1->tag == Iex_Const
   1165           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1166          /* cases where both args are consts */
   1167          switch (e->Iex.Binop.op) {
   1168 
   1169             /* -- Or -- */
   1170             case Iop_Or8:
   1171                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1172                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1173                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1174                break;
   1175             case Iop_Or16:
   1176                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1177                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1178                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1179                break;
   1180             case Iop_Or32:
   1181                e2 = IRExpr_Const(IRConst_U32(
   1182                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1183                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1184                break;
   1185             case Iop_Or64:
   1186                e2 = IRExpr_Const(IRConst_U64(
   1187                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1188                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1189                break;
   1190 
   1191             /* -- Xor -- */
   1192             case Iop_Xor8:
   1193                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1194                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1195                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1196                break;
   1197             case Iop_Xor16:
   1198                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1199                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1200                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1201                break;
   1202             case Iop_Xor32:
   1203                e2 = IRExpr_Const(IRConst_U32(
   1204                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1205                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1206                break;
   1207             case Iop_Xor64:
   1208                e2 = IRExpr_Const(IRConst_U64(
   1209                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1210                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1211                break;
   1212 
   1213             /* -- And -- */
   1214             case Iop_And8:
   1215                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1216                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1217                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1218                break;
   1219             case Iop_And32:
   1220                e2 = IRExpr_Const(IRConst_U32(
   1221                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1222                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1223                break;
   1224             case Iop_And64:
   1225                e2 = IRExpr_Const(IRConst_U64(
   1226                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1227                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1228                break;
   1229 
   1230             /* -- Add -- */
   1231             case Iop_Add8:
   1232                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1233                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1234                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1235                break;
   1236             case Iop_Add32:
   1237                e2 = IRExpr_Const(IRConst_U32(
   1238                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1239                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1240                break;
   1241             case Iop_Add64:
   1242                e2 = IRExpr_Const(IRConst_U64(
   1243                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1244                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1245                break;
   1246 
   1247             /* -- Sub -- */
   1248             case Iop_Sub8:
   1249                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1250                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1251                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1252                break;
   1253             case Iop_Sub32:
   1254                e2 = IRExpr_Const(IRConst_U32(
   1255                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1256                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1257                break;
   1258             case Iop_Sub64:
   1259                e2 = IRExpr_Const(IRConst_U64(
   1260                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1261                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1262                break;
   1263 
   1264             /* -- Max32U -- */
   1265             case Iop_Max32U: {
   1266                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1267                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1268                UInt res  = u32a > u32b ? u32a : u32b;
   1269                e2 = IRExpr_Const(IRConst_U32(res));
   1270                break;
   1271             }
   1272 
   1273             /* -- Mul -- */
   1274             case Iop_Mul32:
   1275                e2 = IRExpr_Const(IRConst_U32(
   1276                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1277                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1278                break;
   1279             case Iop_Mul64:
   1280                e2 = IRExpr_Const(IRConst_U64(
   1281                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1282                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1283                break;
   1284 
   1285             case Iop_MullS32: {
   1286                /* very paranoid */
   1287                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1288                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1289                Int   s32a = (Int)u32a;
   1290                Int   s32b = (Int)u32b;
   1291                Long  s64a = (Long)s32a;
   1292                Long  s64b = (Long)s32b;
   1293                Long  sres = s64a * s64b;
   1294                ULong ures = (ULong)sres;
   1295                e2 = IRExpr_Const(IRConst_U64(ures));
   1296                break;
   1297             }
   1298 
   1299             /* -- Shl -- */
   1300             case Iop_Shl32:
   1301                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1302                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1303                if (shift >= 0 && shift <= 31)
   1304                   e2 = IRExpr_Const(IRConst_U32(
   1305                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1306                            << shift)));
   1307                break;
   1308             case Iop_Shl64:
   1309                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1310                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1311                if (shift >= 0 && shift <= 63)
   1312                   e2 = IRExpr_Const(IRConst_U64(
   1313                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1314                            << shift)));
   1315                break;
   1316 
   1317             /* -- Sar -- */
   1318             case Iop_Sar32: {
   1319                /* paranoid ... */
   1320                /*signed*/ Int s32;
   1321                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1322                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1323                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1324                if (shift >= 0 && shift <= 31) {
   1325                   s32 >>=/*signed*/ shift;
   1326                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1327                }
   1328                break;
   1329             }
   1330             case Iop_Sar64: {
   1331                /* paranoid ... */
   1332                /*signed*/ Long s64;
   1333                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1334                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1335                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1336                if (shift >= 0 && shift <= 63) {
   1337                   s64 >>=/*signed*/ shift;
   1338                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1339                }
   1340                break;
   1341             }
   1342 
   1343             /* -- Shr -- */
   1344             case Iop_Shr32: {
   1345                /* paranoid ... */
   1346                /*unsigned*/ UInt u32;
   1347                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1348                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1349                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1350                if (shift >= 0 && shift <= 31) {
   1351                   u32 >>=/*unsigned*/ shift;
   1352                   e2 = IRExpr_Const(IRConst_U32(u32));
   1353                }
   1354                break;
   1355             }
   1356             case Iop_Shr64: {
   1357                /* paranoid ... */
   1358                /*unsigned*/ ULong u64;
   1359                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1360                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1361                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1362                if (shift >= 0 && shift <= 63) {
   1363                   u64 >>=/*unsigned*/ shift;
   1364                   e2 = IRExpr_Const(IRConst_U64(u64));
   1365                }
   1366                break;
   1367             }
   1368 
   1369             /* -- CmpEQ -- */
   1370             case Iop_CmpEQ32:
   1371                e2 = IRExpr_Const(IRConst_U1(toBool(
   1372                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1373                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1374                break;
   1375             case Iop_CmpEQ64:
   1376                e2 = IRExpr_Const(IRConst_U1(toBool(
   1377                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1378                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1379                break;
   1380 
   1381             /* -- CmpNE -- */
   1382             case Iop_CmpNE8:
   1383                e2 = IRExpr_Const(IRConst_U1(toBool(
   1384                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1385                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1386                break;
   1387             case Iop_CmpNE32:
   1388                e2 = IRExpr_Const(IRConst_U1(toBool(
   1389                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1390                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1391                break;
   1392             case Iop_CmpNE64:
   1393                e2 = IRExpr_Const(IRConst_U1(toBool(
   1394                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1395                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1396                break;
   1397 
   1398             /* -- CmpLEU -- */
   1399             case Iop_CmpLE32U:
   1400                e2 = IRExpr_Const(IRConst_U1(toBool(
   1401                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1402                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1403                break;
   1404 
   1405             /* -- CmpLES -- */
   1406             case Iop_CmpLE32S:
   1407                e2 = IRExpr_Const(IRConst_U1(toBool(
   1408                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1409                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1410                break;
   1411 
   1412             /* -- CmpLTS -- */
   1413             case Iop_CmpLT32S:
   1414                e2 = IRExpr_Const(IRConst_U1(toBool(
   1415                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1416                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1417                break;
   1418 
   1419             /* -- CmpLTU -- */
   1420             case Iop_CmpLT32U:
   1421                e2 = IRExpr_Const(IRConst_U1(toBool(
   1422                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1423                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1424                break;
   1425 
   1426             /* -- CmpORD -- */
   1427             case Iop_CmpORD32S: {
   1428                /* very paranoid */
   1429                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1430                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1431                Int   s32a = (Int)u32a;
   1432                Int   s32b = (Int)u32b;
   1433                Int   r = 0x2; /* EQ */
   1434                if (s32a < s32b) {
   1435                   r = 0x8; /* LT */
   1436                }
   1437                else if (s32a > s32b) {
   1438                   r = 0x4; /* GT */
   1439                }
   1440                e2 = IRExpr_Const(IRConst_U32(r));
   1441                break;
   1442             }
   1443 
   1444             /* -- nHLto2n -- */
   1445             case Iop_32HLto64:
   1446                e2 = IRExpr_Const(IRConst_U64(
   1447                        (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
   1448                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   1449                     ));
   1450                break;
   1451             case Iop_64HLto128:
   1452                /* We can't fold this, because there is no way to
   1453                   express he result in IR, but at least pretend to
   1454                   handle it, so as to stop getting blasted with
   1455                   no-rule-for-this-primop messages. */
   1456                break;
   1457 
   1458             default:
   1459                goto unhandled;
   1460          }
   1461 
   1462       } else {
   1463 
   1464          /* other cases (identities, etc) */
   1465 
   1466          /* Shl64/Shr64(x,0) ==> x */
   1467          if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
   1468              && e->Iex.Binop.arg2->tag == Iex_Const
   1469              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1470             e2 = e->Iex.Binop.arg1;
   1471          } else
   1472 
   1473          /* Shl32/Shr32(x,0) ==> x */
   1474          if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
   1475              && e->Iex.Binop.arg2->tag == Iex_Const
   1476              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1477             e2 = e->Iex.Binop.arg1;
   1478          } else
   1479 
   1480          /* Or8(x,0) ==> x */
   1481          if ((e->Iex.Binop.op == Iop_Or8)
   1482              && e->Iex.Binop.arg2->tag == Iex_Const
   1483              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
   1484             e2 = e->Iex.Binop.arg1;
   1485          } else
   1486 
   1487          /* Or16(x,0) ==> x */
   1488          if ((e->Iex.Binop.op == Iop_Or16)
   1489              && e->Iex.Binop.arg2->tag == Iex_Const
   1490              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
   1491             e2 = e->Iex.Binop.arg1;
   1492          } else
   1493 
   1494          /* Or32/Add32/Max32U(x,0) ==> x */
   1495          if ((e->Iex.Binop.op == Iop_Add32
   1496               || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
   1497              && e->Iex.Binop.arg2->tag == Iex_Const
   1498              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
   1499             e2 = e->Iex.Binop.arg1;
   1500          } else
   1501 
   1502          /* Add32(t,t) ==> t << 1.  Memcheck doesn't understand that
   1503             x+x produces a defined least significant bit, and it seems
   1504             simplest just to get rid of the problem by rewriting it
   1505             out, since the opportunity to do so exists. */
   1506          if (e->Iex.Binop.op == Iop_Add32
   1507              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1508              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1509              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1510                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1511             e2 = IRExpr_Binop(Iop_Shl32,
   1512                               e->Iex.Binop.arg1,
   1513                               IRExpr_Const(IRConst_U8(1)));
   1514          } else
   1515 
   1516          /* Add64(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
   1517          if (e->Iex.Binop.op == Iop_Add64
   1518              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1519              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1520              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1521                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1522             e2 = IRExpr_Binop(Iop_Shl64,
   1523                               e->Iex.Binop.arg1,
   1524                               IRExpr_Const(IRConst_U8(1)));
   1525          } else
   1526 
   1527          /* Add8(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
   1528          if (e->Iex.Binop.op == Iop_Add8
   1529              && e->Iex.Binop.arg1->tag == Iex_RdTmp
   1530              && e->Iex.Binop.arg2->tag == Iex_RdTmp
   1531              && e->Iex.Binop.arg1->Iex.RdTmp.tmp
   1532                 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
   1533             e2 = IRExpr_Binop(Iop_Shl8,
   1534                               e->Iex.Binop.arg1,
   1535                               IRExpr_Const(IRConst_U8(1)));
   1536          } else
   1537          /* NB no Add16(t,t) case yet as no known test case exists */
   1538 
   1539          /* Or64/Add64(x,0) ==> x */
   1540          if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
   1541              && e->Iex.Binop.arg2->tag == Iex_Const
   1542              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
   1543             e2 = e->Iex.Binop.arg1;
   1544          } else
   1545 
   1546          /* And32(x,0xFFFFFFFF) ==> x */
   1547          if (e->Iex.Binop.op == Iop_And32
   1548              && e->Iex.Binop.arg2->tag == Iex_Const
   1549              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
   1550             e2 = e->Iex.Binop.arg1;
   1551          } else
   1552 
   1553          /* And32(x,0) ==> 0 */
   1554          if (e->Iex.Binop.op == Iop_And32
   1555              && e->Iex.Binop.arg2->tag == Iex_Const
   1556              && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
   1557             e2 = IRExpr_Const(IRConst_U32(0));
   1558          } else
   1559 
   1560          /* And32/Shl32(0,x) ==> 0 */
   1561          if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
   1562              && e->Iex.Binop.arg1->tag == Iex_Const
   1563              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
   1564             e2 = IRExpr_Const(IRConst_U32(0));
   1565          } else
   1566 
   1567          /* Or8(0,x) ==> x */
   1568          if (e->Iex.Binop.op == Iop_Or8
   1569              && e->Iex.Binop.arg1->tag == Iex_Const
   1570              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
   1571             e2 = e->Iex.Binop.arg2;
   1572          } else
   1573 
   1574          /* Or32/Max32U(0,x) ==> x */
   1575          if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
   1576              && e->Iex.Binop.arg1->tag == Iex_Const
   1577              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
   1578             e2 = e->Iex.Binop.arg2;
   1579          } else
   1580 
   1581          /* Or64(0,x) ==> x */
   1582          if (e->Iex.Binop.op == Iop_Or64
   1583              && e->Iex.Binop.arg1->tag == Iex_Const
   1584              && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
   1585             e2 = e->Iex.Binop.arg2;
   1586          } else
   1587 
   1588          /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
   1589          /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
   1590          /* Max32U(t,t) ==> t, for some IRTemp t */
   1591          switch (e->Iex.Binop.op) {
   1592             case Iop_And64: case Iop_And32:
   1593             case Iop_And16: case Iop_And8:
   1594             case Iop_Or64: case Iop_Or32:
   1595             case Iop_Or16: case Iop_Or8: case Iop_OrV128:
   1596             case Iop_Max32U:
   1597                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1598                   e2 = e->Iex.Binop.arg1;
   1599                break;
   1600             default:
   1601                break;
   1602          }
   1603 
   1604          /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   1605          /* Sub32/64(t,t) ==> 0, for some IRTemp t */
   1606          switch (e->Iex.Binop.op) {
   1607             case Iop_Xor64: case Iop_Xor32:
   1608             case Iop_Xor16: case Iop_Xor8:
   1609             case Iop_XorV128:
   1610             case Iop_Sub64: case Iop_Sub32:
   1611                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1612                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   1613                break;
   1614             default:
   1615                break;
   1616          }
   1617 
   1618          switch (e->Iex.Binop.op) {
   1619             case Iop_CmpEQ64:
   1620             case Iop_CmpEQ8x8:
   1621             case Iop_CmpEQ8x16:
   1622                if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
   1623                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   1624                break;
   1625             default:
   1626                break;
   1627          }
   1628 
   1629       }
   1630    }
   1631 
   1632    /* Mux0X */
   1633    if (e->tag == Iex_Mux0X) {
   1634       /* is the discriminant is a constant? */
   1635       if (e->Iex.Mux0X.cond->tag == Iex_Const) {
   1636          Bool zero;
   1637          /* assured us by the IR type rules */
   1638          vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
   1639          zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
   1640                                      ->Iex.Const.con->Ico.U8));
   1641          e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
   1642       }
   1643       else
   1644       /* are the arms identical? (pretty weedy test) */
   1645       if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
   1646                                e->Iex.Mux0X.exprX)) {
   1647          e2 = e->Iex.Mux0X.expr0;
   1648       }
   1649    }
   1650 
   1651    /* Show cases where we've found but not folded 'op(t,t)'. */
   1652    if (0 && e == e2 && e->tag == Iex_Binop
   1653        && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   1654       vex_printf("IDENT: ");
   1655       ppIRExpr(e); vex_printf("\n");
   1656    }
   1657 
   1658    /* Show the overall results of folding. */
   1659    if (DEBUG_IROPT && e2 != e) {
   1660       vex_printf("FOLD: ");
   1661       ppIRExpr(e); vex_printf("  ->  ");
   1662       ppIRExpr(e2); vex_printf("\n");
   1663    }
   1664 
   1665    return e2;
   1666 
   1667  unhandled:
   1668 #  if 0
   1669    vex_printf("\n\n");
   1670    ppIRExpr(e);
   1671    vpanic("fold_Expr: no rule for the above");
   1672 #  else
   1673    if (vex_control.iropt_verbosity > 0) {
   1674       vex_printf("vex iropt: fold_Expr: no rule for: ");
   1675       ppIRExpr(e);
   1676       vex_printf("\n");
   1677    }
   1678    return e2;
   1679 #  endif
   1680 }
   1681 
   1682 
   1683 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   1684    1-level due to previous flattening pass. */
   1685 
   1686 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   1687 {
   1688    switch (ex->tag) {
   1689       case Iex_RdTmp:
   1690          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   1691             return env[(Int)ex->Iex.RdTmp.tmp];
   1692          } else {
   1693             /* not bound in env */
   1694             return ex;
   1695          }
   1696 
   1697       case Iex_Const:
   1698       case Iex_Get:
   1699          return ex;
   1700 
   1701       case Iex_GetI:
   1702          vassert(isIRAtom(ex->Iex.GetI.ix));
   1703          return IRExpr_GetI(
   1704             ex->Iex.GetI.descr,
   1705             subst_Expr(env, ex->Iex.GetI.ix),
   1706             ex->Iex.GetI.bias
   1707          );
   1708 
   1709       case Iex_Qop:
   1710          vassert(isIRAtom(ex->Iex.Qop.arg1));
   1711          vassert(isIRAtom(ex->Iex.Qop.arg2));
   1712          vassert(isIRAtom(ex->Iex.Qop.arg3));
   1713          vassert(isIRAtom(ex->Iex.Qop.arg4));
   1714          return IRExpr_Qop(
   1715                    ex->Iex.Qop.op,
   1716                    subst_Expr(env, ex->Iex.Qop.arg1),
   1717                    subst_Expr(env, ex->Iex.Qop.arg2),
   1718                    subst_Expr(env, ex->Iex.Qop.arg3),
   1719                    subst_Expr(env, ex->Iex.Qop.arg4)
   1720                 );
   1721 
   1722       case Iex_Triop:
   1723          vassert(isIRAtom(ex->Iex.Triop.arg1));
   1724          vassert(isIRAtom(ex->Iex.Triop.arg2));
   1725          vassert(isIRAtom(ex->Iex.Triop.arg3));
   1726          return IRExpr_Triop(
   1727                    ex->Iex.Triop.op,
   1728                    subst_Expr(env, ex->Iex.Triop.arg1),
   1729                    subst_Expr(env, ex->Iex.Triop.arg2),
   1730                    subst_Expr(env, ex->Iex.Triop.arg3)
   1731                 );
   1732 
   1733       case Iex_Binop:
   1734          vassert(isIRAtom(ex->Iex.Binop.arg1));
   1735          vassert(isIRAtom(ex->Iex.Binop.arg2));
   1736          return IRExpr_Binop(
   1737                    ex->Iex.Binop.op,
   1738                    subst_Expr(env, ex->Iex.Binop.arg1),
   1739                    subst_Expr(env, ex->Iex.Binop.arg2)
   1740                 );
   1741 
   1742       case Iex_Unop:
   1743          vassert(isIRAtom(ex->Iex.Unop.arg));
   1744          return IRExpr_Unop(
   1745                    ex->Iex.Unop.op,
   1746                    subst_Expr(env, ex->Iex.Unop.arg)
   1747                 );
   1748 
   1749       case Iex_Load:
   1750          vassert(isIRAtom(ex->Iex.Load.addr));
   1751          return IRExpr_Load(
   1752                    ex->Iex.Load.end,
   1753                    ex->Iex.Load.ty,
   1754                    subst_Expr(env, ex->Iex.Load.addr)
   1755                 );
   1756 
   1757       case Iex_CCall: {
   1758          Int      i;
   1759          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   1760          for (i = 0; args2[i]; i++) {
   1761             vassert(isIRAtom(args2[i]));
   1762             args2[i] = subst_Expr(env, args2[i]);
   1763          }
   1764          return IRExpr_CCall(
   1765                    ex->Iex.CCall.cee,
   1766                    ex->Iex.CCall.retty,
   1767                    args2
   1768                 );
   1769       }
   1770 
   1771       case Iex_Mux0X:
   1772          vassert(isIRAtom(ex->Iex.Mux0X.cond));
   1773          vassert(isIRAtom(ex->Iex.Mux0X.expr0));
   1774          vassert(isIRAtom(ex->Iex.Mux0X.exprX));
   1775          return IRExpr_Mux0X(
   1776                    subst_Expr(env, ex->Iex.Mux0X.cond),
   1777                    subst_Expr(env, ex->Iex.Mux0X.expr0),
   1778                    subst_Expr(env, ex->Iex.Mux0X.exprX)
   1779                 );
   1780 
   1781       default:
   1782          vex_printf("\n\n"); ppIRExpr(ex);
   1783          vpanic("subst_Expr");
   1784 
   1785    }
   1786 }
   1787 
   1788 
   1789 /* Apply the subst to stmt, then fold the result as much as possible.
   1790    Much simplified due to stmt being previously flattened.  As a
   1791    result of this, the stmt may wind up being turned into a no-op.
   1792 */
   1793 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   1794 {
   1795 #  if 0
   1796    vex_printf("\nsubst and fold stmt\n");
   1797    ppIRStmt(st);
   1798    vex_printf("\n");
   1799 #  endif
   1800 
   1801    switch (st->tag) {
   1802       case Ist_AbiHint:
   1803          vassert(isIRAtom(st->Ist.AbiHint.base));
   1804          vassert(isIRAtom(st->Ist.AbiHint.nia));
   1805          return IRStmt_AbiHint(
   1806                    fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
   1807                    st->Ist.AbiHint.len,
   1808                    fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
   1809                 );
   1810       case Ist_Put:
   1811          vassert(isIRAtom(st->Ist.Put.data));
   1812          return IRStmt_Put(
   1813                    st->Ist.Put.offset,
   1814                    fold_Expr(subst_Expr(env, st->Ist.Put.data))
   1815                 );
   1816 
   1817       case Ist_PutI:
   1818          vassert(isIRAtom(st->Ist.PutI.ix));
   1819          vassert(isIRAtom(st->Ist.PutI.data));
   1820          return IRStmt_PutI(
   1821                    st->Ist.PutI.descr,
   1822                    fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
   1823                    st->Ist.PutI.bias,
   1824                    fold_Expr(subst_Expr(env, st->Ist.PutI.data))
   1825                 );
   1826 
   1827       case Ist_WrTmp:
   1828          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   1829             allowed to be more than just a constant or a tmp. */
   1830          return IRStmt_WrTmp(
   1831                    st->Ist.WrTmp.tmp,
   1832                    fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
   1833                 );
   1834 
   1835       case Ist_Store:
   1836          vassert(isIRAtom(st->Ist.Store.addr));
   1837          vassert(isIRAtom(st->Ist.Store.data));
   1838          return IRStmt_Store(
   1839                    st->Ist.Store.end,
   1840                    fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
   1841                    fold_Expr(subst_Expr(env, st->Ist.Store.data))
   1842                 );
   1843 
   1844       case Ist_CAS: {
   1845          IRCAS *cas, *cas2;
   1846          cas = st->Ist.CAS.details;
   1847          vassert(isIRAtom(cas->addr));
   1848          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   1849          vassert(isIRAtom(cas->expdLo));
   1850          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   1851          vassert(isIRAtom(cas->dataLo));
   1852          cas2 = mkIRCAS(
   1853                    cas->oldHi, cas->oldLo, cas->end,
   1854                    fold_Expr(subst_Expr(env, cas->addr)),
   1855                    cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
   1856                    fold_Expr(subst_Expr(env, cas->expdLo)),
   1857                    cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
   1858                    fold_Expr(subst_Expr(env, cas->dataLo))
   1859                 );
   1860          return IRStmt_CAS(cas2);
   1861       }
   1862 
   1863       case Ist_LLSC:
   1864          vassert(isIRAtom(st->Ist.LLSC.addr));
   1865          if (st->Ist.LLSC.storedata)
   1866             vassert(isIRAtom(st->Ist.LLSC.storedata));
   1867          return IRStmt_LLSC(
   1868                    st->Ist.LLSC.end,
   1869                    st->Ist.LLSC.result,
   1870                    fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
   1871                    st->Ist.LLSC.storedata
   1872                       ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
   1873                       : NULL
   1874                 );
   1875 
   1876       case Ist_Dirty: {
   1877          Int     i;
   1878          IRDirty *d, *d2;
   1879          d = st->Ist.Dirty.details;
   1880          d2 = emptyIRDirty();
   1881          *d2 = *d;
   1882          d2->args = shallowCopyIRExprVec(d2->args);
   1883          if (d2->mFx != Ifx_None) {
   1884             vassert(isIRAtom(d2->mAddr));
   1885             d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
   1886          }
   1887          vassert(isIRAtom(d2->guard));
   1888          d2->guard = fold_Expr(subst_Expr(env, d2->guard));
   1889          for (i = 0; d2->args[i]; i++) {
   1890             vassert(isIRAtom(d2->args[i]));
   1891             d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
   1892          }
   1893          return IRStmt_Dirty(d2);
   1894       }
   1895 
   1896       case Ist_IMark:
   1897          return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
   1898 
   1899       case Ist_NoOp:
   1900          return IRStmt_NoOp();
   1901 
   1902       case Ist_MBE:
   1903          return IRStmt_MBE(st->Ist.MBE.event);
   1904 
   1905       case Ist_Exit: {
   1906          IRExpr* fcond;
   1907          vassert(isIRAtom(st->Ist.Exit.guard));
   1908          fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
   1909          if (fcond->tag == Iex_Const) {
   1910             /* Interesting.  The condition on this exit has folded down to
   1911                a constant. */
   1912             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   1913             vassert(fcond->Iex.Const.con->Ico.U1 == False
   1914                     || fcond->Iex.Const.con->Ico.U1 == True);
   1915             if (fcond->Iex.Const.con->Ico.U1 == False) {
   1916                /* exit is never going to happen, so dump the statement. */
   1917                return IRStmt_NoOp();
   1918             } else {
   1919                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   1920                /* Hmmm.  The exit has become unconditional.  Leave it
   1921                   as it is for now, since we'd have to truncate the BB
   1922                   at this point, which is tricky.  Such truncation is
   1923                   done later by the dead-code elimination pass. */
   1924                /* fall out into the reconstruct-the-exit code. */
   1925                if (vex_control.iropt_verbosity > 0)
   1926                   /* really a misuse of vex_control.iropt_verbosity */
   1927                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   1928             }
   1929          }
   1930          return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
   1931       }
   1932 
   1933    default:
   1934       vex_printf("\n"); ppIRStmt(st);
   1935       vpanic("subst_and_fold_Stmt");
   1936    }
   1937 }
   1938 
   1939 
   1940 IRSB* cprop_BB ( IRSB* in )
   1941 {
   1942    Int      i;
   1943    IRSB*    out;
   1944    IRStmt*  st2;
   1945    Int      n_tmps = in->tyenv->types_used;
   1946    IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
   1947 
   1948    out = emptyIRSB();
   1949    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   1950 
   1951    /* Set up the env with which travels forward.  This holds a
   1952       substitution, mapping IRTemps to atoms, that is, IRExprs which
   1953       are either IRTemps or IRConsts.  Thus, copy and constant
   1954       propagation is done.  The environment is to be applied as we
   1955       move along.  Keys are IRTemps.  Values are IRExpr*s.
   1956    */
   1957    for (i = 0; i < n_tmps; i++)
   1958       env[i] = NULL;
   1959 
   1960    /* For each original SSA-form stmt ... */
   1961    for (i = 0; i < in->stmts_used; i++) {
   1962 
   1963       /* First apply the substitution to the current stmt.  This
   1964          propagates in any constants and tmp-tmp assignments
   1965          accumulated prior to this point.  As part of the subst_Stmt
   1966          call, also then fold any constant expressions resulting. */
   1967 
   1968       st2 = in->stmts[i];
   1969 
   1970       /* perhaps st2 is already a no-op? */
   1971       if (st2->tag == Ist_NoOp) continue;
   1972 
   1973       st2 = subst_and_fold_Stmt( env, st2 );
   1974 
   1975       /* If the statement has been folded into a no-op, forget it. */
   1976       if (st2->tag == Ist_NoOp) continue;
   1977 
   1978       /* Now consider what the stmt looks like.  If it's of the form
   1979          't = const' or 't1 = t2', add it to the running environment
   1980          and not to the output BB.  Otherwise, add it to the output
   1981          BB.  Note, we choose not to propagate const when const is an
   1982          F64i, so that F64i literals can be CSE'd later.  This helps
   1983          x86 floating point code generation. */
   1984 
   1985       if (st2->tag == Ist_WrTmp
   1986           && st2->Ist.WrTmp.data->tag == Iex_Const
   1987           && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   1988          /* 't = const' -- add to env.
   1989              The pair (IRTemp, IRExpr*) is added. */
   1990          env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   1991       }
   1992       else
   1993       if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
   1994          /* 't1 = t2' -- add to env.
   1995              The pair (IRTemp, IRExpr*) is added. */
   1996          env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   1997       }
   1998       else {
   1999          /* Not interesting, copy st2 into the output block. */
   2000          addStmtToIRSB( out, st2 );
   2001       }
   2002    }
   2003 
   2004    out->next     = subst_Expr( env, in->next );
   2005    out->jumpkind = in->jumpkind;
   2006    return out;
   2007 }
   2008 
   2009 
   2010 /*---------------------------------------------------------------*/
   2011 /*--- Dead code (t = E) removal                               ---*/
   2012 /*---------------------------------------------------------------*/
   2013 
   2014 /* As a side effect, also removes all code following an unconditional
   2015    side exit. */
   2016 
   2017 /* The type of the HashHW map is: a map from IRTemp to nothing
   2018    -- really just operating a set or IRTemps.
   2019 */
   2020 
   2021 inline
   2022 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2023 {
   2024    set[(Int)tmp] = True;
   2025 }
   2026 
   2027 static void addUses_Expr ( Bool* set, IRExpr* e )
   2028 {
   2029    Int i;
   2030    switch (e->tag) {
   2031       case Iex_GetI:
   2032          addUses_Expr(set, e->Iex.GetI.ix);
   2033          return;
   2034       case Iex_Mux0X:
   2035          addUses_Expr(set, e->Iex.Mux0X.cond);
   2036          addUses_Expr(set, e->Iex.Mux0X.expr0);
   2037          addUses_Expr(set, e->Iex.Mux0X.exprX);
   2038          return;
   2039       case Iex_CCall:
   2040          for (i = 0; e->Iex.CCall.args[i]; i++)
   2041             addUses_Expr(set, e->Iex.CCall.args[i]);
   2042          return;
   2043       case Iex_Load:
   2044          addUses_Expr(set, e->Iex.Load.addr);
   2045          return;
   2046       case Iex_Qop:
   2047          addUses_Expr(set, e->Iex.Qop.arg1);
   2048          addUses_Expr(set, e->Iex.Qop.arg2);
   2049          addUses_Expr(set, e->Iex.Qop.arg3);
   2050          addUses_Expr(set, e->Iex.Qop.arg4);
   2051          return;
   2052       case Iex_Triop:
   2053          addUses_Expr(set, e->Iex.Triop.arg1);
   2054          addUses_Expr(set, e->Iex.Triop.arg2);
   2055          addUses_Expr(set, e->Iex.Triop.arg3);
   2056          return;
   2057       case Iex_Binop:
   2058          addUses_Expr(set, e->Iex.Binop.arg1);
   2059          addUses_Expr(set, e->Iex.Binop.arg2);
   2060          return;
   2061       case Iex_Unop:
   2062          addUses_Expr(set, e->Iex.Unop.arg);
   2063          return;
   2064       case Iex_RdTmp:
   2065          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2066          return;
   2067       case Iex_Const:
   2068       case Iex_Get:
   2069          return;
   2070       default:
   2071          vex_printf("\n");
   2072          ppIRExpr(e);
   2073          vpanic("addUses_Expr");
   2074    }
   2075 }
   2076 
   2077 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2078 {
   2079    Int      i;
   2080    IRDirty* d;
   2081    IRCAS*   cas;
   2082    switch (st->tag) {
   2083       case Ist_AbiHint:
   2084          addUses_Expr(set, st->Ist.AbiHint.base);
   2085          addUses_Expr(set, st->Ist.AbiHint.nia);
   2086          return;
   2087       case Ist_PutI:
   2088          addUses_Expr(set, st->Ist.PutI.ix);
   2089          addUses_Expr(set, st->Ist.PutI.data);
   2090          return;
   2091       case Ist_WrTmp:
   2092          addUses_Expr(set, st->Ist.WrTmp.data);
   2093          return;
   2094       case Ist_Put:
   2095          addUses_Expr(set, st->Ist.Put.data);
   2096          return;
   2097       case Ist_Store:
   2098          addUses_Expr(set, st->Ist.Store.addr);
   2099          addUses_Expr(set, st->Ist.Store.data);
   2100          return;
   2101       case Ist_CAS:
   2102          cas = st->Ist.CAS.details;
   2103          addUses_Expr(set, cas->addr);
   2104          if (cas->expdHi)
   2105             addUses_Expr(set, cas->expdHi);
   2106          addUses_Expr(set, cas->expdLo);
   2107          if (cas->dataHi)
   2108             addUses_Expr(set, cas->dataHi);
   2109          addUses_Expr(set, cas->dataLo);
   2110          return;
   2111       case Ist_LLSC:
   2112          addUses_Expr(set, st->Ist.LLSC.addr);
   2113          if (st->Ist.LLSC.storedata)
   2114             addUses_Expr(set, st->Ist.LLSC.storedata);
   2115          return;
   2116       case Ist_Dirty:
   2117          d = st->Ist.Dirty.details;
   2118          if (d->mFx != Ifx_None)
   2119             addUses_Expr(set, d->mAddr);
   2120          addUses_Expr(set, d->guard);
   2121          for (i = 0; d->args[i] != NULL; i++)
   2122             addUses_Expr(set, d->args[i]);
   2123          return;
   2124       case Ist_NoOp:
   2125       case Ist_IMark:
   2126       case Ist_MBE:
   2127          return;
   2128       case Ist_Exit:
   2129          addUses_Expr(set, st->Ist.Exit.guard);
   2130          return;
   2131       default:
   2132          vex_printf("\n");
   2133          ppIRStmt(st);
   2134          vpanic("addUses_Stmt");
   2135    }
   2136 }
   2137 
   2138 
   2139 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   2140 static Bool isZeroU1 ( IRExpr* e )
   2141 {
   2142    return toBool( e->tag == Iex_Const
   2143                   && e->Iex.Const.con->tag == Ico_U1
   2144                   && e->Iex.Const.con->Ico.U1 == False );
   2145 }
   2146 
   2147 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   2148 static Bool isOneU1 ( IRExpr* e )
   2149 {
   2150    return toBool( e->tag == Iex_Const
   2151                   && e->Iex.Const.con->tag == Ico_U1
   2152                   && e->Iex.Const.con->Ico.U1 == True );
   2153 }
   2154 
   2155 
   2156 /* Note, this destructively modifies the given IRSB. */
   2157 
   2158 /* Scan backwards through statements, carrying a set of IRTemps which
   2159    are known to be used after the current point.  On encountering 't =
   2160    E', delete the binding if it is not used.  Otherwise, add any temp
   2161    uses to the set and keep on moving backwards.
   2162 
   2163    As an enhancement, the first (backwards) pass searches for IR exits
   2164    with always-taken conditions and notes the location of the earliest
   2165    one in the block.  If any such are found, a second pass copies the
   2166    exit destination and jump kind to the bb-end.  Then, the exit and
   2167    all statements following it are turned into no-ops.
   2168 */
   2169 
   2170 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   2171 {
   2172    Int     i, i_unconditional_exit;
   2173    Int     n_tmps = bb->tyenv->types_used;
   2174    Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
   2175    IRStmt* st;
   2176 
   2177    for (i = 0; i < n_tmps; i++)
   2178       set[i] = False;
   2179 
   2180    /* start off by recording IRTemp uses in the next field. */
   2181    addUses_Expr(set, bb->next);
   2182 
   2183    /* First pass */
   2184 
   2185    /* Work backwards through the stmts */
   2186    i_unconditional_exit = -1;
   2187    for (i = bb->stmts_used-1; i >= 0; i--) {
   2188       st = bb->stmts[i];
   2189       if (st->tag == Ist_NoOp)
   2190          continue;
   2191       /* take note of any unconditional exits */
   2192       if (st->tag == Ist_Exit
   2193           && isOneU1(st->Ist.Exit.guard))
   2194          i_unconditional_exit = i;
   2195       if (st->tag == Ist_WrTmp
   2196           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   2197           /* it's an IRTemp which never got used.  Delete it. */
   2198          if (DEBUG_IROPT) {
   2199             vex_printf("DEAD: ");
   2200             ppIRStmt(st);
   2201             vex_printf("\n");
   2202          }
   2203          bb->stmts[i] = IRStmt_NoOp();
   2204       }
   2205       else
   2206       if (st->tag == Ist_Dirty
   2207           && st->Ist.Dirty.details->guard
   2208           && isZeroU1(st->Ist.Dirty.details->guard)) {
   2209          /* This is a dirty helper which will never get called.
   2210             Delete it. */
   2211          bb->stmts[i] = IRStmt_NoOp();
   2212        }
   2213        else {
   2214          /* Note any IRTemp uses made by the current statement. */
   2215          addUses_Stmt(set, st);
   2216       }
   2217    }
   2218 
   2219    /* Optional second pass: if any unconditional exits were found,
   2220       delete them and all following statements. */
   2221 
   2222    if (i_unconditional_exit != -1) {
   2223       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   2224                         i_unconditional_exit);
   2225       vassert(i_unconditional_exit >= 0
   2226               && i_unconditional_exit < bb->stmts_used);
   2227       bb->next
   2228          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   2229       bb->jumpkind
   2230          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   2231       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   2232          bb->stmts[i] = IRStmt_NoOp();
   2233    }
   2234 }
   2235 
   2236 
   2237 /*---------------------------------------------------------------*/
   2238 /*--- Specialisation of helper function calls, in             ---*/
   2239 /*--- collaboration with the front end                        ---*/
   2240 /*---------------------------------------------------------------*/
   2241 
   2242 static
   2243 IRSB* spec_helpers_BB(
   2244          IRSB* bb,
   2245          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
   2246       )
   2247 {
   2248    Int     i;
   2249    IRStmt* st;
   2250    IRExpr* ex;
   2251    Bool    any = False;
   2252 
   2253    for (i = bb->stmts_used-1; i >= 0; i--) {
   2254       st = bb->stmts[i];
   2255 
   2256       if (st->tag != Ist_WrTmp
   2257           || st->Ist.WrTmp.data->tag != Iex_CCall)
   2258          continue;
   2259 
   2260       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   2261                           st->Ist.WrTmp.data->Iex.CCall.args,
   2262                           &bb->stmts[0], i );
   2263       if (!ex)
   2264         /* the front end can't think of a suitable replacement */
   2265         continue;
   2266 
   2267       /* We got something better.  Install it in the bb. */
   2268       any = True;
   2269       bb->stmts[i]
   2270          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   2271 
   2272       if (0) {
   2273          vex_printf("SPEC: ");
   2274          ppIRExpr(st->Ist.WrTmp.data);
   2275          vex_printf("  -->  ");
   2276          ppIRExpr(ex);
   2277          vex_printf("\n");
   2278       }
   2279    }
   2280 
   2281    if (any)
   2282       bb = flatten_BB(bb);
   2283    return bb;
   2284 }
   2285 
   2286 
   2287 /*---------------------------------------------------------------*/
   2288 /*--- Determination of guest state aliasing relationships     ---*/
   2289 /*---------------------------------------------------------------*/
   2290 
   2291 /* These are helper functions for CSE and GetI/PutI transformations.
   2292 
   2293    Determine, to the extent possible, the relationship between two
   2294    guest state accesses.  The possible outcomes are:
   2295 
   2296    * Exact alias.  These two accesses denote precisely the same
   2297      piece of the guest state.
   2298 
   2299    * Definitely no alias.  These two accesses are guaranteed not to
   2300      overlap any part of the guest state.
   2301 
   2302    * Unknown -- if neither of the above can be established.
   2303 
   2304    If in doubt, return Unknown.  */
   2305 
   2306 typedef
   2307    enum { ExactAlias, NoAlias, UnknownAlias }
   2308    GSAliasing;
   2309 
   2310 
   2311 /* Produces the alias relation between an indexed guest
   2312    state access and a non-indexed access. */
   2313 
   2314 static
   2315 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   2316                                     Int offset2, IRType ty2 )
   2317 {
   2318    UInt minoff1, maxoff1, minoff2, maxoff2;
   2319 
   2320    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2321    minoff2 = offset2;
   2322    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   2323 
   2324    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2325       return NoAlias;
   2326 
   2327    /* Could probably do better here if required.  For the moment
   2328       however just claim not to know anything more. */
   2329    return UnknownAlias;
   2330 }
   2331 
   2332 
   2333 /* Produces the alias relation between two indexed guest state
   2334    accesses. */
   2335 
   2336 static
   2337 GSAliasing getAliasingRelation_II (
   2338               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   2339               IRRegArray* descr2, IRExpr* ix2, Int bias2
   2340            )
   2341 {
   2342    UInt minoff1, maxoff1, minoff2, maxoff2;
   2343    Int  iters;
   2344 
   2345    /* First try hard to show they don't alias. */
   2346    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2347    getArrayBounds( descr2, &minoff2, &maxoff2 );
   2348    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2349       return NoAlias;
   2350 
   2351    /* So the two arrays at least partially overlap.  To get any
   2352       further we'll have to be sure that the descriptors are
   2353       identical. */
   2354    if (!eqIRRegArray(descr1, descr2))
   2355       return UnknownAlias;
   2356 
   2357    /* The descriptors are identical.  Now the only difference can be
   2358       in the index expressions.  If they cannot be shown to be
   2359       identical, we have to say we don't know what the aliasing
   2360       relation will be.  Now, since the IR is flattened, the index
   2361       expressions should be atoms -- either consts or tmps.  So that
   2362       makes the comparison simple. */
   2363    vassert(isIRAtom(ix1));
   2364    vassert(isIRAtom(ix2));
   2365    if (!eqIRAtom(ix1,ix2))
   2366       return UnknownAlias;
   2367 
   2368    /* Ok, the index expressions are identical.  So now the only way
   2369       they can be different is in the bias.  Normalise this
   2370       paranoidly, to reliably establish equality/non-equality. */
   2371 
   2372    /* So now we know that the GetI and PutI index the same array
   2373       with the same base.  Are the offsets the same, modulo the
   2374       array size?  Do this paranoidly. */
   2375    vassert(descr1->nElems == descr2->nElems);
   2376    vassert(descr1->elemTy == descr2->elemTy);
   2377    vassert(descr1->base   == descr2->base);
   2378    iters = 0;
   2379    while (bias1 < 0 || bias2 < 0) {
   2380       bias1 += descr1->nElems;
   2381       bias2 += descr1->nElems;
   2382       iters++;
   2383       if (iters > 10)
   2384          vpanic("getAliasingRelation: iters");
   2385    }
   2386    vassert(bias1 >= 0 && bias2 >= 0);
   2387    bias1 %= descr1->nElems;
   2388    bias2 %= descr1->nElems;
   2389    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   2390    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   2391 
   2392    /* Finally, biasP and biasG are normalised into the range
   2393       0 .. descrP/G->nElems - 1.  And so we can establish
   2394       equality/non-equality. */
   2395 
   2396    return bias1==bias2 ? ExactAlias : NoAlias;
   2397 }
   2398 
   2399 
   2400 /*---------------------------------------------------------------*/
   2401 /*--- Common Subexpression Elimination                        ---*/
   2402 /*---------------------------------------------------------------*/
   2403 
   2404 /* Expensive in time and space. */
   2405 
   2406 /* Uses two environments:
   2407    a IRTemp -> IRTemp mapping
   2408    a mapping from AvailExpr* to IRTemp
   2409 */
   2410 
   2411 typedef
   2412    struct {
   2413       enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
   2414       union {
   2415          /* unop(tmp) */
   2416          struct {
   2417             IROp   op;
   2418             IRTemp arg;
   2419          } Ut;
   2420          /* binop(tmp,tmp) */
   2421          struct {
   2422             IROp   op;
   2423             IRTemp arg1;
   2424             IRTemp arg2;
   2425          } Btt;
   2426          /* binop(tmp,const) */
   2427          struct {
   2428             IROp    op;
   2429             IRTemp  arg1;
   2430             IRConst con2;
   2431          } Btc;
   2432          /* binop(const,tmp) */
   2433          struct {
   2434             IROp    op;
   2435             IRConst con1;
   2436             IRTemp  arg2;
   2437          } Bct;
   2438          /* F64i-style const */
   2439          struct {
   2440             ULong f64i;
   2441          } Cf64i;
   2442          /* Mux0X(tmp,tmp,tmp) */
   2443          struct {
   2444             IRTemp co;
   2445             IRTemp e0;
   2446             IRTemp eX;
   2447          } Mttt;
   2448          /* GetI(descr,tmp,bias)*/
   2449          struct {
   2450             IRRegArray* descr;
   2451             IRTemp      ix;
   2452             Int         bias;
   2453          } GetIt;
   2454       } u;
   2455    }
   2456    AvailExpr;
   2457 
   2458 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   2459 {
   2460    if (a1->tag != a2->tag)
   2461       return False;
   2462    switch (a1->tag) {
   2463       case Ut:
   2464          return toBool(
   2465                 a1->u.Ut.op == a2->u.Ut.op
   2466                 && a1->u.Ut.arg == a2->u.Ut.arg);
   2467       case Btt:
   2468          return toBool(
   2469                 a1->u.Btt.op == a2->u.Btt.op
   2470                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   2471                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   2472       case Btc:
   2473          return toBool(
   2474                 a1->u.Btc.op == a2->u.Btc.op
   2475                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   2476                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   2477       case Bct:
   2478          return toBool(
   2479                 a1->u.Bct.op == a2->u.Bct.op
   2480                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   2481                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   2482       case Cf64i:
   2483          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   2484       case Mttt:
   2485          return toBool(a1->u.Mttt.co == a2->u.Mttt.co
   2486                        && a1->u.Mttt.e0 == a2->u.Mttt.e0
   2487                        && a1->u.Mttt.eX == a2->u.Mttt.eX);
   2488       case GetIt:
   2489          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   2490                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   2491                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   2492       default: vpanic("eq_AvailExpr");
   2493    }
   2494 }
   2495 
   2496 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   2497 {
   2498    IRConst* con;
   2499    switch (ae->tag) {
   2500       case Ut:
   2501          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   2502       case Btt:
   2503          return IRExpr_Binop( ae->u.Btt.op,
   2504                               IRExpr_RdTmp(ae->u.Btt.arg1),
   2505                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   2506       case Btc:
   2507          con = LibVEX_Alloc(sizeof(IRConst));
   2508          *con = ae->u.Btc.con2;
   2509          return IRExpr_Binop( ae->u.Btc.op,
   2510                               IRExpr_RdTmp(ae->u.Btc.arg1),
   2511                               IRExpr_Const(con) );
   2512       case Bct:
   2513          con = LibVEX_Alloc(sizeof(IRConst));
   2514          *con = ae->u.Bct.con1;
   2515          return IRExpr_Binop( ae->u.Bct.op,
   2516                               IRExpr_Const(con),
   2517                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   2518       case Cf64i:
   2519          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   2520       case Mttt:
   2521          return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
   2522                              IRExpr_RdTmp(ae->u.Mttt.e0),
   2523                              IRExpr_RdTmp(ae->u.Mttt.eX));
   2524       case GetIt:
   2525          return IRExpr_GetI(ae->u.GetIt.descr,
   2526                             IRExpr_RdTmp(ae->u.GetIt.ix),
   2527                             ae->u.GetIt.bias);
   2528       default:
   2529          vpanic("availExpr_to_IRExpr");
   2530    }
   2531 }
   2532 
   2533 inline
   2534 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   2535 {
   2536    HWord res;
   2537    /* env :: IRTemp -> IRTemp */
   2538    if (lookupHHW( env, &res, (HWord)tmp ))
   2539       return (IRTemp)res;
   2540    else
   2541       return tmp;
   2542 }
   2543 
   2544 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   2545 {
   2546    /* env :: IRTemp -> IRTemp */
   2547    switch (ae->tag) {
   2548       case Ut:
   2549          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   2550          break;
   2551       case Btt:
   2552          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   2553          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   2554          break;
   2555       case Btc:
   2556          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   2557          break;
   2558       case Bct:
   2559          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   2560          break;
   2561       case Cf64i:
   2562          break;
   2563       case Mttt:
   2564          ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
   2565          ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
   2566          ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
   2567          break;
   2568       case GetIt:
   2569          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   2570          break;
   2571       default:
   2572          vpanic("subst_AvailExpr");
   2573    }
   2574 }
   2575 
   2576 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
   2577 {
   2578    AvailExpr* ae;
   2579 
   2580    if (e->tag == Iex_Unop
   2581        && e->Iex.Unop.arg->tag == Iex_RdTmp) {
   2582       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2583       ae->tag      = Ut;
   2584       ae->u.Ut.op  = e->Iex.Unop.op;
   2585       ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   2586       return ae;
   2587    }
   2588 
   2589    if (e->tag == Iex_Binop
   2590        && e->Iex.Binop.arg1->tag == Iex_RdTmp
   2591        && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   2592       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2593       ae->tag        = Btt;
   2594       ae->u.Btt.op   = e->Iex.Binop.op;
   2595       ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2596       ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   2597       return ae;
   2598    }
   2599 
   2600    if (e->tag == Iex_Binop
   2601       && e->Iex.Binop.arg1->tag == Iex_RdTmp
   2602       && e->Iex.Binop.arg2->tag == Iex_Const) {
   2603       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2604       ae->tag        = Btc;
   2605       ae->u.Btc.op   = e->Iex.Binop.op;
   2606       ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2607       ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   2608       return ae;
   2609    }
   2610 
   2611    if (e->tag == Iex_Binop
   2612       && e->Iex.Binop.arg1->tag == Iex_Const
   2613       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   2614       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2615       ae->tag        = Bct;
   2616       ae->u.Bct.op   = e->Iex.Binop.op;
   2617       ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   2618       ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   2619       return ae;
   2620    }
   2621 
   2622    if (e->tag == Iex_Const
   2623        && e->Iex.Const.con->tag == Ico_F64i) {
   2624       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2625       ae->tag          = Cf64i;
   2626       ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   2627       return ae;
   2628    }
   2629 
   2630    if (e->tag == Iex_Mux0X
   2631        && e->Iex.Mux0X.cond->tag == Iex_RdTmp
   2632        && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
   2633        && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
   2634       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2635       ae->tag       = Mttt;
   2636       ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
   2637       ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
   2638       ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
   2639       return ae;
   2640    }
   2641 
   2642    if (e->tag == Iex_GetI
   2643        && e->Iex.GetI.ix->tag == Iex_RdTmp) {
   2644       ae = LibVEX_Alloc(sizeof(AvailExpr));
   2645       ae->tag           = GetIt;
   2646       ae->u.GetIt.descr = e->Iex.GetI.descr;
   2647       ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   2648       ae->u.GetIt.bias  = e->Iex.GetI.bias;
   2649       return ae;
   2650    }
   2651 
   2652    return NULL;
   2653 }
   2654 
   2655 
   2656 /* The BB is modified in-place.  Returns True if any changes were
   2657    made. */
   2658 
   2659 static Bool do_cse_BB ( IRSB* bb )
   2660 {
   2661    Int        i, j, paranoia;
   2662    IRTemp     t, q;
   2663    IRStmt*    st;
   2664    AvailExpr* eprime;
   2665    AvailExpr* ae;
   2666    Bool       invalidate;
   2667    Bool       anyDone = False;
   2668 
   2669    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   2670    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   2671 
   2672    vassert(sizeof(IRTemp) <= sizeof(HWord));
   2673 
   2674    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   2675 
   2676    /* Iterate forwards over the stmts.
   2677       On seeing "t = E", where E is one of the 5 AvailExpr forms:
   2678          let E' = apply tenv substitution to E
   2679          search aenv for E'
   2680             if a mapping E' -> q is found,
   2681                replace this stmt by "t = q"
   2682                and add binding t -> q to tenv
   2683             else
   2684                add binding E' -> t to aenv
   2685                replace this stmt by "t = E'"
   2686 
   2687       Other statements are only interesting to the extent that they
   2688       might invalidate some of the expressions in aenv.  So there is
   2689       an invalidate-bindings check for each statement seen.
   2690    */
   2691    for (i = 0; i < bb->stmts_used; i++) {
   2692       st = bb->stmts[i];
   2693 
   2694       /* ------ BEGIN invalidate aenv bindings ------ */
   2695       /* This is critical: remove from aenv any E' -> .. bindings
   2696          which might be invalidated by this statement.  The only
   2697          vulnerable kind of bindings are the GetI kind.
   2698             Dirty call - dump (paranoia level -> 2)
   2699             Store      - dump (ditto)
   2700             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   2701          Uses getAliasingRelation_IC and getAliasingRelation_II
   2702          to do the no-overlap assessments needed for Put/PutI.
   2703       */
   2704       switch (st->tag) {
   2705          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   2706          case Ist_CAS: case Ist_LLSC:
   2707             paranoia = 2; break;
   2708          case Ist_Put: case Ist_PutI:
   2709             paranoia = 1; break;
   2710          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   2711          case Ist_WrTmp: case Ist_Exit:
   2712             paranoia = 0; break;
   2713          default:
   2714             vpanic("do_cse_BB(1)");
   2715       }
   2716 
   2717       if (paranoia > 0) {
   2718          for (j = 0; j < aenv->used; j++) {
   2719             if (!aenv->inuse[j])
   2720                continue;
   2721             ae = (AvailExpr*)aenv->key[j];
   2722             if (ae->tag != GetIt)
   2723                continue;
   2724             invalidate = False;
   2725             if (paranoia >= 2) {
   2726                invalidate = True;
   2727             } else {
   2728                vassert(paranoia == 1);
   2729                if (st->tag == Ist_Put) {
   2730                   if (getAliasingRelation_IC(
   2731                          ae->u.GetIt.descr,
   2732                          IRExpr_RdTmp(ae->u.GetIt.ix),
   2733                          st->Ist.Put.offset,
   2734                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   2735                       ) != NoAlias)
   2736                      invalidate = True;
   2737                }
   2738                else
   2739                if (st->tag == Ist_PutI) {
   2740                   if (getAliasingRelation_II(
   2741                          ae->u.GetIt.descr,
   2742                          IRExpr_RdTmp(ae->u.GetIt.ix),
   2743                          ae->u.GetIt.bias,
   2744                          st->Ist.PutI.descr,
   2745                          st->Ist.PutI.ix,
   2746                          st->Ist.PutI.bias
   2747                       ) != NoAlias)
   2748                      invalidate = True;
   2749                }
   2750                else
   2751                   vpanic("do_cse_BB(2)");
   2752             }
   2753 
   2754             if (invalidate) {
   2755                aenv->inuse[j] = False;
   2756                aenv->key[j]   = (HWord)NULL;  /* be sure */
   2757             }
   2758          } /* for j */
   2759       } /* paranoia > 0 */
   2760 
   2761       /* ------ ENV invalidate aenv bindings ------ */
   2762 
   2763       /* ignore not-interestings */
   2764       if (st->tag != Ist_WrTmp)
   2765          continue;
   2766 
   2767       t = st->Ist.WrTmp.tmp;
   2768       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
   2769       /* ignore if not of AvailExpr form */
   2770       if (!eprime)
   2771          continue;
   2772 
   2773       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   2774 
   2775       /* apply tenv */
   2776       subst_AvailExpr( tenv, eprime );
   2777 
   2778       /* search aenv for eprime, unfortunately the hard way */
   2779       for (j = 0; j < aenv->used; j++)
   2780          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   2781             break;
   2782 
   2783       if (j < aenv->used) {
   2784          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   2785             note the t->q binding in tenv. */
   2786          /* (this is the core of the CSE action) */
   2787          q = (IRTemp)aenv->val[j];
   2788          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   2789          addToHHW( tenv, (HWord)t, (HWord)q );
   2790          anyDone = True;
   2791       } else {
   2792          /* No binding was found, so instead we add E' -> t to our
   2793             collection of available expressions, replace this stmt
   2794             with "t = E'", and move on. */
   2795          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   2796          addToHHW( aenv, (HWord)eprime, (HWord)t );
   2797       }
   2798    }
   2799 
   2800    /*
   2801    ppIRSB(bb);
   2802    sanityCheckIRSB(bb, Ity_I32);
   2803    vex_printf("\n\n");
   2804    */
   2805    return anyDone;
   2806 }
   2807 
   2808 
   2809 /*---------------------------------------------------------------*/
   2810 /*--- Add32/Sub32 chain collapsing                            ---*/
   2811 /*---------------------------------------------------------------*/
   2812 
   2813 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   2814 
   2815 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   2816    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   2817    root node is Add32, not Sub32. */
   2818 
   2819 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   2820 {
   2821    if (e->tag != Iex_Binop)
   2822       return False;
   2823    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   2824       return False;
   2825    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   2826       return False;
   2827    if (e->Iex.Binop.arg2->tag != Iex_Const)
   2828       return False;
   2829    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   2830    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   2831    if (e->Iex.Binop.op == Iop_Sub32)
   2832       *i32 = -*i32;
   2833    return True;
   2834 }
   2835 
   2836 
   2837 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   2838    other tmp2.  Scan backwards from the specified start point -- an
   2839    optimisation. */
   2840 
   2841 static Bool collapseChain ( IRSB* bb, Int startHere,
   2842                             IRTemp tmp,
   2843                             IRTemp* tmp2, Int* i32 )
   2844 {
   2845    Int     j, ii;
   2846    IRTemp  vv;
   2847    IRStmt* st;
   2848    IRExpr* e;
   2849 
   2850    /* the (var, con) pair contain the current 'representation' for
   2851       'tmp'.  We start with 'tmp + 0'.  */
   2852    IRTemp var = tmp;
   2853    Int    con = 0;
   2854 
   2855    /* Scan backwards to see if tmp can be replaced by some other tmp
   2856      +/- a constant. */
   2857    for (j = startHere; j >= 0; j--) {
   2858       st = bb->stmts[j];
   2859       if (st->tag != Ist_WrTmp)
   2860          continue;
   2861       if (st->Ist.WrTmp.tmp != var)
   2862          continue;
   2863       e = st->Ist.WrTmp.data;
   2864       if (!isAdd32OrSub32(e, &vv, &ii))
   2865          break;
   2866       var = vv;
   2867       con += ii;
   2868    }
   2869    if (j == -1)
   2870       /* no earlier binding for var .. ill-formed IR */
   2871       vpanic("collapseChain");
   2872 
   2873    /* so, did we find anything interesting? */
   2874    if (var == tmp)
   2875       return False; /* no .. */
   2876 
   2877    *tmp2 = var;
   2878    *i32  = con;
   2879    return True;
   2880 }
   2881 
   2882 
   2883 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   2884 
   2885 static void collapse_AddSub_chains_BB ( IRSB* bb )
   2886 {
   2887    IRStmt *st;
   2888    IRTemp var, var2;
   2889    Int    i, con, con2;
   2890 
   2891    for (i = bb->stmts_used-1; i >= 0; i--) {
   2892       st = bb->stmts[i];
   2893       if (st->tag == Ist_NoOp)
   2894          continue;
   2895 
   2896       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   2897 
   2898       if (st->tag == Ist_WrTmp
   2899           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   2900 
   2901          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   2902             Find out if var can be expressed as var2 + con2. */
   2903          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   2904             if (DEBUG_IROPT) {
   2905                vex_printf("replacing1 ");
   2906                ppIRStmt(st);
   2907                vex_printf(" with ");
   2908             }
   2909             con2 += con;
   2910             bb->stmts[i]
   2911                = IRStmt_WrTmp(
   2912                     st->Ist.WrTmp.tmp,
   2913                     (con2 >= 0)
   2914                       ? IRExpr_Binop(Iop_Add32,
   2915                                      IRExpr_RdTmp(var2),
   2916                                      IRExpr_Const(IRConst_U32(con2)))
   2917                       : IRExpr_Binop(Iop_Sub32,
   2918                                      IRExpr_RdTmp(var2),
   2919                                      IRExpr_Const(IRConst_U32(-con2)))
   2920                  );
   2921             if (DEBUG_IROPT) {
   2922                ppIRStmt(bb->stmts[i]);
   2923                vex_printf("\n");
   2924             }
   2925          }
   2926 
   2927          continue;
   2928       }
   2929 
   2930       /* Try to collapse 't1 = GetI[t2, con]'. */
   2931 
   2932       if (st->tag == Ist_WrTmp
   2933           && st->Ist.WrTmp.data->tag == Iex_GetI
   2934           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   2935           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   2936                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   2937          if (DEBUG_IROPT) {
   2938             vex_printf("replacing3 ");
   2939             ppIRStmt(st);
   2940             vex_printf(" with ");
   2941          }
   2942          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   2943          bb->stmts[i]
   2944             = IRStmt_WrTmp(
   2945                  st->Ist.WrTmp.tmp,
   2946                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   2947                              IRExpr_RdTmp(var2),
   2948                              con2));
   2949          if (DEBUG_IROPT) {
   2950             ppIRStmt(bb->stmts[i]);
   2951             vex_printf("\n");
   2952          }
   2953          continue;
   2954       }
   2955 
   2956       /* Perhaps st is PutI[t, con] ? */
   2957 
   2958       if (st->tag == Ist_PutI
   2959           && st->Ist.PutI.ix->tag == Iex_RdTmp
   2960           && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
   2961                                &var2, &con2)) {
   2962          if (DEBUG_IROPT) {
   2963             vex_printf("replacing2 ");
   2964             ppIRStmt(st);
   2965             vex_printf(" with ");
   2966          }
   2967          con2 += st->Ist.PutI.bias;
   2968          bb->stmts[i]
   2969            = IRStmt_PutI(st->Ist.PutI.descr,
   2970                          IRExpr_RdTmp(var2),
   2971                          con2,
   2972                          st->Ist.PutI.data);
   2973          if (DEBUG_IROPT) {
   2974             ppIRStmt(bb->stmts[i]);
   2975             vex_printf("\n");
   2976          }
   2977          continue;
   2978       }
   2979 
   2980    } /* for */
   2981 }
   2982 
   2983 
   2984 /*---------------------------------------------------------------*/
   2985 /*--- PutI/GetI transformations                               ---*/
   2986 /*---------------------------------------------------------------*/
   2987 
   2988 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   2989    the given starting point to find, if any, a PutI which writes
   2990    exactly the same piece of guest state, and so return the expression
   2991    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   2992 
   2993 static
   2994 IRExpr* findPutI ( IRSB* bb, Int startHere,
   2995                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   2996 {
   2997    Int        j;
   2998    IRStmt*    st;
   2999    GSAliasing relation;
   3000 
   3001    if (0) {
   3002       vex_printf("\nfindPutI ");
   3003       ppIRRegArray(descrG);
   3004       vex_printf(" ");
   3005       ppIRExpr(ixG);
   3006       vex_printf(" %d\n", biasG);
   3007    }
   3008 
   3009    /* Scan backwards in bb from startHere to find a suitable PutI
   3010       binding for (descrG, ixG, biasG), if any. */
   3011 
   3012    for (j = startHere; j >= 0; j--) {
   3013       st = bb->stmts[j];
   3014       if (st->tag == Ist_NoOp)
   3015          continue;
   3016 
   3017       if (st->tag == Ist_Put) {
   3018          /* Non-indexed Put.  This can't give a binding, but we do
   3019             need to check it doesn't invalidate the search by
   3020             overlapping any part of the indexed guest state. */
   3021 
   3022          relation
   3023             = getAliasingRelation_IC(
   3024                  descrG, ixG,
   3025                  st->Ist.Put.offset,
   3026                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   3027 
   3028          if (relation == NoAlias) {
   3029             /* we're OK; keep going */
   3030             continue;
   3031          } else {
   3032             /* relation == UnknownAlias || relation == ExactAlias */
   3033             /* If this assertion fails, we've found a Put which writes
   3034                an area of guest state which is read by a GetI.  Which
   3035                is unlikely (although not per se wrong). */
   3036             vassert(relation != ExactAlias);
   3037             /* This Put potentially writes guest state that the GetI
   3038                reads; we must fail. */
   3039             return NULL;
   3040          }
   3041       }
   3042 
   3043       if (st->tag == Ist_PutI) {
   3044 
   3045          relation = getAliasingRelation_II(
   3046                        descrG, ixG, biasG,
   3047                        st->Ist.PutI.descr,
   3048                        st->Ist.PutI.ix,
   3049                        st->Ist.PutI.bias
   3050                     );
   3051 
   3052          if (relation == NoAlias) {
   3053             /* This PutI definitely doesn't overlap.  Ignore it and
   3054                keep going. */
   3055             continue; /* the for j loop */
   3056          }
   3057 
   3058          if (relation == UnknownAlias) {
   3059             /* We don't know if this PutI writes to the same guest
   3060                state that the GetI, or not.  So we have to give up. */
   3061             return NULL;
   3062          }
   3063 
   3064          /* Otherwise, we've found what we're looking for.  */
   3065          vassert(relation == ExactAlias);
   3066          return st->Ist.PutI.data;
   3067 
   3068       } /* if (st->tag == Ist_PutI) */
   3069 
   3070       if (st->tag == Ist_Dirty) {
   3071          /* Be conservative.  If the dirty call has any guest effects at
   3072             all, give up.  We could do better -- only give up if there
   3073             are any guest writes/modifies. */
   3074          if (st->Ist.Dirty.details->nFxState > 0)
   3075             return NULL;
   3076       }
   3077 
   3078    } /* for */
   3079 
   3080    /* No valid replacement was found. */
   3081    return NULL;
   3082 }
   3083 
   3084 
   3085 
   3086 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   3087    that it writes exactly the same piece of guest state) ?  Safe
   3088    answer: False. */
   3089 
   3090 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   3091 {
   3092    vassert(pi->tag == Ist_PutI);
   3093    if (s2->tag != Ist_PutI)
   3094       return False;
   3095 
   3096    return toBool(
   3097           getAliasingRelation_II(
   3098              pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
   3099              s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
   3100           )
   3101           == ExactAlias
   3102           );
   3103 }
   3104 
   3105 
   3106 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   3107    overlap it?  Safe answer: True.  Note, we could do a lot better
   3108    than this if needed. */
   3109 
   3110 static
   3111 Bool guestAccessWhichMightOverlapPutI (
   3112         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   3113      )
   3114 {
   3115    GSAliasing relation;
   3116    UInt       minoffP, maxoffP;
   3117 
   3118    vassert(pi->tag == Ist_PutI);
   3119    getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
   3120    switch (s2->tag) {
   3121 
   3122       case Ist_NoOp:
   3123       case Ist_IMark:
   3124          return False;
   3125 
   3126       case Ist_MBE:
   3127       case Ist_AbiHint:
   3128          /* just be paranoid ... these should be rare. */
   3129          return True;
   3130 
   3131       case Ist_CAS:
   3132          /* This is unbelievably lame, but it's probably not
   3133             significant from a performance point of view.  Really, a
   3134             CAS is a load-store op, so it should be safe to say False.
   3135             However .. */
   3136          return True;
   3137 
   3138       case Ist_Dirty:
   3139          /* If the dirty call has any guest effects at all, give up.
   3140             Probably could do better. */
   3141          if (s2->Ist.Dirty.details->nFxState > 0)
   3142             return True;
   3143          return False;
   3144 
   3145       case Ist_Put:
   3146          vassert(isIRAtom(s2->Ist.Put.data));
   3147          relation
   3148             = getAliasingRelation_IC(
   3149                  pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3150                  s2->Ist.Put.offset,
   3151                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   3152               );
   3153          goto have_relation;
   3154 
   3155       case Ist_PutI:
   3156          vassert(isIRAtom(s2->Ist.PutI.ix));
   3157          vassert(isIRAtom(s2->Ist.PutI.data));
   3158          relation
   3159             = getAliasingRelation_II(
   3160                  pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
   3161                  s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
   3162               );
   3163          goto have_relation;
   3164 
   3165       case Ist_WrTmp:
   3166          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   3167             relation
   3168                = getAliasingRelation_II(
   3169                     pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3170                                         pi->Ist.PutI.bias,
   3171                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   3172                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   3173                     s2->Ist.WrTmp.data->Iex.GetI.bias
   3174                  );
   3175             goto have_relation;
   3176          }
   3177          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   3178             relation
   3179                = getAliasingRelation_IC(
   3180                     pi->Ist.PutI.descr, pi->Ist.PutI.ix,
   3181                     s2->Ist.WrTmp.data->Iex.Get.offset,
   3182                     s2->Ist.WrTmp.data->Iex.Get.ty
   3183                  );
   3184             goto have_relation;
   3185          }
   3186          return False;
   3187 
   3188       case Ist_Store:
   3189          vassert(isIRAtom(s2->Ist.Store.addr));
   3190          vassert(isIRAtom(s2->Ist.Store.data));
   3191          return False;
   3192 
   3193       default:
   3194          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   3195          vpanic("guestAccessWhichMightOverlapPutI");
   3196    }
   3197 
   3198   have_relation:
   3199    if (relation == NoAlias)
   3200       return False;
   3201    else
   3202       return True; /* ExactAlias or UnknownAlias */
   3203 }
   3204 
   3205 
   3206 
   3207 /* ---------- PutI/GetI transformations main functions --------- */
   3208 
   3209 /* Remove redundant GetIs, to the extent that they can be detected.
   3210    bb is modified in-place. */
   3211 
   3212 static
   3213 void do_redundant_GetI_elimination ( IRSB* bb )
   3214 {
   3215    Int     i;
   3216    IRStmt* st;
   3217 
   3218    for (i = bb->stmts_used-1; i >= 0; i--) {
   3219       st = bb->stmts[i];
   3220       if (st->tag == Ist_NoOp)
   3221          continue;
   3222 
   3223       if (st->tag == Ist_WrTmp
   3224           && st->Ist.WrTmp.data->tag == Iex_GetI
   3225           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   3226          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   3227          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   3228          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   3229          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   3230          if (replacement
   3231              && isIRAtom(replacement)
   3232              /* Make sure we're doing a type-safe transformation! */
   3233              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   3234             if (DEBUG_IROPT) {
   3235                vex_printf("rGI:  ");
   3236                ppIRExpr(st->Ist.WrTmp.data);
   3237                vex_printf(" -> ");
   3238                ppIRExpr(replacement);
   3239                vex_printf("\n");
   3240             }
   3241             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   3242          }
   3243       }
   3244    }
   3245 
   3246 }
   3247 
   3248 
   3249 /* Remove redundant PutIs, to the extent which they can be detected.
   3250    bb is modified in-place. */
   3251 
   3252 static
   3253 void do_redundant_PutI_elimination ( IRSB* bb )
   3254 {
   3255    Int    i, j;
   3256    Bool   delete;
   3257    IRStmt *st, *stj;
   3258 
   3259    for (i = 0; i < bb->stmts_used; i++) {
   3260       st = bb->stmts[i];
   3261       if (st->tag != Ist_PutI)
   3262          continue;
   3263       /* Ok, search forwards from here to see if we can find another
   3264          PutI which makes this one redundant, and dodging various
   3265          hazards.  Search forwards:
   3266          * If conditional exit, give up (because anything after that
   3267            does not postdominate this put).
   3268          * If a Get which might overlap, give up (because this PutI
   3269            not necessarily dead).
   3270          * If a Put which is identical, stop with success.
   3271          * If a Put which might overlap, but is not identical, give up.
   3272          * If a dirty helper call which might write guest state, give up.
   3273          * If a Put which definitely doesn't overlap, or any other
   3274            kind of stmt, continue.
   3275       */
   3276       delete = False;
   3277       for (j = i+1; j < bb->stmts_used; j++) {
   3278          stj = bb->stmts[j];
   3279          if (stj->tag == Ist_NoOp)
   3280             continue;
   3281          if (identicalPutIs(st, stj)) {
   3282             /* success! */
   3283             delete = True;
   3284             break;
   3285          }
   3286          if (stj->tag == Ist_Exit)
   3287             /* give up */
   3288             break;
   3289          if (st->tag == Ist_Dirty)
   3290             /* give up; could do better here */
   3291             break;
   3292          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   3293             /* give up */
   3294            break;
   3295       }
   3296 
   3297       if (delete) {
   3298          if (DEBUG_IROPT) {
   3299             vex_printf("rPI:  ");
   3300             ppIRStmt(st);
   3301             vex_printf("\n");
   3302          }
   3303          bb->stmts[i] = IRStmt_NoOp();
   3304       }
   3305 
   3306    }
   3307 }
   3308 
   3309 
   3310 /*---------------------------------------------------------------*/
   3311 /*--- Loop unrolling                                          ---*/
   3312 /*---------------------------------------------------------------*/
   3313 
   3314 /* Adjust all tmp values (names) in e by delta.  e is destructively
   3315    modified. */
   3316 
   3317 static void deltaIRExpr ( IRExpr* e, Int delta )
   3318 {
   3319    Int i;
   3320    switch (e->tag) {
   3321       case Iex_RdTmp:
   3322          e->Iex.RdTmp.tmp += delta;
   3323          break;
   3324       case Iex_Get:
   3325       case Iex_Const:
   3326          break;
   3327       case Iex_GetI:
   3328          deltaIRExpr(e->Iex.GetI.ix, delta);
   3329          break;
   3330       case Iex_Qop:
   3331          deltaIRExpr(e->Iex.Qop.arg1, delta);
   3332          deltaIRExpr(e->Iex.Qop.arg2, delta);
   3333          deltaIRExpr(e->Iex.Qop.arg3, delta);
   3334          deltaIRExpr(e->Iex.Qop.arg4, delta);
   3335          break;
   3336       case Iex_Triop:
   3337          deltaIRExpr(e->Iex.Triop.arg1, delta);
   3338          deltaIRExpr(e->Iex.Triop.arg2, delta);
   3339          deltaIRExpr(e->Iex.Triop.arg3, delta);
   3340          break;
   3341       case Iex_Binop:
   3342          deltaIRExpr(e->Iex.Binop.arg1, delta);
   3343          deltaIRExpr(e->Iex.Binop.arg2, delta);
   3344          break;
   3345       case Iex_Unop:
   3346          deltaIRExpr(e->Iex.Unop.arg, delta);
   3347          break;
   3348       case Iex_Load:
   3349          deltaIRExpr(e->Iex.Load.addr, delta);
   3350          break;
   3351       case Iex_CCall:
   3352          for (i = 0; e->Iex.CCall.args[i]; i++)
   3353             deltaIRExpr(e->Iex.CCall.args[i], delta);
   3354          break;
   3355       case Iex_Mux0X:
   3356          deltaIRExpr(e->Iex.Mux0X.cond, delta);
   3357          deltaIRExpr(e->Iex.Mux0X.expr0, delta);
   3358          deltaIRExpr(e->Iex.Mux0X.exprX, delta);
   3359          break;
   3360       default:
   3361          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3362          vpanic("deltaIRExpr");
   3363    }
   3364 }
   3365 
   3366 /* Adjust all tmp values (names) in st by delta.  st is destructively
   3367    modified. */
   3368 
   3369 static void deltaIRStmt ( IRStmt* st, Int delta )
   3370 {
   3371    Int      i;
   3372    IRDirty* d;
   3373    switch (st->tag) {
   3374       case Ist_NoOp:
   3375       case Ist_IMark:
   3376       case Ist_MBE:
   3377          break;
   3378       case Ist_AbiHint:
   3379          deltaIRExpr(st->Ist.AbiHint.base, delta);
   3380          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   3381          break;
   3382       case Ist_Put:
   3383          deltaIRExpr(st->Ist.Put.data, delta);
   3384          break;
   3385       case Ist_PutI:
   3386          deltaIRExpr(st->Ist.PutI.ix, delta);
   3387          deltaIRExpr(st->Ist.PutI.data, delta);
   3388          break;
   3389       case Ist_WrTmp:
   3390          st->Ist.WrTmp.tmp += delta;
   3391          deltaIRExpr(st->Ist.WrTmp.data, delta);
   3392          break;
   3393       case Ist_Exit:
   3394          deltaIRExpr(st->Ist.Exit.guard, delta);
   3395          break;
   3396       case Ist_Store:
   3397          deltaIRExpr(st->Ist.Store.addr, delta);
   3398          deltaIRExpr(st->Ist.Store.data, delta);
   3399          break;
   3400       case Ist_CAS:
   3401          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   3402             st->Ist.CAS.details->oldHi += delta;
   3403          st->Ist.CAS.details->oldLo += delta;
   3404          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   3405          if (st->Ist.CAS.details->expdHi)
   3406             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   3407          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   3408          if (st->Ist.CAS.details->dataHi)
   3409             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   3410          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   3411          break;
   3412       case Ist_LLSC:
   3413          st->Ist.LLSC.result += delta;
   3414          deltaIRExpr(st->Ist.LLSC.addr, delta);
   3415          if (st->Ist.LLSC.storedata)
   3416             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   3417          break;
   3418       case Ist_Dirty:
   3419          d = st->Ist.Dirty.details;
   3420          deltaIRExpr(d->guard, delta);
   3421          for (i = 0; d->args[i]; i++)
   3422             deltaIRExpr(d->args[i], delta);
   3423          if (d->tmp != IRTemp_INVALID)
   3424             d->tmp += delta;
   3425          if (d->mAddr)
   3426             deltaIRExpr(d->mAddr, delta);
   3427          break;
   3428       default:
   3429          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   3430          vpanic("deltaIRStmt");
   3431    }
   3432 }
   3433 
   3434 
   3435 /* If possible, return a loop-unrolled version of bb0.  The original
   3436    is changed.  If not possible, return NULL.  */
   3437 
   3438 /* The two schemas considered are:
   3439 
   3440      X: BODY; goto X
   3441 
   3442      which unrolls to (eg)  X: BODY;BODY; goto X
   3443 
   3444    and
   3445 
   3446        X: BODY; if (c) goto X; goto Y
   3447    which trivially transforms to
   3448        X: BODY; if (!c) goto Y; goto X;
   3449    so it falls in the scope of the first case.
   3450 
   3451    X and Y must be literal (guest) addresses.
   3452 */
   3453 
   3454 static Int calc_unroll_factor( IRSB* bb )
   3455 {
   3456    Int n_stmts, i;
   3457 
   3458    n_stmts = 0;
   3459    for (i = 0; i < bb->stmts_used; i++) {
   3460       if (bb->stmts[i]->tag != Ist_NoOp)
   3461          n_stmts++;
   3462    }
   3463 
   3464    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   3465       if (vex_control.iropt_verbosity > 0)
   3466          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   3467                     n_stmts, 8* n_stmts);
   3468       return 8;
   3469    }
   3470    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   3471       if (vex_control.iropt_verbosity > 0)
   3472          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   3473                     n_stmts, 4* n_stmts);
   3474       return 4;
   3475    }
   3476 
   3477    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   3478       if (vex_control.iropt_verbosity > 0)
   3479          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   3480                     n_stmts, 2* n_stmts);
   3481       return 2;
   3482    }
   3483 
   3484    if (vex_control.iropt_verbosity > 0)
   3485       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   3486 
   3487    return 1;
   3488 }
   3489 
   3490 
   3491 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
   3492 {
   3493    Int      i, j, jmax, n_vars;
   3494    Bool     xxx_known;
   3495    Addr64   xxx_value, yyy_value;
   3496    IRExpr*  udst;
   3497    IRStmt*  st;
   3498    IRConst* con;
   3499    IRSB     *bb1, *bb2;
   3500    Int      unroll_factor;
   3501 
   3502    if (vex_control.iropt_unroll_thresh <= 0)
   3503       return NULL;
   3504 
   3505    /* First off, figure out if we can unroll this loop.  Do this
   3506       without modifying bb0. */
   3507 
   3508    if (bb0->jumpkind != Ijk_Boring)
   3509       return NULL;
   3510 
   3511    xxx_known = False;
   3512    xxx_value = 0;
   3513 
   3514    /* Extract the next-guest address.  If it isn't a literal, we
   3515       have to give up. */
   3516 
   3517    udst = bb0->next;
   3518    if (udst->tag == Iex_Const
   3519        && (udst->Iex.Const.con->tag == Ico_U32
   3520            || udst->Iex.Const.con->tag == Ico_U64)) {
   3521       /* The BB ends in a jump to a literal location. */
   3522       xxx_known = True;
   3523       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   3524                     ?  udst->Iex.Const.con->Ico.U64
   3525                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   3526    }
   3527 
   3528    if (!xxx_known)
   3529       return NULL;
   3530 
   3531    /* Now we know the BB ends to a jump to a literal location.  If
   3532       it's a jump to itself (viz, idiom #1), move directly to the
   3533       unrolling stage, first cloning the bb so the original isn't
   3534       modified. */
   3535    if (xxx_value == my_addr) {
   3536       unroll_factor = calc_unroll_factor( bb0 );
   3537       if (unroll_factor < 2)
   3538          return NULL;
   3539       bb1 = deepCopyIRSB( bb0 );
   3540       bb0 = NULL;
   3541       udst = NULL; /* is now invalid */
   3542       goto do_unroll;
   3543    }
   3544 
   3545    /* Search for the second idiomatic form:
   3546         X: BODY; if (c) goto X; goto Y
   3547       We know Y, but need to establish that the last stmt
   3548       is 'if (c) goto X'.
   3549    */
   3550    yyy_value = xxx_value;
   3551    for (i = bb0->stmts_used-1; i >= 0; i--)
   3552       if (bb0->stmts[i])
   3553          break;
   3554 
   3555    if (i < 0)
   3556       return NULL; /* block with no stmts.  Strange. */
   3557 
   3558    st = bb0->stmts[i];
   3559    if (st->tag != Ist_Exit)
   3560       return NULL;
   3561    if (st->Ist.Exit.jk != Ijk_Boring)
   3562       return NULL;
   3563 
   3564    con = st->Ist.Exit.dst;
   3565    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   3566 
   3567    xxx_value = con->tag == Ico_U64
   3568                   ? st->Ist.Exit.dst->Ico.U64
   3569                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   3570 
   3571    /* If this assertion fails, we have some kind of type error. */
   3572    vassert(con->tag == udst->Iex.Const.con->tag);
   3573 
   3574    if (xxx_value != my_addr)
   3575       /* We didn't find either idiom.  Give up. */
   3576       return NULL;
   3577 
   3578    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   3579       yyy values (which makes it look like idiom #1), and go into
   3580       unrolling proper.  This means finding (again) the last stmt, in
   3581       the copied BB. */
   3582 
   3583    unroll_factor = calc_unroll_factor( bb0 );
   3584    if (unroll_factor < 2)
   3585       return NULL;
   3586 
   3587    bb1 = deepCopyIRSB( bb0 );
   3588    bb0 = NULL;
   3589    udst = NULL; /* is now invalid */
   3590    for (i = bb1->stmts_used-1; i >= 0; i--)
   3591       if (bb1->stmts[i])
   3592          break;
   3593 
   3594    /* The next bunch of assertions should be true since we already
   3595       found and checked the last stmt in the original bb. */
   3596 
   3597    vassert(i >= 0);
   3598 
   3599    st = bb1->stmts[i];
   3600    vassert(st->tag == Ist_Exit);
   3601 
   3602    con = st->Ist.Exit.dst;
   3603    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   3604 
   3605    udst = bb1->next;
   3606    vassert(udst->tag == Iex_Const);
   3607    vassert(udst->Iex.Const.con->tag == Ico_U32
   3608           || udst->Iex.Const.con->tag == Ico_U64);
   3609    vassert(con->tag == udst->Iex.Const.con->tag);
   3610 
   3611    /* switch the xxx and yyy fields around */
   3612    if (con->tag == Ico_U64) {
   3613       udst->Iex.Const.con->Ico.U64 = xxx_value;
   3614       con->Ico.U64 = yyy_value;
   3615    } else {
   3616       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   3617       con->Ico.U32 = (UInt)yyy_value;
   3618    }
   3619 
   3620    /* negate the test condition */
   3621    st->Ist.Exit.guard
   3622       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   3623 
   3624    /* --- The unroller proper.  Both idioms are by now --- */
   3625    /* --- now converted to idiom 1. --- */
   3626 
   3627   do_unroll:
   3628 
   3629    vassert(unroll_factor == 2
   3630            || unroll_factor == 4
   3631            || unroll_factor == 8);
   3632 
   3633    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   3634    for (j = 1; j <= jmax; j++) {
   3635 
   3636       n_vars = bb1->tyenv->types_used;
   3637 
   3638       bb2 = deepCopyIRSB(bb1);
   3639       for (i = 0; i < n_vars; i++)
   3640          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   3641 
   3642       for (i = 0; i < bb2->stmts_used; i++) {
   3643          /* deltaIRStmt destructively modifies the stmt, but
   3644             that's OK since bb2 is a complete fresh copy of bb1. */
   3645          deltaIRStmt(bb2->stmts[i], n_vars);
   3646          addStmtToIRSB(bb1, bb2->stmts[i]);
   3647       }
   3648    }
   3649 
   3650    if (DEBUG_IROPT) {
   3651       vex_printf("\nUNROLLED (%llx)\n", my_addr);
   3652       ppIRSB(bb1);
   3653       vex_printf("\n");
   3654    }
   3655 
   3656    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   3657       by negating the test condition.  This should be fixed properly.
   3658       For the moment use this shotgun approach.  */
   3659    return flatten_BB(bb1);
   3660 }
   3661 
   3662 
   3663 /*---------------------------------------------------------------*/
   3664 /*--- The tree builder                                        ---*/
   3665 /*---------------------------------------------------------------*/
   3666 
   3667 /* This isn't part of IR optimisation.  Really it's a pass done prior
   3668    to instruction selection, which improves the code that the
   3669    instruction selector can produce. */
   3670 
   3671 /* --- The 'tmp' environment is the central data structure here --- */
   3672 
   3673 /* The number of outstanding bindings we're prepared to track.
   3674    The number of times the env becomes full and we have to dump
   3675    the oldest binding (hence reducing code quality) falls very
   3676    rapidly as the env size increases.  8 gives reasonable performance
   3677    under most circumstances. */
   3678 #define A_NENV 10
   3679 
   3680 /* bindee == NULL   ===  slot is not in use
   3681    bindee != NULL   ===  slot is in use
   3682 */
   3683 typedef
   3684    struct {
   3685       IRTemp  binder;
   3686       IRExpr* bindee;
   3687       Bool    doesLoad;
   3688       Bool    doesGet;
   3689    }
   3690    ATmpInfo;
   3691 
   3692 __attribute__((unused))
   3693 static void ppAEnv ( ATmpInfo* env )
   3694 {
   3695    Int i;
   3696    for (i = 0; i < A_NENV; i++) {
   3697       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   3698       if (env[i].bindee)
   3699          ppIRExpr(env[i].bindee);
   3700       else
   3701          vex_printf("(null)");
   3702       vex_printf("\n");
   3703    }
   3704 }
   3705 
   3706 /* --- Tree-traversal fns --- */
   3707 
   3708 /* Traverse an expr, and detect if any part of it reads memory or does
   3709    a Get.  Be careful ... this really controls how much the
   3710    tree-builder can reorder the code, so getting it right is critical.
   3711 */
   3712 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
   3713 {
   3714    Int i;
   3715    switch (e->tag) {
   3716       case Iex_CCall:
   3717          for (i = 0; e->Iex.CCall.args[i]; i++)
   3718             setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
   3719          return;
   3720       case Iex_Mux0X:
   3721          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
   3722          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
   3723          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
   3724          return;
   3725       case Iex_Qop:
   3726          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
   3727          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
   3728          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
   3729          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
   3730          return;
   3731       case Iex_Triop:
   3732          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
   3733          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
   3734          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
   3735          return;
   3736       case Iex_Binop:
   3737          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
   3738          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
   3739          return;
   3740       case Iex_Unop:
   3741          setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
   3742          return;
   3743       case Iex_Load:
   3744          *doesLoad = True;
   3745          setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
   3746          return;
   3747       case Iex_Get:
   3748          *doesGet = True;
   3749          return;
   3750       case Iex_GetI:
   3751          *doesGet = True;
   3752          setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
   3753          return;
   3754       case Iex_RdTmp:
   3755       case Iex_Const:
   3756          return;
   3757       default:
   3758          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3759          vpanic("setHints_Expr");
   3760    }
   3761 }
   3762 
   3763 
   3764 /* Add a binding to the front of the env and slide all the rest
   3765    backwards.  It should be the case that the last slot is free. */
   3766 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   3767 {
   3768    Int i;
   3769    vassert(env[A_NENV-1].bindee == NULL);
   3770    for (i = A_NENV-1; i >= 1; i--)
   3771       env[i] = env[i-1];
   3772    env[0].binder   = binder;
   3773    env[0].bindee   = bindee;
   3774    env[0].doesLoad = False; /* filled in later */
   3775    env[0].doesGet  = False; /* filled in later */
   3776 }
   3777 
   3778 /* Given uses :: array of UShort, indexed by IRTemp
   3779    Add the use-occurrences of temps in this expression
   3780    to the env.
   3781 */
   3782 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   3783 {
   3784    Int i;
   3785 
   3786    switch (e->tag) {
   3787 
   3788       case Iex_RdTmp: /* the only interesting case */
   3789          uses[e->Iex.RdTmp.tmp]++;
   3790          return;
   3791 
   3792       case Iex_Mux0X:
   3793          aoccCount_Expr(uses, e->Iex.Mux0X.cond);
   3794          aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
   3795          aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
   3796          return;
   3797 
   3798       case Iex_Qop:
   3799          aoccCount_Expr(uses, e->Iex.Qop.arg1);
   3800          aoccCount_Expr(uses, e->Iex.Qop.arg2);
   3801          aoccCount_Expr(uses, e->Iex.Qop.arg3);
   3802          aoccCount_Expr(uses, e->Iex.Qop.arg4);
   3803          return;
   3804 
   3805       case Iex_Triop:
   3806          aoccCount_Expr(uses, e->Iex.Triop.arg1);
   3807          aoccCount_Expr(uses, e->Iex.Triop.arg2);
   3808          aoccCount_Expr(uses, e->Iex.Triop.arg3);
   3809          return;
   3810 
   3811       case Iex_Binop:
   3812          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   3813          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   3814          return;
   3815 
   3816       case Iex_Unop:
   3817          aoccCount_Expr(uses, e->Iex.Unop.arg);
   3818          return;
   3819 
   3820       case Iex_Load:
   3821          aoccCount_Expr(uses, e->Iex.Load.addr);
   3822          return;
   3823 
   3824       case Iex_CCall:
   3825          for (i = 0; e->Iex.CCall.args[i]; i++)
   3826             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   3827          return;
   3828 
   3829       case Iex_GetI:
   3830          aoccCount_Expr(uses, e->Iex.GetI.ix);
   3831          return;
   3832 
   3833       case Iex_Const:
   3834       case Iex_Get:
   3835          return;
   3836 
   3837       default:
   3838          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3839          vpanic("aoccCount_Expr");
   3840     }
   3841 }
   3842 
   3843 
   3844 /* Given uses :: array of UShort, indexed by IRTemp
   3845    Add the use-occurrences of temps in this statement
   3846    to the env.
   3847 */
   3848 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   3849 {
   3850    Int      i;
   3851    IRDirty* d;
   3852    IRCAS*   cas;
   3853    switch (st->tag) {
   3854       case Ist_AbiHint:
   3855          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   3856          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   3857          return;
   3858       case Ist_WrTmp:
   3859          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   3860          return;
   3861       case Ist_Put:
   3862          aoccCount_Expr(uses, st->Ist.Put.data);
   3863          return;
   3864       case Ist_PutI:
   3865          aoccCount_Expr(uses, st->Ist.PutI.ix);
   3866          aoccCount_Expr(uses, st->Ist.PutI.data);
   3867          return;
   3868       case Ist_Store:
   3869          aoccCount_Expr(uses, st->Ist.Store.addr);
   3870          aoccCount_Expr(uses, st->Ist.Store.data);
   3871          return;
   3872       case Ist_CAS:
   3873          cas = st->Ist.CAS.details;
   3874          aoccCount_Expr(uses, cas->addr);
   3875          if (cas->expdHi)
   3876             aoccCount_Expr(uses, cas->expdHi);
   3877          aoccCount_Expr(uses, cas->expdLo);
   3878          if (cas->dataHi)
   3879             aoccCount_Expr(uses, cas->dataHi);
   3880          aoccCount_Expr(uses, cas->dataLo);
   3881          return;
   3882       case Ist_LLSC:
   3883          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   3884          if (st->Ist.LLSC.storedata)
   3885             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   3886          return;
   3887       case Ist_Dirty:
   3888          d = st->Ist.Dirty.details;
   3889          if (d->mFx != Ifx_None)
   3890             aoccCount_Expr(uses, d->mAddr);
   3891          aoccCount_Expr(uses, d->guard);
   3892          for (i = 0; d->args[i]; i++)
   3893             aoccCount_Expr(uses, d->args[i]);
   3894          return;
   3895       case Ist_NoOp:
   3896       case Ist_IMark:
   3897       case Ist_MBE:
   3898          return;
   3899       case Ist_Exit:
   3900          aoccCount_Expr(uses, st->Ist.Exit.guard);
   3901          return;
   3902       default:
   3903          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   3904          vpanic("aoccCount_Stmt");
   3905    }
   3906 }
   3907 
   3908 /* Look up a binding for tmp in the env.  If found, return the bound
   3909    expression, and set the env's binding to NULL so it is marked as
   3910    used.  If not found, return NULL. */
   3911 
   3912 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   3913 {
   3914    Int i;
   3915    for (i = 0; i < A_NENV; i++) {
   3916       if (env[i].binder == tmp && env[i].bindee != NULL) {
   3917          IRExpr* bindee = env[i].bindee;
   3918          env[i].bindee = NULL;
   3919          return bindee;
   3920       }
   3921    }
   3922    return NULL;
   3923 }
   3924 
   3925 /* Traverse e, looking for temps.  For each observed temp, see if env
   3926    contains a binding for the temp, and if so return the bound value.
   3927    The env has the property that any binding it holds is
   3928    'single-shot', so once a binding is used, it is marked as no longer
   3929    available, by setting its .bindee field to NULL. */
   3930 
   3931 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   3932    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   3933 }
   3934 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   3935    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   3936 }
   3937 
   3938 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   3939 {
   3940    switch (op) {
   3941    case Iop_Or32:
   3942       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   3943       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   3944          return IRExpr_Unop( Iop_CmpwNEZ32,
   3945                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   3946                                                      a2->Iex.Unop.arg ) );
   3947       break;
   3948    default:
   3949       break;
   3950    }
   3951    /* no reduction rule applies */
   3952    return IRExpr_Binop( op, a1, a2 );
   3953 }
   3954 
   3955 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   3956 {
   3957    switch (op) {
   3958    case Iop_CmpwNEZ64:
   3959       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   3960       if (is_Binop(aa, Iop_Or64)
   3961           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   3962          return fold_IRExpr_Unop(
   3963                    Iop_CmpwNEZ64,
   3964                    IRExpr_Binop(Iop_Or64,
   3965                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   3966                                 aa->Iex.Binop.arg2));
   3967       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   3968       if (is_Binop(aa, Iop_Or64)
   3969           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   3970          return fold_IRExpr_Unop(
   3971                    Iop_CmpwNEZ64,
   3972                    IRExpr_Binop(Iop_Or64,
   3973                                 aa->Iex.Binop.arg1,
   3974                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   3975       break;
   3976    case Iop_CmpNEZ64:
   3977       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   3978       if (is_Unop(aa, Iop_Left64))
   3979          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   3980       break;
   3981    case Iop_CmpwNEZ32:
   3982       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   3983       if (is_Unop(aa, Iop_CmpwNEZ32))
   3984          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   3985       break;
   3986    case Iop_CmpNEZ32:
   3987       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   3988       if (is_Unop(aa, Iop_Left32))
   3989          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   3990       break;
   3991    case Iop_Left32:
   3992       /* Left32( Left32(x) ) --> Left32(x) */
   3993       if (is_Unop(aa, Iop_Left32))
   3994          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   3995       break;
   3996    case Iop_32to1:
   3997       /* 32to1( 1Uto32 ( x ) ) --> x */
   3998       if (is_Unop(aa, Iop_1Uto32))
   3999          return aa->Iex.Unop.arg;
   4000       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   4001       if (is_Unop(aa, Iop_CmpwNEZ32))
   4002          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   4003       break;
   4004    case Iop_64to1:
   4005       /* 64to1( 1Uto64 ( x ) ) --> x */
   4006       if (is_Unop(aa, Iop_1Uto64))
   4007          return aa->Iex.Unop.arg;
   4008       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   4009       if (is_Unop(aa, Iop_CmpwNEZ64))
   4010          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   4011       break;
   4012    case Iop_64to32:
   4013       /* 64to32( 32Uto64 ( x )) --> x */
   4014       if (is_Unop(aa, Iop_32Uto64))
   4015          return aa->Iex.Unop.arg;
   4016       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   4017       if (is_Unop(aa, Iop_8Uto64))
   4018          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   4019       break;
   4020 
   4021    case Iop_32Uto64:
   4022       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   4023       if (is_Unop(aa, Iop_8Uto32))
   4024          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   4025       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   4026       if (is_Unop(aa, Iop_16Uto32))
   4027          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   4028       break;
   4029 
   4030    case Iop_1Sto32:
   4031       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   4032       if (is_Unop(aa, Iop_CmpNEZ8)
   4033           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   4034           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   4035           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   4036                      Iop_CmpNEZ32)) {
   4037          return IRExpr_Unop( Iop_CmpwNEZ32,
   4038                              aa->Iex.Unop.arg->Iex.Unop.arg
   4039                                ->Iex.Unop.arg->Iex.Unop.arg);
   4040       }
   4041       break;
   4042 
   4043 
   4044    default:
   4045       break;
   4046    }
   4047    /* no reduction rule applies */
   4048    return IRExpr_Unop( op, aa );
   4049 }
   4050 
   4051 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   4052 {
   4053    IRExpr*  e2;
   4054    IRExpr** args2;
   4055    Int      i;
   4056 
   4057    switch (e->tag) {
   4058 
   4059       case Iex_CCall:
   4060          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   4061          for (i = 0; args2[i]; i++)
   4062             args2[i] = atbSubst_Expr(env,args2[i]);
   4063          return IRExpr_CCall(
   4064                    e->Iex.CCall.cee,
   4065                    e->Iex.CCall.retty,
   4066                    args2
   4067                 );
   4068       case Iex_RdTmp:
   4069          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   4070          return e2 ? e2 : e;
   4071       case Iex_Mux0X:
   4072          return IRExpr_Mux0X(
   4073                    atbSubst_Expr(env, e->Iex.Mux0X.cond),
   4074                    atbSubst_Expr(env, e->Iex.Mux0X.expr0),
   4075                    atbSubst_Expr(env, e->Iex.Mux0X.exprX)
   4076                 );
   4077       case Iex_Qop:
   4078          return IRExpr_Qop(
   4079                    e->Iex.Qop.op,
   4080                    atbSubst_Expr(env, e->Iex.Qop.arg1),
   4081                    atbSubst_Expr(env, e->Iex.Qop.arg2),
   4082                    atbSubst_Expr(env, e->Iex.Qop.arg3),
   4083                    atbSubst_Expr(env, e->Iex.Qop.arg4)
   4084                 );
   4085       case Iex_Triop:
   4086          return IRExpr_Triop(
   4087                    e->Iex.Triop.op,
   4088                    atbSubst_Expr(env, e->Iex.Triop.arg1),
   4089                    atbSubst_Expr(env, e->Iex.Triop.arg2),
   4090                    atbSubst_Expr(env, e->Iex.Triop.arg3)
   4091                 );
   4092       case Iex_Binop:
   4093          return fold_IRExpr_Binop(
   4094                    e->Iex.Binop.op,
   4095                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   4096                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   4097                 );
   4098       case Iex_Unop:
   4099          return fold_IRExpr_Unop(
   4100                    e->Iex.Unop.op,
   4101                    atbSubst_Expr(env, e->Iex.Unop.arg)
   4102                 );
   4103       case Iex_Load:
   4104          return IRExpr_Load(
   4105                    e->Iex.Load.end,
   4106                    e->Iex.Load.ty,
   4107                    atbSubst_Expr(env, e->Iex.Load.addr)
   4108                 );
   4109       case Iex_GetI:
   4110          return IRExpr_GetI(
   4111                    e->Iex.GetI.descr,
   4112                    atbSubst_Expr(env, e->Iex.GetI.ix),
   4113                    e->Iex.GetI.bias
   4114                 );
   4115       case Iex_Const:
   4116       case Iex_Get:
   4117          return e;
   4118       default:
   4119          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4120          vpanic("atbSubst_Expr");
   4121    }
   4122 }
   4123 
   4124 /* Same deal as atbSubst_Expr, except for stmts. */
   4125 
   4126 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   4127 {
   4128    Int     i;
   4129    IRDirty *d, *d2;
   4130    IRCAS   *cas, *cas2;
   4131    switch (st->tag) {
   4132       case Ist_AbiHint:
   4133          return IRStmt_AbiHint(
   4134                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   4135                    st->Ist.AbiHint.len,
   4136                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   4137                 );
   4138       case Ist_Store:
   4139          return IRStmt_Store(
   4140                    st->Ist.Store.end,
   4141                    atbSubst_Expr(env, st->Ist.Store.addr),
   4142                    atbSubst_Expr(env, st->Ist.Store.data)
   4143                 );
   4144       case Ist_WrTmp:
   4145          return IRStmt_WrTmp(
   4146                    st->Ist.WrTmp.tmp,
   4147                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   4148                 );
   4149       case Ist_Put:
   4150          return IRStmt_Put(
   4151                    st->Ist.Put.offset,
   4152                    atbSubst_Expr(env, st->Ist.Put.data)
   4153                 );
   4154       case Ist_PutI:
   4155          return IRStmt_PutI(
   4156                    st->Ist.PutI.descr,
   4157                    atbSubst_Expr(env, st->Ist.PutI.ix),
   4158                    st->Ist.PutI.bias,
   4159                    atbSubst_Expr(env, st->Ist.PutI.data)
   4160                 );
   4161 
   4162       case Ist_Exit:
   4163          return IRStmt_Exit(
   4164                    atbSubst_Expr(env, st->Ist.Exit.guard),
   4165                    st->Ist.Exit.jk,
   4166                    st->Ist.Exit.dst
   4167                 );
   4168       case Ist_IMark:
   4169          return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
   4170       case Ist_NoOp:
   4171          return IRStmt_NoOp();
   4172       case Ist_MBE:
   4173          return IRStmt_MBE(st->Ist.MBE.event);
   4174       case Ist_CAS:
   4175          cas  = st->Ist.CAS.details;
   4176          cas2 = mkIRCAS(
   4177                    cas->oldHi, cas->oldLo, cas->end,
   4178                    atbSubst_Expr(env, cas->addr),
   4179                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   4180                    atbSubst_Expr(env, cas->expdLo),
   4181                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   4182                    atbSubst_Expr(env, cas->dataLo)
   4183                 );
   4184          return IRStmt_CAS(cas2);
   4185       case Ist_LLSC:
   4186          return IRStmt_LLSC(
   4187                    st->Ist.LLSC.end,
   4188                    st->Ist.LLSC.result,
   4189                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   4190                    st->Ist.LLSC.storedata
   4191                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   4192                 );
   4193       case Ist_Dirty:
   4194          d  = st->Ist.Dirty.details;
   4195          d2 = emptyIRDirty();
   4196          *d2 = *d;
   4197          if (d2->mFx != Ifx_None)
   4198             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   4199          d2->guard = atbSubst_Expr(env, d2->guard);
   4200          for (i = 0; d2->args[i]; i++)
   4201             d2->args[i] = atbSubst_Expr(env, d2->args[i]);
   4202          return IRStmt_Dirty(d2);
   4203       default:
   4204          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4205          vpanic("atbSubst_Stmt");
   4206    }
   4207 }
   4208 
   4209 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
   4210 {
   4211    Int      i, j, k, m;
   4212    Bool     stmtPuts, stmtStores, invalidateMe;
   4213    IRStmt*  st;
   4214    IRStmt*  st2;
   4215    ATmpInfo env[A_NENV];
   4216 
   4217    Int       n_tmps = bb->tyenv->types_used;
   4218    UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
   4219 
   4220    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   4221       temp.  Also count occurrences in the bb->next field. */
   4222 
   4223    for (i = 0; i < n_tmps; i++)
   4224       uses[i] = 0;
   4225 
   4226    for (i = 0; i < bb->stmts_used; i++) {
   4227       st = bb->stmts[i];
   4228       if (st->tag == Ist_NoOp)
   4229          continue;
   4230       aoccCount_Stmt( uses, st );
   4231    }
   4232    aoccCount_Expr(uses, bb->next );
   4233 
   4234 #  if 0
   4235    for (i = 0; i < n_tmps; i++) {
   4236       if (uses[i] == 0)
   4237         continue;
   4238       ppIRTemp( (IRTemp)i );
   4239       vex_printf("  used %d\n", (Int)uses[i] );
   4240    }
   4241 #  endif
   4242 
   4243    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   4244 
   4245          If the env is full, emit the end element.  This guarantees
   4246          there is at least one free slot in the following.
   4247 
   4248          On seeing 't = E', occ(t)==1,
   4249             let E'=env(E)
   4250             delete this stmt
   4251             add t -> E' to the front of the env
   4252             Examine E' and set the hints for E' appropriately
   4253               (doesLoad? doesGet?)
   4254 
   4255          On seeing any other stmt,
   4256             let stmt' = env(stmt)
   4257             remove from env any 't=E' binds invalidated by stmt
   4258                 emit the invalidated stmts
   4259             emit stmt'
   4260             compact any holes in env
   4261               by sliding entries towards the front
   4262 
   4263       Finally, apply env to bb->next.
   4264    */
   4265 
   4266    for (i = 0; i < A_NENV; i++) {
   4267       env[i].bindee = NULL;
   4268       env[i].binder = IRTemp_INVALID;
   4269    }
   4270 
   4271    /* The stmts in bb are being reordered, and we are guaranteed to
   4272       end up with no more than the number we started with.  Use i to
   4273       be the cursor of the current stmt examined and j <= i to be that
   4274       for the current stmt being written.
   4275    */
   4276    j = 0;
   4277    for (i = 0; i < bb->stmts_used; i++) {
   4278 
   4279       st = bb->stmts[i];
   4280       if (st->tag == Ist_NoOp)
   4281          continue;
   4282 
   4283       /* Ensure there's at least one space in the env, by emitting
   4284          the oldest binding if necessary. */
   4285       if (env[A_NENV-1].bindee != NULL) {
   4286          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   4287                                       env[A_NENV-1].bindee );
   4288          j++;
   4289          vassert(j <= i);
   4290          env[A_NENV-1].bindee = NULL;
   4291       }
   4292 
   4293       /* Consider current stmt. */
   4294       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   4295          IRExpr *e, *e2;
   4296 
   4297          /* optional extra: dump dead bindings as we find them.
   4298             Removes the need for a prior dead-code removal pass. */
   4299          if (uses[st->Ist.WrTmp.tmp] == 0) {
   4300 	    if (0) vex_printf("DEAD binding\n");
   4301             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   4302          }
   4303          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   4304 
   4305          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   4306             actions. */
   4307          e  = st->Ist.WrTmp.data;
   4308          e2 = atbSubst_Expr(env, e);
   4309          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   4310          setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
   4311          /* don't advance j, as we are deleting this stmt and instead
   4312             holding it temporarily in the env. */
   4313          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   4314       }
   4315 
   4316       /* we get here for any other kind of statement. */
   4317       /* 'use up' any bindings required by the current statement. */
   4318       st2 = atbSubst_Stmt(env, st);
   4319 
   4320       /* Now, before this stmt, dump any bindings in env that it
   4321          invalidates.  These need to be dumped in the order in which
   4322          they originally entered env -- that means from oldest to
   4323          youngest. */
   4324 
   4325       /* stmtPuts/stmtStores characterise what the stmt under
   4326          consideration does, or might do (sidely safe @ True). */
   4327       stmtPuts
   4328          = toBool( st->tag == Ist_Put
   4329                    || st->tag == Ist_PutI
   4330                    || st->tag == Ist_Dirty );
   4331 
   4332       /* be True if this stmt writes memory or might do (==> we don't
   4333          want to reorder other loads or stores relative to it).  Also,
   4334          both LL and SC fall under this classification, since we
   4335          really ought to be conservative and not reorder any other
   4336          memory transactions relative to them. */
   4337       stmtStores
   4338          = toBool( st->tag == Ist_Store
   4339                    || st->tag == Ist_Dirty
   4340                    || st->tag == Ist_LLSC );
   4341 
   4342       for (k = A_NENV-1; k >= 0; k--) {
   4343          if (env[k].bindee == NULL)
   4344             continue;
   4345          /* Compare the actions of this stmt with the actions of
   4346             binding 'k', to see if they invalidate the binding. */
   4347          invalidateMe
   4348             = toBool(
   4349               /* a store invalidates loaded data */
   4350               (env[k].doesLoad && stmtStores)
   4351               /* a put invalidates get'd data */
   4352               || (env[k].doesGet && stmtPuts)
   4353               /* a put invalidates loaded data.  Note, we could do
   4354                  much better here in the sense that we only need to
   4355                  invalidate trees containing loads if the Put in
   4356                  question is marked as requiring precise
   4357                  exceptions. */
   4358               || (env[k].doesLoad && stmtPuts)
   4359               /* probably overly conservative: a memory bus event
   4360                  invalidates absolutely everything, so that all
   4361                  computation prior to it is forced to complete before
   4362                  proceeding with the event (fence,lock,unlock). */
   4363               || st->tag == Ist_MBE
   4364               /* also be (probably overly) paranoid re AbiHints */
   4365               || st->tag == Ist_AbiHint
   4366               );
   4367          if (invalidateMe) {
   4368             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   4369             j++;
   4370             vassert(j <= i);
   4371             env[k].bindee = NULL;
   4372          }
   4373       }
   4374 
   4375       /* Slide in-use entries in env up to the front */
   4376       m = 0;
   4377       for (k = 0; k < A_NENV; k++) {
   4378          if (env[k].bindee != NULL) {
   4379             env[m] = env[k];
   4380             m++;
   4381 	 }
   4382       }
   4383       for (m = m; m < A_NENV; m++) {
   4384          env[m].bindee = NULL;
   4385       }
   4386 
   4387       /* finally, emit the substituted statement */
   4388       bb->stmts[j] = st2;
   4389       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   4390       j++;
   4391 
   4392       vassert(j <= i+1);
   4393    } /* for each stmt in the original bb ... */
   4394 
   4395    /* Finally ... substitute the ->next field as much as possible, and
   4396       dump any left-over bindings.  Hmm.  Perhaps there should be no
   4397       left over bindings?  Or any left-over bindings are
   4398       by definition dead? */
   4399    bb->next = atbSubst_Expr(env, bb->next);
   4400    bb->stmts_used = j;
   4401 }
   4402 
   4403 
   4404 /*---------------------------------------------------------------*/
   4405 /*--- iropt main                                              ---*/
   4406 /*---------------------------------------------------------------*/
   4407 
   4408 static Bool iropt_verbose = False; /* True; */
   4409 
   4410 
   4411 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   4412    redundant Put removal, constant propagation, dead code removal,
   4413    clean helper specialisation, and dead code removal (again).
   4414 */
   4415 
   4416 
   4417 static
   4418 IRSB* cheap_transformations (
   4419          IRSB* bb,
   4420          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   4421          Bool (*preciseMemExnsFn)(Int,Int)
   4422       )
   4423 {
   4424    redundant_get_removal_BB ( bb );
   4425    if (iropt_verbose) {
   4426       vex_printf("\n========= REDUNDANT GET\n\n" );
   4427       ppIRSB(bb);
   4428    }
   4429 
   4430    redundant_put_removal_BB ( bb, preciseMemExnsFn );
   4431    if (iropt_verbose) {
   4432       vex_printf("\n========= REDUNDANT PUT\n\n" );
   4433       ppIRSB(bb);
   4434    }
   4435 
   4436    bb = cprop_BB ( bb );
   4437    if (iropt_verbose) {
   4438       vex_printf("\n========= CPROPD\n\n" );
   4439       ppIRSB(bb);
   4440    }
   4441 
   4442    do_deadcode_BB ( bb );
   4443    if (iropt_verbose) {
   4444       vex_printf("\n========= DEAD\n\n" );
   4445       ppIRSB(bb);
   4446    }
   4447 
   4448    bb = spec_helpers_BB ( bb, specHelper );
   4449    do_deadcode_BB ( bb );
   4450    if (iropt_verbose) {
   4451       vex_printf("\n========= SPECd \n\n" );
   4452       ppIRSB(bb);
   4453    }
   4454 
   4455    return bb;
   4456 }
   4457 
   4458 
   4459 /* Do some more expensive transformations on bb, which are aimed at
   4460    optimising as much as possible in the presence of GetI and PutI.  */
   4461 
   4462 static
   4463 IRSB* expensive_transformations( IRSB* bb )
   4464 {
   4465    (void)do_cse_BB( bb );
   4466    collapse_AddSub_chains_BB( bb );
   4467    do_redundant_GetI_elimination( bb );
   4468    do_redundant_PutI_elimination( bb );
   4469    do_deadcode_BB( bb );
   4470    return bb;
   4471 }
   4472 
   4473 
   4474 /* Scan a flattened BB to look for signs that more expensive
   4475    optimisations might be useful:
   4476    - find out if there are any GetIs and PutIs
   4477    - find out if there are any floating or vector-typed temporaries
   4478 */
   4479 
   4480 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   4481                                  /*OUT*/Bool* hasVorFtemps,
   4482                                  IRSB* bb )
   4483 {
   4484    Int      i, j;
   4485    IRStmt*  st;
   4486    IRDirty* d;
   4487    IRCAS*   cas;
   4488 
   4489    *hasGetIorPutI = False;
   4490    *hasVorFtemps  = False;
   4491 
   4492    for (i = 0; i < bb->stmts_used; i++) {
   4493       st = bb->stmts[i];
   4494       switch (st->tag) {
   4495          case Ist_AbiHint:
   4496             vassert(isIRAtom(st->Ist.AbiHint.base));
   4497             vassert(isIRAtom(st->Ist.AbiHint.nia));
   4498             break;
   4499          case Ist_PutI:
   4500             *hasGetIorPutI = True;
   4501             break;
   4502          case Ist_WrTmp:
   4503             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   4504                *hasGetIorPutI = True;
   4505             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   4506                case Ity_I1: case Ity_I8: case Ity_I16:
   4507                case Ity_I32: case Ity_I64: case Ity_I128:
   4508                   break;
   4509                case Ity_F32: case Ity_F64: case Ity_V128:
   4510                   *hasVorFtemps = True;
   4511                   break;
   4512                default:
   4513                   goto bad;
   4514             }
   4515             break;
   4516          case Ist_Put:
   4517             vassert(isIRAtom(st->Ist.Put.data));
   4518             break;
   4519          case Ist_Store:
   4520             vassert(isIRAtom(st->Ist.Store.addr));
   4521             vassert(isIRAtom(st->Ist.Store.data));
   4522             break;
   4523          case Ist_CAS:
   4524             cas = st->Ist.CAS.details;
   4525             vassert(isIRAtom(cas->addr));
   4526             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   4527             vassert(isIRAtom(cas->expdLo));
   4528             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   4529             vassert(isIRAtom(cas->dataLo));
   4530             break;
   4531          case Ist_LLSC:
   4532             vassert(isIRAtom(st->Ist.LLSC.addr));
   4533             if (st->Ist.LLSC.storedata)
   4534                vassert(isIRAtom(st->Ist.LLSC.storedata));
   4535             break;
   4536          case Ist_Dirty:
   4537             d = st->Ist.Dirty.details;
   4538             vassert(isIRAtom(d->guard));
   4539             for (j = 0; d->args[j]; j++)
   4540                vassert(isIRAtom(d->args[j]));
   4541             if (d->mFx != Ifx_None)
   4542                vassert(isIRAtom(d->mAddr));
   4543             break;
   4544          case Ist_NoOp:
   4545          case Ist_IMark:
   4546          case Ist_MBE:
   4547             break;
   4548          case Ist_Exit:
   4549             vassert(isIRAtom(st->Ist.Exit.guard));
   4550             break;
   4551          default:
   4552          bad:
   4553             ppIRStmt(st);
   4554             vpanic("considerExpensives");
   4555       }
   4556    }
   4557 }
   4558 
   4559 
   4560 /* ---------------- The main iropt entry point. ---------------- */
   4561 
   4562 /* exported from this file */
   4563 /* Rules of the game:
   4564 
   4565    - IRExpr/IRStmt trees should be treated as immutable, as they
   4566      may get shared.  So never change a field of such a tree node;
   4567      instead construct and return a new one if needed.
   4568 */
   4569 
   4570 
   4571 IRSB* do_iropt_BB(
   4572          IRSB* bb0,
   4573          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   4574          Bool (*preciseMemExnsFn)(Int,Int),
   4575          Addr64 guest_addr,
   4576          VexArch guest_arch
   4577       )
   4578 {
   4579    static Int n_total     = 0;
   4580    static Int n_expensive = 0;
   4581 
   4582    Bool hasGetIorPutI, hasVorFtemps;
   4583    IRSB *bb, *bb2;
   4584 
   4585    n_total++;
   4586 
   4587    /* First flatten the block out, since all other
   4588       phases assume flat code. */
   4589 
   4590    bb = flatten_BB ( bb0 );
   4591 
   4592    if (iropt_verbose) {
   4593       vex_printf("\n========= FLAT\n\n" );
   4594       ppIRSB(bb);
   4595    }
   4596 
   4597    /* If at level 0, stop now. */
   4598    if (vex_control.iropt_level <= 0) return bb;
   4599 
   4600    /* Now do a preliminary cleanup pass, and figure out if we also
   4601       need to do 'expensive' optimisations.  Expensive optimisations
   4602       are deemed necessary if the block contains any GetIs or PutIs.
   4603       If needed, do expensive transformations and then another cheap
   4604       cleanup pass. */
   4605 
   4606    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4607 
   4608    if (guest_arch == VexArchARM) {
   4609       /* Translating Thumb2 code produces a lot of chaff.  We have to
   4610          work extra hard to get rid of it. */
   4611       bb = cprop_BB(bb);
   4612       bb = spec_helpers_BB ( bb, specHelper );
   4613       redundant_put_removal_BB ( bb, preciseMemExnsFn );
   4614       do_deadcode_BB( bb );
   4615    }
   4616 
   4617    if (vex_control.iropt_level > 1) {
   4618 
   4619       /* Peer at what we have, to decide how much more effort to throw
   4620          at it. */
   4621       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   4622 
   4623       if (hasVorFtemps && !hasGetIorPutI) {
   4624          /* If any evidence of FP or Vector activity, CSE, as that
   4625             tends to mop up all manner of lardy code to do with
   4626             rounding modes.  Don't bother if hasGetIorPutI since that
   4627             case leads into the expensive transformations, which do
   4628             CSE anyway. */
   4629          (void)do_cse_BB( bb );
   4630          do_deadcode_BB( bb );
   4631       }
   4632 
   4633       if (hasGetIorPutI) {
   4634          Bool cses;
   4635          n_expensive++;
   4636          if (DEBUG_IROPT)
   4637             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   4638          bb = expensive_transformations( bb );
   4639          bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4640          /* Potentially common up GetIs */
   4641          cses = do_cse_BB( bb );
   4642          if (cses)
   4643             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4644       }
   4645 
   4646       /* Now have a go at unrolling simple (single-BB) loops.  If
   4647          successful, clean up the results as much as possible. */
   4648 
   4649       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   4650       if (bb2) {
   4651          bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
   4652          if (hasGetIorPutI) {
   4653             bb = expensive_transformations( bb );
   4654             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   4655          } else {
   4656             /* at least do CSE and dead code removal */
   4657             do_cse_BB( bb );
   4658             do_deadcode_BB( bb );
   4659          }
   4660          if (0) vex_printf("vex iropt: unrolled a loop\n");
   4661       }
   4662 
   4663    }
   4664 
   4665    return bb;
   4666 }
   4667 
   4668 
   4669 /*---------------------------------------------------------------*/
   4670 /*--- end                                            ir_opt.c ---*/
   4671 /*---------------------------------------------------------------*/
   4672