Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*---------------------------------------------------------------*/
      4 /*--- begin                                          ir_opt.c ---*/
      5 /*---------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2004-2012 OpenWorks LLP
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #include "libvex_basictypes.h"
     38 #include "libvex_ir.h"
     39 #include "libvex.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "ir_opt.h"
     44 
     45 
     46 /* Set to 1 for lots of debugging output. */
     47 #define DEBUG_IROPT 0
     48 
     49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
     50 #define STATS_IROPT 0
     51 
     52 
     53 /* What iropt does, 29 Dec 04.
     54 
     55    It takes an IRSB and produces a new one with the same meaning,
     56    defined thus:
     57 
     58    After execution of the new BB, all guest state and guest memory is
     59    the same as after execution of the original.  This is true
     60    regardless of how the block was exited (at the end vs side exit).
     61 
     62    In addition, parts of the guest state will be identical to that
     63    created by execution of the original at the following observation
     64    points:
     65 
     66    * In a dirty helper call, any parts of the guest state that the
     67      helper states that it reads or modifies will be up to date.
     68      Also, guest memory will be up to date.  Parts of the guest state
     69      not marked as being read or modified by the helper cannot be
     70      assumed to be up-to-date at the point where the helper is called.
     71 
     72    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
     73      Immediately prior to any load or store, those parts of the guest
     74      state marked as requiring precise exceptions will be up to date.
     75      Also, guest memory will be up to date.  Parts of the guest state
     76      not marked as requiring precise exceptions cannot be assumed to
     77      be up-to-date at the point of the load/store.
     78 
     79      If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
     80      Same as minimal, but all the guest state is up to date at memory
     81      exception points.
     82 
     83      If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
     84      Guest state is up to date at each instruction.
     85 
     86    The relative order of loads and stores (including loads/stores of
     87    guest memory done by dirty helpers annotated as such) is not
     88    changed.  However, the relative order of loads with no intervening
     89    stores/modifies may be changed.
     90 
     91    Transformation order
     92    ~~~~~~~~~~~~~~~~~~~~
     93 
     94    There are three levels of optimisation, controlled by
     95    vex_control.iropt_level.  Define first:
     96 
     97    "Cheap transformations" are the following sequence:
     98       * Redundant-Get removal
     99       * Redundant-Put removal
    100       * Constant propagation/folding
    101       * Dead code removal
    102       * Specialisation of clean helper functions
    103       * Dead code removal
    104 
    105    "Expensive transformations" are the following sequence:
    106       * CSE
    107       * Folding of add/sub chains
    108       * Redundant-GetI removal
    109       * Redundant-PutI removal
    110       * Dead code removal
    111 
    112    Then the transformations are as follows, as defined by
    113    vex_control.iropt_level:
    114 
    115    Level 0:
    116       * Flatten into atomic form.
    117 
    118    Level 1: the following sequence:
    119       * Flatten into atomic form.
    120       * Cheap transformations.
    121 
    122    Level 2: the following sequence
    123       * Flatten into atomic form.
    124       * Cheap transformations.
    125       * If block contains any floating or vector types, CSE.
    126       * If block contains GetI or PutI, Expensive transformations.
    127       * Try unrolling loops.  Three possible outcomes:
    128         - No effect: do nothing more.
    129         - Unrolled a loop, and block does not contain GetI or PutI:
    130           Do: * CSE
    131               * Dead code removal
    132         - Unrolled a loop, and block contains GetI or PutI:
    133           Do: * Expensive transformations
    134               * Cheap transformations
    135 */
    136 
    137 /* Implementation notes, 29 Dec 04.
    138 
    139    TODO (important): I think rPutI removal ignores precise exceptions
    140    and is therefore in a sense, wrong.  In the sense that PutIs are
    141    assumed not to write parts of the guest state that we need to have
    142    up-to-date at loads/stores.  So far on x86 guest that has not
    143    mattered since indeed only the x87 FP registers and tags are
    144    accessed using GetI/PutI, and there is no need so far for them to
    145    be up to date at mem exception points.  The rPutI pass should be
    146    fixed.
    147 
    148    TODO: improve pessimistic handling of precise exceptions
    149      in the tree builder.
    150 
    151    TODO: check interaction of rGetI and dirty helpers.
    152 
    153    F64i constants are treated differently from other constants.
    154    They are not regarded as atoms, and instead lifted off and
    155    bound to temps.  This allows them to participate in CSE, which
    156    is important for getting good performance for x86 guest code.
    157 
    158    CSE up F64 literals (already doing F64is)
    159 
    160    CSE: consider carefully the requirement for precise exns
    161         prior to making CSE any more aggressive.  */
    162 
    163 
    164 /*---------------------------------------------------------------*/
    165 /*--- Finite mappery, of a sort                               ---*/
    166 /*---------------------------------------------------------------*/
    167 
    168 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    169    hashing, but it's not clear whether or not this would really be any
    170    faster. */
    171 
    172 typedef
    173    struct {
    174       Bool*  inuse;
    175       HWord* key;
    176       HWord* val;
    177       Int    size;
    178       Int    used;
    179    }
    180    HashHW;
    181 
    182 static HashHW* newHHW ( void )
    183 {
    184    HashHW* h = LibVEX_Alloc(sizeof(HashHW));
    185    h->size   = 8;
    186    h->used   = 0;
    187    h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
    188    h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
    189    h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
    190    return h;
    191 }
    192 
    193 
    194 /* Look up key in the map. */
    195 
    196 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    197 {
    198    Int i;
    199    /* vex_printf("lookupHHW(%llx)\n", key ); */
    200    for (i = 0; i < h->used; i++) {
    201       if (h->inuse[i] && h->key[i] == key) {
    202          if (val)
    203             *val = h->val[i];
    204          return True;
    205       }
    206    }
    207    return False;
    208 }
    209 
    210 
    211 /* Add key->val to the map.  Replaces any existing binding for key. */
    212 
    213 static void addToHHW ( HashHW* h, HWord key, HWord val )
    214 {
    215    Int i, j;
    216    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    217 
    218    /* Find and replace existing binding, if any. */
    219    for (i = 0; i < h->used; i++) {
    220       if (h->inuse[i] && h->key[i] == key) {
    221          h->val[i] = val;
    222          return;
    223       }
    224    }
    225 
    226    /* Ensure a space is available. */
    227    if (h->used == h->size) {
    228       /* Copy into arrays twice the size. */
    229       Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
    230       HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    231       HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
    232       for (i = j = 0; i < h->size; i++) {
    233          if (!h->inuse[i]) continue;
    234          inuse2[j] = True;
    235          key2[j] = h->key[i];
    236          val2[j] = h->val[i];
    237          j++;
    238       }
    239       h->used = j;
    240       h->size *= 2;
    241       h->inuse = inuse2;
    242       h->key = key2;
    243       h->val = val2;
    244    }
    245 
    246    /* Finally, add it. */
    247    vassert(h->used < h->size);
    248    h->inuse[h->used] = True;
    249    h->key[h->used] = key;
    250    h->val[h->used] = val;
    251    h->used++;
    252 }
    253 
    254 
    255 /*---------------------------------------------------------------*/
    256 /*--- Flattening out a BB into atomic SSA form                ---*/
    257 /*---------------------------------------------------------------*/
    258 
    259 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    260    copies made by flattening.  If in doubt return False. */
    261 
    262 static Bool isFlat ( IRExpr* e )
    263 {
    264    if (e->tag == Iex_Get)
    265       return True;
    266    if (e->tag == Iex_Binop)
    267       return toBool( isIRAtom(e->Iex.Binop.arg1)
    268                      && isIRAtom(e->Iex.Binop.arg2) );
    269    if (e->tag == Iex_Load)
    270       return isIRAtom(e->Iex.Load.addr);
    271    return False;
    272 }
    273 
    274 /* Flatten out 'ex' so it is atomic, returning a new expression with
    275    the same value, after having appended extra IRTemp assignments to
    276    the end of 'bb'. */
    277 
    278 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    279 {
    280    Int i;
    281    IRExpr** newargs;
    282    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    283    IRTemp t1;
    284 
    285    switch (ex->tag) {
    286 
    287       case Iex_GetI:
    288          t1 = newIRTemp(bb->tyenv, ty);
    289          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    290             IRExpr_GetI(ex->Iex.GetI.descr,
    291                         flatten_Expr(bb, ex->Iex.GetI.ix),
    292                         ex->Iex.GetI.bias)));
    293          return IRExpr_RdTmp(t1);
    294 
    295       case Iex_Get:
    296          t1 = newIRTemp(bb->tyenv, ty);
    297          addStmtToIRSB(bb,
    298             IRStmt_WrTmp(t1, ex));
    299          return IRExpr_RdTmp(t1);
    300 
    301       case Iex_Qop: {
    302          IRQop* qop = ex->Iex.Qop.details;
    303          t1 = newIRTemp(bb->tyenv, ty);
    304          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    305             IRExpr_Qop(qop->op,
    306                          flatten_Expr(bb, qop->arg1),
    307                          flatten_Expr(bb, qop->arg2),
    308                          flatten_Expr(bb, qop->arg3),
    309                          flatten_Expr(bb, qop->arg4))));
    310          return IRExpr_RdTmp(t1);
    311       }
    312 
    313       case Iex_Triop: {
    314          IRTriop* triop = ex->Iex.Triop.details;
    315          t1 = newIRTemp(bb->tyenv, ty);
    316          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    317             IRExpr_Triop(triop->op,
    318                          flatten_Expr(bb, triop->arg1),
    319                          flatten_Expr(bb, triop->arg2),
    320                          flatten_Expr(bb, triop->arg3))));
    321          return IRExpr_RdTmp(t1);
    322       }
    323 
    324       case Iex_Binop:
    325          t1 = newIRTemp(bb->tyenv, ty);
    326          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    327             IRExpr_Binop(ex->Iex.Binop.op,
    328                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    329                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    330          return IRExpr_RdTmp(t1);
    331 
    332       case Iex_Unop:
    333          t1 = newIRTemp(bb->tyenv, ty);
    334          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    335             IRExpr_Unop(ex->Iex.Unop.op,
    336                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    337          return IRExpr_RdTmp(t1);
    338 
    339       case Iex_Load:
    340          t1 = newIRTemp(bb->tyenv, ty);
    341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    342             IRExpr_Load(ex->Iex.Load.end,
    343                         ex->Iex.Load.ty,
    344                         flatten_Expr(bb, ex->Iex.Load.addr))));
    345          return IRExpr_RdTmp(t1);
    346 
    347       case Iex_CCall:
    348          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    349          for (i = 0; newargs[i]; i++)
    350             newargs[i] = flatten_Expr(bb, newargs[i]);
    351          t1 = newIRTemp(bb->tyenv, ty);
    352          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    353             IRExpr_CCall(ex->Iex.CCall.cee,
    354                          ex->Iex.CCall.retty,
    355                          newargs)));
    356          return IRExpr_RdTmp(t1);
    357 
    358       case Iex_Mux0X:
    359          t1 = newIRTemp(bb->tyenv, ty);
    360          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    361             IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
    362                          flatten_Expr(bb, ex->Iex.Mux0X.expr0),
    363                          flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
    364          return IRExpr_RdTmp(t1);
    365 
    366       case Iex_Const:
    367          /* Lift F64i constants out onto temps so they can be CSEd
    368             later. */
    369          if (ex->Iex.Const.con->tag == Ico_F64i) {
    370             t1 = newIRTemp(bb->tyenv, ty);
    371             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    372                IRExpr_Const(ex->Iex.Const.con)));
    373             return IRExpr_RdTmp(t1);
    374          } else {
    375             /* Leave all other constants alone. */
    376             return ex;
    377          }
    378 
    379       case Iex_RdTmp:
    380          return ex;
    381 
    382       default:
    383          vex_printf("\n");
    384          ppIRExpr(ex);
    385          vex_printf("\n");
    386          vpanic("flatten_Expr");
    387    }
    388 }
    389 
    390 
    391 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    392 
    393 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    394 {
    395    Int i;
    396    IRExpr  *e1, *e2, *e3, *e4, *e5;
    397    IRDirty *d,  *d2;
    398    IRCAS   *cas, *cas2;
    399    IRPutI  *puti, *puti2;
    400    switch (st->tag) {
    401       case Ist_Put:
    402          if (isIRAtom(st->Ist.Put.data)) {
    403             /* optimisation to reduce the amount of heap wasted
    404                by the flattener */
    405             addStmtToIRSB(bb, st);
    406          } else {
    407             /* general case, always correct */
    408             e1 = flatten_Expr(bb, st->Ist.Put.data);
    409             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    410          }
    411          break;
    412       case Ist_PutI:
    413          puti = st->Ist.PutI.details;
    414          e1 = flatten_Expr(bb, puti->ix);
    415          e2 = flatten_Expr(bb, puti->data);
    416          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
    417          addStmtToIRSB(bb, IRStmt_PutI(puti2));
    418          break;
    419       case Ist_WrTmp:
    420          if (isFlat(st->Ist.WrTmp.data)) {
    421             /* optimisation, to reduce the number of tmp-tmp
    422                copies generated */
    423             addStmtToIRSB(bb, st);
    424          } else {
    425             /* general case, always correct */
    426             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    427             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    428          }
    429          break;
    430       case Ist_Store:
    431          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    432          e2 = flatten_Expr(bb, st->Ist.Store.data);
    433          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    434          break;
    435       case Ist_CAS:
    436          cas  = st->Ist.CAS.details;
    437          e1   = flatten_Expr(bb, cas->addr);
    438          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    439          e3   = flatten_Expr(bb, cas->expdLo);
    440          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    441          e5   = flatten_Expr(bb, cas->dataLo);
    442          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    443                          e1, e2, e3, e4, e5 );
    444          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    445          break;
    446       case Ist_LLSC:
    447          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    448          e2 = st->Ist.LLSC.storedata
    449                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    450                  : NULL;
    451          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    452                                        st->Ist.LLSC.result, e1, e2));
    453          break;
    454       case Ist_Dirty:
    455          d = st->Ist.Dirty.details;
    456          d2 = emptyIRDirty();
    457          *d2 = *d;
    458          d2->args = shallowCopyIRExprVec(d2->args);
    459          if (d2->mFx != Ifx_None) {
    460             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    461          } else {
    462             vassert(d2->mAddr == NULL);
    463          }
    464          d2->guard = flatten_Expr(bb, d2->guard);
    465          for (i = 0; d2->args[i]; i++)
    466             d2->args[i] = flatten_Expr(bb, d2->args[i]);
    467          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    468          break;
    469       case Ist_NoOp:
    470       case Ist_MBE:
    471       case Ist_IMark:
    472          addStmtToIRSB(bb, st);
    473          break;
    474       case Ist_AbiHint:
    475          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    476          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    477          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    478          break;
    479       case Ist_Exit:
    480          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    481          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    482                                        st->Ist.Exit.dst,
    483                                        st->Ist.Exit.offsIP));
    484          break;
    485       default:
    486          vex_printf("\n");
    487          ppIRStmt(st);
    488          vex_printf("\n");
    489          vpanic("flatten_Stmt");
    490    }
    491 }
    492 
    493 
    494 static IRSB* flatten_BB ( IRSB* in )
    495 {
    496    Int   i;
    497    IRSB* out;
    498    out = emptyIRSB();
    499    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    500    for (i = 0; i < in->stmts_used; i++)
    501       if (in->stmts[i])
    502          flatten_Stmt( out, in->stmts[i] );
    503    out->next     = flatten_Expr( out, in->next );
    504    out->jumpkind = in->jumpkind;
    505    out->offsIP   = in->offsIP;
    506    return out;
    507 }
    508 
    509 
    510 /*---------------------------------------------------------------*/
    511 /*--- In-place removal of redundant GETs                      ---*/
    512 /*---------------------------------------------------------------*/
    513 
    514 /* Scan forwards, building up an environment binding (min offset, max
    515    offset) pairs to values, which will either be temps or constants.
    516 
    517    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    518    env and if it matches, replace the Get with the stored value.  If
    519    there is no match, add a (minoff,maxoff) :-> t binding.
    520 
    521    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    522    any binding which fully or partially overlaps with (minoff,maxoff).
    523    Then add a new (minoff,maxoff) :-> t or c binding.  */
    524 
    525 /* Extract the min/max offsets from a guest state array descriptor. */
    526 
    527 inline
    528 static void getArrayBounds ( IRRegArray* descr,
    529                              UInt* minoff, UInt* maxoff )
    530 {
    531    *minoff = descr->base;
    532    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    533    vassert((*minoff & ~0xFFFF) == 0);
    534    vassert((*maxoff & ~0xFFFF) == 0);
    535    vassert(*minoff <= *maxoff);
    536 }
    537 
    538 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    539 
    540 static UInt mk_key_GetPut ( Int offset, IRType ty )
    541 {
    542    /* offset should fit in 16 bits. */
    543    UInt minoff = offset;
    544    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    545    vassert((minoff & ~0xFFFF) == 0);
    546    vassert((maxoff & ~0xFFFF) == 0);
    547    return (minoff << 16) | maxoff;
    548 }
    549 
    550 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    551 {
    552    UInt minoff, maxoff;
    553    getArrayBounds( descr, &minoff, &maxoff );
    554    vassert((minoff & ~0xFFFF) == 0);
    555    vassert((maxoff & ~0xFFFF) == 0);
    556    return (minoff << 16) | maxoff;
    557 }
    558 
    559 /* Supposing h has keys of the form generated by mk_key_GetPut and
    560    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    561    .. k_hi).
    562 */
    563 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    564 {
    565    Int  j;
    566    UInt e_lo, e_hi;
    567    vassert(k_lo <= k_hi);
    568    /* invalidate any env entries which in any way overlap (k_lo
    569       .. k_hi) */
    570    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    571 
    572    for (j = 0; j < h->used; j++) {
    573       if (!h->inuse[j])
    574          continue;
    575       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    576       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    577       vassert(e_lo <= e_hi);
    578       if (e_hi < k_lo || k_hi < e_lo)
    579          continue; /* no overlap possible */
    580       else
    581          /* overlap; invalidate */
    582          h->inuse[j] = False;
    583    }
    584 }
    585 
    586 
    587 static void redundant_get_removal_BB ( IRSB* bb )
    588 {
    589    HashHW* env = newHHW();
    590    UInt    key = 0; /* keep gcc -O happy */
    591    Int     i, j;
    592    HWord   val;
    593 
    594    for (i = 0; i < bb->stmts_used; i++) {
    595       IRStmt* st = bb->stmts[i];
    596 
    597       if (st->tag == Ist_NoOp)
    598          continue;
    599 
    600       /* Deal with Gets */
    601       if (st->tag == Ist_WrTmp
    602           && st->Ist.WrTmp.data->tag == Iex_Get) {
    603          /* st is 't = Get(...)'.  Look up in the environment and see
    604             if the Get can be replaced. */
    605          IRExpr* get = st->Ist.WrTmp.data;
    606          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    607                                      get->Iex.Get.ty );
    608          if (lookupHHW(env, &val, (HWord)key)) {
    609             /* found it */
    610             /* Note, we could do better here.  If the types are
    611                different we don't do the substitution, since doing so
    612                could lead to invalidly-typed IR.  An improvement would
    613                be to stick in a reinterpret-style cast, although that
    614                would make maintaining flatness more difficult. */
    615             IRExpr* valE    = (IRExpr*)val;
    616             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    617                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    618             if (typesOK && DEBUG_IROPT) {
    619                vex_printf("rGET: "); ppIRExpr(get);
    620                vex_printf("  ->  "); ppIRExpr(valE);
    621                vex_printf("\n");
    622             }
    623             if (typesOK)
    624                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    625          } else {
    626             /* Not found, but at least we know that t and the Get(...)
    627                are now associated.  So add a binding to reflect that
    628                fact. */
    629             addToHHW( env, (HWord)key,
    630                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    631          }
    632       }
    633 
    634       /* Deal with Puts: invalidate any env entries overlapped by this
    635          Put */
    636       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    637          UInt k_lo, k_hi;
    638          if (st->tag == Ist_Put) {
    639             key = mk_key_GetPut( st->Ist.Put.offset,
    640                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    641          } else {
    642             vassert(st->tag == Ist_PutI);
    643             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    644          }
    645 
    646          k_lo = (key >> 16) & 0xFFFF;
    647          k_hi = key & 0xFFFF;
    648          invalidateOverlaps(env, k_lo, k_hi);
    649       }
    650       else
    651       if (st->tag == Ist_Dirty) {
    652          /* Deal with dirty helpers which write or modify guest state.
    653             Invalidate the entire env.  We could do a lot better
    654             here. */
    655          IRDirty* d      = st->Ist.Dirty.details;
    656          Bool     writes = False;
    657          for (j = 0; j < d->nFxState; j++) {
    658             if (d->fxState[j].fx == Ifx_Modify
    659                 || d->fxState[j].fx == Ifx_Write)
    660             writes = True;
    661          }
    662          if (writes) {
    663             /* dump the entire env (not clever, but correct ...) */
    664             for (j = 0; j < env->used; j++)
    665                env->inuse[j] = False;
    666             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    667          }
    668       }
    669 
    670       /* add this one to the env, if appropriate */
    671       if (st->tag == Ist_Put) {
    672          vassert(isIRAtom(st->Ist.Put.data));
    673          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    674       }
    675 
    676    } /* for (i = 0; i < bb->stmts_used; i++) */
    677 
    678 }
    679 
    680 
    681 /*---------------------------------------------------------------*/
    682 /*--- In-place removal of redundant PUTs                      ---*/
    683 /*---------------------------------------------------------------*/
    684 
    685 /* Find any Get uses in st and invalidate any partially or fully
    686    overlapping ranges listed in env.  Due to the flattening phase, the
    687    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    688 
    689 static void handle_gets_Stmt (
    690                HashHW* env,
    691                IRStmt* st,
    692                Bool (*preciseMemExnsFn)(Int,Int)
    693             )
    694 {
    695    Int     j;
    696    UInt    key = 0; /* keep gcc -O happy */
    697    Bool    isGet;
    698    Bool    memRW = False;
    699    IRExpr* e;
    700 
    701    switch (st->tag) {
    702 
    703       /* This is the only interesting case.  Deal with Gets in the RHS
    704          expression. */
    705       case Ist_WrTmp:
    706          e = st->Ist.WrTmp.data;
    707          switch (e->tag) {
    708             case Iex_Get:
    709                isGet = True;
    710                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    711                break;
    712             case Iex_GetI:
    713                isGet = True;
    714                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    715                break;
    716             case Iex_Load:
    717                isGet = False;
    718                memRW = True;
    719                break;
    720             default:
    721                isGet = False;
    722          }
    723          if (isGet) {
    724             UInt k_lo, k_hi;
    725             k_lo = (key >> 16) & 0xFFFF;
    726             k_hi = key & 0xFFFF;
    727             invalidateOverlaps(env, k_lo, k_hi);
    728          }
    729          break;
    730 
    731       /* Be very conservative for dirty helper calls; dump the entire
    732          environment.  The helper might read guest state, in which
    733          case it needs to be flushed first.  Also, the helper might
    734          access guest memory, in which case all parts of the guest
    735          state requiring precise exceptions needs to be flushed.  The
    736          crude solution is just to flush everything; we could easily
    737          enough do a lot better if needed. */
    738       /* Probably also overly-conservative, but also dump everything
    739          if we hit a memory bus event (fence, lock, unlock).  Ditto
    740          AbiHints, CASs, LLs and SCs. */
    741       case Ist_AbiHint:
    742          vassert(isIRAtom(st->Ist.AbiHint.base));
    743          vassert(isIRAtom(st->Ist.AbiHint.nia));
    744          /* fall through */
    745       case Ist_MBE:
    746       case Ist_Dirty:
    747       case Ist_CAS:
    748       case Ist_LLSC:
    749          for (j = 0; j < env->used; j++)
    750             env->inuse[j] = False;
    751          break;
    752 
    753       /* all other cases are boring. */
    754       case Ist_Store:
    755          vassert(isIRAtom(st->Ist.Store.addr));
    756          vassert(isIRAtom(st->Ist.Store.data));
    757          memRW = True;
    758          break;
    759 
    760       case Ist_Exit:
    761          vassert(isIRAtom(st->Ist.Exit.guard));
    762          break;
    763 
    764       case Ist_PutI:
    765          vassert(isIRAtom(st->Ist.PutI.details->ix));
    766          vassert(isIRAtom(st->Ist.PutI.details->data));
    767          break;
    768 
    769       case Ist_NoOp:
    770       case Ist_IMark:
    771          break;
    772 
    773       default:
    774          vex_printf("\n");
    775          ppIRStmt(st);
    776          vex_printf("\n");
    777          vpanic("handle_gets_Stmt");
    778    }
    779 
    780    if (memRW) {
    781       /* This statement accesses memory.  So we need to dump all parts
    782          of the environment corresponding to guest state that may not
    783          be reordered with respect to memory references.  That means
    784          at least the stack pointer. */
    785       switch (vex_control.iropt_register_updates) {
    786          case VexRegUpdAllregsAtMemAccess:
    787             /* Precise exceptions required at mem access.
    788                Flush all guest state. */
    789             for (j = 0; j < env->used; j++)
    790                env->inuse[j] = False;
    791             break;
    792          case VexRegUpdUnwindregsAtMemAccess:
    793             for (j = 0; j < env->used; j++) {
    794                if (!env->inuse[j])
    795                   continue;
    796                /* Just flush the minimal amount required, as computed by
    797                   preciseMemExnsFn. */
    798                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    799                HWord k_hi = env->key[j] & 0xFFFF;
    800                if (preciseMemExnsFn( k_lo, k_hi ))
    801                   env->inuse[j] = False;
    802             }
    803             break;
    804          default:
    805             // VexRegUpdAllregsAtEachInsn cannot happen here.
    806             // Neither any rubbish other value.
    807             vassert(0);
    808       }
    809    } /* if (memRW) */
    810 
    811 }
    812 
    813 
    814 /* Scan backwards, building up a set of (min offset, max
    815    offset) pairs, indicating those parts of the guest state
    816    for which the next event is a write.
    817 
    818    On seeing a conditional exit, empty the set.
    819 
    820    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    821    completely within the set, remove the Put.  Otherwise, add
    822    (minoff,maxoff) to the set.
    823 
    824    On seeing 'Get (minoff,maxoff)', remove any part of the set
    825    overlapping (minoff,maxoff).  The same has to happen for any events
    826    which implicitly read parts of the guest state: dirty helper calls
    827    and loads/stores.
    828 */
    829 
    830 static void redundant_put_removal_BB (
    831                IRSB* bb,
    832                Bool (*preciseMemExnsFn)(Int,Int)
    833             )
    834 {
    835    Int     i, j;
    836    Bool    isPut;
    837    IRStmt* st;
    838    UInt    key = 0; /* keep gcc -O happy */
    839 
    840    vassert
    841       (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess
    842        || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess);
    843 
    844    HashHW* env = newHHW();
    845 
    846    /* Initialise the running env with the fact that the final exit
    847       writes the IP (or, whatever it claims to write.  We don't
    848       care.) */
    849    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
    850    addToHHW(env, (HWord)key, 0);
    851 
    852    /* And now scan backwards through the statements. */
    853    for (i = bb->stmts_used-1; i >= 0; i--) {
    854       st = bb->stmts[i];
    855 
    856       if (st->tag == Ist_NoOp)
    857          continue;
    858 
    859       /* Deal with conditional exits. */
    860       if (st->tag == Ist_Exit) {
    861          //Bool re_add;
    862          /* Need to throw out from the env, any part of it which
    863             doesn't overlap with the guest state written by this exit.
    864             Since the exit only writes one section, it's simplest to
    865             do this: (1) check whether env contains a write that
    866             completely overlaps the write done by this exit; (2) empty
    867             out env; and (3) if (1) was true, add the write done by
    868             this exit.
    869 
    870             To make (1) a bit simpler, merely search for a write that
    871             exactly matches the one done by this exit.  That's safe
    872             because it will fail as often or more often than a full
    873             overlap check, and failure to find an overlapping write in
    874             env is the safe case (we just nuke env if that
    875             happens). */
    876          //vassert(isIRAtom(st->Ist.Exit.guard));
    877          /* (1) */
    878          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
    879          //                    typeOfIRConst(st->Ist.Exit.dst));
    880          //re_add = lookupHHW(env, NULL, key);
    881          /* (2) */
    882          for (j = 0; j < env->used; j++)
    883             env->inuse[j] = False;
    884          /* (3) */
    885          //if (0 && re_add)
    886          //   addToHHW(env, (HWord)key, 0);
    887          continue;
    888       }
    889 
    890       /* Deal with Puts */
    891       switch (st->tag) {
    892          case Ist_Put:
    893             isPut = True;
    894             key = mk_key_GetPut( st->Ist.Put.offset,
    895                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    896             vassert(isIRAtom(st->Ist.Put.data));
    897             break;
    898          case Ist_PutI:
    899             isPut = True;
    900             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    901             vassert(isIRAtom(st->Ist.PutI.details->ix));
    902             vassert(isIRAtom(st->Ist.PutI.details->data));
    903             break;
    904          default:
    905             isPut = False;
    906       }
    907       if (isPut && st->tag != Ist_PutI) {
    908          /* See if any single entry in env overlaps this Put.  This is
    909             simplistic in that the transformation is valid if, say, two
    910             or more entries in the env overlap this Put, but the use of
    911             lookupHHW will only find a single entry which exactly
    912             overlaps this Put.  This is suboptimal but safe. */
    913          if (lookupHHW(env, NULL, (HWord)key)) {
    914             /* This Put is redundant because a later one will overwrite
    915                it.  So NULL (nop) it out. */
    916             if (DEBUG_IROPT) {
    917                vex_printf("rPUT: "); ppIRStmt(st);
    918                vex_printf("\n");
    919             }
    920             bb->stmts[i] = IRStmt_NoOp();
    921          } else {
    922             /* We can't demonstrate that this Put is redundant, so add it
    923                to the running collection. */
    924             addToHHW(env, (HWord)key, 0);
    925          }
    926          continue;
    927       }
    928 
    929       /* Deal with Gets.  These remove bits of the environment since
    930          appearance of a Get means that the next event for that slice
    931          of the guest state is no longer a write, but a read.  Also
    932          deals with implicit reads of guest state needed to maintain
    933          precise exceptions. */
    934       handle_gets_Stmt( env, st, preciseMemExnsFn );
    935    }
    936 }
    937 
    938 
    939 /*---------------------------------------------------------------*/
    940 /*--- Constant propagation and folding                        ---*/
    941 /*---------------------------------------------------------------*/
    942 
    943 #if STATS_IROPT
    944 /* How often sameIRExprs was invoked */
    945 static UInt invocation_count;
    946 /* How often sameIRExprs recursed through IRTemp assignments */
    947 static UInt recursion_count;
    948 /* How often sameIRExprs found identical IRExprs */
    949 static UInt success_count;
    950 /* How often recursing through assignments to IRTemps helped
    951    establishing equality. */
    952 static UInt recursion_success_count;
    953 /* Whether or not recursing through an IRTemp assignment helped
    954    establishing IRExpr equality for a given sameIRExprs invocation. */
    955 static Bool recursion_helped;
    956 /* Whether or not a given sameIRExprs invocation recursed through an
    957    IRTemp assignment */
    958 static Bool recursed;
    959 /* Maximum number of nodes ever visited when comparing two IRExprs. */
    960 static UInt max_nodes_visited;
    961 #endif /* STATS_IROPT */
    962 
    963 /* Count the number of nodes visited for a given sameIRExprs invocation. */
    964 static UInt num_nodes_visited;
    965 
    966 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
    967    This is to guard against performance degradation by visiting large
    968    trees without success. */
    969 #define NODE_LIMIT 30
    970 
    971 
    972 /* The env in this section is a map from IRTemp to IRExpr*,
    973    that is, an array indexed by IRTemp. */
    974 
    975 /* Do both expressions compute the same value? The answer is generally
    976    conservative, i.e. it will report that the expressions do not compute
    977    the same value when in fact they do. The reason is that we do not
    978    keep track of changes in the guest state and memory. Thusly, two
    979    Get's, GetI's or Load's, even when accessing the same location, will be
    980    assumed to compute different values. After all the accesses may happen
    981    at different times and the guest state / memory can have changed in
    982    the meantime.
    983 
    984    XXX IMPORTANT XXX the two expressions must have the same IR type.
    985    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
    986 
    987 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
    988    wrapper that deals with the common tags-don't-match case, and a
    989    slower out of line general case.  Saves a few insns. */
    990 
    991 __attribute__((noinline))
    992 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
    993 
    994 inline
    995 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
    996 {
    997    if (e1->tag != e2->tag) return False;
    998    return sameIRExprs_aux2(env, e1, e2);
    999 }
   1000 
   1001 __attribute__((noinline))
   1002 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1003 {
   1004    if (num_nodes_visited++ > NODE_LIMIT) return False;
   1005 
   1006    switch (e1->tag) {
   1007       case Iex_RdTmp:
   1008          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
   1009 
   1010          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
   1011             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
   1012                                         env[e2->Iex.RdTmp.tmp]);
   1013 #if STATS_IROPT
   1014             recursed = True;
   1015             if (same) recursion_helped = True;
   1016 #endif
   1017             return same;
   1018          }
   1019          return False;
   1020 
   1021       case Iex_Get:
   1022       case Iex_GetI:
   1023       case Iex_Load:
   1024          /* Guest state / memory could have changed in the meantime. */
   1025          return False;
   1026 
   1027       case Iex_Binop:
   1028          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
   1029                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
   1030                                                  e2->Iex.Binop.arg1 )
   1031                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
   1032                                                  e2->Iex.Binop.arg2 ));
   1033 
   1034       case Iex_Unop:
   1035          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
   1036                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
   1037                                                  e2->Iex.Unop.arg ));
   1038 
   1039       case Iex_Const: {
   1040          IRConst *c1 = e1->Iex.Const.con;
   1041          IRConst *c2 = e2->Iex.Const.con;
   1042          vassert(c1->tag == c2->tag);
   1043          switch (c1->tag) {
   1044             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
   1045             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
   1046             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
   1047             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
   1048             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
   1049             default: break;
   1050          }
   1051          return False;
   1052       }
   1053 
   1054       case Iex_Triop: {
   1055          IRTriop *tri1 = e1->Iex.Triop.details;
   1056          IRTriop *tri2 = e2->Iex.Triop.details;
   1057          return toBool( tri1->op == tri2->op
   1058                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
   1059                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
   1060                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
   1061       }
   1062 
   1063       case Iex_Mux0X:
   1064          return toBool(    sameIRExprs_aux( env, e1->Iex.Mux0X.cond,
   1065                                                  e2->Iex.Mux0X.cond )
   1066                         && sameIRExprs_aux( env, e1->Iex.Mux0X.expr0,
   1067                                                  e2->Iex.Mux0X.expr0 )
   1068                         && sameIRExprs_aux( env, e1->Iex.Mux0X.exprX,
   1069                                                  e2->Iex.Mux0X.exprX ));
   1070 
   1071       default:
   1072          /* Not very likely to be "same". */
   1073          break;
   1074    }
   1075 
   1076    return False;
   1077 }
   1078 
   1079 inline
   1080 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1081 {
   1082    Bool same;
   1083 
   1084    num_nodes_visited = 0;
   1085    same = sameIRExprs_aux(env, e1, e2);
   1086 
   1087 #if STATS_IROPT
   1088    ++invocation_count;
   1089    if (recursed) ++recursion_count;
   1090    success_count += same;
   1091    if (same && recursion_helped)
   1092       ++recursion_success_count;
   1093    if (num_nodes_visited > max_nodes_visited)
   1094       max_nodes_visited = num_nodes_visited;
   1095    recursed = False; /* reset */
   1096    recursion_helped = False;  /* reset */
   1097 #endif /* STATS_IROPT */
   1098 
   1099    return same;
   1100 }
   1101 
   1102 
   1103 /* Debugging-only hack (not used in production runs): make a guess
   1104    whether sameIRExprs might assert due to the two args being of
   1105    different types.  If in doubt return False.  Is only used when
   1106    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
   1107    Bad because it duplicates functionality from typeOfIRExpr.  See
   1108    comment on the single use point below for rationale. */
   1109 static
   1110 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
   1111 {
   1112    if (e1->tag != e2->tag) return False;
   1113    switch (e1->tag) {
   1114       case Iex_Const: {
   1115          /* The only interesting case */
   1116          IRConst *c1 = e1->Iex.Const.con;
   1117          IRConst *c2 = e2->Iex.Const.con;
   1118          return c1->tag != c2->tag;
   1119       }
   1120       default:
   1121          break;
   1122    }
   1123    return False;
   1124 }
   1125 
   1126 
   1127 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
   1128 static Bool isZeroU32 ( IRExpr* e )
   1129 {
   1130    return toBool( e->tag == Iex_Const
   1131                   && e->Iex.Const.con->tag == Ico_U32
   1132                   && e->Iex.Const.con->Ico.U32 == 0);
   1133 }
   1134 
   1135 /* Is this literally IRExpr_Const(IRConst_U32(1---1)) ? */
   1136 static Bool isOnesU32 ( IRExpr* e )
   1137 {
   1138    return toBool( e->tag == Iex_Const
   1139                   && e->Iex.Const.con->tag == Ico_U32
   1140                   && e->Iex.Const.con->Ico.U32 == 0xFFFFFFFF );
   1141 }
   1142 
   1143 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
   1144 static Bool isZeroU64 ( IRExpr* e )
   1145 {
   1146    return toBool( e->tag == Iex_Const
   1147                   && e->Iex.Const.con->tag == Ico_U64
   1148                   && e->Iex.Const.con->Ico.U64 == 0);
   1149 }
   1150 
   1151 /* Is this an integer constant with value 0 ? */
   1152 static Bool isZeroU ( IRExpr* e )
   1153 {
   1154    if (e->tag != Iex_Const) return False;
   1155    switch (e->Iex.Const.con->tag) {
   1156       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
   1157       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
   1158       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
   1159       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
   1160       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
   1161       default: vpanic("isZeroU");
   1162    }
   1163 }
   1164 
   1165 /* Is this an integer constant with value 1---1b ? */
   1166 static Bool isOnesU ( IRExpr* e )
   1167 {
   1168    if (e->tag != Iex_Const) return False;
   1169    switch (e->Iex.Const.con->tag) {
   1170       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
   1171       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
   1172       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
   1173                                      == 0xFFFFFFFF);
   1174       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
   1175                                      == 0xFFFFFFFFFFFFFFFFULL);
   1176       default: ppIRExpr(e); vpanic("isOnesU");
   1177    }
   1178 }
   1179 
   1180 static Bool notBool ( Bool b )
   1181 {
   1182    if (b == True) return False;
   1183    if (b == False) return True;
   1184    vpanic("notBool");
   1185 }
   1186 
   1187 /* Make a zero which has the same type as the result of the given
   1188    primop. */
   1189 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
   1190 {
   1191    switch (op) {
   1192       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
   1193       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
   1194       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
   1195       case Iop_Sub32:
   1196       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
   1197       case Iop_Sub64:
   1198       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
   1199       case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
   1200       default: vpanic("mkZeroOfPrimopResultType: bad primop");
   1201    }
   1202 }
   1203 
   1204 /* Make a value containing all 1-bits, which has the same type as the
   1205    result of the given primop. */
   1206 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
   1207 {
   1208    switch (op) {
   1209       case Iop_CmpEQ32:
   1210       case Iop_CmpEQ64:
   1211          return IRExpr_Const(IRConst_U1(toBool(1)));
   1212       case Iop_Or8:
   1213          return IRExpr_Const(IRConst_U8(0xFF));
   1214       case Iop_Or16:
   1215          return IRExpr_Const(IRConst_U16(0xFFFF));
   1216       case Iop_Or32:
   1217          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
   1218       case Iop_CmpEQ8x8:
   1219       case Iop_Or64:
   1220          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
   1221       case Iop_CmpEQ8x16:
   1222       case Iop_CmpEQ16x8:
   1223       case Iop_CmpEQ32x4:
   1224          return IRExpr_Const(IRConst_V128(0xFFFF));
   1225       default:
   1226          ppIROp(op);
   1227          vpanic("mkOnesOfPrimopResultType: bad primop");
   1228    }
   1229 }
   1230 
   1231 /* Helpers for folding Clz32/64. */
   1232 static UInt fold_Clz64 ( ULong value )
   1233 {
   1234    UInt i;
   1235    vassert(value != 0ULL); /* no defined semantics for arg==0 */
   1236    for (i = 0; i < 64; ++i) {
   1237       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
   1238    }
   1239    vassert(0);
   1240    /*NOTREACHED*/
   1241    return 0;
   1242 }
   1243 
   1244 static UInt fold_Clz32 ( UInt value )
   1245 {
   1246    UInt i;
   1247    vassert(value != 0); /* no defined semantics for arg==0 */
   1248    for (i = 0; i < 32; ++i) {
   1249       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
   1250    }
   1251    vassert(0);
   1252    /*NOTREACHED*/
   1253    return 0;
   1254 }
   1255 
   1256 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
   1257    the corresponding real constant. */
   1258 //XXX re-check this before use
   1259 //static ULong de_summarise_V64 ( UChar v64 )
   1260 //{
   1261 //   ULong r = 0;
   1262 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
   1263 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
   1264 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
   1265 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
   1266 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
   1267 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
   1268 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
   1269 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
   1270 //   return r;
   1271 //}
   1272 
   1273 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
   1274 {
   1275    Int     shift;
   1276    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1277 
   1278    switch (e->tag) {
   1279    case Iex_Unop:
   1280       /* UNARY ops */
   1281       if (e->Iex.Unop.arg->tag == Iex_Const) {
   1282          switch (e->Iex.Unop.op) {
   1283          case Iop_1Uto8:
   1284             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1285                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1286                     ? 1 : 0)));
   1287             break;
   1288          case Iop_1Uto32:
   1289             e2 = IRExpr_Const(IRConst_U32(
   1290                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1291                     ? 1 : 0));
   1292             break;
   1293          case Iop_1Uto64:
   1294             e2 = IRExpr_Const(IRConst_U64(
   1295                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1296                     ? 1 : 0));
   1297             break;
   1298 
   1299          case Iop_1Sto8:
   1300             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1301                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1302                     ? 0xFF : 0)));
   1303             break;
   1304          case Iop_1Sto16:
   1305             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1306                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1307                     ? 0xFFFF : 0)));
   1308             break;
   1309          case Iop_1Sto32:
   1310             e2 = IRExpr_Const(IRConst_U32(
   1311                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1312                     ? 0xFFFFFFFF : 0));
   1313             break;
   1314          case Iop_1Sto64:
   1315             e2 = IRExpr_Const(IRConst_U64(
   1316                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1317                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1318             break;
   1319 
   1320          case Iop_8Sto32: {
   1321             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1322             s32 <<= 24;
   1323             s32 >>= 24;
   1324             e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1325             break;
   1326          }
   1327          case Iop_16Sto32: {
   1328             /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1329             s32 <<= 16;
   1330             s32 >>= 16;
   1331             e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
   1332             break;
   1333          }
   1334          case Iop_8Uto64:
   1335             e2 = IRExpr_Const(IRConst_U64(
   1336                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1337             break;
   1338          case Iop_16Uto64:
   1339             e2 = IRExpr_Const(IRConst_U64(
   1340                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1341             break;
   1342          case Iop_8Uto32:
   1343             e2 = IRExpr_Const(IRConst_U32(
   1344                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1345             break;
   1346          case Iop_8Sto16: {
   1347             /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1348             s16 <<= 8;
   1349             s16 >>= 8;
   1350             e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
   1351             break;
   1352          }
   1353          case Iop_8Uto16:
   1354             e2 = IRExpr_Const(IRConst_U16(
   1355                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1356             break;
   1357          case Iop_16Uto32:
   1358             e2 = IRExpr_Const(IRConst_U32(
   1359                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1360             break;
   1361          case Iop_32to16:
   1362             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1363                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1364             break;
   1365          case Iop_32to8:
   1366             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1367                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1368             break;
   1369          case Iop_32to1:
   1370             e2 = IRExpr_Const(IRConst_U1(toBool(
   1371                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1372                  )));
   1373             break;
   1374          case Iop_64to1:
   1375             e2 = IRExpr_Const(IRConst_U1(toBool(
   1376                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1377                  )));
   1378             break;
   1379 
   1380          case Iop_NotV128:
   1381             e2 = IRExpr_Const(IRConst_V128(
   1382                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
   1383             break;
   1384          case Iop_Not64:
   1385             e2 = IRExpr_Const(IRConst_U64(
   1386                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1387             break;
   1388          case Iop_Not32:
   1389             e2 = IRExpr_Const(IRConst_U32(
   1390                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1391             break;
   1392          case Iop_Not16:
   1393             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1394                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1395             break;
   1396          case Iop_Not8:
   1397             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1398                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1399             break;
   1400 
   1401          case Iop_Not1:
   1402             e2 = IRExpr_Const(IRConst_U1(
   1403                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1404             break;
   1405 
   1406          case Iop_64to8: {
   1407             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1408             w64 &= 0xFFULL;
   1409             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1410             break;
   1411          }
   1412          case Iop_64to16: {
   1413             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1414             w64 &= 0xFFFFULL;
   1415             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1416             break;
   1417          }
   1418          case Iop_64to32: {
   1419             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1420             w64 &= 0x00000000FFFFFFFFULL;
   1421             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1422             break;
   1423          }
   1424          case Iop_64HIto32: {
   1425             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1426             w64 >>= 32;
   1427             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1428             break;
   1429          }
   1430          case Iop_32Uto64:
   1431             e2 = IRExpr_Const(IRConst_U64(
   1432                     0xFFFFFFFFULL
   1433                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1434             break;
   1435          case Iop_16Sto64: {
   1436             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1437             s64 <<= 48;
   1438             s64 >>= 48;
   1439             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1440             break;
   1441          }
   1442          case Iop_32Sto64: {
   1443             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1444             s64 <<= 32;
   1445             s64 >>= 32;
   1446             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1447             break;
   1448          }
   1449 
   1450          case Iop_16to8: {
   1451             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1452             w16 &= 0xFF;
   1453             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1454             break;
   1455          }
   1456          case Iop_16HIto8: {
   1457             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1458             w16 >>= 8;
   1459             w16 &= 0xFF;
   1460             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1461             break;
   1462          }
   1463 
   1464          case Iop_CmpNEZ8:
   1465             e2 = IRExpr_Const(IRConst_U1(toBool(
   1466                     0 !=
   1467                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1468                  )));
   1469             break;
   1470          case Iop_CmpNEZ32:
   1471             e2 = IRExpr_Const(IRConst_U1(toBool(
   1472                     0 !=
   1473                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1474                  )));
   1475             break;
   1476          case Iop_CmpNEZ64:
   1477             e2 = IRExpr_Const(IRConst_U1(toBool(
   1478                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1479                  )));
   1480             break;
   1481 
   1482          case Iop_CmpwNEZ32: {
   1483             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1484             if (w32 == 0)
   1485                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1486             else
   1487                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1488             break;
   1489          }
   1490          case Iop_CmpwNEZ64: {
   1491             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1492             if (w64 == 0)
   1493                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1494             else
   1495                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1496             break;
   1497          }
   1498 
   1499          case Iop_Left32: {
   1500             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1501             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1502             s32 = (s32 | (-s32));
   1503             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1504             break;
   1505          }
   1506 
   1507          case Iop_Left64: {
   1508             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1509             Long  s64 = (Long)u64;
   1510             s64 = (s64 | (-s64));
   1511             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1512             break;
   1513          }
   1514 
   1515          case Iop_Clz32: {
   1516             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1517             if (u32 != 0)
   1518                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1519             break;
   1520          }
   1521          case Iop_Clz64: {
   1522             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1523             if (u64 != 0ULL)
   1524                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1525             break;
   1526          }
   1527 
   1528          /* For these vector ones, can't fold all cases, but at least
   1529             do the most obvious one.  Could do better here using
   1530             summarise/desummarise of vector constants, but too
   1531             difficult to verify; hence just handle the zero cases. */
   1532          case Iop_32UtoV128: {
   1533             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1534             if (0 == u32) {
   1535                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1536             } else {
   1537                goto unhandled;
   1538             }
   1539             break;
   1540          }
   1541          case Iop_V128to64: {
   1542             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1543             if (0 == ((v128 >> 0) & 0xFF)) {
   1544                e2 = IRExpr_Const(IRConst_U64(0));
   1545             } else {
   1546                goto unhandled;
   1547             }
   1548             break;
   1549          }
   1550          case Iop_V128HIto64: {
   1551             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1552             if (0 == ((v128 >> 8) & 0xFF)) {
   1553                e2 = IRExpr_Const(IRConst_U64(0));
   1554             } else {
   1555                goto unhandled;
   1556             }
   1557             break;
   1558          }
   1559          case Iop_64UtoV128: {
   1560             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1561             if (0 == u64) {
   1562                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1563             } else {
   1564                goto unhandled;
   1565             }
   1566             break;
   1567          }
   1568 
   1569          /* Even stupider (although still correct ..) */
   1570          case Iop_V256to64_0: case Iop_V256to64_1:
   1571          case Iop_V256to64_2: case Iop_V256to64_3: {
   1572             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
   1573             if (v256 == 0x00000000) {
   1574                e2 = IRExpr_Const(IRConst_U64(0));
   1575             } else {
   1576                goto unhandled;
   1577             }
   1578             break;
   1579          }
   1580 
   1581          default:
   1582             goto unhandled;
   1583       }
   1584       }
   1585       break;
   1586 
   1587    case Iex_Binop:
   1588       /* BINARY ops */
   1589       if (e->Iex.Binop.arg1->tag == Iex_Const
   1590           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1591          /* cases where both args are consts */
   1592          switch (e->Iex.Binop.op) {
   1593 
   1594             /* -- Or -- */
   1595             case Iop_Or8:
   1596                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1597                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1598                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1599                break;
   1600             case Iop_Or16:
   1601                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1602                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1603                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1604                break;
   1605             case Iop_Or32:
   1606                e2 = IRExpr_Const(IRConst_U32(
   1607                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1608                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1609                break;
   1610             case Iop_Or64:
   1611                e2 = IRExpr_Const(IRConst_U64(
   1612                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1613                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1614                break;
   1615             case Iop_OrV128:
   1616                e2 = IRExpr_Const(IRConst_V128(
   1617                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1618                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1619                break;
   1620 
   1621             /* -- Xor -- */
   1622             case Iop_Xor8:
   1623                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1624                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1625                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1626                break;
   1627             case Iop_Xor16:
   1628                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1629                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1630                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1631                break;
   1632             case Iop_Xor32:
   1633                e2 = IRExpr_Const(IRConst_U32(
   1634                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1635                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1636                break;
   1637             case Iop_Xor64:
   1638                e2 = IRExpr_Const(IRConst_U64(
   1639                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1640                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1641                break;
   1642             case Iop_XorV128:
   1643                e2 = IRExpr_Const(IRConst_V128(
   1644                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1645                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1646                break;
   1647 
   1648             /* -- And -- */
   1649             case Iop_And8:
   1650                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1651                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1652                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1653                break;
   1654             case Iop_And16:
   1655                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1656                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1657                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1658                break;
   1659             case Iop_And32:
   1660                e2 = IRExpr_Const(IRConst_U32(
   1661                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1662                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1663                break;
   1664             case Iop_And64:
   1665                e2 = IRExpr_Const(IRConst_U64(
   1666                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1667                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1668                break;
   1669             case Iop_AndV128:
   1670                e2 = IRExpr_Const(IRConst_V128(
   1671                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1672                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1673                break;
   1674 
   1675             /* -- Add -- */
   1676             case Iop_Add8:
   1677                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1678                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1679                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1680                break;
   1681             case Iop_Add32:
   1682                e2 = IRExpr_Const(IRConst_U32(
   1683                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1684                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1685                break;
   1686             case Iop_Add64:
   1687                e2 = IRExpr_Const(IRConst_U64(
   1688                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1689                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1690                break;
   1691 
   1692             /* -- Sub -- */
   1693             case Iop_Sub8:
   1694                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1695                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1696                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1697                break;
   1698             case Iop_Sub32:
   1699                e2 = IRExpr_Const(IRConst_U32(
   1700                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1701                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1702                break;
   1703             case Iop_Sub64:
   1704                e2 = IRExpr_Const(IRConst_U64(
   1705                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1706                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1707                break;
   1708 
   1709             /* -- Max32U -- */
   1710             case Iop_Max32U: {
   1711                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1712                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1713                UInt res  = u32a > u32b ? u32a : u32b;
   1714                e2 = IRExpr_Const(IRConst_U32(res));
   1715                break;
   1716             }
   1717 
   1718             /* -- Mul -- */
   1719             case Iop_Mul32:
   1720                e2 = IRExpr_Const(IRConst_U32(
   1721                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1722                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1723                break;
   1724             case Iop_Mul64:
   1725                e2 = IRExpr_Const(IRConst_U64(
   1726                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1727                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1728                break;
   1729 
   1730             case Iop_MullS32: {
   1731                /* very paranoid */
   1732                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1733                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1734                Int   s32a = (Int)u32a;
   1735                Int   s32b = (Int)u32b;
   1736                Long  s64a = (Long)s32a;
   1737                Long  s64b = (Long)s32b;
   1738                Long  sres = s64a * s64b;
   1739                ULong ures = (ULong)sres;
   1740                e2 = IRExpr_Const(IRConst_U64(ures));
   1741                break;
   1742             }
   1743 
   1744             /* -- Shl -- */
   1745             case Iop_Shl32:
   1746                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1747                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1748                if (shift >= 0 && shift <= 31)
   1749                   e2 = IRExpr_Const(IRConst_U32(
   1750                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1751                            << shift)));
   1752                break;
   1753             case Iop_Shl64:
   1754                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1755                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1756                if (shift >= 0 && shift <= 63)
   1757                   e2 = IRExpr_Const(IRConst_U64(
   1758                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1759                            << shift)));
   1760                break;
   1761 
   1762             /* -- Sar -- */
   1763             case Iop_Sar32: {
   1764                /* paranoid ... */
   1765                /*signed*/ Int s32;
   1766                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1767                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1768                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1769                if (shift >= 0 && shift <= 31) {
   1770                   s32 >>=/*signed*/ shift;
   1771                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1772                }
   1773                break;
   1774             }
   1775             case Iop_Sar64: {
   1776                /* paranoid ... */
   1777                /*signed*/ Long s64;
   1778                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1779                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1780                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1781                if (shift >= 0 && shift <= 63) {
   1782                   s64 >>=/*signed*/ shift;
   1783                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1784                }
   1785                break;
   1786             }
   1787 
   1788             /* -- Shr -- */
   1789             case Iop_Shr32: {
   1790                /* paranoid ... */
   1791                /*unsigned*/ UInt u32;
   1792                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1793                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1794                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1795                if (shift >= 0 && shift <= 31) {
   1796                   u32 >>=/*unsigned*/ shift;
   1797                   e2 = IRExpr_Const(IRConst_U32(u32));
   1798                }
   1799                break;
   1800             }
   1801             case Iop_Shr64: {
   1802                /* paranoid ... */
   1803                /*unsigned*/ ULong u64;
   1804                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1805                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1806                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1807                if (shift >= 0 && shift <= 63) {
   1808                   u64 >>=/*unsigned*/ shift;
   1809                   e2 = IRExpr_Const(IRConst_U64(u64));
   1810                }
   1811                break;
   1812             }
   1813 
   1814             /* -- CmpEQ -- */
   1815             case Iop_CmpEQ32:
   1816                e2 = IRExpr_Const(IRConst_U1(toBool(
   1817                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1818                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1819                break;
   1820             case Iop_CmpEQ64:
   1821                e2 = IRExpr_Const(IRConst_U1(toBool(
   1822                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1823                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1824                break;
   1825 
   1826             /* -- CmpNE -- */
   1827             case Iop_CmpNE8:
   1828                e2 = IRExpr_Const(IRConst_U1(toBool(
   1829                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1830                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1831                break;
   1832             case Iop_CmpNE32:
   1833                e2 = IRExpr_Const(IRConst_U1(toBool(
   1834                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1835                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1836                break;
   1837             case Iop_CmpNE64:
   1838                e2 = IRExpr_Const(IRConst_U1(toBool(
   1839                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1840                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1841                break;
   1842 
   1843             /* -- CmpLEU -- */
   1844             case Iop_CmpLE32U:
   1845                e2 = IRExpr_Const(IRConst_U1(toBool(
   1846                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1847                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1848                break;
   1849             case Iop_CmpLE64U:
   1850                e2 = IRExpr_Const(IRConst_U1(toBool(
   1851                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1852                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1853                break;
   1854 
   1855             /* -- CmpLES -- */
   1856             case Iop_CmpLE32S:
   1857                e2 = IRExpr_Const(IRConst_U1(toBool(
   1858                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1859                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1860                break;
   1861             case Iop_CmpLE64S:
   1862                e2 = IRExpr_Const(IRConst_U1(toBool(
   1863                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1864                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1865                break;
   1866 
   1867             /* -- CmpLTS -- */
   1868             case Iop_CmpLT32S:
   1869                e2 = IRExpr_Const(IRConst_U1(toBool(
   1870                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1871                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1872                break;
   1873             case Iop_CmpLT64S:
   1874                e2 = IRExpr_Const(IRConst_U1(toBool(
   1875                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1876                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1877                break;
   1878 
   1879             /* -- CmpLTU -- */
   1880             case Iop_CmpLT32U:
   1881                e2 = IRExpr_Const(IRConst_U1(toBool(
   1882                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1883                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1884                break;
   1885             case Iop_CmpLT64U:
   1886                e2 = IRExpr_Const(IRConst_U1(toBool(
   1887                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1888                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1889                break;
   1890 
   1891             /* -- CmpORD -- */
   1892             case Iop_CmpORD32S: {
   1893                /* very paranoid */
   1894                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1895                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1896                Int   s32a = (Int)u32a;
   1897                Int   s32b = (Int)u32b;
   1898                Int   r = 0x2; /* EQ */
   1899                if (s32a < s32b) {
   1900                   r = 0x8; /* LT */
   1901                }
   1902                else if (s32a > s32b) {
   1903                   r = 0x4; /* GT */
   1904                }
   1905                e2 = IRExpr_Const(IRConst_U32(r));
   1906                break;
   1907             }
   1908 
   1909             /* -- nHLto2n -- */
   1910             case Iop_32HLto64:
   1911                e2 = IRExpr_Const(IRConst_U64(
   1912                        (((ULong)(e->Iex.Binop.arg1
   1913                                   ->Iex.Const.con->Ico.U32)) << 32)
   1914                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   1915                     ));
   1916                break;
   1917             case Iop_64HLto128:
   1918                /* We can't fold this, because there is no way to
   1919                   express he result in IR, but at least pretend to
   1920                   handle it, so as to stop getting blasted with
   1921                   no-rule-for-this-primop messages. */
   1922                break;
   1923             /* For this vector one, can't fold all cases, but at
   1924                least do the most obvious one.  Could do better here
   1925                using summarise/desummarise of vector constants, but
   1926                too difficult to verify; hence just handle the zero
   1927                cases. */
   1928             case Iop_64HLtoV128: {
   1929                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
   1930                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   1931                if (0 == argHi && 0 == argLo) {
   1932                   e2 = IRExpr_Const(IRConst_V128(0));
   1933                } else {
   1934                   goto unhandled;
   1935                }
   1936                break;
   1937             }
   1938 
   1939             /* -- V128 stuff -- */
   1940             case Iop_InterleaveLO8x16: {
   1941                /* This turns up a lot in Memcheck instrumentation of
   1942                   Icc generated code.  I don't know why. */
   1943                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
   1944                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
   1945                if (0 == arg1 && 0 == arg2) {
   1946                   e2 = IRExpr_Const(IRConst_V128(0));
   1947                } else {
   1948                   goto unhandled;
   1949                }
   1950                break;
   1951             }
   1952 
   1953             default:
   1954                goto unhandled;
   1955          }
   1956 
   1957       } else {
   1958 
   1959          /* other cases (identities, etc) */
   1960          switch (e->Iex.Binop.op) {
   1961 
   1962             case Iop_Shl32:
   1963             case Iop_Shl64:
   1964             case Iop_Shr64:
   1965                /* Shl32/Shl64/Shr64(x,0) ==> x */
   1966                if (isZeroU(e->Iex.Binop.arg2)) {
   1967                   e2 = e->Iex.Binop.arg1;
   1968                   break;
   1969                }
   1970                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
   1971                if (isZeroU(e->Iex.Binop.arg1)) {
   1972                   e2 = e->Iex.Binop.arg1;
   1973                   break;
   1974                }
   1975                break;
   1976 
   1977             case Iop_Shr32:
   1978                /* Shr32(x,0) ==> x */
   1979                if (isZeroU(e->Iex.Binop.arg2)) {
   1980                   e2 = e->Iex.Binop.arg1;
   1981                   break;
   1982                }
   1983                break;
   1984 
   1985             case Iop_Or8:
   1986             case Iop_Or16:
   1987             case Iop_Or32:
   1988             case Iop_Or64:
   1989             case Iop_Max32U:
   1990                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
   1991                if (isZeroU(e->Iex.Binop.arg2)) {
   1992                   e2 = e->Iex.Binop.arg1;
   1993                   break;
   1994                }
   1995                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
   1996                if (isZeroU(e->Iex.Binop.arg1)) {
   1997                   e2 = e->Iex.Binop.arg2;
   1998                   break;
   1999                }
   2000                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
   2001                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
   2002                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
   2003                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2004                   break;
   2005                }
   2006                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
   2007                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2008                   e2 = e->Iex.Binop.arg1;
   2009                   break;
   2010                }
   2011                break;
   2012 
   2013             case Iop_Add8:
   2014                /* Add8(t,t) ==> t << 1.
   2015                   Memcheck doesn't understand that
   2016                   x+x produces a defined least significant bit, and it seems
   2017                   simplest just to get rid of the problem by rewriting it
   2018                   out, since the opportunity to do so exists. */
   2019                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2020                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
   2021                                     IRExpr_Const(IRConst_U8(1)));
   2022                   break;
   2023                }
   2024                break;
   2025 
   2026                /* NB no Add16(t,t) case yet as no known test case exists */
   2027 
   2028             case Iop_Add32:
   2029             case Iop_Add64:
   2030                /* Add32/Add64(x,0) ==> x */
   2031                if (isZeroU(e->Iex.Binop.arg2)) {
   2032                   e2 = e->Iex.Binop.arg1;
   2033                   break;
   2034                }
   2035                /* Add32/Add64(0,x) ==> x */
   2036                if (isZeroU(e->Iex.Binop.arg1)) {
   2037                   e2 = e->Iex.Binop.arg2;
   2038                   break;
   2039                }
   2040                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
   2041                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2042                   e2 = IRExpr_Binop(
   2043                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
   2044                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
   2045                   break;
   2046                }
   2047                break;
   2048 
   2049             case Iop_Sub64:
   2050                /* Sub64(x,0) ==> x */
   2051                if (isZeroU64(e->Iex.Binop.arg2)) {
   2052                   e2 = e->Iex.Binop.arg1;
   2053                   break;
   2054                }
   2055                /* Sub64(t,t) ==> 0, for some IRTemp t */
   2056                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2057                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2058                   break;
   2059                }
   2060                break;
   2061 
   2062             case Iop_And32:
   2063                /* And32(x,0xFFFFFFFF) ==> x */
   2064                if (isOnesU32(e->Iex.Binop.arg2)) {
   2065                   e2 = e->Iex.Binop.arg1;
   2066                   break;
   2067                }
   2068                /* And32(x,0) ==> 0 */
   2069                if (isZeroU32(e->Iex.Binop.arg2)) {
   2070                   e2 = e->Iex.Binop.arg2;
   2071                   break;
   2072                }
   2073                /* And32(0,x) ==> 0 */
   2074                if (isZeroU32(e->Iex.Binop.arg1)) {
   2075                   e2 = e->Iex.Binop.arg1;
   2076                   break;
   2077                }
   2078                /* And32(t,t) ==> t, for some IRTemp t */
   2079                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2080                   e2 = e->Iex.Binop.arg1;
   2081                   break;
   2082                }
   2083                break;
   2084 
   2085             case Iop_And8:
   2086             case Iop_And16:
   2087             case Iop_And64:
   2088             case Iop_AndV128:
   2089             case Iop_AndV256:
   2090                /* And8/And16/And64/AndV128/AndV256(t,t)
   2091                   ==> t, for some IRTemp t */
   2092                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2093                   e2 = e->Iex.Binop.arg1;
   2094                   break;
   2095                }
   2096                break;
   2097 
   2098             case Iop_OrV128:
   2099             case Iop_OrV256:
   2100                /* V128/V256(t,t) ==> t, for some IRTemp t */
   2101                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2102                   e2 = e->Iex.Binop.arg1;
   2103                   break;
   2104                }
   2105                break;
   2106 
   2107             case Iop_Xor8:
   2108             case Iop_Xor16:
   2109             case Iop_Xor32:
   2110             case Iop_Xor64:
   2111             case Iop_XorV128:
   2112                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   2113                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2114                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2115                   break;
   2116                }
   2117                break;
   2118 
   2119             case Iop_Sub32:
   2120             case Iop_CmpNE32:
   2121                /* Sub32/CmpNE32(t,t) ==> 0, for some IRTemp t */
   2122                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2123                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2124                   break;
   2125                }
   2126                break;
   2127 
   2128             case Iop_CmpEQ32:
   2129             case Iop_CmpEQ64:
   2130             case Iop_CmpEQ8x8:
   2131             case Iop_CmpEQ8x16:
   2132             case Iop_CmpEQ16x8:
   2133             case Iop_CmpEQ32x4:
   2134                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2135                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2136                   break;
   2137                }
   2138                break;
   2139 
   2140             default:
   2141                break;
   2142          }
   2143       }
   2144       break;
   2145 
   2146    case Iex_Mux0X:
   2147       /* Mux0X */
   2148 
   2149       /* is the discriminant is a constant? */
   2150       if (e->Iex.Mux0X.cond->tag == Iex_Const) {
   2151          Bool zero;
   2152          /* assured us by the IR type rules */
   2153          vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
   2154          zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
   2155                                      ->Iex.Const.con->Ico.U8));
   2156          e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
   2157       }
   2158       else
   2159       /* are the arms identical? (pretty weedy test) */
   2160       if (sameIRExprs(env, e->Iex.Mux0X.expr0,
   2161                       e->Iex.Mux0X.exprX)) {
   2162          e2 = e->Iex.Mux0X.expr0;
   2163       }
   2164       break;
   2165 
   2166    default:
   2167       /* not considered */
   2168       break;
   2169    }
   2170 
   2171    /* Show cases where we've found but not folded 'op(t,t)'.  Be
   2172       careful not to call sameIRExprs with values of different types,
   2173       though, else it will assert (and so it should!).  We can't
   2174       conveniently call typeOfIRExpr on the two args without a whole
   2175       bunch of extra plumbing to pass in a type env, so just use a
   2176       hacky test to check the arguments are not anything that might
   2177       sameIRExprs to assert.  This is only OK because this kludge is
   2178       only used for debug printing, not for "real" operation.  For
   2179       "real" operation (ie, all other calls to sameIRExprs), it is
   2180       essential that the to args have the same type.
   2181 
   2182       The "right" solution is to plumb the containing block's
   2183       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
   2184       that's a bunch of extra parameter passing which will just slow
   2185       down the normal case, for no purpose. */
   2186    if (vex_control.iropt_verbosity > 0
   2187        && e == e2
   2188        && e->tag == Iex_Binop
   2189        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
   2190                                                     e->Iex.Binop.arg2)
   2191        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2192       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
   2193       ppIRExpr(e);
   2194       vex_printf("\n");
   2195    }
   2196 
   2197    /* Show the overall results of folding. */
   2198    if (DEBUG_IROPT && e2 != e) {
   2199       vex_printf("FOLD: ");
   2200       ppIRExpr(e); vex_printf("  ->  ");
   2201       ppIRExpr(e2); vex_printf("\n");
   2202    }
   2203 
   2204    return e2;
   2205 
   2206  unhandled:
   2207 #  if 0
   2208    vex_printf("\n\n");
   2209    ppIRExpr(e);
   2210    vpanic("fold_Expr: no rule for the above");
   2211 #  else
   2212    if (vex_control.iropt_verbosity > 0) {
   2213       vex_printf("vex iropt: fold_Expr: no const rule for: ");
   2214       ppIRExpr(e);
   2215       vex_printf("\n");
   2216    }
   2217    return e2;
   2218 #  endif
   2219 }
   2220 
   2221 
   2222 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   2223    1-level due to previous flattening pass. */
   2224 
   2225 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   2226 {
   2227    switch (ex->tag) {
   2228       case Iex_RdTmp:
   2229          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   2230             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
   2231             if (rhs->tag == Iex_RdTmp)
   2232                return rhs;
   2233             if (rhs->tag == Iex_Const
   2234                 && rhs->Iex.Const.con->tag != Ico_F64i)
   2235                return rhs;
   2236          }
   2237          /* not bound in env */
   2238          return ex;
   2239 
   2240       case Iex_Const:
   2241       case Iex_Get:
   2242          return ex;
   2243 
   2244       case Iex_GetI:
   2245          vassert(isIRAtom(ex->Iex.GetI.ix));
   2246          return IRExpr_GetI(
   2247             ex->Iex.GetI.descr,
   2248             subst_Expr(env, ex->Iex.GetI.ix),
   2249             ex->Iex.GetI.bias
   2250          );
   2251 
   2252       case Iex_Qop: {
   2253          IRQop* qop = ex->Iex.Qop.details;
   2254          vassert(isIRAtom(qop->arg1));
   2255          vassert(isIRAtom(qop->arg2));
   2256          vassert(isIRAtom(qop->arg3));
   2257          vassert(isIRAtom(qop->arg4));
   2258          return IRExpr_Qop(
   2259                    qop->op,
   2260                    subst_Expr(env, qop->arg1),
   2261                    subst_Expr(env, qop->arg2),
   2262                    subst_Expr(env, qop->arg3),
   2263                    subst_Expr(env, qop->arg4)
   2264                 );
   2265       }
   2266 
   2267       case Iex_Triop: {
   2268          IRTriop* triop = ex->Iex.Triop.details;
   2269          vassert(isIRAtom(triop->arg1));
   2270          vassert(isIRAtom(triop->arg2));
   2271          vassert(isIRAtom(triop->arg3));
   2272          return IRExpr_Triop(
   2273                    triop->op,
   2274                    subst_Expr(env, triop->arg1),
   2275                    subst_Expr(env, triop->arg2),
   2276                    subst_Expr(env, triop->arg3)
   2277                 );
   2278       }
   2279 
   2280       case Iex_Binop:
   2281          vassert(isIRAtom(ex->Iex.Binop.arg1));
   2282          vassert(isIRAtom(ex->Iex.Binop.arg2));
   2283          return IRExpr_Binop(
   2284                    ex->Iex.Binop.op,
   2285                    subst_Expr(env, ex->Iex.Binop.arg1),
   2286                    subst_Expr(env, ex->Iex.Binop.arg2)
   2287                 );
   2288 
   2289       case Iex_Unop:
   2290          vassert(isIRAtom(ex->Iex.Unop.arg));
   2291          return IRExpr_Unop(
   2292                    ex->Iex.Unop.op,
   2293                    subst_Expr(env, ex->Iex.Unop.arg)
   2294                 );
   2295 
   2296       case Iex_Load:
   2297          vassert(isIRAtom(ex->Iex.Load.addr));
   2298          return IRExpr_Load(
   2299                    ex->Iex.Load.end,
   2300                    ex->Iex.Load.ty,
   2301                    subst_Expr(env, ex->Iex.Load.addr)
   2302                 );
   2303 
   2304       case Iex_CCall: {
   2305          Int      i;
   2306          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   2307          for (i = 0; args2[i]; i++) {
   2308             vassert(isIRAtom(args2[i]));
   2309             args2[i] = subst_Expr(env, args2[i]);
   2310          }
   2311          return IRExpr_CCall(
   2312                    ex->Iex.CCall.cee,
   2313                    ex->Iex.CCall.retty,
   2314                    args2
   2315                 );
   2316       }
   2317 
   2318       case Iex_Mux0X:
   2319          vassert(isIRAtom(ex->Iex.Mux0X.cond));
   2320          vassert(isIRAtom(ex->Iex.Mux0X.expr0));
   2321          vassert(isIRAtom(ex->Iex.Mux0X.exprX));
   2322          return IRExpr_Mux0X(
   2323                    subst_Expr(env, ex->Iex.Mux0X.cond),
   2324                    subst_Expr(env, ex->Iex.Mux0X.expr0),
   2325                    subst_Expr(env, ex->Iex.Mux0X.exprX)
   2326                 );
   2327 
   2328       default:
   2329          vex_printf("\n\n"); ppIRExpr(ex);
   2330          vpanic("subst_Expr");
   2331 
   2332    }
   2333 }
   2334 
   2335 
   2336 /* Apply the subst to stmt, then fold the result as much as possible.
   2337    Much simplified due to stmt being previously flattened.  As a
   2338    result of this, the stmt may wind up being turned into a no-op.
   2339 */
   2340 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   2341 {
   2342 #  if 0
   2343    vex_printf("\nsubst and fold stmt\n");
   2344    ppIRStmt(st);
   2345    vex_printf("\n");
   2346 #  endif
   2347 
   2348    switch (st->tag) {
   2349       case Ist_AbiHint:
   2350          vassert(isIRAtom(st->Ist.AbiHint.base));
   2351          vassert(isIRAtom(st->Ist.AbiHint.nia));
   2352          return IRStmt_AbiHint(
   2353                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
   2354                    st->Ist.AbiHint.len,
   2355                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
   2356                 );
   2357       case Ist_Put:
   2358          vassert(isIRAtom(st->Ist.Put.data));
   2359          return IRStmt_Put(
   2360                    st->Ist.Put.offset,
   2361                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
   2362                 );
   2363 
   2364       case Ist_PutI: {
   2365          IRPutI *puti, *puti2;
   2366          puti = st->Ist.PutI.details;
   2367          vassert(isIRAtom(puti->ix));
   2368          vassert(isIRAtom(puti->data));
   2369          puti2 = mkIRPutI(puti->descr,
   2370                           fold_Expr(env, subst_Expr(env, puti->ix)),
   2371                           puti->bias,
   2372                           fold_Expr(env, subst_Expr(env, puti->data)));
   2373          return IRStmt_PutI(puti2);
   2374       }
   2375 
   2376       case Ist_WrTmp:
   2377          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   2378             allowed to be more than just a constant or a tmp. */
   2379          return IRStmt_WrTmp(
   2380                    st->Ist.WrTmp.tmp,
   2381                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
   2382                 );
   2383 
   2384       case Ist_Store:
   2385          vassert(isIRAtom(st->Ist.Store.addr));
   2386          vassert(isIRAtom(st->Ist.Store.data));
   2387          return IRStmt_Store(
   2388                    st->Ist.Store.end,
   2389                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
   2390                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
   2391                 );
   2392 
   2393       case Ist_CAS: {
   2394          IRCAS *cas, *cas2;
   2395          cas = st->Ist.CAS.details;
   2396          vassert(isIRAtom(cas->addr));
   2397          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   2398          vassert(isIRAtom(cas->expdLo));
   2399          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   2400          vassert(isIRAtom(cas->dataLo));
   2401          cas2 = mkIRCAS(
   2402                    cas->oldHi, cas->oldLo, cas->end,
   2403                    fold_Expr(env, subst_Expr(env, cas->addr)),
   2404                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
   2405                                : NULL,
   2406                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
   2407                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
   2408                                : NULL,
   2409                    fold_Expr(env, subst_Expr(env, cas->dataLo))
   2410                 );
   2411          return IRStmt_CAS(cas2);
   2412       }
   2413 
   2414       case Ist_LLSC:
   2415          vassert(isIRAtom(st->Ist.LLSC.addr));
   2416          if (st->Ist.LLSC.storedata)
   2417             vassert(isIRAtom(st->Ist.LLSC.storedata));
   2418          return IRStmt_LLSC(
   2419                    st->Ist.LLSC.end,
   2420                    st->Ist.LLSC.result,
   2421                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
   2422                    st->Ist.LLSC.storedata
   2423                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
   2424                       : NULL
   2425                 );
   2426 
   2427       case Ist_Dirty: {
   2428          Int     i;
   2429          IRDirty *d, *d2;
   2430          d = st->Ist.Dirty.details;
   2431          d2 = emptyIRDirty();
   2432          *d2 = *d;
   2433          d2->args = shallowCopyIRExprVec(d2->args);
   2434          if (d2->mFx != Ifx_None) {
   2435             vassert(isIRAtom(d2->mAddr));
   2436             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
   2437          }
   2438          vassert(isIRAtom(d2->guard));
   2439          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
   2440          for (i = 0; d2->args[i]; i++) {
   2441             vassert(isIRAtom(d2->args[i]));
   2442             d2->args[i] = fold_Expr(env, subst_Expr(env, d2->args[i]));
   2443          }
   2444          return IRStmt_Dirty(d2);
   2445       }
   2446 
   2447       case Ist_IMark:
   2448          return IRStmt_IMark(st->Ist.IMark.addr,
   2449                              st->Ist.IMark.len,
   2450                              st->Ist.IMark.delta);
   2451 
   2452       case Ist_NoOp:
   2453          return IRStmt_NoOp();
   2454 
   2455       case Ist_MBE:
   2456          return IRStmt_MBE(st->Ist.MBE.event);
   2457 
   2458       case Ist_Exit: {
   2459          IRExpr* fcond;
   2460          vassert(isIRAtom(st->Ist.Exit.guard));
   2461          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
   2462          if (fcond->tag == Iex_Const) {
   2463             /* Interesting.  The condition on this exit has folded down to
   2464                a constant. */
   2465             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2466             vassert(fcond->Iex.Const.con->Ico.U1 == False
   2467                     || fcond->Iex.Const.con->Ico.U1 == True);
   2468             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2469                /* exit is never going to happen, so dump the statement. */
   2470                return IRStmt_NoOp();
   2471             } else {
   2472                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2473                /* Hmmm.  The exit has become unconditional.  Leave it
   2474                   as it is for now, since we'd have to truncate the BB
   2475                   at this point, which is tricky.  Such truncation is
   2476                   done later by the dead-code elimination pass. */
   2477                /* fall out into the reconstruct-the-exit code. */
   2478                if (vex_control.iropt_verbosity > 0)
   2479                   /* really a misuse of vex_control.iropt_verbosity */
   2480                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2481             }
   2482          }
   2483          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
   2484                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
   2485       }
   2486 
   2487    default:
   2488       vex_printf("\n"); ppIRStmt(st);
   2489       vpanic("subst_and_fold_Stmt");
   2490    }
   2491 }
   2492 
   2493 
   2494 IRSB* cprop_BB ( IRSB* in )
   2495 {
   2496    Int      i;
   2497    IRSB*    out;
   2498    IRStmt*  st2;
   2499    Int      n_tmps = in->tyenv->types_used;
   2500    IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
   2501 
   2502    out = emptyIRSB();
   2503    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2504 
   2505    /* Set up the env with which travels forward.  This holds a
   2506       substitution, mapping IRTemps to IRExprs. The environment
   2507       is to be applied as we move along.  Keys are IRTemps.
   2508       Values are IRExpr*s.
   2509    */
   2510    for (i = 0; i < n_tmps; i++)
   2511       env[i] = NULL;
   2512 
   2513    /* For each original SSA-form stmt ... */
   2514    for (i = 0; i < in->stmts_used; i++) {
   2515 
   2516       /* First apply the substitution to the current stmt.  This
   2517          propagates in any constants and tmp-tmp assignments
   2518          accumulated prior to this point.  As part of the subst_Stmt
   2519          call, also then fold any constant expressions resulting. */
   2520 
   2521       st2 = in->stmts[i];
   2522 
   2523       /* perhaps st2 is already a no-op? */
   2524       if (st2->tag == Ist_NoOp) continue;
   2525 
   2526       st2 = subst_and_fold_Stmt( env, st2 );
   2527 
   2528       /* If the statement has been folded into a no-op, forget it. */
   2529       if (st2->tag == Ist_NoOp) continue;
   2530 
   2531       /* If the statement assigns to an IRTemp add it to the running
   2532          environment. This is for the benefit of copy propagation
   2533          and to allow sameIRExpr look through IRTemps. */
   2534       if (st2->tag == Ist_WrTmp) {
   2535          vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
   2536          env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2537 
   2538          /* 't1 = t2' -- don't add to BB; will be optimized out */
   2539          if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) continue;
   2540 
   2541          /* 't = const' && 'const != F64i' -- don't add to BB
   2542             Note, we choose not to propagate const when const is an
   2543             F64i, so that F64i literals can be CSE'd later.  This helps
   2544             x86 floating point code generation. */
   2545          if (st2->Ist.WrTmp.data->tag == Iex_Const
   2546              && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) continue;
   2547       }
   2548 
   2549       /* Not interesting, copy st2 into the output block. */
   2550       addStmtToIRSB( out, st2 );
   2551    }
   2552 
   2553 #if STATS_IROPT
   2554    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
   2555               invocation_count, recursion_count, success_count,
   2556               recursion_success_count, max_nodes_visited);
   2557 #endif
   2558 
   2559    out->next     = subst_Expr( env, in->next );
   2560    out->jumpkind = in->jumpkind;
   2561    out->offsIP   = in->offsIP;
   2562    return out;
   2563 }
   2564 
   2565 
   2566 /*---------------------------------------------------------------*/
   2567 /*--- Dead code (t = E) removal                               ---*/
   2568 /*---------------------------------------------------------------*/
   2569 
   2570 /* As a side effect, also removes all code following an unconditional
   2571    side exit. */
   2572 
   2573 /* The type of the HashHW map is: a map from IRTemp to nothing
   2574    -- really just operating a set or IRTemps.
   2575 */
   2576 
   2577 inline
   2578 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2579 {
   2580    set[(Int)tmp] = True;
   2581 }
   2582 
   2583 static void addUses_Expr ( Bool* set, IRExpr* e )
   2584 {
   2585    Int i;
   2586    switch (e->tag) {
   2587       case Iex_GetI:
   2588          addUses_Expr(set, e->Iex.GetI.ix);
   2589          return;
   2590       case Iex_Mux0X:
   2591          addUses_Expr(set, e->Iex.Mux0X.cond);
   2592          addUses_Expr(set, e->Iex.Mux0X.expr0);
   2593          addUses_Expr(set, e->Iex.Mux0X.exprX);
   2594          return;
   2595       case Iex_CCall:
   2596          for (i = 0; e->Iex.CCall.args[i]; i++)
   2597             addUses_Expr(set, e->Iex.CCall.args[i]);
   2598          return;
   2599       case Iex_Load:
   2600          addUses_Expr(set, e->Iex.Load.addr);
   2601          return;
   2602       case Iex_Qop:
   2603          addUses_Expr(set, e->Iex.Qop.details->arg1);
   2604          addUses_Expr(set, e->Iex.Qop.details->arg2);
   2605          addUses_Expr(set, e->Iex.Qop.details->arg3);
   2606          addUses_Expr(set, e->Iex.Qop.details->arg4);
   2607          return;
   2608       case Iex_Triop:
   2609          addUses_Expr(set, e->Iex.Triop.details->arg1);
   2610          addUses_Expr(set, e->Iex.Triop.details->arg2);
   2611          addUses_Expr(set, e->Iex.Triop.details->arg3);
   2612          return;
   2613       case Iex_Binop:
   2614          addUses_Expr(set, e->Iex.Binop.arg1);
   2615          addUses_Expr(set, e->Iex.Binop.arg2);
   2616          return;
   2617       case Iex_Unop:
   2618          addUses_Expr(set, e->Iex.Unop.arg);
   2619          return;
   2620       case Iex_RdTmp:
   2621          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2622          return;
   2623       case Iex_Const:
   2624       case Iex_Get:
   2625          return;
   2626       default:
   2627          vex_printf("\n");
   2628          ppIRExpr(e);
   2629          vpanic("addUses_Expr");
   2630    }
   2631 }
   2632 
   2633 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2634 {
   2635    Int      i;
   2636    IRDirty* d;
   2637    IRCAS*   cas;
   2638    switch (st->tag) {
   2639       case Ist_AbiHint:
   2640          addUses_Expr(set, st->Ist.AbiHint.base);
   2641          addUses_Expr(set, st->Ist.AbiHint.nia);
   2642          return;
   2643       case Ist_PutI:
   2644          addUses_Expr(set, st->Ist.PutI.details->ix);
   2645          addUses_Expr(set, st->Ist.PutI.details->data);
   2646          return;
   2647       case Ist_WrTmp:
   2648          addUses_Expr(set, st->Ist.WrTmp.data);
   2649          return;
   2650       case Ist_Put:
   2651          addUses_Expr(set, st->Ist.Put.data);
   2652          return;
   2653       case Ist_Store:
   2654          addUses_Expr(set, st->Ist.Store.addr);
   2655          addUses_Expr(set, st->Ist.Store.data);
   2656          return;
   2657       case Ist_CAS:
   2658          cas = st->Ist.CAS.details;
   2659          addUses_Expr(set, cas->addr);
   2660          if (cas->expdHi)
   2661             addUses_Expr(set, cas->expdHi);
   2662          addUses_Expr(set, cas->expdLo);
   2663          if (cas->dataHi)
   2664             addUses_Expr(set, cas->dataHi);
   2665          addUses_Expr(set, cas->dataLo);
   2666          return;
   2667       case Ist_LLSC:
   2668          addUses_Expr(set, st->Ist.LLSC.addr);
   2669          if (st->Ist.LLSC.storedata)
   2670             addUses_Expr(set, st->Ist.LLSC.storedata);
   2671          return;
   2672       case Ist_Dirty:
   2673          d = st->Ist.Dirty.details;
   2674          if (d->mFx != Ifx_None)
   2675             addUses_Expr(set, d->mAddr);
   2676          addUses_Expr(set, d->guard);
   2677          for (i = 0; d->args[i] != NULL; i++)
   2678             addUses_Expr(set, d->args[i]);
   2679          return;
   2680       case Ist_NoOp:
   2681       case Ist_IMark:
   2682       case Ist_MBE:
   2683          return;
   2684       case Ist_Exit:
   2685          addUses_Expr(set, st->Ist.Exit.guard);
   2686          return;
   2687       default:
   2688          vex_printf("\n");
   2689          ppIRStmt(st);
   2690          vpanic("addUses_Stmt");
   2691    }
   2692 }
   2693 
   2694 
   2695 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   2696 static Bool isZeroU1 ( IRExpr* e )
   2697 {
   2698    return toBool( e->tag == Iex_Const
   2699                   && e->Iex.Const.con->tag == Ico_U1
   2700                   && e->Iex.Const.con->Ico.U1 == False );
   2701 }
   2702 
   2703 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   2704 static Bool isOneU1 ( IRExpr* e )
   2705 {
   2706    return toBool( e->tag == Iex_Const
   2707                   && e->Iex.Const.con->tag == Ico_U1
   2708                   && e->Iex.Const.con->Ico.U1 == True );
   2709 }
   2710 
   2711 
   2712 /* Note, this destructively modifies the given IRSB. */
   2713 
   2714 /* Scan backwards through statements, carrying a set of IRTemps which
   2715    are known to be used after the current point.  On encountering 't =
   2716    E', delete the binding if it is not used.  Otherwise, add any temp
   2717    uses to the set and keep on moving backwards.
   2718 
   2719    As an enhancement, the first (backwards) pass searches for IR exits
   2720    with always-taken conditions and notes the location of the earliest
   2721    one in the block.  If any such are found, a second pass copies the
   2722    exit destination and jump kind to the bb-end.  Then, the exit and
   2723    all statements following it are turned into no-ops.
   2724 */
   2725 
   2726 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   2727 {
   2728    Int     i, i_unconditional_exit;
   2729    Int     n_tmps = bb->tyenv->types_used;
   2730    Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
   2731    IRStmt* st;
   2732 
   2733    for (i = 0; i < n_tmps; i++)
   2734       set[i] = False;
   2735 
   2736    /* start off by recording IRTemp uses in the next field. */
   2737    addUses_Expr(set, bb->next);
   2738 
   2739    /* First pass */
   2740 
   2741    /* Work backwards through the stmts */
   2742    i_unconditional_exit = -1;
   2743    for (i = bb->stmts_used-1; i >= 0; i--) {
   2744       st = bb->stmts[i];
   2745       if (st->tag == Ist_NoOp)
   2746          continue;
   2747       /* take note of any unconditional exits */
   2748       if (st->tag == Ist_Exit
   2749           && isOneU1(st->Ist.Exit.guard))
   2750          i_unconditional_exit = i;
   2751       if (st->tag == Ist_WrTmp
   2752           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   2753           /* it's an IRTemp which never got used.  Delete it. */
   2754          if (DEBUG_IROPT) {
   2755             vex_printf("DEAD: ");
   2756             ppIRStmt(st);
   2757             vex_printf("\n");
   2758          }
   2759          bb->stmts[i] = IRStmt_NoOp();
   2760       }
   2761       else
   2762       if (st->tag == Ist_Dirty
   2763           && st->Ist.Dirty.details->guard
   2764           && isZeroU1(st->Ist.Dirty.details->guard)) {
   2765          /* This is a dirty helper which will never get called.
   2766             Delete it. */
   2767          bb->stmts[i] = IRStmt_NoOp();
   2768        }
   2769        else {
   2770          /* Note any IRTemp uses made by the current statement. */
   2771          addUses_Stmt(set, st);
   2772       }
   2773    }
   2774 
   2775    /* Optional second pass: if any unconditional exits were found,
   2776       delete them and all following statements. */
   2777 
   2778    if (i_unconditional_exit != -1) {
   2779       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   2780                         i_unconditional_exit);
   2781       vassert(i_unconditional_exit >= 0
   2782               && i_unconditional_exit < bb->stmts_used);
   2783       bb->next
   2784          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   2785       bb->jumpkind
   2786          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   2787       bb->offsIP
   2788          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
   2789       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   2790          bb->stmts[i] = IRStmt_NoOp();
   2791    }
   2792 }
   2793 
   2794 
   2795 /*---------------------------------------------------------------*/
   2796 /*--- Specialisation of helper function calls, in             ---*/
   2797 /*--- collaboration with the front end                        ---*/
   2798 /*---------------------------------------------------------------*/
   2799 
   2800 static
   2801 IRSB* spec_helpers_BB(
   2802          IRSB* bb,
   2803          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
   2804       )
   2805 {
   2806    Int     i;
   2807    IRStmt* st;
   2808    IRExpr* ex;
   2809    Bool    any = False;
   2810 
   2811    for (i = bb->stmts_used-1; i >= 0; i--) {
   2812       st = bb->stmts[i];
   2813 
   2814       if (st->tag != Ist_WrTmp
   2815           || st->Ist.WrTmp.data->tag != Iex_CCall)
   2816          continue;
   2817 
   2818       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   2819                           st->Ist.WrTmp.data->Iex.CCall.args,
   2820                           &bb->stmts[0], i );
   2821       if (!ex)
   2822         /* the front end can't think of a suitable replacement */
   2823         continue;
   2824 
   2825       /* We got something better.  Install it in the bb. */
   2826       any = True;
   2827       bb->stmts[i]
   2828          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   2829 
   2830       if (0) {
   2831          vex_printf("SPEC: ");
   2832          ppIRExpr(st->Ist.WrTmp.data);
   2833          vex_printf("  -->  ");
   2834          ppIRExpr(ex);
   2835          vex_printf("\n");
   2836       }
   2837    }
   2838 
   2839    if (any)
   2840       bb = flatten_BB(bb);
   2841    return bb;
   2842 }
   2843 
   2844 
   2845 /*---------------------------------------------------------------*/
   2846 /*--- Determination of guest state aliasing relationships     ---*/
   2847 /*---------------------------------------------------------------*/
   2848 
   2849 /* These are helper functions for CSE and GetI/PutI transformations.
   2850 
   2851    Determine, to the extent possible, the relationship between two
   2852    guest state accesses.  The possible outcomes are:
   2853 
   2854    * Exact alias.  These two accesses denote precisely the same
   2855      piece of the guest state.
   2856 
   2857    * Definitely no alias.  These two accesses are guaranteed not to
   2858      overlap any part of the guest state.
   2859 
   2860    * Unknown -- if neither of the above can be established.
   2861 
   2862    If in doubt, return Unknown.  */
   2863 
   2864 typedef
   2865    enum { ExactAlias, NoAlias, UnknownAlias }
   2866    GSAliasing;
   2867 
   2868 
   2869 /* Produces the alias relation between an indexed guest
   2870    state access and a non-indexed access. */
   2871 
   2872 static
   2873 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   2874                                     Int offset2, IRType ty2 )
   2875 {
   2876    UInt minoff1, maxoff1, minoff2, maxoff2;
   2877 
   2878    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2879    minoff2 = offset2;
   2880    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   2881 
   2882    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2883       return NoAlias;
   2884 
   2885    /* Could probably do better here if required.  For the moment
   2886       however just claim not to know anything more. */
   2887    return UnknownAlias;
   2888 }
   2889 
   2890 
   2891 /* Produces the alias relation between two indexed guest state
   2892    accesses. */
   2893 
   2894 static
   2895 GSAliasing getAliasingRelation_II (
   2896               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   2897               IRRegArray* descr2, IRExpr* ix2, Int bias2
   2898            )
   2899 {
   2900    UInt minoff1, maxoff1, minoff2, maxoff2;
   2901    Int  iters;
   2902 
   2903    /* First try hard to show they don't alias. */
   2904    getArrayBounds( descr1, &minoff1, &maxoff1 );
   2905    getArrayBounds( descr2, &minoff2, &maxoff2 );
   2906    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   2907       return NoAlias;
   2908 
   2909    /* So the two arrays at least partially overlap.  To get any
   2910       further we'll have to be sure that the descriptors are
   2911       identical. */
   2912    if (!eqIRRegArray(descr1, descr2))
   2913       return UnknownAlias;
   2914 
   2915    /* The descriptors are identical.  Now the only difference can be
   2916       in the index expressions.  If they cannot be shown to be
   2917       identical, we have to say we don't know what the aliasing
   2918       relation will be.  Now, since the IR is flattened, the index
   2919       expressions should be atoms -- either consts or tmps.  So that
   2920       makes the comparison simple. */
   2921    vassert(isIRAtom(ix1));
   2922    vassert(isIRAtom(ix2));
   2923    if (!eqIRAtom(ix1,ix2))
   2924       return UnknownAlias;
   2925 
   2926    /* Ok, the index expressions are identical.  So now the only way
   2927       they can be different is in the bias.  Normalise this
   2928       paranoidly, to reliably establish equality/non-equality. */
   2929 
   2930    /* So now we know that the GetI and PutI index the same array
   2931       with the same base.  Are the offsets the same, modulo the
   2932       array size?  Do this paranoidly. */
   2933    vassert(descr1->nElems == descr2->nElems);
   2934    vassert(descr1->elemTy == descr2->elemTy);
   2935    vassert(descr1->base   == descr2->base);
   2936    iters = 0;
   2937    while (bias1 < 0 || bias2 < 0) {
   2938       bias1 += descr1->nElems;
   2939       bias2 += descr1->nElems;
   2940       iters++;
   2941       if (iters > 10)
   2942          vpanic("getAliasingRelation: iters");
   2943    }
   2944    vassert(bias1 >= 0 && bias2 >= 0);
   2945    bias1 %= descr1->nElems;
   2946    bias2 %= descr1->nElems;
   2947    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   2948    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   2949 
   2950    /* Finally, biasP and biasG are normalised into the range
   2951       0 .. descrP/G->nElems - 1.  And so we can establish
   2952       equality/non-equality. */
   2953 
   2954    return bias1==bias2 ? ExactAlias : NoAlias;
   2955 }
   2956 
   2957 
   2958 /*---------------------------------------------------------------*/
   2959 /*--- Common Subexpression Elimination                        ---*/
   2960 /*---------------------------------------------------------------*/
   2961 
   2962 /* Expensive in time and space. */
   2963 
   2964 /* Uses two environments:
   2965    a IRTemp -> IRTemp mapping
   2966    a mapping from AvailExpr* to IRTemp
   2967 */
   2968 
   2969 typedef
   2970    struct {
   2971       enum { TCc, TCt } tag;
   2972       union { IRTemp tmp; IRConst* con; } u;
   2973    }
   2974    TmpOrConst;
   2975 
   2976 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
   2977 {
   2978    if (tc1->tag != tc2->tag)
   2979       return False;
   2980    switch (tc1->tag) {
   2981       case TCc:
   2982          return eqIRConst(tc1->u.con, tc2->u.con);
   2983       case TCt:
   2984          return tc1->u.tmp == tc2->u.tmp;
   2985       default:
   2986          vpanic("eqTmpOrConst");
   2987    }
   2988 }
   2989 
   2990 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
   2991 {
   2992    Bool eq = cee1->addr == cee2->addr;
   2993    if (eq) {
   2994       vassert(cee1->regparms == cee2->regparms);
   2995       vassert(cee1->mcx_mask == cee2->mcx_mask);
   2996       /* Names should be the same too, but we don't bother to
   2997          check. */
   2998    }
   2999    return eq;
   3000 }
   3001 
   3002 /* Convert a NULL terminated IRExpr* vector to an array of
   3003    TmpOrConsts, and a length. */
   3004 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
   3005                                        /*OUT*/Int* nOuts,
   3006                                        IRExpr** ins )
   3007 {
   3008    Int i, n;
   3009    /* We have to make two passes, one to count, one to copy. */
   3010    for (n = 0; ins[n]; n++)
   3011       ;
   3012    *outs  = LibVEX_Alloc(n * sizeof(TmpOrConst));
   3013    *nOuts = n;
   3014    /* and now copy .. */
   3015    for (i = 0; i < n; i++) {
   3016       IRExpr*       arg = ins[i];
   3017       TmpOrConst* dst = &(*outs)[i];
   3018       if (arg->tag == Iex_RdTmp) {
   3019          dst->tag   = TCt;
   3020          dst->u.tmp = arg->Iex.RdTmp.tmp;
   3021       }
   3022       else if (arg->tag == Iex_Const) {
   3023          dst->tag   = TCc;
   3024          dst->u.con = arg->Iex.Const.con;
   3025       }
   3026       else {
   3027          /* Failure of this is serious; it means that the presented arg
   3028             isn't an IR atom, as it should be. */
   3029          vpanic("irExprVec_to_TmpOrConsts");
   3030       }
   3031    }
   3032 }
   3033 
   3034 typedef
   3035    struct {
   3036       enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt, CCall } tag;
   3037       union {
   3038          /* unop(tmp) */
   3039          struct {
   3040             IROp   op;
   3041             IRTemp arg;
   3042          } Ut;
   3043          /* binop(tmp,tmp) */
   3044          struct {
   3045             IROp   op;
   3046             IRTemp arg1;
   3047             IRTemp arg2;
   3048          } Btt;
   3049          /* binop(tmp,const) */
   3050          struct {
   3051             IROp    op;
   3052             IRTemp  arg1;
   3053             IRConst con2;
   3054          } Btc;
   3055          /* binop(const,tmp) */
   3056          struct {
   3057             IROp    op;
   3058             IRConst con1;
   3059             IRTemp  arg2;
   3060          } Bct;
   3061          /* F64i-style const */
   3062          struct {
   3063             ULong f64i;
   3064          } Cf64i;
   3065          /* Mux0X(tmp,tmp,tmp) */
   3066          struct {
   3067             IRTemp co;
   3068             IRTemp e0;
   3069             IRTemp eX;
   3070          } Mttt;
   3071          /* GetI(descr,tmp,bias)*/
   3072          struct {
   3073             IRRegArray* descr;
   3074             IRTemp      ix;
   3075             Int         bias;
   3076          } GetIt;
   3077          /* Clean helper call */
   3078          struct {
   3079             IRCallee*   cee;
   3080             TmpOrConst* args;
   3081             Int         nArgs;
   3082             IRType      retty;
   3083          } CCall;
   3084       } u;
   3085    }
   3086    AvailExpr;
   3087 
   3088 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   3089 {
   3090    if (LIKELY(a1->tag != a2->tag))
   3091       return False;
   3092    switch (a1->tag) {
   3093       case Ut:
   3094          return toBool(
   3095                 a1->u.Ut.op == a2->u.Ut.op
   3096                 && a1->u.Ut.arg == a2->u.Ut.arg);
   3097       case Btt:
   3098          return toBool(
   3099                 a1->u.Btt.op == a2->u.Btt.op
   3100                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   3101                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   3102       case Btc:
   3103          return toBool(
   3104                 a1->u.Btc.op == a2->u.Btc.op
   3105                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   3106                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   3107       case Bct:
   3108          return toBool(
   3109                 a1->u.Bct.op == a2->u.Bct.op
   3110                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   3111                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   3112       case Cf64i:
   3113          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   3114       case Mttt:
   3115          return toBool(a1->u.Mttt.co == a2->u.Mttt.co
   3116                        && a1->u.Mttt.e0 == a2->u.Mttt.e0
   3117                        && a1->u.Mttt.eX == a2->u.Mttt.eX);
   3118       case GetIt:
   3119          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   3120                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   3121                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   3122       case CCall: {
   3123          Int  i, n;
   3124          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
   3125                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
   3126          if (eq) {
   3127             n = a1->u.CCall.nArgs;
   3128             for (i = 0; i < n; i++) {
   3129                if (!eqTmpOrConst( &a1->u.CCall.args[i],
   3130                                   &a2->u.CCall.args[i] )) {
   3131                   eq = False;
   3132                   break;
   3133                }
   3134             }
   3135          }
   3136          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
   3137          return eq;
   3138       }
   3139       default: vpanic("eq_AvailExpr");
   3140    }
   3141 }
   3142 
   3143 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   3144 {
   3145    IRConst* con;
   3146    switch (ae->tag) {
   3147       case Ut:
   3148          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   3149       case Btt:
   3150          return IRExpr_Binop( ae->u.Btt.op,
   3151                               IRExpr_RdTmp(ae->u.Btt.arg1),
   3152                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   3153       case Btc:
   3154          con = LibVEX_Alloc(sizeof(IRConst));
   3155          *con = ae->u.Btc.con2;
   3156          return IRExpr_Binop( ae->u.Btc.op,
   3157                               IRExpr_RdTmp(ae->u.Btc.arg1),
   3158                               IRExpr_Const(con) );
   3159       case Bct:
   3160          con = LibVEX_Alloc(sizeof(IRConst));
   3161          *con = ae->u.Bct.con1;
   3162          return IRExpr_Binop( ae->u.Bct.op,
   3163                               IRExpr_Const(con),
   3164                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   3165       case Cf64i:
   3166          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   3167       case Mttt:
   3168          return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
   3169                              IRExpr_RdTmp(ae->u.Mttt.e0),
   3170                              IRExpr_RdTmp(ae->u.Mttt.eX));
   3171       case GetIt:
   3172          return IRExpr_GetI(ae->u.GetIt.descr,
   3173                             IRExpr_RdTmp(ae->u.GetIt.ix),
   3174                             ae->u.GetIt.bias);
   3175       case CCall: {
   3176          Int i, n = ae->u.CCall.nArgs;
   3177          vassert(n >= 0);
   3178          IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*));
   3179          vec[n] = NULL;
   3180          for (i = 0; i < n; i++) {
   3181             TmpOrConst* tc = &ae->u.CCall.args[i];
   3182             if (tc->tag == TCc) {
   3183                vec[i] = IRExpr_Const(tc->u.con);
   3184             }
   3185             else if (tc->tag == TCt) {
   3186                vec[i] = IRExpr_RdTmp(tc->u.tmp);
   3187             }
   3188             else vpanic("availExpr_to_IRExpr:CCall-arg");
   3189          }
   3190          return IRExpr_CCall(ae->u.CCall.cee,
   3191                              ae->u.CCall.retty,
   3192                              vec);
   3193       }
   3194       default:
   3195          vpanic("availExpr_to_IRExpr");
   3196    }
   3197 }
   3198 
   3199 inline
   3200 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   3201 {
   3202    HWord res;
   3203    /* env :: IRTemp -> IRTemp */
   3204    if (lookupHHW( env, &res, (HWord)tmp ))
   3205       return (IRTemp)res;
   3206    else
   3207       return tmp;
   3208 }
   3209 
   3210 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   3211 {
   3212    /* env :: IRTemp -> IRTemp */
   3213    switch (ae->tag) {
   3214       case Ut:
   3215          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   3216          break;
   3217       case Btt:
   3218          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   3219          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   3220          break;
   3221       case Btc:
   3222          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   3223          break;
   3224       case Bct:
   3225          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   3226          break;
   3227       case Cf64i:
   3228          break;
   3229       case Mttt:
   3230          ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
   3231          ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
   3232          ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
   3233          break;
   3234       case GetIt:
   3235          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   3236          break;
   3237       case CCall: {
   3238          Int i, n = ae->u.CCall.nArgs;;
   3239          for (i = 0; i < n; i++) {
   3240             TmpOrConst* tc = &ae->u.CCall.args[i];
   3241             if (tc->tag == TCt) {
   3242                tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
   3243             }
   3244          }
   3245          break;
   3246       }
   3247       default:
   3248          vpanic("subst_AvailExpr");
   3249    }
   3250 }
   3251 
   3252 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
   3253 {
   3254    AvailExpr* ae;
   3255 
   3256    if (e->tag == Iex_Unop
   3257        && e->Iex.Unop.arg->tag == Iex_RdTmp) {
   3258       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3259       ae->tag      = Ut;
   3260       ae->u.Ut.op  = e->Iex.Unop.op;
   3261       ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   3262       return ae;
   3263    }
   3264 
   3265    if (e->tag == Iex_Binop
   3266        && e->Iex.Binop.arg1->tag == Iex_RdTmp
   3267        && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3268       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3269       ae->tag        = Btt;
   3270       ae->u.Btt.op   = e->Iex.Binop.op;
   3271       ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3272       ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3273       return ae;
   3274    }
   3275 
   3276    if (e->tag == Iex_Binop
   3277       && e->Iex.Binop.arg1->tag == Iex_RdTmp
   3278       && e->Iex.Binop.arg2->tag == Iex_Const) {
   3279       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3280       ae->tag        = Btc;
   3281       ae->u.Btc.op   = e->Iex.Binop.op;
   3282       ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3283       ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   3284       return ae;
   3285    }
   3286 
   3287    if (e->tag == Iex_Binop
   3288       && e->Iex.Binop.arg1->tag == Iex_Const
   3289       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3290       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3291       ae->tag        = Bct;
   3292       ae->u.Bct.op   = e->Iex.Binop.op;
   3293       ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3294       ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   3295       return ae;
   3296    }
   3297 
   3298    if (e->tag == Iex_Const
   3299        && e->Iex.Const.con->tag == Ico_F64i) {
   3300       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3301       ae->tag          = Cf64i;
   3302       ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   3303       return ae;
   3304    }
   3305 
   3306    if (e->tag == Iex_Mux0X
   3307        && e->Iex.Mux0X.cond->tag == Iex_RdTmp
   3308        && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
   3309        && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
   3310       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3311       ae->tag       = Mttt;
   3312       ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
   3313       ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
   3314       ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
   3315       return ae;
   3316    }
   3317 
   3318    if (e->tag == Iex_GetI
   3319        && e->Iex.GetI.ix->tag == Iex_RdTmp) {
   3320       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3321       ae->tag           = GetIt;
   3322       ae->u.GetIt.descr = e->Iex.GetI.descr;
   3323       ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   3324       ae->u.GetIt.bias  = e->Iex.GetI.bias;
   3325       return ae;
   3326    }
   3327 
   3328    if (e->tag == Iex_CCall) {
   3329       ae = LibVEX_Alloc(sizeof(AvailExpr));
   3330       ae->tag = CCall;
   3331       /* Ok to share only the cee, since it is immutable. */
   3332       ae->u.CCall.cee   = e->Iex.CCall.cee;
   3333       ae->u.CCall.retty = e->Iex.CCall.retty;
   3334       /* irExprVec_to_TmpOrConsts will assert if the args are
   3335          neither tmps nor constants, but that's ok .. that's all they
   3336          should be. */
   3337       irExprVec_to_TmpOrConsts(
   3338          &ae->u.CCall.args, &ae->u.CCall.nArgs,
   3339          e->Iex.CCall.args
   3340       );
   3341       return ae;
   3342    }
   3343 
   3344    return NULL;
   3345 }
   3346 
   3347 
   3348 /* The BB is modified in-place.  Returns True if any changes were
   3349    made. */
   3350 
   3351 static Bool do_cse_BB ( IRSB* bb )
   3352 {
   3353    Int        i, j, paranoia;
   3354    IRTemp     t, q;
   3355    IRStmt*    st;
   3356    AvailExpr* eprime;
   3357    AvailExpr* ae;
   3358    Bool       invalidate;
   3359    Bool       anyDone = False;
   3360 
   3361    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   3362    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   3363 
   3364    vassert(sizeof(IRTemp) <= sizeof(HWord));
   3365 
   3366    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   3367 
   3368    /* Iterate forwards over the stmts.
   3369       On seeing "t = E", where E is one of the 5 AvailExpr forms:
   3370          let E' = apply tenv substitution to E
   3371          search aenv for E'
   3372             if a mapping E' -> q is found,
   3373                replace this stmt by "t = q"
   3374                and add binding t -> q to tenv
   3375             else
   3376                add binding E' -> t to aenv
   3377                replace this stmt by "t = E'"
   3378 
   3379       Other statements are only interesting to the extent that they
   3380       might invalidate some of the expressions in aenv.  So there is
   3381       an invalidate-bindings check for each statement seen.
   3382    */
   3383    for (i = 0; i < bb->stmts_used; i++) {
   3384       st = bb->stmts[i];
   3385 
   3386       /* ------ BEGIN invalidate aenv bindings ------ */
   3387       /* This is critical: remove from aenv any E' -> .. bindings
   3388          which might be invalidated by this statement.  The only
   3389          vulnerable kind of bindings are the GetI kind.
   3390             Dirty call - dump (paranoia level -> 2)
   3391             Store      - dump (ditto)
   3392             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   3393          Uses getAliasingRelation_IC and getAliasingRelation_II
   3394          to do the no-overlap assessments needed for Put/PutI.
   3395       */
   3396       switch (st->tag) {
   3397          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   3398          case Ist_CAS: case Ist_LLSC:
   3399             paranoia = 2; break;
   3400          case Ist_Put: case Ist_PutI:
   3401             paranoia = 1; break;
   3402          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   3403          case Ist_WrTmp: case Ist_Exit:
   3404             paranoia = 0; break;
   3405          default:
   3406             vpanic("do_cse_BB(1)");
   3407       }
   3408 
   3409       if (paranoia > 0) {
   3410          for (j = 0; j < aenv->used; j++) {
   3411             if (!aenv->inuse[j])
   3412                continue;
   3413             ae = (AvailExpr*)aenv->key[j];
   3414             if (ae->tag != GetIt)
   3415                continue;
   3416             invalidate = False;
   3417             if (paranoia >= 2) {
   3418                invalidate = True;
   3419             } else {
   3420                vassert(paranoia == 1);
   3421                if (st->tag == Ist_Put) {
   3422                   if (getAliasingRelation_IC(
   3423                          ae->u.GetIt.descr,
   3424                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3425                          st->Ist.Put.offset,
   3426                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   3427                       ) != NoAlias)
   3428                      invalidate = True;
   3429                }
   3430                else
   3431                if (st->tag == Ist_PutI) {
   3432                   IRPutI *puti = st->Ist.PutI.details;
   3433                   if (getAliasingRelation_II(
   3434                          ae->u.GetIt.descr,
   3435                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3436                          ae->u.GetIt.bias,
   3437                          puti->descr,
   3438                          puti->ix,
   3439                          puti->bias
   3440                       ) != NoAlias)
   3441                      invalidate = True;
   3442                }
   3443                else
   3444                   vpanic("do_cse_BB(2)");
   3445             }
   3446 
   3447             if (invalidate) {
   3448                aenv->inuse[j] = False;
   3449                aenv->key[j]   = (HWord)NULL;  /* be sure */
   3450             }
   3451          } /* for j */
   3452       } /* paranoia > 0 */
   3453 
   3454       /* ------ ENV invalidate aenv bindings ------ */
   3455 
   3456       /* ignore not-interestings */
   3457       if (st->tag != Ist_WrTmp)
   3458          continue;
   3459 
   3460       t = st->Ist.WrTmp.tmp;
   3461       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
   3462       /* ignore if not of AvailExpr form */
   3463       if (!eprime)
   3464          continue;
   3465 
   3466       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   3467 
   3468       /* apply tenv */
   3469       subst_AvailExpr( tenv, eprime );
   3470 
   3471       /* search aenv for eprime, unfortunately the hard way */
   3472       for (j = 0; j < aenv->used; j++)
   3473          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   3474             break;
   3475 
   3476       if (j < aenv->used) {
   3477          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   3478             note the t->q binding in tenv. */
   3479          /* (this is the core of the CSE action) */
   3480          q = (IRTemp)aenv->val[j];
   3481          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   3482          addToHHW( tenv, (HWord)t, (HWord)q );
   3483          anyDone = True;
   3484       } else {
   3485          /* No binding was found, so instead we add E' -> t to our
   3486             collection of available expressions, replace this stmt
   3487             with "t = E'", and move on. */
   3488          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   3489          addToHHW( aenv, (HWord)eprime, (HWord)t );
   3490       }
   3491    }
   3492 
   3493    /*
   3494    ppIRSB(bb);
   3495    sanityCheckIRSB(bb, Ity_I32);
   3496    vex_printf("\n\n");
   3497    */
   3498    return anyDone;
   3499 }
   3500 
   3501 
   3502 /*---------------------------------------------------------------*/
   3503 /*--- Add32/Sub32 chain collapsing                            ---*/
   3504 /*---------------------------------------------------------------*/
   3505 
   3506 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   3507 
   3508 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   3509    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   3510    root node is Add32, not Sub32. */
   3511 
   3512 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   3513 {
   3514    if (e->tag != Iex_Binop)
   3515       return False;
   3516    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   3517       return False;
   3518    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   3519       return False;
   3520    if (e->Iex.Binop.arg2->tag != Iex_Const)
   3521       return False;
   3522    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3523    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   3524    if (e->Iex.Binop.op == Iop_Sub32)
   3525       *i32 = -*i32;
   3526    return True;
   3527 }
   3528 
   3529 
   3530 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   3531    other tmp2.  Scan backwards from the specified start point -- an
   3532    optimisation. */
   3533 
   3534 static Bool collapseChain ( IRSB* bb, Int startHere,
   3535                             IRTemp tmp,
   3536                             IRTemp* tmp2, Int* i32 )
   3537 {
   3538    Int     j, ii;
   3539    IRTemp  vv;
   3540    IRStmt* st;
   3541    IRExpr* e;
   3542 
   3543    /* the (var, con) pair contain the current 'representation' for
   3544       'tmp'.  We start with 'tmp + 0'.  */
   3545    IRTemp var = tmp;
   3546    Int    con = 0;
   3547 
   3548    /* Scan backwards to see if tmp can be replaced by some other tmp
   3549      +/- a constant. */
   3550    for (j = startHere; j >= 0; j--) {
   3551       st = bb->stmts[j];
   3552       if (st->tag != Ist_WrTmp)
   3553          continue;
   3554       if (st->Ist.WrTmp.tmp != var)
   3555          continue;
   3556       e = st->Ist.WrTmp.data;
   3557       if (!isAdd32OrSub32(e, &vv, &ii))
   3558          break;
   3559       var = vv;
   3560       con += ii;
   3561    }
   3562    if (j == -1)
   3563       /* no earlier binding for var .. ill-formed IR */
   3564       vpanic("collapseChain");
   3565 
   3566    /* so, did we find anything interesting? */
   3567    if (var == tmp)
   3568       return False; /* no .. */
   3569 
   3570    *tmp2 = var;
   3571    *i32  = con;
   3572    return True;
   3573 }
   3574 
   3575 
   3576 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   3577 
   3578 static void collapse_AddSub_chains_BB ( IRSB* bb )
   3579 {
   3580    IRStmt *st;
   3581    IRTemp var, var2;
   3582    Int    i, con, con2;
   3583 
   3584    for (i = bb->stmts_used-1; i >= 0; i--) {
   3585       st = bb->stmts[i];
   3586       if (st->tag == Ist_NoOp)
   3587          continue;
   3588 
   3589       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   3590 
   3591       if (st->tag == Ist_WrTmp
   3592           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   3593 
   3594          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   3595             Find out if var can be expressed as var2 + con2. */
   3596          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   3597             if (DEBUG_IROPT) {
   3598                vex_printf("replacing1 ");
   3599                ppIRStmt(st);
   3600                vex_printf(" with ");
   3601             }
   3602             con2 += con;
   3603             bb->stmts[i]
   3604                = IRStmt_WrTmp(
   3605                     st->Ist.WrTmp.tmp,
   3606                     (con2 >= 0)
   3607                       ? IRExpr_Binop(Iop_Add32,
   3608                                      IRExpr_RdTmp(var2),
   3609                                      IRExpr_Const(IRConst_U32(con2)))
   3610                       : IRExpr_Binop(Iop_Sub32,
   3611                                      IRExpr_RdTmp(var2),
   3612                                      IRExpr_Const(IRConst_U32(-con2)))
   3613                  );
   3614             if (DEBUG_IROPT) {
   3615                ppIRStmt(bb->stmts[i]);
   3616                vex_printf("\n");
   3617             }
   3618          }
   3619 
   3620          continue;
   3621       }
   3622 
   3623       /* Try to collapse 't1 = GetI[t2, con]'. */
   3624 
   3625       if (st->tag == Ist_WrTmp
   3626           && st->Ist.WrTmp.data->tag == Iex_GetI
   3627           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   3628           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   3629                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   3630          if (DEBUG_IROPT) {
   3631             vex_printf("replacing3 ");
   3632             ppIRStmt(st);
   3633             vex_printf(" with ");
   3634          }
   3635          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   3636          bb->stmts[i]
   3637             = IRStmt_WrTmp(
   3638                  st->Ist.WrTmp.tmp,
   3639                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   3640                              IRExpr_RdTmp(var2),
   3641                              con2));
   3642          if (DEBUG_IROPT) {
   3643             ppIRStmt(bb->stmts[i]);
   3644             vex_printf("\n");
   3645          }
   3646          continue;
   3647       }
   3648 
   3649       /* Perhaps st is PutI[t, con] ? */
   3650       IRPutI *puti = st->Ist.PutI.details;
   3651       if (st->tag == Ist_PutI
   3652           && puti->ix->tag == Iex_RdTmp
   3653           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
   3654                                &var2, &con2)) {
   3655          if (DEBUG_IROPT) {
   3656             vex_printf("replacing2 ");
   3657             ppIRStmt(st);
   3658             vex_printf(" with ");
   3659          }
   3660          con2 += puti->bias;
   3661          bb->stmts[i]
   3662             = IRStmt_PutI(mkIRPutI(puti->descr,
   3663                                    IRExpr_RdTmp(var2),
   3664                                    con2,
   3665                                    puti->data));
   3666          if (DEBUG_IROPT) {
   3667             ppIRStmt(bb->stmts[i]);
   3668             vex_printf("\n");
   3669          }
   3670          continue;
   3671       }
   3672 
   3673    } /* for */
   3674 }
   3675 
   3676 
   3677 /*---------------------------------------------------------------*/
   3678 /*--- PutI/GetI transformations                               ---*/
   3679 /*---------------------------------------------------------------*/
   3680 
   3681 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   3682    the given starting point to find, if any, a PutI which writes
   3683    exactly the same piece of guest state, and so return the expression
   3684    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   3685 
   3686 static
   3687 IRExpr* findPutI ( IRSB* bb, Int startHere,
   3688                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   3689 {
   3690    Int        j;
   3691    IRStmt*    st;
   3692    GSAliasing relation;
   3693 
   3694    if (0) {
   3695       vex_printf("\nfindPutI ");
   3696       ppIRRegArray(descrG);
   3697       vex_printf(" ");
   3698       ppIRExpr(ixG);
   3699       vex_printf(" %d\n", biasG);
   3700    }
   3701 
   3702    /* Scan backwards in bb from startHere to find a suitable PutI
   3703       binding for (descrG, ixG, biasG), if any. */
   3704 
   3705    for (j = startHere; j >= 0; j--) {
   3706       st = bb->stmts[j];
   3707       if (st->tag == Ist_NoOp)
   3708          continue;
   3709 
   3710       if (st->tag == Ist_Put) {
   3711          /* Non-indexed Put.  This can't give a binding, but we do
   3712             need to check it doesn't invalidate the search by
   3713             overlapping any part of the indexed guest state. */
   3714 
   3715          relation
   3716             = getAliasingRelation_IC(
   3717                  descrG, ixG,
   3718                  st->Ist.Put.offset,
   3719                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   3720 
   3721          if (relation == NoAlias) {
   3722             /* we're OK; keep going */
   3723             continue;
   3724          } else {
   3725             /* relation == UnknownAlias || relation == ExactAlias */
   3726             /* If this assertion fails, we've found a Put which writes
   3727                an area of guest state which is read by a GetI.  Which
   3728                is unlikely (although not per se wrong). */
   3729             vassert(relation != ExactAlias);
   3730             /* This Put potentially writes guest state that the GetI
   3731                reads; we must fail. */
   3732             return NULL;
   3733          }
   3734       }
   3735 
   3736       if (st->tag == Ist_PutI) {
   3737          IRPutI *puti = st->Ist.PutI.details;
   3738 
   3739          relation = getAliasingRelation_II(
   3740                        descrG, ixG, biasG,
   3741                        puti->descr,
   3742                        puti->ix,
   3743                        puti->bias
   3744                     );
   3745 
   3746          if (relation == NoAlias) {
   3747             /* This PutI definitely doesn't overlap.  Ignore it and
   3748                keep going. */
   3749             continue; /* the for j loop */
   3750          }
   3751 
   3752          if (relation == UnknownAlias) {
   3753             /* We don't know if this PutI writes to the same guest
   3754                state that the GetI, or not.  So we have to give up. */
   3755             return NULL;
   3756          }
   3757 
   3758          /* Otherwise, we've found what we're looking for.  */
   3759          vassert(relation == ExactAlias);
   3760          return puti->data;
   3761 
   3762       } /* if (st->tag == Ist_PutI) */
   3763 
   3764       if (st->tag == Ist_Dirty) {
   3765          /* Be conservative.  If the dirty call has any guest effects at
   3766             all, give up.  We could do better -- only give up if there
   3767             are any guest writes/modifies. */
   3768          if (st->Ist.Dirty.details->nFxState > 0)
   3769             return NULL;
   3770       }
   3771 
   3772    } /* for */
   3773 
   3774    /* No valid replacement was found. */
   3775    return NULL;
   3776 }
   3777 
   3778 
   3779 
   3780 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   3781    that it writes exactly the same piece of guest state) ?  Safe
   3782    answer: False. */
   3783 
   3784 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   3785 {
   3786    vassert(pi->tag == Ist_PutI);
   3787    if (s2->tag != Ist_PutI)
   3788       return False;
   3789 
   3790    IRPutI *p1 = pi->Ist.PutI.details;
   3791    IRPutI *p2 = s2->Ist.PutI.details;
   3792 
   3793    return toBool(
   3794           getAliasingRelation_II(
   3795              p1->descr, p1->ix, p1->bias,
   3796              p2->descr, p2->ix, p2->bias
   3797           )
   3798           == ExactAlias
   3799           );
   3800 }
   3801 
   3802 
   3803 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   3804    overlap it?  Safe answer: True.  Note, we could do a lot better
   3805    than this if needed. */
   3806 
   3807 static
   3808 Bool guestAccessWhichMightOverlapPutI (
   3809         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   3810      )
   3811 {
   3812    GSAliasing relation;
   3813    UInt       minoffP, maxoffP;
   3814 
   3815    vassert(pi->tag == Ist_PutI);
   3816 
   3817    IRPutI *p1 = pi->Ist.PutI.details;
   3818 
   3819    getArrayBounds(p1->descr, &minoffP, &maxoffP);
   3820    switch (s2->tag) {
   3821 
   3822       case Ist_NoOp:
   3823       case Ist_IMark:
   3824          return False;
   3825 
   3826       case Ist_MBE:
   3827       case Ist_AbiHint:
   3828          /* just be paranoid ... these should be rare. */
   3829          return True;
   3830 
   3831       case Ist_CAS:
   3832          /* This is unbelievably lame, but it's probably not
   3833             significant from a performance point of view.  Really, a
   3834             CAS is a load-store op, so it should be safe to say False.
   3835             However .. */
   3836          return True;
   3837 
   3838       case Ist_Dirty:
   3839          /* If the dirty call has any guest effects at all, give up.
   3840             Probably could do better. */
   3841          if (s2->Ist.Dirty.details->nFxState > 0)
   3842             return True;
   3843          return False;
   3844 
   3845       case Ist_Put:
   3846          vassert(isIRAtom(s2->Ist.Put.data));
   3847          relation
   3848             = getAliasingRelation_IC(
   3849                  p1->descr, p1->ix,
   3850                  s2->Ist.Put.offset,
   3851                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   3852               );
   3853          goto have_relation;
   3854 
   3855       case Ist_PutI: {
   3856          IRPutI *p2 = s2->Ist.PutI.details;
   3857 
   3858          vassert(isIRAtom(p2->ix));
   3859          vassert(isIRAtom(p2->data));
   3860          relation
   3861             = getAliasingRelation_II(
   3862                  p1->descr, p1->ix, p1->bias,
   3863                  p2->descr, p2->ix, p2->bias
   3864               );
   3865          goto have_relation;
   3866       }
   3867 
   3868       case Ist_WrTmp:
   3869          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   3870             relation
   3871                = getAliasingRelation_II(
   3872                     p1->descr, p1->ix, p1->bias,
   3873                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   3874                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   3875                     s2->Ist.WrTmp.data->Iex.GetI.bias
   3876                  );
   3877             goto have_relation;
   3878          }
   3879          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   3880             relation
   3881                = getAliasingRelation_IC(
   3882                     p1->descr, p1->ix,
   3883                     s2->Ist.WrTmp.data->Iex.Get.offset,
   3884                     s2->Ist.WrTmp.data->Iex.Get.ty
   3885                  );
   3886             goto have_relation;
   3887          }
   3888          return False;
   3889 
   3890       case Ist_Store:
   3891          vassert(isIRAtom(s2->Ist.Store.addr));
   3892          vassert(isIRAtom(s2->Ist.Store.data));
   3893          return False;
   3894 
   3895       default:
   3896          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   3897          vpanic("guestAccessWhichMightOverlapPutI");
   3898    }
   3899 
   3900   have_relation:
   3901    if (relation == NoAlias)
   3902       return False;
   3903    else
   3904       return True; /* ExactAlias or UnknownAlias */
   3905 }
   3906 
   3907 
   3908 
   3909 /* ---------- PutI/GetI transformations main functions --------- */
   3910 
   3911 /* Remove redundant GetIs, to the extent that they can be detected.
   3912    bb is modified in-place. */
   3913 
   3914 static
   3915 void do_redundant_GetI_elimination ( IRSB* bb )
   3916 {
   3917    Int     i;
   3918    IRStmt* st;
   3919 
   3920    for (i = bb->stmts_used-1; i >= 0; i--) {
   3921       st = bb->stmts[i];
   3922       if (st->tag == Ist_NoOp)
   3923          continue;
   3924 
   3925       if (st->tag == Ist_WrTmp
   3926           && st->Ist.WrTmp.data->tag == Iex_GetI
   3927           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   3928          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   3929          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   3930          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   3931          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   3932          if (replacement
   3933              && isIRAtom(replacement)
   3934              /* Make sure we're doing a type-safe transformation! */
   3935              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   3936             if (DEBUG_IROPT) {
   3937                vex_printf("rGI:  ");
   3938                ppIRExpr(st->Ist.WrTmp.data);
   3939                vex_printf(" -> ");
   3940                ppIRExpr(replacement);
   3941                vex_printf("\n");
   3942             }
   3943             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   3944          }
   3945       }
   3946    }
   3947 
   3948 }
   3949 
   3950 
   3951 /* Remove redundant PutIs, to the extent which they can be detected.
   3952    bb is modified in-place. */
   3953 
   3954 static
   3955 void do_redundant_PutI_elimination ( IRSB* bb )
   3956 {
   3957    Int    i, j;
   3958    Bool   delete;
   3959    IRStmt *st, *stj;
   3960 
   3961    vassert
   3962       (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess
   3963        || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess);
   3964 
   3965    for (i = 0; i < bb->stmts_used; i++) {
   3966       st = bb->stmts[i];
   3967       if (st->tag != Ist_PutI)
   3968          continue;
   3969       /* Ok, search forwards from here to see if we can find another
   3970          PutI which makes this one redundant, and dodging various
   3971          hazards.  Search forwards:
   3972          * If conditional exit, give up (because anything after that
   3973            does not postdominate this put).
   3974          * If a Get which might overlap, give up (because this PutI
   3975            not necessarily dead).
   3976          * If a Put which is identical, stop with success.
   3977          * If a Put which might overlap, but is not identical, give up.
   3978          * If a dirty helper call which might write guest state, give up.
   3979          * If a Put which definitely doesn't overlap, or any other
   3980            kind of stmt, continue.
   3981       */
   3982       delete = False;
   3983       for (j = i+1; j < bb->stmts_used; j++) {
   3984          stj = bb->stmts[j];
   3985          if (stj->tag == Ist_NoOp)
   3986             continue;
   3987          if (identicalPutIs(st, stj)) {
   3988             /* success! */
   3989             delete = True;
   3990             break;
   3991          }
   3992          if (stj->tag == Ist_Exit)
   3993             /* give up */
   3994             break;
   3995          if (st->tag == Ist_Dirty)
   3996             /* give up; could do better here */
   3997             break;
   3998          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   3999             /* give up */
   4000            break;
   4001       }
   4002 
   4003       if (delete) {
   4004          if (DEBUG_IROPT) {
   4005             vex_printf("rPI:  ");
   4006             ppIRStmt(st);
   4007             vex_printf("\n");
   4008          }
   4009          bb->stmts[i] = IRStmt_NoOp();
   4010       }
   4011 
   4012    }
   4013 }
   4014 
   4015 
   4016 /*---------------------------------------------------------------*/
   4017 /*--- Loop unrolling                                          ---*/
   4018 /*---------------------------------------------------------------*/
   4019 
   4020 /* Adjust all tmp values (names) in e by delta.  e is destructively
   4021    modified. */
   4022 
   4023 static void deltaIRExpr ( IRExpr* e, Int delta )
   4024 {
   4025    Int i;
   4026    switch (e->tag) {
   4027       case Iex_RdTmp:
   4028          e->Iex.RdTmp.tmp += delta;
   4029          break;
   4030       case Iex_Get:
   4031       case Iex_Const:
   4032          break;
   4033       case Iex_GetI:
   4034          deltaIRExpr(e->Iex.GetI.ix, delta);
   4035          break;
   4036       case Iex_Qop:
   4037          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
   4038          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
   4039          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
   4040          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
   4041          break;
   4042       case Iex_Triop:
   4043          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
   4044          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
   4045          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
   4046          break;
   4047       case Iex_Binop:
   4048          deltaIRExpr(e->Iex.Binop.arg1, delta);
   4049          deltaIRExpr(e->Iex.Binop.arg2, delta);
   4050          break;
   4051       case Iex_Unop:
   4052          deltaIRExpr(e->Iex.Unop.arg, delta);
   4053          break;
   4054       case Iex_Load:
   4055          deltaIRExpr(e->Iex.Load.addr, delta);
   4056          break;
   4057       case Iex_CCall:
   4058          for (i = 0; e->Iex.CCall.args[i]; i++)
   4059             deltaIRExpr(e->Iex.CCall.args[i], delta);
   4060          break;
   4061       case Iex_Mux0X:
   4062          deltaIRExpr(e->Iex.Mux0X.cond, delta);
   4063          deltaIRExpr(e->Iex.Mux0X.expr0, delta);
   4064          deltaIRExpr(e->Iex.Mux0X.exprX, delta);
   4065          break;
   4066       default:
   4067          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4068          vpanic("deltaIRExpr");
   4069    }
   4070 }
   4071 
   4072 /* Adjust all tmp values (names) in st by delta.  st is destructively
   4073    modified. */
   4074 
   4075 static void deltaIRStmt ( IRStmt* st, Int delta )
   4076 {
   4077    Int      i;
   4078    IRDirty* d;
   4079    switch (st->tag) {
   4080       case Ist_NoOp:
   4081       case Ist_IMark:
   4082       case Ist_MBE:
   4083          break;
   4084       case Ist_AbiHint:
   4085          deltaIRExpr(st->Ist.AbiHint.base, delta);
   4086          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   4087          break;
   4088       case Ist_Put:
   4089          deltaIRExpr(st->Ist.Put.data, delta);
   4090          break;
   4091       case Ist_PutI:
   4092          deltaIRExpr(st->Ist.PutI.details->ix, delta);
   4093          deltaIRExpr(st->Ist.PutI.details->data, delta);
   4094          break;
   4095       case Ist_WrTmp:
   4096          st->Ist.WrTmp.tmp += delta;
   4097          deltaIRExpr(st->Ist.WrTmp.data, delta);
   4098          break;
   4099       case Ist_Exit:
   4100          deltaIRExpr(st->Ist.Exit.guard, delta);
   4101          break;
   4102       case Ist_Store:
   4103          deltaIRExpr(st->Ist.Store.addr, delta);
   4104          deltaIRExpr(st->Ist.Store.data, delta);
   4105          break;
   4106       case Ist_CAS:
   4107          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   4108             st->Ist.CAS.details->oldHi += delta;
   4109          st->Ist.CAS.details->oldLo += delta;
   4110          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   4111          if (st->Ist.CAS.details->expdHi)
   4112             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   4113          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   4114          if (st->Ist.CAS.details->dataHi)
   4115             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   4116          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   4117          break;
   4118       case Ist_LLSC:
   4119          st->Ist.LLSC.result += delta;
   4120          deltaIRExpr(st->Ist.LLSC.addr, delta);
   4121          if (st->Ist.LLSC.storedata)
   4122             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   4123          break;
   4124       case Ist_Dirty:
   4125          d = st->Ist.Dirty.details;
   4126          deltaIRExpr(d->guard, delta);
   4127          for (i = 0; d->args[i]; i++)
   4128             deltaIRExpr(d->args[i], delta);
   4129          if (d->tmp != IRTemp_INVALID)
   4130             d->tmp += delta;
   4131          if (d->mAddr)
   4132             deltaIRExpr(d->mAddr, delta);
   4133          break;
   4134       default:
   4135          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4136          vpanic("deltaIRStmt");
   4137    }
   4138 }
   4139 
   4140 
   4141 /* If possible, return a loop-unrolled version of bb0.  The original
   4142    is changed.  If not possible, return NULL.  */
   4143 
   4144 /* The two schemas considered are:
   4145 
   4146      X: BODY; goto X
   4147 
   4148      which unrolls to (eg)  X: BODY;BODY; goto X
   4149 
   4150    and
   4151 
   4152        X: BODY; if (c) goto X; goto Y
   4153    which trivially transforms to
   4154        X: BODY; if (!c) goto Y; goto X;
   4155    so it falls in the scope of the first case.
   4156 
   4157    X and Y must be literal (guest) addresses.
   4158 */
   4159 
   4160 static Int calc_unroll_factor( IRSB* bb )
   4161 {
   4162    Int n_stmts, i;
   4163 
   4164    n_stmts = 0;
   4165    for (i = 0; i < bb->stmts_used; i++) {
   4166       if (bb->stmts[i]->tag != Ist_NoOp)
   4167          n_stmts++;
   4168    }
   4169 
   4170    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   4171       if (vex_control.iropt_verbosity > 0)
   4172          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   4173                     n_stmts, 8* n_stmts);
   4174       return 8;
   4175    }
   4176    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   4177       if (vex_control.iropt_verbosity > 0)
   4178          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   4179                     n_stmts, 4* n_stmts);
   4180       return 4;
   4181    }
   4182 
   4183    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   4184       if (vex_control.iropt_verbosity > 0)
   4185          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   4186                     n_stmts, 2* n_stmts);
   4187       return 2;
   4188    }
   4189 
   4190    if (vex_control.iropt_verbosity > 0)
   4191       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   4192 
   4193    return 1;
   4194 }
   4195 
   4196 
   4197 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
   4198 {
   4199    Int      i, j, jmax, n_vars;
   4200    Bool     xxx_known;
   4201    Addr64   xxx_value, yyy_value;
   4202    IRExpr*  udst;
   4203    IRStmt*  st;
   4204    IRConst* con;
   4205    IRSB     *bb1, *bb2;
   4206    Int      unroll_factor;
   4207 
   4208    if (vex_control.iropt_unroll_thresh <= 0)
   4209       return NULL;
   4210 
   4211    /* First off, figure out if we can unroll this loop.  Do this
   4212       without modifying bb0. */
   4213 
   4214    if (bb0->jumpkind != Ijk_Boring)
   4215       return NULL;
   4216 
   4217    xxx_known = False;
   4218    xxx_value = 0;
   4219 
   4220    /* Extract the next-guest address.  If it isn't a literal, we
   4221       have to give up. */
   4222 
   4223    udst = bb0->next;
   4224    if (udst->tag == Iex_Const
   4225        && (udst->Iex.Const.con->tag == Ico_U32
   4226            || udst->Iex.Const.con->tag == Ico_U64)) {
   4227       /* The BB ends in a jump to a literal location. */
   4228       xxx_known = True;
   4229       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   4230                     ?  udst->Iex.Const.con->Ico.U64
   4231                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   4232    }
   4233 
   4234    if (!xxx_known)
   4235       return NULL;
   4236 
   4237    /* Now we know the BB ends to a jump to a literal location.  If
   4238       it's a jump to itself (viz, idiom #1), move directly to the
   4239       unrolling stage, first cloning the bb so the original isn't
   4240       modified. */
   4241    if (xxx_value == my_addr) {
   4242       unroll_factor = calc_unroll_factor( bb0 );
   4243       if (unroll_factor < 2)
   4244          return NULL;
   4245       bb1 = deepCopyIRSB( bb0 );
   4246       bb0 = NULL;
   4247       udst = NULL; /* is now invalid */
   4248       goto do_unroll;
   4249    }
   4250 
   4251    /* Search for the second idiomatic form:
   4252         X: BODY; if (c) goto X; goto Y
   4253       We know Y, but need to establish that the last stmt
   4254       is 'if (c) goto X'.
   4255    */
   4256    yyy_value = xxx_value;
   4257    for (i = bb0->stmts_used-1; i >= 0; i--)
   4258       if (bb0->stmts[i])
   4259          break;
   4260 
   4261    if (i < 0)
   4262       return NULL; /* block with no stmts.  Strange. */
   4263 
   4264    st = bb0->stmts[i];
   4265    if (st->tag != Ist_Exit)
   4266       return NULL;
   4267    if (st->Ist.Exit.jk != Ijk_Boring)
   4268       return NULL;
   4269 
   4270    con = st->Ist.Exit.dst;
   4271    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4272 
   4273    xxx_value = con->tag == Ico_U64
   4274                   ? st->Ist.Exit.dst->Ico.U64
   4275                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   4276 
   4277    /* If this assertion fails, we have some kind of type error. */
   4278    vassert(con->tag == udst->Iex.Const.con->tag);
   4279 
   4280    if (xxx_value != my_addr)
   4281       /* We didn't find either idiom.  Give up. */
   4282       return NULL;
   4283 
   4284    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   4285       yyy values (which makes it look like idiom #1), and go into
   4286       unrolling proper.  This means finding (again) the last stmt, in
   4287       the copied BB. */
   4288 
   4289    unroll_factor = calc_unroll_factor( bb0 );
   4290    if (unroll_factor < 2)
   4291       return NULL;
   4292 
   4293    bb1 = deepCopyIRSB( bb0 );
   4294    bb0 = NULL;
   4295    udst = NULL; /* is now invalid */
   4296    for (i = bb1->stmts_used-1; i >= 0; i--)
   4297       if (bb1->stmts[i])
   4298          break;
   4299 
   4300    /* The next bunch of assertions should be true since we already
   4301       found and checked the last stmt in the original bb. */
   4302 
   4303    vassert(i >= 0);
   4304 
   4305    st = bb1->stmts[i];
   4306    vassert(st->tag == Ist_Exit);
   4307 
   4308    con = st->Ist.Exit.dst;
   4309    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4310 
   4311    udst = bb1->next;
   4312    vassert(udst->tag == Iex_Const);
   4313    vassert(udst->Iex.Const.con->tag == Ico_U32
   4314           || udst->Iex.Const.con->tag == Ico_U64);
   4315    vassert(con->tag == udst->Iex.Const.con->tag);
   4316 
   4317    /* switch the xxx and yyy fields around */
   4318    if (con->tag == Ico_U64) {
   4319       udst->Iex.Const.con->Ico.U64 = xxx_value;
   4320       con->Ico.U64 = yyy_value;
   4321    } else {
   4322       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   4323       con->Ico.U32 = (UInt)yyy_value;
   4324    }
   4325 
   4326    /* negate the test condition */
   4327    st->Ist.Exit.guard
   4328       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   4329 
   4330    /* --- The unroller proper.  Both idioms are by now --- */
   4331    /* --- now converted to idiom 1. --- */
   4332 
   4333   do_unroll:
   4334 
   4335    vassert(unroll_factor == 2
   4336            || unroll_factor == 4
   4337            || unroll_factor == 8);
   4338 
   4339    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   4340    for (j = 1; j <= jmax; j++) {
   4341 
   4342       n_vars = bb1->tyenv->types_used;
   4343 
   4344       bb2 = deepCopyIRSB(bb1);
   4345       for (i = 0; i < n_vars; i++)
   4346          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   4347 
   4348       for (i = 0; i < bb2->stmts_used; i++) {
   4349          /* deltaIRStmt destructively modifies the stmt, but
   4350             that's OK since bb2 is a complete fresh copy of bb1. */
   4351          deltaIRStmt(bb2->stmts[i], n_vars);
   4352          addStmtToIRSB(bb1, bb2->stmts[i]);
   4353       }
   4354    }
   4355 
   4356    if (DEBUG_IROPT) {
   4357       vex_printf("\nUNROLLED (%llx)\n", my_addr);
   4358       ppIRSB(bb1);
   4359       vex_printf("\n");
   4360    }
   4361 
   4362    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   4363       by negating the test condition.  This should be fixed properly.
   4364       For the moment use this shotgun approach.  */
   4365    return flatten_BB(bb1);
   4366 }
   4367 
   4368 
   4369 /*---------------------------------------------------------------*/
   4370 /*--- The tree builder                                        ---*/
   4371 /*---------------------------------------------------------------*/
   4372 
   4373 /* This isn't part of IR optimisation.  Really it's a pass done prior
   4374    to instruction selection, which improves the code that the
   4375    instruction selector can produce. */
   4376 
   4377 /* --- The 'tmp' environment is the central data structure here --- */
   4378 
   4379 /* The number of outstanding bindings we're prepared to track.
   4380    The number of times the env becomes full and we have to dump
   4381    the oldest binding (hence reducing code quality) falls very
   4382    rapidly as the env size increases.  8 gives reasonable performance
   4383    under most circumstances. */
   4384 #define A_NENV 10
   4385 
   4386 /* bindee == NULL   ===  slot is not in use
   4387    bindee != NULL   ===  slot is in use
   4388 */
   4389 typedef
   4390    struct {
   4391       IRTemp  binder;
   4392       IRExpr* bindee;
   4393       Bool    doesLoad;
   4394       Bool    doesGet;
   4395    }
   4396    ATmpInfo;
   4397 
   4398 __attribute__((unused))
   4399 static void ppAEnv ( ATmpInfo* env )
   4400 {
   4401    Int i;
   4402    for (i = 0; i < A_NENV; i++) {
   4403       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   4404       if (env[i].bindee)
   4405          ppIRExpr(env[i].bindee);
   4406       else
   4407          vex_printf("(null)");
   4408       vex_printf("\n");
   4409    }
   4410 }
   4411 
   4412 /* --- Tree-traversal fns --- */
   4413 
   4414 /* Traverse an expr, and detect if any part of it reads memory or does
   4415    a Get.  Be careful ... this really controls how much the
   4416    tree-builder can reorder the code, so getting it right is critical.
   4417 */
   4418 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
   4419 {
   4420    Int i;
   4421    switch (e->tag) {
   4422       case Iex_CCall:
   4423          for (i = 0; e->Iex.CCall.args[i]; i++)
   4424             setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
   4425          return;
   4426       case Iex_Mux0X:
   4427          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
   4428          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
   4429          setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
   4430          return;
   4431       case Iex_Qop:
   4432          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg1);
   4433          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg2);
   4434          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg3);
   4435          setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg4);
   4436          return;
   4437       case Iex_Triop:
   4438          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg1);
   4439          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg2);
   4440          setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg3);
   4441          return;
   4442       case Iex_Binop:
   4443          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
   4444          setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
   4445          return;
   4446       case Iex_Unop:
   4447          setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
   4448          return;
   4449       case Iex_Load:
   4450          *doesLoad = True;
   4451          setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
   4452          return;
   4453       case Iex_Get:
   4454          *doesGet = True;
   4455          return;
   4456       case Iex_GetI:
   4457          *doesGet = True;
   4458          setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
   4459          return;
   4460       case Iex_RdTmp:
   4461       case Iex_Const:
   4462          return;
   4463       default:
   4464          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4465          vpanic("setHints_Expr");
   4466    }
   4467 }
   4468 
   4469 
   4470 /* Add a binding to the front of the env and slide all the rest
   4471    backwards.  It should be the case that the last slot is free. */
   4472 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   4473 {
   4474    Int i;
   4475    vassert(env[A_NENV-1].bindee == NULL);
   4476    for (i = A_NENV-1; i >= 1; i--)
   4477       env[i] = env[i-1];
   4478    env[0].binder   = binder;
   4479    env[0].bindee   = bindee;
   4480    env[0].doesLoad = False; /* filled in later */
   4481    env[0].doesGet  = False; /* filled in later */
   4482 }
   4483 
   4484 /* Given uses :: array of UShort, indexed by IRTemp
   4485    Add the use-occurrences of temps in this expression
   4486    to the env.
   4487 */
   4488 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   4489 {
   4490    Int i;
   4491 
   4492    switch (e->tag) {
   4493 
   4494       case Iex_RdTmp: /* the only interesting case */
   4495          uses[e->Iex.RdTmp.tmp]++;
   4496          return;
   4497 
   4498       case Iex_Mux0X:
   4499          aoccCount_Expr(uses, e->Iex.Mux0X.cond);
   4500          aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
   4501          aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
   4502          return;
   4503 
   4504       case Iex_Qop:
   4505          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
   4506          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
   4507          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
   4508          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
   4509          return;
   4510 
   4511       case Iex_Triop:
   4512          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
   4513          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
   4514          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
   4515          return;
   4516 
   4517       case Iex_Binop:
   4518          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   4519          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   4520          return;
   4521 
   4522       case Iex_Unop:
   4523          aoccCount_Expr(uses, e->Iex.Unop.arg);
   4524          return;
   4525 
   4526       case Iex_Load:
   4527          aoccCount_Expr(uses, e->Iex.Load.addr);
   4528          return;
   4529 
   4530       case Iex_CCall:
   4531          for (i = 0; e->Iex.CCall.args[i]; i++)
   4532             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   4533          return;
   4534 
   4535       case Iex_GetI:
   4536          aoccCount_Expr(uses, e->Iex.GetI.ix);
   4537          return;
   4538 
   4539       case Iex_Const:
   4540       case Iex_Get:
   4541          return;
   4542 
   4543       default:
   4544          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4545          vpanic("aoccCount_Expr");
   4546     }
   4547 }
   4548 
   4549 
   4550 /* Given uses :: array of UShort, indexed by IRTemp
   4551    Add the use-occurrences of temps in this statement
   4552    to the env.
   4553 */
   4554 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   4555 {
   4556    Int      i;
   4557    IRDirty* d;
   4558    IRCAS*   cas;
   4559    switch (st->tag) {
   4560       case Ist_AbiHint:
   4561          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   4562          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   4563          return;
   4564       case Ist_WrTmp:
   4565          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   4566          return;
   4567       case Ist_Put:
   4568          aoccCount_Expr(uses, st->Ist.Put.data);
   4569          return;
   4570       case Ist_PutI:
   4571          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
   4572          aoccCount_Expr(uses, st->Ist.PutI.details->data);
   4573          return;
   4574       case Ist_Store:
   4575          aoccCount_Expr(uses, st->Ist.Store.addr);
   4576          aoccCount_Expr(uses, st->Ist.Store.data);
   4577          return;
   4578       case Ist_CAS:
   4579          cas = st->Ist.CAS.details;
   4580          aoccCount_Expr(uses, cas->addr);
   4581          if (cas->expdHi)
   4582             aoccCount_Expr(uses, cas->expdHi);
   4583          aoccCount_Expr(uses, cas->expdLo);
   4584          if (cas->dataHi)
   4585             aoccCount_Expr(uses, cas->dataHi);
   4586          aoccCount_Expr(uses, cas->dataLo);
   4587          return;
   4588       case Ist_LLSC:
   4589          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   4590          if (st->Ist.LLSC.storedata)
   4591             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   4592          return;
   4593       case Ist_Dirty:
   4594          d = st->Ist.Dirty.details;
   4595          if (d->mFx != Ifx_None)
   4596             aoccCount_Expr(uses, d->mAddr);
   4597          aoccCount_Expr(uses, d->guard);
   4598          for (i = 0; d->args[i]; i++)
   4599             aoccCount_Expr(uses, d->args[i]);
   4600          return;
   4601       case Ist_NoOp:
   4602       case Ist_IMark:
   4603       case Ist_MBE:
   4604          return;
   4605       case Ist_Exit:
   4606          aoccCount_Expr(uses, st->Ist.Exit.guard);
   4607          return;
   4608       default:
   4609          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4610          vpanic("aoccCount_Stmt");
   4611    }
   4612 }
   4613 
   4614 /* Look up a binding for tmp in the env.  If found, return the bound
   4615    expression, and set the env's binding to NULL so it is marked as
   4616    used.  If not found, return NULL. */
   4617 
   4618 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   4619 {
   4620    Int i;
   4621    for (i = 0; i < A_NENV; i++) {
   4622       if (env[i].binder == tmp && env[i].bindee != NULL) {
   4623          IRExpr* bindee = env[i].bindee;
   4624          env[i].bindee = NULL;
   4625          return bindee;
   4626       }
   4627    }
   4628    return NULL;
   4629 }
   4630 
   4631 /* Traverse e, looking for temps.  For each observed temp, see if env
   4632    contains a binding for the temp, and if so return the bound value.
   4633    The env has the property that any binding it holds is
   4634    'single-shot', so once a binding is used, it is marked as no longer
   4635    available, by setting its .bindee field to NULL. */
   4636 
   4637 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   4638    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   4639 }
   4640 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   4641    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   4642 }
   4643 
   4644 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   4645 {
   4646    switch (op) {
   4647    case Iop_Or32:
   4648       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   4649       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   4650          return IRExpr_Unop( Iop_CmpwNEZ32,
   4651                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   4652                                                      a2->Iex.Unop.arg ) );
   4653       break;
   4654 
   4655    case Iop_CmpNE32:
   4656       /* Since X has type Ity_I1 we can simplify:
   4657          CmpNE32(1Uto32(X),0)) ==> X */
   4658       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   4659          return a1->Iex.Unop.arg;
   4660       break;
   4661 
   4662    default:
   4663       break;
   4664    }
   4665    /* no reduction rule applies */
   4666    return IRExpr_Binop( op, a1, a2 );
   4667 }
   4668 
   4669 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   4670 {
   4671    switch (op) {
   4672    case Iop_CmpwNEZ64:
   4673       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   4674       if (is_Binop(aa, Iop_Or64)
   4675           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   4676          return fold_IRExpr_Unop(
   4677                    Iop_CmpwNEZ64,
   4678                    IRExpr_Binop(Iop_Or64,
   4679                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   4680                                 aa->Iex.Binop.arg2));
   4681       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   4682       if (is_Binop(aa, Iop_Or64)
   4683           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   4684          return fold_IRExpr_Unop(
   4685                    Iop_CmpwNEZ64,
   4686                    IRExpr_Binop(Iop_Or64,
   4687                                 aa->Iex.Binop.arg1,
   4688                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   4689       break;
   4690    case Iop_CmpNEZ64:
   4691       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   4692       if (is_Unop(aa, Iop_Left64))
   4693          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   4694       break;
   4695    case Iop_CmpwNEZ32:
   4696       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   4697       if (is_Unop(aa, Iop_CmpwNEZ32))
   4698          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   4699       break;
   4700    case Iop_CmpNEZ32:
   4701       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   4702       if (is_Unop(aa, Iop_Left32))
   4703          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   4704       /* CmpNEZ32( 1Uto32(X) ) --> X */
   4705       if (is_Unop(aa, Iop_1Uto32))
   4706          return aa->Iex.Unop.arg;
   4707       break;
   4708    case Iop_CmpNEZ8:
   4709       /* CmpNEZ8( 1Uto8(X) ) --> X */
   4710       if (is_Unop(aa, Iop_1Uto8))
   4711          return aa->Iex.Unop.arg;
   4712       break;
   4713    case Iop_Left32:
   4714       /* Left32( Left32(x) ) --> Left32(x) */
   4715       if (is_Unop(aa, Iop_Left32))
   4716          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   4717       break;
   4718    case Iop_32to1:
   4719       /* 32to1( 1Uto32 ( x ) ) --> x */
   4720       if (is_Unop(aa, Iop_1Uto32))
   4721          return aa->Iex.Unop.arg;
   4722       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   4723       if (is_Unop(aa, Iop_CmpwNEZ32))
   4724          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   4725       break;
   4726    case Iop_64to1:
   4727       /* 64to1( 1Uto64 ( x ) ) --> x */
   4728       if (is_Unop(aa, Iop_1Uto64))
   4729          return aa->Iex.Unop.arg;
   4730       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   4731       if (is_Unop(aa, Iop_CmpwNEZ64))
   4732          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   4733       break;
   4734    case Iop_64to32:
   4735       /* 64to32( 32Uto64 ( x )) --> x */
   4736       if (is_Unop(aa, Iop_32Uto64))
   4737          return aa->Iex.Unop.arg;
   4738       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   4739       if (is_Unop(aa, Iop_8Uto64))
   4740          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   4741       break;
   4742 
   4743    case Iop_32Uto64:
   4744       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   4745       if (is_Unop(aa, Iop_8Uto32))
   4746          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   4747       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   4748       if (is_Unop(aa, Iop_16Uto32))
   4749          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   4750       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
   4751                      --> Shr64( 32Uto64(64to32(x)), sh )) */
   4752       if (is_Unop(aa, Iop_64to32)
   4753           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
   4754           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   4755           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   4756                      Iop_64to32)) {
   4757          return aa->Iex.Unop.arg;
   4758       }
   4759       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
   4760          --> 32Uto64(64to32( Shl64(                x,   sh )) */
   4761       if (is_Unop(aa, Iop_64to32)
   4762           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
   4763           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   4764           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   4765                      Iop_64to32)) {
   4766          return
   4767             IRExpr_Unop(
   4768                Iop_32Uto64,
   4769                IRExpr_Unop(
   4770                   Iop_64to32,
   4771                   IRExpr_Binop(
   4772                      Iop_Shl64,
   4773                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
   4774                      aa->Iex.Unop.arg->Iex.Binop.arg2
   4775             )));
   4776       }
   4777       break;
   4778 
   4779    case Iop_1Sto32:
   4780       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   4781       if (is_Unop(aa, Iop_CmpNEZ8)
   4782           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   4783           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   4784           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   4785                      Iop_CmpNEZ32)) {
   4786          return IRExpr_Unop( Iop_CmpwNEZ32,
   4787                              aa->Iex.Unop.arg->Iex.Unop.arg
   4788                                ->Iex.Unop.arg->Iex.Unop.arg);
   4789       }
   4790       break;
   4791 
   4792    default:
   4793       break;
   4794    }
   4795    /* no reduction rule applies */
   4796    return IRExpr_Unop( op, aa );
   4797 }
   4798 
   4799 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   4800 {
   4801    IRExpr*  e2;
   4802    IRExpr** args2;
   4803    Int      i;
   4804 
   4805    switch (e->tag) {
   4806 
   4807       case Iex_CCall:
   4808          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   4809          for (i = 0; args2[i]; i++)
   4810             args2[i] = atbSubst_Expr(env,args2[i]);
   4811          return IRExpr_CCall(
   4812                    e->Iex.CCall.cee,
   4813                    e->Iex.CCall.retty,
   4814                    args2
   4815                 );
   4816       case Iex_RdTmp:
   4817          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   4818          return e2 ? e2 : e;
   4819       case Iex_Mux0X:
   4820          return IRExpr_Mux0X(
   4821                    atbSubst_Expr(env, e->Iex.Mux0X.cond),
   4822                    atbSubst_Expr(env, e->Iex.Mux0X.expr0),
   4823                    atbSubst_Expr(env, e->Iex.Mux0X.exprX)
   4824                 );
   4825       case Iex_Qop:
   4826          return IRExpr_Qop(
   4827                    e->Iex.Qop.details->op,
   4828                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
   4829                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
   4830                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
   4831                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
   4832                 );
   4833       case Iex_Triop:
   4834          return IRExpr_Triop(
   4835                    e->Iex.Triop.details->op,
   4836                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
   4837                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
   4838                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
   4839                 );
   4840       case Iex_Binop:
   4841          return fold_IRExpr_Binop(
   4842                    e->Iex.Binop.op,
   4843                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   4844                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   4845                 );
   4846       case Iex_Unop:
   4847          return fold_IRExpr_Unop(
   4848                    e->Iex.Unop.op,
   4849                    atbSubst_Expr(env, e->Iex.Unop.arg)
   4850                 );
   4851       case Iex_Load:
   4852          return IRExpr_Load(
   4853                    e->Iex.Load.end,
   4854                    e->Iex.Load.ty,
   4855                    atbSubst_Expr(env, e->Iex.Load.addr)
   4856                 );
   4857       case Iex_GetI:
   4858          return IRExpr_GetI(
   4859                    e->Iex.GetI.descr,
   4860                    atbSubst_Expr(env, e->Iex.GetI.ix),
   4861                    e->Iex.GetI.bias
   4862                 );
   4863       case Iex_Const:
   4864       case Iex_Get:
   4865          return e;
   4866       default:
   4867          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4868          vpanic("atbSubst_Expr");
   4869    }
   4870 }
   4871 
   4872 /* Same deal as atbSubst_Expr, except for stmts. */
   4873 
   4874 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   4875 {
   4876    Int     i;
   4877    IRDirty *d, *d2;
   4878    IRCAS   *cas, *cas2;
   4879    IRPutI  *puti, *puti2;
   4880 
   4881    switch (st->tag) {
   4882       case Ist_AbiHint:
   4883          return IRStmt_AbiHint(
   4884                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   4885                    st->Ist.AbiHint.len,
   4886                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   4887                 );
   4888       case Ist_Store:
   4889          return IRStmt_Store(
   4890                    st->Ist.Store.end,
   4891                    atbSubst_Expr(env, st->Ist.Store.addr),
   4892                    atbSubst_Expr(env, st->Ist.Store.data)
   4893                 );
   4894       case Ist_WrTmp:
   4895          return IRStmt_WrTmp(
   4896                    st->Ist.WrTmp.tmp,
   4897                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   4898                 );
   4899       case Ist_Put:
   4900          return IRStmt_Put(
   4901                    st->Ist.Put.offset,
   4902                    atbSubst_Expr(env, st->Ist.Put.data)
   4903                 );
   4904       case Ist_PutI:
   4905          puti  = st->Ist.PutI.details;
   4906          puti2 = mkIRPutI(puti->descr,
   4907                           atbSubst_Expr(env, puti->ix),
   4908                           puti->bias,
   4909                           atbSubst_Expr(env, puti->data));
   4910          return IRStmt_PutI(puti2);
   4911 
   4912       case Ist_Exit:
   4913          return IRStmt_Exit(
   4914                    atbSubst_Expr(env, st->Ist.Exit.guard),
   4915                    st->Ist.Exit.jk,
   4916                    st->Ist.Exit.dst,
   4917                    st->Ist.Exit.offsIP
   4918                 );
   4919       case Ist_IMark:
   4920          return IRStmt_IMark(st->Ist.IMark.addr,
   4921                              st->Ist.IMark.len,
   4922                              st->Ist.IMark.delta);
   4923       case Ist_NoOp:
   4924          return IRStmt_NoOp();
   4925       case Ist_MBE:
   4926          return IRStmt_MBE(st->Ist.MBE.event);
   4927       case Ist_CAS:
   4928          cas  = st->Ist.CAS.details;
   4929          cas2 = mkIRCAS(
   4930                    cas->oldHi, cas->oldLo, cas->end,
   4931                    atbSubst_Expr(env, cas->addr),
   4932                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   4933                    atbSubst_Expr(env, cas->expdLo),
   4934                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   4935                    atbSubst_Expr(env, cas->dataLo)
   4936                 );
   4937          return IRStmt_CAS(cas2);
   4938       case Ist_LLSC:
   4939          return IRStmt_LLSC(
   4940                    st->Ist.LLSC.end,
   4941                    st->Ist.LLSC.result,
   4942                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   4943                    st->Ist.LLSC.storedata
   4944                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   4945                 );
   4946       case Ist_Dirty:
   4947          d  = st->Ist.Dirty.details;
   4948          d2 = emptyIRDirty();
   4949          *d2 = *d;
   4950          if (d2->mFx != Ifx_None)
   4951             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   4952          d2->guard = atbSubst_Expr(env, d2->guard);
   4953          for (i = 0; d2->args[i]; i++)
   4954             d2->args[i] = atbSubst_Expr(env, d2->args[i]);
   4955          return IRStmt_Dirty(d2);
   4956       default:
   4957          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4958          vpanic("atbSubst_Stmt");
   4959    }
   4960 }
   4961 
   4962 /* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb )
   4963 {
   4964    Int      i, j, k, m;
   4965    Bool     stmtPuts, stmtStores, invalidateMe;
   4966    IRStmt*  st;
   4967    IRStmt*  st2;
   4968    ATmpInfo env[A_NENV];
   4969 
   4970    Bool   max_ga_known = False;
   4971    Addr64 max_ga       = 0;
   4972 
   4973    Int       n_tmps = bb->tyenv->types_used;
   4974    UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
   4975 
   4976    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   4977       temp.  Also count occurrences in the bb->next field.  Take the
   4978       opportunity to also find the maximum guest address in the block,
   4979       since that will be needed later for deciding when we can safely
   4980       elide event checks. */
   4981 
   4982    for (i = 0; i < n_tmps; i++)
   4983       uses[i] = 0;
   4984 
   4985    for (i = 0; i < bb->stmts_used; i++) {
   4986       st = bb->stmts[i];
   4987       switch (st->tag) {
   4988          case Ist_NoOp:
   4989             continue;
   4990          case Ist_IMark: {
   4991             Int    len = st->Ist.IMark.len;
   4992             Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
   4993             max_ga_known = True;
   4994             if (mga > max_ga)
   4995                max_ga = mga;
   4996             break;
   4997          }
   4998          default:
   4999             break;
   5000       }
   5001       aoccCount_Stmt( uses, st );
   5002    }
   5003    aoccCount_Expr(uses, bb->next );
   5004 
   5005 #  if 0
   5006    for (i = 0; i < n_tmps; i++) {
   5007       if (uses[i] == 0)
   5008         continue;
   5009       ppIRTemp( (IRTemp)i );
   5010       vex_printf("  used %d\n", (Int)uses[i] );
   5011    }
   5012 #  endif
   5013 
   5014    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   5015 
   5016          If the env is full, emit the end element.  This guarantees
   5017          there is at least one free slot in the following.
   5018 
   5019          On seeing 't = E', occ(t)==1,
   5020             let E'=env(E)
   5021             delete this stmt
   5022             add t -> E' to the front of the env
   5023             Examine E' and set the hints for E' appropriately
   5024               (doesLoad? doesGet?)
   5025 
   5026          On seeing any other stmt,
   5027             let stmt' = env(stmt)
   5028             remove from env any 't=E' binds invalidated by stmt
   5029                 emit the invalidated stmts
   5030             emit stmt'
   5031             compact any holes in env
   5032               by sliding entries towards the front
   5033 
   5034       Finally, apply env to bb->next.
   5035    */
   5036 
   5037    for (i = 0; i < A_NENV; i++) {
   5038       env[i].bindee = NULL;
   5039       env[i].binder = IRTemp_INVALID;
   5040    }
   5041 
   5042    /* The stmts in bb are being reordered, and we are guaranteed to
   5043       end up with no more than the number we started with.  Use i to
   5044       be the cursor of the current stmt examined and j <= i to be that
   5045       for the current stmt being written.
   5046    */
   5047    j = 0;
   5048    for (i = 0; i < bb->stmts_used; i++) {
   5049 
   5050       st = bb->stmts[i];
   5051       if (st->tag == Ist_NoOp)
   5052          continue;
   5053 
   5054       /* Ensure there's at least one space in the env, by emitting
   5055          the oldest binding if necessary. */
   5056       if (env[A_NENV-1].bindee != NULL) {
   5057          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   5058                                       env[A_NENV-1].bindee );
   5059          j++;
   5060          vassert(j <= i);
   5061          env[A_NENV-1].bindee = NULL;
   5062       }
   5063 
   5064       /* Consider current stmt. */
   5065       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   5066          IRExpr *e, *e2;
   5067 
   5068          /* optional extra: dump dead bindings as we find them.
   5069             Removes the need for a prior dead-code removal pass. */
   5070          if (uses[st->Ist.WrTmp.tmp] == 0) {
   5071 	    if (0) vex_printf("DEAD binding\n");
   5072             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5073          }
   5074          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   5075 
   5076          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   5077             actions. */
   5078          e  = st->Ist.WrTmp.data;
   5079          e2 = atbSubst_Expr(env, e);
   5080          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   5081          setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
   5082          /* don't advance j, as we are deleting this stmt and instead
   5083             holding it temporarily in the env. */
   5084          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5085       }
   5086 
   5087       /* we get here for any other kind of statement. */
   5088       /* 'use up' any bindings required by the current statement. */
   5089       st2 = atbSubst_Stmt(env, st);
   5090 
   5091       /* Now, before this stmt, dump any bindings in env that it
   5092          invalidates.  These need to be dumped in the order in which
   5093          they originally entered env -- that means from oldest to
   5094          youngest. */
   5095 
   5096       /* stmtPuts/stmtStores characterise what the stmt under
   5097          consideration does, or might do (sidely safe @ True). */
   5098       stmtPuts
   5099          = toBool( st->tag == Ist_Put
   5100                    || st->tag == Ist_PutI
   5101                    || st->tag == Ist_Dirty );
   5102 
   5103       /* be True if this stmt writes memory or might do (==> we don't
   5104          want to reorder other loads or stores relative to it).  Also,
   5105          both LL and SC fall under this classification, since we
   5106          really ought to be conservative and not reorder any other
   5107          memory transactions relative to them. */
   5108       stmtStores
   5109          = toBool( st->tag == Ist_Store
   5110                    || st->tag == Ist_Dirty
   5111                    || st->tag == Ist_LLSC
   5112                    || st->tag == Ist_CAS );
   5113 
   5114       for (k = A_NENV-1; k >= 0; k--) {
   5115          if (env[k].bindee == NULL)
   5116             continue;
   5117          /* Compare the actions of this stmt with the actions of
   5118             binding 'k', to see if they invalidate the binding. */
   5119          invalidateMe
   5120             = toBool(
   5121               /* a store invalidates loaded data */
   5122               (env[k].doesLoad && stmtStores)
   5123               /* a put invalidates get'd data */
   5124               || (env[k].doesGet && stmtPuts)
   5125               /* a put invalidates loaded data.  Note, we could do
   5126                  much better here in the sense that we only need to
   5127                  invalidate trees containing loads if the Put in
   5128                  question is marked as requiring precise
   5129                  exceptions. */
   5130               || (env[k].doesLoad && stmtPuts)
   5131               /* probably overly conservative: a memory bus event
   5132                  invalidates absolutely everything, so that all
   5133                  computation prior to it is forced to complete before
   5134                  proceeding with the event (fence,lock,unlock). */
   5135               || st->tag == Ist_MBE
   5136               /* also be (probably overly) paranoid re AbiHints */
   5137               || st->tag == Ist_AbiHint
   5138               );
   5139          if (invalidateMe) {
   5140             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   5141             j++;
   5142             vassert(j <= i);
   5143             env[k].bindee = NULL;
   5144          }
   5145       }
   5146 
   5147       /* Slide in-use entries in env up to the front */
   5148       m = 0;
   5149       for (k = 0; k < A_NENV; k++) {
   5150          if (env[k].bindee != NULL) {
   5151             env[m] = env[k];
   5152             m++;
   5153 	 }
   5154       }
   5155       for (m = m; m < A_NENV; m++) {
   5156          env[m].bindee = NULL;
   5157       }
   5158 
   5159       /* finally, emit the substituted statement */
   5160       bb->stmts[j] = st2;
   5161       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   5162       j++;
   5163 
   5164       vassert(j <= i+1);
   5165    } /* for each stmt in the original bb ... */
   5166 
   5167    /* Finally ... substitute the ->next field as much as possible, and
   5168       dump any left-over bindings.  Hmm.  Perhaps there should be no
   5169       left over bindings?  Or any left-over bindings are
   5170       by definition dead? */
   5171    bb->next = atbSubst_Expr(env, bb->next);
   5172    bb->stmts_used = j;
   5173 
   5174    return max_ga_known ? max_ga : ~(Addr64)0;
   5175 }
   5176 
   5177 
   5178 /*---------------------------------------------------------------*/
   5179 /*--- iropt main                                              ---*/
   5180 /*---------------------------------------------------------------*/
   5181 
   5182 static Bool iropt_verbose = False; /* True; */
   5183 
   5184 
   5185 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   5186    redundant Put removal, constant propagation, dead code removal,
   5187    clean helper specialisation, and dead code removal (again).
   5188 */
   5189 
   5190 
   5191 static
   5192 IRSB* cheap_transformations (
   5193          IRSB* bb,
   5194          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   5195          Bool (*preciseMemExnsFn)(Int,Int)
   5196       )
   5197 {
   5198    redundant_get_removal_BB ( bb );
   5199    if (iropt_verbose) {
   5200       vex_printf("\n========= REDUNDANT GET\n\n" );
   5201       ppIRSB(bb);
   5202    }
   5203 
   5204    if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
   5205       redundant_put_removal_BB ( bb, preciseMemExnsFn );
   5206    }
   5207    if (iropt_verbose) {
   5208       vex_printf("\n========= REDUNDANT PUT\n\n" );
   5209       ppIRSB(bb);
   5210    }
   5211 
   5212    bb = cprop_BB ( bb );
   5213    if (iropt_verbose) {
   5214       vex_printf("\n========= CPROPD\n\n" );
   5215       ppIRSB(bb);
   5216    }
   5217 
   5218    do_deadcode_BB ( bb );
   5219    if (iropt_verbose) {
   5220       vex_printf("\n========= DEAD\n\n" );
   5221       ppIRSB(bb);
   5222    }
   5223 
   5224    bb = spec_helpers_BB ( bb, specHelper );
   5225    do_deadcode_BB ( bb );
   5226    if (iropt_verbose) {
   5227       vex_printf("\n========= SPECd \n\n" );
   5228       ppIRSB(bb);
   5229    }
   5230 
   5231    return bb;
   5232 }
   5233 
   5234 
   5235 /* Do some more expensive transformations on bb, which are aimed at
   5236    optimising as much as possible in the presence of GetI and PutI.  */
   5237 
   5238 static
   5239 IRSB* expensive_transformations( IRSB* bb )
   5240 {
   5241    (void)do_cse_BB( bb );
   5242    collapse_AddSub_chains_BB( bb );
   5243    do_redundant_GetI_elimination( bb );
   5244    if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
   5245       do_redundant_PutI_elimination( bb );
   5246    }
   5247    do_deadcode_BB( bb );
   5248    return bb;
   5249 }
   5250 
   5251 
   5252 /* Scan a flattened BB to look for signs that more expensive
   5253    optimisations might be useful:
   5254    - find out if there are any GetIs and PutIs
   5255    - find out if there are any floating or vector-typed temporaries
   5256 */
   5257 
   5258 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   5259                                  /*OUT*/Bool* hasVorFtemps,
   5260                                  IRSB* bb )
   5261 {
   5262    Int      i, j;
   5263    IRStmt*  st;
   5264    IRDirty* d;
   5265    IRCAS*   cas;
   5266 
   5267    *hasGetIorPutI = False;
   5268    *hasVorFtemps  = False;
   5269 
   5270    for (i = 0; i < bb->stmts_used; i++) {
   5271       st = bb->stmts[i];
   5272       switch (st->tag) {
   5273          case Ist_AbiHint:
   5274             vassert(isIRAtom(st->Ist.AbiHint.base));
   5275             vassert(isIRAtom(st->Ist.AbiHint.nia));
   5276             break;
   5277          case Ist_PutI:
   5278             *hasGetIorPutI = True;
   5279             break;
   5280          case Ist_WrTmp:
   5281             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   5282                *hasGetIorPutI = True;
   5283             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   5284                case Ity_I1: case Ity_I8: case Ity_I16:
   5285                case Ity_I32: case Ity_I64: case Ity_I128:
   5286                   break;
   5287                case Ity_F32: case Ity_F64: case Ity_F128:
   5288                case Ity_V128: case Ity_V256:
   5289                   *hasVorFtemps = True;
   5290                   break;
   5291                case Ity_D32: case Ity_D64: case Ity_D128:
   5292                   *hasVorFtemps = True;
   5293                   break;
   5294                default:
   5295                   goto bad;
   5296             }
   5297             break;
   5298          case Ist_Put:
   5299             vassert(isIRAtom(st->Ist.Put.data));
   5300             break;
   5301          case Ist_Store:
   5302             vassert(isIRAtom(st->Ist.Store.addr));
   5303             vassert(isIRAtom(st->Ist.Store.data));
   5304             break;
   5305          case Ist_CAS:
   5306             cas = st->Ist.CAS.details;
   5307             vassert(isIRAtom(cas->addr));
   5308             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   5309             vassert(isIRAtom(cas->expdLo));
   5310             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   5311             vassert(isIRAtom(cas->dataLo));
   5312             break;
   5313          case Ist_LLSC:
   5314             vassert(isIRAtom(st->Ist.LLSC.addr));
   5315             if (st->Ist.LLSC.storedata)
   5316                vassert(isIRAtom(st->Ist.LLSC.storedata));
   5317             break;
   5318          case Ist_Dirty:
   5319             d = st->Ist.Dirty.details;
   5320             vassert(isIRAtom(d->guard));
   5321             for (j = 0; d->args[j]; j++)
   5322                vassert(isIRAtom(d->args[j]));
   5323             if (d->mFx != Ifx_None)
   5324                vassert(isIRAtom(d->mAddr));
   5325             break;
   5326          case Ist_NoOp:
   5327          case Ist_IMark:
   5328          case Ist_MBE:
   5329             break;
   5330          case Ist_Exit:
   5331             vassert(isIRAtom(st->Ist.Exit.guard));
   5332             break;
   5333          default:
   5334          bad:
   5335             ppIRStmt(st);
   5336             vpanic("considerExpensives");
   5337       }
   5338    }
   5339 }
   5340 
   5341 
   5342 /* ---------------- The main iropt entry point. ---------------- */
   5343 
   5344 /* exported from this file */
   5345 /* Rules of the game:
   5346 
   5347    - IRExpr/IRStmt trees should be treated as immutable, as they
   5348      may get shared.  So never change a field of such a tree node;
   5349      instead construct and return a new one if needed.
   5350 */
   5351 
   5352 
   5353 IRSB* do_iropt_BB(
   5354          IRSB* bb0,
   5355          IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
   5356          Bool (*preciseMemExnsFn)(Int,Int),
   5357          Addr64 guest_addr,
   5358          VexArch guest_arch
   5359       )
   5360 {
   5361    static Int n_total     = 0;
   5362    static Int n_expensive = 0;
   5363 
   5364    Bool hasGetIorPutI, hasVorFtemps;
   5365    IRSB *bb, *bb2;
   5366 
   5367    n_total++;
   5368 
   5369    /* First flatten the block out, since all other
   5370       phases assume flat code. */
   5371 
   5372    bb = flatten_BB ( bb0 );
   5373 
   5374    if (iropt_verbose) {
   5375       vex_printf("\n========= FLAT\n\n" );
   5376       ppIRSB(bb);
   5377    }
   5378 
   5379    /* If at level 0, stop now. */
   5380    if (vex_control.iropt_level <= 0) return bb;
   5381 
   5382    /* Now do a preliminary cleanup pass, and figure out if we also
   5383       need to do 'expensive' optimisations.  Expensive optimisations
   5384       are deemed necessary if the block contains any GetIs or PutIs.
   5385       If needed, do expensive transformations and then another cheap
   5386       cleanup pass. */
   5387 
   5388    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   5389 
   5390    if (guest_arch == VexArchARM) {
   5391       /* Translating Thumb2 code produces a lot of chaff.  We have to
   5392          work extra hard to get rid of it. */
   5393       bb = cprop_BB(bb);
   5394       bb = spec_helpers_BB ( bb, specHelper );
   5395       if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
   5396          redundant_put_removal_BB ( bb, preciseMemExnsFn );
   5397       }
   5398       do_cse_BB( bb );
   5399       do_deadcode_BB( bb );
   5400    }
   5401 
   5402    if (vex_control.iropt_level > 1) {
   5403 
   5404       /* Peer at what we have, to decide how much more effort to throw
   5405          at it. */
   5406       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   5407 
   5408       if (hasVorFtemps && !hasGetIorPutI) {
   5409          /* If any evidence of FP or Vector activity, CSE, as that
   5410             tends to mop up all manner of lardy code to do with
   5411             rounding modes.  Don't bother if hasGetIorPutI since that
   5412             case leads into the expensive transformations, which do
   5413             CSE anyway. */
   5414          (void)do_cse_BB( bb );
   5415          do_deadcode_BB( bb );
   5416       }
   5417 
   5418       if (hasGetIorPutI) {
   5419          Bool cses;
   5420          n_expensive++;
   5421          if (DEBUG_IROPT)
   5422             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   5423          bb = expensive_transformations( bb );
   5424          bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   5425          /* Potentially common up GetIs */
   5426          cses = do_cse_BB( bb );
   5427          if (cses)
   5428             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   5429       }
   5430 
   5431       /* Now have a go at unrolling simple (single-BB) loops.  If
   5432          successful, clean up the results as much as possible. */
   5433 
   5434       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   5435       if (bb2) {
   5436          bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
   5437          if (hasGetIorPutI) {
   5438             bb = expensive_transformations( bb );
   5439             bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
   5440          } else {
   5441             /* at least do CSE and dead code removal */
   5442             do_cse_BB( bb );
   5443             do_deadcode_BB( bb );
   5444          }
   5445          if (0) vex_printf("vex iropt: unrolled a loop\n");
   5446       }
   5447 
   5448    }
   5449 
   5450    return bb;
   5451 }
   5452 
   5453 
   5454 /*---------------------------------------------------------------*/
   5455 /*--- end                                            ir_opt.c ---*/
   5456 /*---------------------------------------------------------------*/
   5457