Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*---------------------------------------------------------------*/
      4 /*--- begin                                          ir_opt.c ---*/
      5 /*---------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2004-2017 OpenWorks LLP
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #include "libvex_basictypes.h"
     38 #include "libvex_ir.h"
     39 #include "libvex.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "ir_opt.h"
     44 
     45 
     46 /* Set to 1 for lots of debugging output. */
     47 #define DEBUG_IROPT 0
     48 
     49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
     50 #define STATS_IROPT 0
     51 
     52 
     53 /* What iropt does, 29 Dec 04.
     54 
     55    It takes an IRSB and produces a new one with the same meaning,
     56    defined thus:
     57 
     58    After execution of the new BB, all guest state and guest memory is
     59    the same as after execution of the original.  This is true
     60    regardless of how the block was exited (at the end vs side exit).
     61 
     62    In addition, parts of the guest state will be identical to that
     63    created by execution of the original at the following observation
     64    points:
     65 
     66    * In a dirty helper call, any parts of the guest state that the
     67      helper states that it reads or modifies will be up to date.
     68      Also, guest memory will be up to date.  Parts of the guest state
     69      not marked as being read or modified by the helper cannot be
     70      assumed to be up-to-date at the point where the helper is called.
     71 
     72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
     73      The guest state is only up to date only as explained above
     74      (i.e. at SB exits and as specified by dirty helper call).
     75      Also, the stack pointer register is up to date at memory
     76      exception points (as this is needed for the stack extension
     77      logic in m_signals.c).
     78 
     79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
     80      Immediately prior to any load or store, those parts of the guest
     81      state marked as requiring precise exceptions will be up to date.
     82      Also, guest memory will be up to date.  Parts of the guest state
     83      not marked as requiring precise exceptions cannot be assumed to
     84      be up-to-date at the point of the load/store.
     85 
     86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
     87      Same as minimal, but all the guest state is up to date at memory
     88      exception points.
     89 
     90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
     91      Guest state is up to date at each instruction.
     92 
     93    The relative order of loads and stores (including loads/stores of
     94    guest memory done by dirty helpers annotated as such) is not
     95    changed.  However, the relative order of loads with no intervening
     96    stores/modifies may be changed.
     97 
     98    Transformation order
     99    ~~~~~~~~~~~~~~~~~~~~
    100 
    101    There are three levels of optimisation, controlled by
    102    vex_control.iropt_level.  Define first:
    103 
    104    "Cheap transformations" are the following sequence:
    105       * Redundant-Get removal
    106       * Redundant-Put removal
    107       * Constant propagation/folding
    108       * Dead code removal
    109       * Specialisation of clean helper functions
    110       * Dead code removal
    111 
    112    "Expensive transformations" are the following sequence:
    113       * CSE
    114       * Folding of add/sub chains
    115       * Redundant-GetI removal
    116       * Redundant-PutI removal
    117       * Dead code removal
    118 
    119    Then the transformations are as follows, as defined by
    120    vex_control.iropt_level:
    121 
    122    Level 0:
    123       * Flatten into atomic form.
    124 
    125    Level 1: the following sequence:
    126       * Flatten into atomic form.
    127       * Cheap transformations.
    128 
    129    Level 2: the following sequence
    130       * Flatten into atomic form.
    131       * Cheap transformations.
    132       * If block contains any floating or vector types, CSE.
    133       * If block contains GetI or PutI, Expensive transformations.
    134       * Try unrolling loops.  Three possible outcomes:
    135         - No effect: do nothing more.
    136         - Unrolled a loop, and block does not contain GetI or PutI:
    137           Do: * CSE
    138               * Dead code removal
    139         - Unrolled a loop, and block contains GetI or PutI:
    140           Do: * Expensive transformations
    141               * Cheap transformations
    142 */
    143 
    144 /* Implementation notes, 29 Dec 04.
    145 
    146    TODO (important): I think rPutI removal ignores precise exceptions
    147    and is therefore in a sense, wrong.  In the sense that PutIs are
    148    assumed not to write parts of the guest state that we need to have
    149    up-to-date at loads/stores.  So far on x86 guest that has not
    150    mattered since indeed only the x87 FP registers and tags are
    151    accessed using GetI/PutI, and there is no need so far for them to
    152    be up to date at mem exception points.  The rPutI pass should be
    153    fixed.
    154 
    155    TODO: improve pessimistic handling of precise exceptions
    156      in the tree builder.
    157 
    158    TODO: check interaction of rGetI and dirty helpers.
    159 
    160    F64i constants are treated differently from other constants.
    161    They are not regarded as atoms, and instead lifted off and
    162    bound to temps.  This allows them to participate in CSE, which
    163    is important for getting good performance for x86 guest code.
    164 
    165    CSE up F64 literals (already doing F64is)
    166 
    167    CSE: consider carefully the requirement for precise exns
    168         prior to making CSE any more aggressive.  */
    169 
    170 
    171 /*---------------------------------------------------------------*/
    172 /*--- Finite mappery, of a sort                               ---*/
    173 /*---------------------------------------------------------------*/
    174 
    175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    176    hashing, but it's not clear whether or not this would really be any
    177    faster. */
    178 
    179 typedef
    180    struct {
    181       Bool*  inuse;
    182       HWord* key;
    183       HWord* val;
    184       Int    size;
    185       Int    used;
    186    }
    187    HashHW;
    188 
    189 static HashHW* newHHW ( void )
    190 {
    191    HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
    192    h->size   = 8;
    193    h->used   = 0;
    194    h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
    195    h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    196    h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    197    return h;
    198 }
    199 
    200 
    201 /* Look up key in the map. */
    202 
    203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    204 {
    205    Int i;
    206    /* vex_printf("lookupHHW(%llx)\n", key ); */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          if (val)
    210             *val = h->val[i];
    211          return True;
    212       }
    213    }
    214    return False;
    215 }
    216 
    217 
    218 /* Add key->val to the map.  Replaces any existing binding for key. */
    219 
    220 static void addToHHW ( HashHW* h, HWord key, HWord val )
    221 {
    222    Int i, j;
    223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    224 
    225    /* Find and replace existing binding, if any. */
    226    for (i = 0; i < h->used; i++) {
    227       if (h->inuse[i] && h->key[i] == key) {
    228          h->val[i] = val;
    229          return;
    230       }
    231    }
    232 
    233    /* Ensure a space is available. */
    234    if (h->used == h->size) {
    235       /* Copy into arrays twice the size. */
    236       Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
    237       HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    238       HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    239       for (i = j = 0; i < h->size; i++) {
    240          if (!h->inuse[i]) continue;
    241          inuse2[j] = True;
    242          key2[j] = h->key[i];
    243          val2[j] = h->val[i];
    244          j++;
    245       }
    246       h->used = j;
    247       h->size *= 2;
    248       h->inuse = inuse2;
    249       h->key = key2;
    250       h->val = val2;
    251    }
    252 
    253    /* Finally, add it. */
    254    vassert(h->used < h->size);
    255    h->inuse[h->used] = True;
    256    h->key[h->used] = key;
    257    h->val[h->used] = val;
    258    h->used++;
    259 }
    260 
    261 
    262 /*---------------------------------------------------------------*/
    263 /*--- Flattening out a BB into atomic SSA form                ---*/
    264 /*---------------------------------------------------------------*/
    265 
    266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    267    copies made by flattening.  If in doubt return False. */
    268 
    269 static Bool isFlat ( IRExpr* e )
    270 {
    271    if (e->tag == Iex_Get)
    272       return True;
    273    if (e->tag == Iex_Binop)
    274       return toBool( isIRAtom(e->Iex.Binop.arg1)
    275                      && isIRAtom(e->Iex.Binop.arg2) );
    276    if (e->tag == Iex_Load)
    277       return isIRAtom(e->Iex.Load.addr);
    278    return False;
    279 }
    280 
    281 /* Flatten out 'ex' so it is atomic, returning a new expression with
    282    the same value, after having appended extra IRTemp assignments to
    283    the end of 'bb'. */
    284 
    285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    286 {
    287    Int i;
    288    IRExpr** newargs;
    289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    290    IRTemp t1;
    291 
    292    switch (ex->tag) {
    293 
    294       case Iex_GetI:
    295          t1 = newIRTemp(bb->tyenv, ty);
    296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    297             IRExpr_GetI(ex->Iex.GetI.descr,
    298                         flatten_Expr(bb, ex->Iex.GetI.ix),
    299                         ex->Iex.GetI.bias)));
    300          return IRExpr_RdTmp(t1);
    301 
    302       case Iex_Get:
    303          t1 = newIRTemp(bb->tyenv, ty);
    304          addStmtToIRSB(bb,
    305             IRStmt_WrTmp(t1, ex));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Qop: {
    309          IRQop* qop = ex->Iex.Qop.details;
    310          t1 = newIRTemp(bb->tyenv, ty);
    311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    312             IRExpr_Qop(qop->op,
    313                          flatten_Expr(bb, qop->arg1),
    314                          flatten_Expr(bb, qop->arg2),
    315                          flatten_Expr(bb, qop->arg3),
    316                          flatten_Expr(bb, qop->arg4))));
    317          return IRExpr_RdTmp(t1);
    318       }
    319 
    320       case Iex_Triop: {
    321          IRTriop* triop = ex->Iex.Triop.details;
    322          t1 = newIRTemp(bb->tyenv, ty);
    323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    324             IRExpr_Triop(triop->op,
    325                          flatten_Expr(bb, triop->arg1),
    326                          flatten_Expr(bb, triop->arg2),
    327                          flatten_Expr(bb, triop->arg3))));
    328          return IRExpr_RdTmp(t1);
    329       }
    330 
    331       case Iex_Binop:
    332          t1 = newIRTemp(bb->tyenv, ty);
    333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    334             IRExpr_Binop(ex->Iex.Binop.op,
    335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    337          return IRExpr_RdTmp(t1);
    338 
    339       case Iex_Unop:
    340          t1 = newIRTemp(bb->tyenv, ty);
    341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    342             IRExpr_Unop(ex->Iex.Unop.op,
    343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    344          return IRExpr_RdTmp(t1);
    345 
    346       case Iex_Load:
    347          t1 = newIRTemp(bb->tyenv, ty);
    348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    349             IRExpr_Load(ex->Iex.Load.end,
    350                         ex->Iex.Load.ty,
    351                         flatten_Expr(bb, ex->Iex.Load.addr))));
    352          return IRExpr_RdTmp(t1);
    353 
    354       case Iex_CCall:
    355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    356          for (i = 0; newargs[i]; i++)
    357             newargs[i] = flatten_Expr(bb, newargs[i]);
    358          t1 = newIRTemp(bb->tyenv, ty);
    359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    360             IRExpr_CCall(ex->Iex.CCall.cee,
    361                          ex->Iex.CCall.retty,
    362                          newargs)));
    363          return IRExpr_RdTmp(t1);
    364 
    365       case Iex_ITE:
    366          t1 = newIRTemp(bb->tyenv, ty);
    367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
    369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
    370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
    371          return IRExpr_RdTmp(t1);
    372 
    373       case Iex_Const:
    374          /* Lift F64i constants out onto temps so they can be CSEd
    375             later. */
    376          if (ex->Iex.Const.con->tag == Ico_F64i) {
    377             t1 = newIRTemp(bb->tyenv, ty);
    378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    379                IRExpr_Const(ex->Iex.Const.con)));
    380             return IRExpr_RdTmp(t1);
    381          } else {
    382             /* Leave all other constants alone. */
    383             return ex;
    384          }
    385 
    386       case Iex_RdTmp:
    387          return ex;
    388 
    389       default:
    390          vex_printf("\n");
    391          ppIRExpr(ex);
    392          vex_printf("\n");
    393          vpanic("flatten_Expr");
    394    }
    395 }
    396 
    397 
    398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    399 
    400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    401 {
    402    Int i;
    403    IRExpr   *e1, *e2, *e3, *e4, *e5;
    404    IRDirty  *d,  *d2;
    405    IRCAS    *cas, *cas2;
    406    IRPutI   *puti, *puti2;
    407    IRLoadG  *lg;
    408    IRStoreG *sg;
    409    switch (st->tag) {
    410       case Ist_Put:
    411          if (isIRAtom(st->Ist.Put.data)) {
    412             /* optimisation to reduce the amount of heap wasted
    413                by the flattener */
    414             addStmtToIRSB(bb, st);
    415          } else {
    416             /* general case, always correct */
    417             e1 = flatten_Expr(bb, st->Ist.Put.data);
    418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    419          }
    420          break;
    421       case Ist_PutI:
    422          puti = st->Ist.PutI.details;
    423          e1 = flatten_Expr(bb, puti->ix);
    424          e2 = flatten_Expr(bb, puti->data);
    425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
    426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
    427          break;
    428       case Ist_WrTmp:
    429          if (isFlat(st->Ist.WrTmp.data)) {
    430             /* optimisation, to reduce the number of tmp-tmp
    431                copies generated */
    432             addStmtToIRSB(bb, st);
    433          } else {
    434             /* general case, always correct */
    435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    437          }
    438          break;
    439       case Ist_Store:
    440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    441          e2 = flatten_Expr(bb, st->Ist.Store.data);
    442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    443          break;
    444       case Ist_StoreG:
    445          sg = st->Ist.StoreG.details;
    446          e1 = flatten_Expr(bb, sg->addr);
    447          e2 = flatten_Expr(bb, sg->data);
    448          e3 = flatten_Expr(bb, sg->guard);
    449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
    450          break;
    451       case Ist_LoadG:
    452          lg = st->Ist.LoadG.details;
    453          e1 = flatten_Expr(bb, lg->addr);
    454          e2 = flatten_Expr(bb, lg->alt);
    455          e3 = flatten_Expr(bb, lg->guard);
    456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
    457                                         e1, e2, e3));
    458          break;
    459       case Ist_CAS:
    460          cas  = st->Ist.CAS.details;
    461          e1   = flatten_Expr(bb, cas->addr);
    462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    463          e3   = flatten_Expr(bb, cas->expdLo);
    464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    465          e5   = flatten_Expr(bb, cas->dataLo);
    466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    467                          e1, e2, e3, e4, e5 );
    468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    469          break;
    470       case Ist_LLSC:
    471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    472          e2 = st->Ist.LLSC.storedata
    473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    474                  : NULL;
    475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    476                                        st->Ist.LLSC.result, e1, e2));
    477          break;
    478       case Ist_Dirty:
    479          d = st->Ist.Dirty.details;
    480          d2 = emptyIRDirty();
    481          *d2 = *d;
    482          d2->args = shallowCopyIRExprVec(d2->args);
    483          if (d2->mFx != Ifx_None) {
    484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    485          } else {
    486             vassert(d2->mAddr == NULL);
    487          }
    488          d2->guard = flatten_Expr(bb, d2->guard);
    489          for (i = 0; d2->args[i]; i++) {
    490             IRExpr* arg = d2->args[i];
    491             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
    492                d2->args[i] = flatten_Expr(bb, arg);
    493          }
    494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    495          break;
    496       case Ist_NoOp:
    497       case Ist_MBE:
    498       case Ist_IMark:
    499          addStmtToIRSB(bb, st);
    500          break;
    501       case Ist_AbiHint:
    502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    505          break;
    506       case Ist_Exit:
    507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    509                                        st->Ist.Exit.dst,
    510                                        st->Ist.Exit.offsIP));
    511          break;
    512       default:
    513          vex_printf("\n");
    514          ppIRStmt(st);
    515          vex_printf("\n");
    516          vpanic("flatten_Stmt");
    517    }
    518 }
    519 
    520 
    521 static IRSB* flatten_BB ( IRSB* in )
    522 {
    523    Int   i;
    524    IRSB* out;
    525    out = emptyIRSB();
    526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    527    for (i = 0; i < in->stmts_used; i++)
    528       if (in->stmts[i])
    529          flatten_Stmt( out, in->stmts[i] );
    530    out->next     = flatten_Expr( out, in->next );
    531    out->jumpkind = in->jumpkind;
    532    out->offsIP   = in->offsIP;
    533    return out;
    534 }
    535 
    536 
    537 /*---------------------------------------------------------------*/
    538 /*--- In-place removal of redundant GETs                      ---*/
    539 /*---------------------------------------------------------------*/
    540 
    541 /* Scan forwards, building up an environment binding (min offset, max
    542    offset) pairs to values, which will either be temps or constants.
    543 
    544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    545    env and if it matches, replace the Get with the stored value.  If
    546    there is no match, add a (minoff,maxoff) :-> t binding.
    547 
    548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    549    any binding which fully or partially overlaps with (minoff,maxoff).
    550    Then add a new (minoff,maxoff) :-> t or c binding.  */
    551 
    552 /* Extract the min/max offsets from a guest state array descriptor. */
    553 
    554 inline
    555 static void getArrayBounds ( IRRegArray* descr,
    556                              UInt* minoff, UInt* maxoff )
    557 {
    558    *minoff = descr->base;
    559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    560    vassert((*minoff & ~0xFFFF) == 0);
    561    vassert((*maxoff & ~0xFFFF) == 0);
    562    vassert(*minoff <= *maxoff);
    563 }
    564 
    565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    566 
    567 static UInt mk_key_GetPut ( Int offset, IRType ty )
    568 {
    569    /* offset should fit in 16 bits. */
    570    UInt minoff = offset;
    571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    572    vassert((minoff & ~0xFFFF) == 0);
    573    vassert((maxoff & ~0xFFFF) == 0);
    574    return (minoff << 16) | maxoff;
    575 }
    576 
    577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    578 {
    579    UInt minoff, maxoff;
    580    getArrayBounds( descr, &minoff, &maxoff );
    581    vassert((minoff & ~0xFFFF) == 0);
    582    vassert((maxoff & ~0xFFFF) == 0);
    583    return (minoff << 16) | maxoff;
    584 }
    585 
    586 /* Supposing h has keys of the form generated by mk_key_GetPut and
    587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    588    .. k_hi).
    589 */
    590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    591 {
    592    Int  j;
    593    UInt e_lo, e_hi;
    594    vassert(k_lo <= k_hi);
    595    /* invalidate any env entries which in any way overlap (k_lo
    596       .. k_hi) */
    597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    598 
    599    for (j = 0; j < h->used; j++) {
    600       if (!h->inuse[j])
    601          continue;
    602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    604       vassert(e_lo <= e_hi);
    605       if (e_hi < k_lo || k_hi < e_lo)
    606          continue; /* no overlap possible */
    607       else
    608          /* overlap; invalidate */
    609          h->inuse[j] = False;
    610    }
    611 }
    612 
    613 
    614 static void redundant_get_removal_BB ( IRSB* bb )
    615 {
    616    HashHW* env = newHHW();
    617    UInt    key = 0; /* keep gcc -O happy */
    618    Int     i, j;
    619    HWord   val;
    620 
    621    for (i = 0; i < bb->stmts_used; i++) {
    622       IRStmt* st = bb->stmts[i];
    623 
    624       if (st->tag == Ist_NoOp)
    625          continue;
    626 
    627       /* Deal with Gets */
    628       if (st->tag == Ist_WrTmp
    629           && st->Ist.WrTmp.data->tag == Iex_Get) {
    630          /* st is 't = Get(...)'.  Look up in the environment and see
    631             if the Get can be replaced. */
    632          IRExpr* get = st->Ist.WrTmp.data;
    633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    634                                      get->Iex.Get.ty );
    635          if (lookupHHW(env, &val, (HWord)key)) {
    636             /* found it */
    637             /* Note, we could do better here.  If the types are
    638                different we don't do the substitution, since doing so
    639                could lead to invalidly-typed IR.  An improvement would
    640                be to stick in a reinterpret-style cast, although that
    641                would make maintaining flatness more difficult. */
    642             IRExpr* valE    = (IRExpr*)val;
    643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    645             if (typesOK && DEBUG_IROPT) {
    646                vex_printf("rGET: "); ppIRExpr(get);
    647                vex_printf("  ->  "); ppIRExpr(valE);
    648                vex_printf("\n");
    649             }
    650             if (typesOK)
    651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    652          } else {
    653             /* Not found, but at least we know that t and the Get(...)
    654                are now associated.  So add a binding to reflect that
    655                fact. */
    656             addToHHW( env, (HWord)key,
    657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    658          }
    659       }
    660 
    661       /* Deal with Puts: invalidate any env entries overlapped by this
    662          Put */
    663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    664          UInt k_lo, k_hi;
    665          if (st->tag == Ist_Put) {
    666             key = mk_key_GetPut( st->Ist.Put.offset,
    667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    668          } else {
    669             vassert(st->tag == Ist_PutI);
    670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    671          }
    672 
    673          k_lo = (key >> 16) & 0xFFFF;
    674          k_hi = key & 0xFFFF;
    675          invalidateOverlaps(env, k_lo, k_hi);
    676       }
    677       else
    678       if (st->tag == Ist_Dirty) {
    679          /* Deal with dirty helpers which write or modify guest state.
    680             Invalidate the entire env.  We could do a lot better
    681             here. */
    682          IRDirty* d      = st->Ist.Dirty.details;
    683          Bool     writes = False;
    684          for (j = 0; j < d->nFxState; j++) {
    685             if (d->fxState[j].fx == Ifx_Modify
    686                 || d->fxState[j].fx == Ifx_Write)
    687             writes = True;
    688          }
    689          if (writes) {
    690             /* dump the entire env (not clever, but correct ...) */
    691             for (j = 0; j < env->used; j++)
    692                env->inuse[j] = False;
    693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    694          }
    695       }
    696 
    697       /* add this one to the env, if appropriate */
    698       if (st->tag == Ist_Put) {
    699          vassert(isIRAtom(st->Ist.Put.data));
    700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    701       }
    702 
    703    } /* for (i = 0; i < bb->stmts_used; i++) */
    704 
    705 }
    706 
    707 
    708 /*---------------------------------------------------------------*/
    709 /*--- In-place removal of redundant PUTs                      ---*/
    710 /*---------------------------------------------------------------*/
    711 
    712 /* Find any Get uses in st and invalidate any partially or fully
    713    overlapping ranges listed in env.  Due to the flattening phase, the
    714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    715 
    716 static void handle_gets_Stmt (
    717                HashHW* env,
    718                IRStmt* st,
    719                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    720                VexRegisterUpdates pxControl
    721             )
    722 {
    723    Int     j;
    724    UInt    key = 0; /* keep gcc -O happy */
    725    Bool    isGet;
    726    Bool    memRW = False;
    727    IRExpr* e;
    728 
    729    switch (st->tag) {
    730 
    731       /* This is the only interesting case.  Deal with Gets in the RHS
    732          expression. */
    733       case Ist_WrTmp:
    734          e = st->Ist.WrTmp.data;
    735          switch (e->tag) {
    736             case Iex_Get:
    737                isGet = True;
    738                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    739                break;
    740             case Iex_GetI:
    741                isGet = True;
    742                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    743                break;
    744             case Iex_Load:
    745                isGet = False;
    746                memRW = True;
    747                break;
    748             default:
    749                isGet = False;
    750          }
    751          if (isGet) {
    752             UInt k_lo, k_hi;
    753             k_lo = (key >> 16) & 0xFFFF;
    754             k_hi = key & 0xFFFF;
    755             invalidateOverlaps(env, k_lo, k_hi);
    756          }
    757          break;
    758 
    759       /* Be very conservative for dirty helper calls; dump the entire
    760          environment.  The helper might read guest state, in which
    761          case it needs to be flushed first.  Also, the helper might
    762          access guest memory, in which case all parts of the guest
    763          state requiring precise exceptions needs to be flushed.  The
    764          crude solution is just to flush everything; we could easily
    765          enough do a lot better if needed. */
    766       /* Probably also overly-conservative, but also dump everything
    767          if we hit a memory bus event (fence, lock, unlock).  Ditto
    768          AbiHints, CASs, LLs and SCs. */
    769       case Ist_AbiHint:
    770          vassert(isIRAtom(st->Ist.AbiHint.base));
    771          vassert(isIRAtom(st->Ist.AbiHint.nia));
    772          /* fall through */
    773       case Ist_MBE:
    774       case Ist_Dirty:
    775       case Ist_CAS:
    776       case Ist_LLSC:
    777          for (j = 0; j < env->used; j++)
    778             env->inuse[j] = False;
    779          break;
    780 
    781       /* all other cases are boring. */
    782       case Ist_Store:
    783          vassert(isIRAtom(st->Ist.Store.addr));
    784          vassert(isIRAtom(st->Ist.Store.data));
    785          memRW = True;
    786          break;
    787       case Ist_StoreG: {
    788          IRStoreG* sg = st->Ist.StoreG.details;
    789          vassert(isIRAtom(sg->addr));
    790          vassert(isIRAtom(sg->data));
    791          vassert(isIRAtom(sg->guard));
    792          memRW = True;
    793          break;
    794       }
    795       case Ist_LoadG: {
    796          IRLoadG* lg = st->Ist.LoadG.details;
    797          vassert(isIRAtom(lg->addr));
    798          vassert(isIRAtom(lg->alt));
    799          vassert(isIRAtom(lg->guard));
    800          memRW = True;
    801          break;
    802       }
    803       case Ist_Exit:
    804          vassert(isIRAtom(st->Ist.Exit.guard));
    805          break;
    806 
    807       case Ist_Put:
    808          vassert(isIRAtom(st->Ist.Put.data));
    809          break;
    810 
    811       case Ist_PutI:
    812          vassert(isIRAtom(st->Ist.PutI.details->ix));
    813          vassert(isIRAtom(st->Ist.PutI.details->data));
    814          break;
    815 
    816       case Ist_NoOp:
    817       case Ist_IMark:
    818          break;
    819 
    820       default:
    821          vex_printf("\n");
    822          ppIRStmt(st);
    823          vex_printf("\n");
    824          vpanic("handle_gets_Stmt");
    825    }
    826 
    827    if (memRW) {
    828       /* This statement accesses memory.  So we might need to dump all parts
    829          of the environment corresponding to guest state that may not
    830          be reordered with respect to memory references.  That means
    831          at least the stack pointer. */
    832       switch (pxControl) {
    833          case VexRegUpdAllregsAtMemAccess:
    834             /* Precise exceptions required at mem access.
    835                Flush all guest state. */
    836             for (j = 0; j < env->used; j++)
    837                env->inuse[j] = False;
    838             break;
    839          case VexRegUpdSpAtMemAccess:
    840             /* We need to dump the stack pointer
    841                (needed for stack extension in m_signals.c).
    842                preciseMemExnsFn will use vex_control.iropt_register_updates
    843                to verify only the sp is to be checked. */
    844             /* fallthrough */
    845          case VexRegUpdUnwindregsAtMemAccess:
    846             for (j = 0; j < env->used; j++) {
    847                if (!env->inuse[j])
    848                   continue;
    849                /* Just flush the minimal amount required, as computed by
    850                   preciseMemExnsFn. */
    851                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    852                HWord k_hi = env->key[j] & 0xFFFF;
    853                if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
    854                   env->inuse[j] = False;
    855             }
    856             break;
    857          case VexRegUpdAllregsAtEachInsn:
    858             // VexRegUpdAllregsAtEachInsn cannot happen here.
    859             // fall through
    860          case VexRegUpd_INVALID:
    861          default:
    862             vassert(0);
    863       }
    864    } /* if (memRW) */
    865 
    866 }
    867 
    868 
    869 /* Scan backwards, building up a set of (min offset, max
    870    offset) pairs, indicating those parts of the guest state
    871    for which the next event is a write.
    872 
    873    On seeing a conditional exit, empty the set.
    874 
    875    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    876    completely within the set, remove the Put.  Otherwise, add
    877    (minoff,maxoff) to the set.
    878 
    879    On seeing 'Get (minoff,maxoff)', remove any part of the set
    880    overlapping (minoff,maxoff).  The same has to happen for any events
    881    which implicitly read parts of the guest state: dirty helper calls
    882    and loads/stores.
    883 */
    884 
    885 static void redundant_put_removal_BB (
    886                IRSB* bb,
    887                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    888                VexRegisterUpdates pxControl
    889             )
    890 {
    891    Int     i, j;
    892    Bool    isPut;
    893    IRStmt* st;
    894    UInt    key = 0; /* keep gcc -O happy */
    895 
    896    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
    897 
    898    HashHW* env = newHHW();
    899 
    900    /* Initialise the running env with the fact that the final exit
    901       writes the IP (or, whatever it claims to write.  We don't
    902       care.) */
    903    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
    904    addToHHW(env, (HWord)key, 0);
    905 
    906    /* And now scan backwards through the statements. */
    907    for (i = bb->stmts_used-1; i >= 0; i--) {
    908       st = bb->stmts[i];
    909 
    910       if (st->tag == Ist_NoOp)
    911          continue;
    912 
    913       /* Deal with conditional exits. */
    914       if (st->tag == Ist_Exit) {
    915          //Bool re_add;
    916          /* Need to throw out from the env, any part of it which
    917             doesn't overlap with the guest state written by this exit.
    918             Since the exit only writes one section, it's simplest to
    919             do this: (1) check whether env contains a write that
    920             completely overlaps the write done by this exit; (2) empty
    921             out env; and (3) if (1) was true, add the write done by
    922             this exit.
    923 
    924             To make (1) a bit simpler, merely search for a write that
    925             exactly matches the one done by this exit.  That's safe
    926             because it will fail as often or more often than a full
    927             overlap check, and failure to find an overlapping write in
    928             env is the safe case (we just nuke env if that
    929             happens). */
    930          //vassert(isIRAtom(st->Ist.Exit.guard));
    931          /* (1) */
    932          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
    933          //                    typeOfIRConst(st->Ist.Exit.dst));
    934          //re_add = lookupHHW(env, NULL, key);
    935          /* (2) */
    936          for (j = 0; j < env->used; j++)
    937             env->inuse[j] = False;
    938          /* (3) */
    939          //if (0 && re_add)
    940          //   addToHHW(env, (HWord)key, 0);
    941          continue;
    942       }
    943 
    944       /* Deal with Puts */
    945       switch (st->tag) {
    946          case Ist_Put:
    947             isPut = True;
    948             key = mk_key_GetPut( st->Ist.Put.offset,
    949                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    950             vassert(isIRAtom(st->Ist.Put.data));
    951             break;
    952          case Ist_PutI:
    953             isPut = True;
    954             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    955             vassert(isIRAtom(st->Ist.PutI.details->ix));
    956             vassert(isIRAtom(st->Ist.PutI.details->data));
    957             break;
    958          default:
    959             isPut = False;
    960       }
    961       if (isPut && st->tag != Ist_PutI) {
    962          /* See if any single entry in env overlaps this Put.  This is
    963             simplistic in that the transformation is valid if, say, two
    964             or more entries in the env overlap this Put, but the use of
    965             lookupHHW will only find a single entry which exactly
    966             overlaps this Put.  This is suboptimal but safe. */
    967          if (lookupHHW(env, NULL, (HWord)key)) {
    968             /* This Put is redundant because a later one will overwrite
    969                it.  So NULL (nop) it out. */
    970             if (DEBUG_IROPT) {
    971                vex_printf("rPUT: "); ppIRStmt(st);
    972                vex_printf("\n");
    973             }
    974             bb->stmts[i] = IRStmt_NoOp();
    975          } else {
    976             /* We can't demonstrate that this Put is redundant, so add it
    977                to the running collection. */
    978             addToHHW(env, (HWord)key, 0);
    979          }
    980          continue;
    981       }
    982 
    983       /* Deal with Gets.  These remove bits of the environment since
    984          appearance of a Get means that the next event for that slice
    985          of the guest state is no longer a write, but a read.  Also
    986          deals with implicit reads of guest state needed to maintain
    987          precise exceptions. */
    988       handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
    989    }
    990 }
    991 
    992 
    993 /*---------------------------------------------------------------*/
    994 /*--- Constant propagation and folding                        ---*/
    995 /*---------------------------------------------------------------*/
    996 
    997 #if STATS_IROPT
    998 /* How often sameIRExprs was invoked */
    999 static UInt invocation_count;
   1000 /* How often sameIRExprs recursed through IRTemp assignments */
   1001 static UInt recursion_count;
   1002 /* How often sameIRExprs found identical IRExprs */
   1003 static UInt success_count;
   1004 /* How often recursing through assignments to IRTemps helped
   1005    establishing equality. */
   1006 static UInt recursion_success_count;
   1007 /* Whether or not recursing through an IRTemp assignment helped
   1008    establishing IRExpr equality for a given sameIRExprs invocation. */
   1009 static Bool recursion_helped;
   1010 /* Whether or not a given sameIRExprs invocation recursed through an
   1011    IRTemp assignment */
   1012 static Bool recursed;
   1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
   1014 static UInt max_nodes_visited;
   1015 #endif /* STATS_IROPT */
   1016 
   1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
   1018 static UInt num_nodes_visited;
   1019 
   1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
   1021    This is to guard against performance degradation by visiting large
   1022    trees without success. */
   1023 #define NODE_LIMIT 30
   1024 
   1025 
   1026 /* The env in this section is a map from IRTemp to IRExpr*,
   1027    that is, an array indexed by IRTemp. */
   1028 
   1029 /* Do both expressions compute the same value? The answer is generally
   1030    conservative, i.e. it will report that the expressions do not compute
   1031    the same value when in fact they do. The reason is that we do not
   1032    keep track of changes in the guest state and memory. Thusly, two
   1033    Get's, GetI's or Load's, even when accessing the same location, will be
   1034    assumed to compute different values. After all the accesses may happen
   1035    at different times and the guest state / memory can have changed in
   1036    the meantime.
   1037 
   1038    XXX IMPORTANT XXX the two expressions must have the same IR type.
   1039    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
   1040 
   1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
   1042    wrapper that deals with the common tags-don't-match case, and a
   1043    slower out of line general case.  Saves a few insns. */
   1044 
   1045 __attribute__((noinline))
   1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
   1047 
   1048 inline
   1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1050 {
   1051    if (e1->tag != e2->tag) return False;
   1052    return sameIRExprs_aux2(env, e1, e2);
   1053 }
   1054 
   1055 __attribute__((noinline))
   1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1057 {
   1058    if (num_nodes_visited++ > NODE_LIMIT) return False;
   1059 
   1060    switch (e1->tag) {
   1061       case Iex_RdTmp:
   1062          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
   1063 
   1064          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
   1065             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
   1066                                         env[e2->Iex.RdTmp.tmp]);
   1067 #if STATS_IROPT
   1068             recursed = True;
   1069             if (same) recursion_helped = True;
   1070 #endif
   1071             return same;
   1072          }
   1073          return False;
   1074 
   1075       case Iex_Get:
   1076       case Iex_GetI:
   1077       case Iex_Load:
   1078          /* Guest state / memory could have changed in the meantime. */
   1079          return False;
   1080 
   1081       case Iex_Binop:
   1082          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
   1083                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
   1084                                                  e2->Iex.Binop.arg1 )
   1085                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
   1086                                                  e2->Iex.Binop.arg2 ));
   1087 
   1088       case Iex_Unop:
   1089          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
   1090                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
   1091                                                  e2->Iex.Unop.arg ));
   1092 
   1093       case Iex_Const: {
   1094          IRConst *c1 = e1->Iex.Const.con;
   1095          IRConst *c2 = e2->Iex.Const.con;
   1096          vassert(c1->tag == c2->tag);
   1097          switch (c1->tag) {
   1098             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
   1099             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
   1100             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
   1101             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
   1102             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
   1103             default: break;
   1104          }
   1105          return False;
   1106       }
   1107 
   1108       case Iex_Triop: {
   1109          IRTriop *tri1 = e1->Iex.Triop.details;
   1110          IRTriop *tri2 = e2->Iex.Triop.details;
   1111          return toBool( tri1->op == tri2->op
   1112                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
   1113                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
   1114                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
   1115       }
   1116 
   1117       case Iex_ITE:
   1118          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
   1119                                                  e2->Iex.ITE.cond )
   1120                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
   1121                                                  e2->Iex.ITE.iftrue )
   1122                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
   1123                                                  e2->Iex.ITE.iffalse ));
   1124 
   1125       default:
   1126          /* Not very likely to be "same". */
   1127          break;
   1128    }
   1129 
   1130    return False;
   1131 }
   1132 
   1133 inline
   1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1135 {
   1136    Bool same;
   1137 
   1138    num_nodes_visited = 0;
   1139    same = sameIRExprs_aux(env, e1, e2);
   1140 
   1141 #if STATS_IROPT
   1142    ++invocation_count;
   1143    if (recursed) ++recursion_count;
   1144    success_count += same;
   1145    if (same && recursion_helped)
   1146       ++recursion_success_count;
   1147    if (num_nodes_visited > max_nodes_visited)
   1148       max_nodes_visited = num_nodes_visited;
   1149    recursed = False; /* reset */
   1150    recursion_helped = False;  /* reset */
   1151 #endif /* STATS_IROPT */
   1152 
   1153    return same;
   1154 }
   1155 
   1156 
   1157 /* Debugging-only hack (not used in production runs): make a guess
   1158    whether sameIRExprs might assert due to the two args being of
   1159    different types.  If in doubt return False.  Is only used when
   1160    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
   1161    Bad because it duplicates functionality from typeOfIRExpr.  See
   1162    comment on the single use point below for rationale. */
   1163 static
   1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
   1165 {
   1166    if (e1->tag != e2->tag) return False;
   1167    switch (e1->tag) {
   1168       case Iex_Const: {
   1169          /* The only interesting case */
   1170          IRConst *c1 = e1->Iex.Const.con;
   1171          IRConst *c2 = e2->Iex.Const.con;
   1172          return c1->tag != c2->tag;
   1173       }
   1174       default:
   1175          break;
   1176    }
   1177    return False;
   1178 }
   1179 
   1180 
   1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
   1182 static Bool isZeroU32 ( IRExpr* e )
   1183 {
   1184    return toBool( e->tag == Iex_Const
   1185                   && e->Iex.Const.con->tag == Ico_U32
   1186                   && e->Iex.Const.con->Ico.U32 == 0);
   1187 }
   1188 
   1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
   1190    Currently unused; commented out to avoid compiler warning */
   1191 #if 0
   1192 static Bool isZeroU64 ( IRExpr* e )
   1193 {
   1194    return toBool( e->tag == Iex_Const
   1195                   && e->Iex.Const.con->tag == Ico_U64
   1196                   && e->Iex.Const.con->Ico.U64 == 0);
   1197 }
   1198 #endif
   1199 
   1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
   1201 static Bool isZeroV128 ( IRExpr* e )
   1202 {
   1203    return toBool( e->tag == Iex_Const
   1204                   && e->Iex.Const.con->tag == Ico_V128
   1205                   && e->Iex.Const.con->Ico.V128 == 0x0000);
   1206 }
   1207 
   1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
   1209 static Bool isZeroV256 ( IRExpr* e )
   1210 {
   1211    return toBool( e->tag == Iex_Const
   1212                   && e->Iex.Const.con->tag == Ico_V256
   1213                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
   1214 }
   1215 
   1216 /* Is this an integer constant with value 0 ? */
   1217 static Bool isZeroU ( IRExpr* e )
   1218 {
   1219    if (e->tag != Iex_Const) return False;
   1220    switch (e->Iex.Const.con->tag) {
   1221       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
   1222       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
   1223       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
   1224       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
   1225       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
   1226       case Ico_V256:  return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000);
   1227       default: vpanic("isZeroU");
   1228    }
   1229 }
   1230 
   1231 /* Is this an integer constant with value 1---1b ? */
   1232 static Bool isOnesU ( IRExpr* e )
   1233 {
   1234    if (e->tag != Iex_Const) return False;
   1235    switch (e->Iex.Const.con->tag) {
   1236       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
   1237       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
   1238       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
   1239                                      == 0xFFFFFFFF);
   1240       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
   1241                                      == 0xFFFFFFFFFFFFFFFFULL);
   1242       default: ppIRExpr(e); vpanic("isOnesU");
   1243    }
   1244 }
   1245 
   1246 static Bool notBool ( Bool b )
   1247 {
   1248    if (b == True) return False;
   1249    if (b == False) return True;
   1250    vpanic("notBool");
   1251 }
   1252 
   1253 /* Make a zero which has the same type as the result of the given
   1254    primop. */
   1255 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
   1256 {
   1257    switch (op) {
   1258       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
   1259       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
   1260       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
   1261       case Iop_Sub32:
   1262       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
   1263       case Iop_And64:
   1264       case Iop_Sub64:
   1265       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
   1266       case Iop_XorV128:
   1267       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
   1268       case Iop_XorV256:
   1269       case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
   1270       default: vpanic("mkZeroOfPrimopResultType: bad primop");
   1271    }
   1272 }
   1273 
   1274 /* Make a value containing all 1-bits, which has the same type as the
   1275    result of the given primop. */
   1276 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
   1277 {
   1278    switch (op) {
   1279       case Iop_CmpEQ32:
   1280       case Iop_CmpEQ64:
   1281          return IRExpr_Const(IRConst_U1(toBool(1)));
   1282       case Iop_Or8:
   1283          return IRExpr_Const(IRConst_U8(0xFF));
   1284       case Iop_Or16:
   1285          return IRExpr_Const(IRConst_U16(0xFFFF));
   1286       case Iop_Or32:
   1287          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
   1288       case Iop_CmpEQ8x8:
   1289       case Iop_Or64:
   1290          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
   1291       case Iop_CmpEQ8x16:
   1292       case Iop_CmpEQ16x8:
   1293       case Iop_CmpEQ32x4:
   1294          return IRExpr_Const(IRConst_V128(0xFFFF));
   1295       default:
   1296          ppIROp(op);
   1297          vpanic("mkOnesOfPrimopResultType: bad primop");
   1298    }
   1299 }
   1300 
   1301 /* Helpers for folding Clz32/64. */
   1302 static UInt fold_Clz64 ( ULong value )
   1303 {
   1304    UInt i;
   1305    vassert(value != 0ULL); /* no defined semantics for arg==0 */
   1306    for (i = 0; i < 64; ++i) {
   1307       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
   1308    }
   1309    vassert(0);
   1310    /*NOTREACHED*/
   1311    return 0;
   1312 }
   1313 
   1314 static UInt fold_Clz32 ( UInt value )
   1315 {
   1316    UInt i;
   1317    vassert(value != 0); /* no defined semantics for arg==0 */
   1318    for (i = 0; i < 32; ++i) {
   1319       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
   1320    }
   1321    vassert(0);
   1322    /*NOTREACHED*/
   1323    return 0;
   1324 }
   1325 
   1326 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
   1327    the corresponding real constant. */
   1328 //XXX re-check this before use
   1329 //static ULong de_summarise_V64 ( UChar v64 )
   1330 //{
   1331 //   ULong r = 0;
   1332 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
   1333 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
   1334 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
   1335 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
   1336 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
   1337 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
   1338 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
   1339 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
   1340 //   return r;
   1341 //}
   1342 
   1343 /* Helper for arbitrary expression pattern matching in flat IR.  If
   1344    'e' is a reference to a tmp, look it up in env -- repeatedly, if
   1345    necessary -- until it resolves to a non-tmp.  Note that this can
   1346    return NULL if it can't resolve 'e' to a new expression, which will
   1347    be the case if 'e' is instead defined by an IRStmt (IRDirty or
   1348    LLSC). */
   1349 static IRExpr* chase ( IRExpr** env, IRExpr* e )
   1350 {
   1351    /* Why is this loop guaranteed to terminate?  Because all tmps must
   1352       have definitions before use, hence a tmp cannot be bound
   1353       (directly or indirectly) to itself. */
   1354    while (e->tag == Iex_RdTmp) {
   1355       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
   1356       e = env[(Int)e->Iex.RdTmp.tmp];
   1357       if (e == NULL) break;
   1358    }
   1359    return e;
   1360 }
   1361 
   1362 /* Similar to |chase|, but follows at most one level of tmp reference. */
   1363 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
   1364 {
   1365    if (e == NULL || e->tag != Iex_RdTmp)
   1366       return e;
   1367    else
   1368       return env[(Int)e->Iex.RdTmp.tmp];
   1369 }
   1370 
   1371 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
   1372 {
   1373    Int     shift;
   1374    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1375 
   1376    switch (e->tag) {
   1377    case Iex_Unop:
   1378       /* UNARY ops */
   1379       if (e->Iex.Unop.arg->tag == Iex_Const) {
   1380          switch (e->Iex.Unop.op) {
   1381          case Iop_1Uto8:
   1382             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1383                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1384                     ? 1 : 0)));
   1385             break;
   1386          case Iop_1Uto32:
   1387             e2 = IRExpr_Const(IRConst_U32(
   1388                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1389                     ? 1 : 0));
   1390             break;
   1391          case Iop_1Uto64:
   1392             e2 = IRExpr_Const(IRConst_U64(
   1393                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1394                     ? 1 : 0));
   1395             break;
   1396 
   1397          case Iop_1Sto8:
   1398             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1399                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1400                     ? 0xFF : 0)));
   1401             break;
   1402          case Iop_1Sto16:
   1403             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1404                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1405                     ? 0xFFFF : 0)));
   1406             break;
   1407          case Iop_1Sto32:
   1408             e2 = IRExpr_Const(IRConst_U32(
   1409                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1410                     ? 0xFFFFFFFF : 0));
   1411             break;
   1412          case Iop_1Sto64:
   1413             e2 = IRExpr_Const(IRConst_U64(
   1414                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1415                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1416             break;
   1417 
   1418          case Iop_8Sto32: {
   1419             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1420             u32 <<= 24;
   1421             u32 = (Int)u32 >> 24;   /* signed shift */
   1422             e2 = IRExpr_Const(IRConst_U32(u32));
   1423             break;
   1424          }
   1425          case Iop_16Sto32: {
   1426             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1427             u32 <<= 16;
   1428             u32 = (Int)u32 >> 16;   /* signed shift */
   1429             e2 = IRExpr_Const(IRConst_U32(u32));
   1430             break;
   1431          }
   1432          case Iop_8Uto64:
   1433             e2 = IRExpr_Const(IRConst_U64(
   1434                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1435             break;
   1436          case Iop_16Uto64:
   1437             e2 = IRExpr_Const(IRConst_U64(
   1438                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1439             break;
   1440          case Iop_8Uto32:
   1441             e2 = IRExpr_Const(IRConst_U32(
   1442                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1443             break;
   1444          case Iop_8Sto16: {
   1445             UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1446             u16 <<= 8;
   1447             u16 = (Short)u16 >> 8;  /* signed shift */
   1448             e2 = IRExpr_Const(IRConst_U16(u16));
   1449             break;
   1450          }
   1451          case Iop_8Uto16:
   1452             e2 = IRExpr_Const(IRConst_U16(
   1453                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1454             break;
   1455          case Iop_16Uto32:
   1456             e2 = IRExpr_Const(IRConst_U32(
   1457                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1458             break;
   1459          case Iop_32to16:
   1460             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1461                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1462             break;
   1463          case Iop_32to8:
   1464             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1465                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1466             break;
   1467          case Iop_32to1:
   1468             e2 = IRExpr_Const(IRConst_U1(toBool(
   1469                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1470                  )));
   1471             break;
   1472          case Iop_64to1:
   1473             e2 = IRExpr_Const(IRConst_U1(toBool(
   1474                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1475                  )));
   1476             break;
   1477 
   1478          case Iop_NotV128:
   1479             e2 = IRExpr_Const(IRConst_V128(
   1480                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
   1481             break;
   1482          case Iop_Not64:
   1483             e2 = IRExpr_Const(IRConst_U64(
   1484                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1485             break;
   1486          case Iop_Not32:
   1487             e2 = IRExpr_Const(IRConst_U32(
   1488                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1489             break;
   1490          case Iop_Not16:
   1491             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1492                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1493             break;
   1494          case Iop_Not8:
   1495             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1496                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1497             break;
   1498 
   1499          case Iop_Not1:
   1500             e2 = IRExpr_Const(IRConst_U1(
   1501                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1502             break;
   1503 
   1504          case Iop_64to8: {
   1505             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1506             w64 &= 0xFFULL;
   1507             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1508             break;
   1509          }
   1510          case Iop_64to16: {
   1511             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1512             w64 &= 0xFFFFULL;
   1513             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1514             break;
   1515          }
   1516          case Iop_64to32: {
   1517             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1518             w64 &= 0x00000000FFFFFFFFULL;
   1519             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1520             break;
   1521          }
   1522          case Iop_64HIto32: {
   1523             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1524             w64 >>= 32;
   1525             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1526             break;
   1527          }
   1528          case Iop_32Uto64:
   1529             e2 = IRExpr_Const(IRConst_U64(
   1530                     0xFFFFFFFFULL
   1531                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1532             break;
   1533          case Iop_16Sto64: {
   1534             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1535             u64 <<= 48;
   1536             u64 = (Long)u64 >> 48;   /* signed shift */
   1537             e2 = IRExpr_Const(IRConst_U64(u64));
   1538             break;
   1539          }
   1540          case Iop_32Sto64: {
   1541             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1542             u64 <<= 32;
   1543             u64 = (Long)u64 >> 32;   /* signed shift */
   1544             e2 = IRExpr_Const(IRConst_U64(u64));
   1545             break;
   1546          }
   1547 
   1548          case Iop_16to8: {
   1549             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1550             w16 &= 0xFF;
   1551             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1552             break;
   1553          }
   1554          case Iop_16HIto8: {
   1555             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1556             w16 >>= 8;
   1557             w16 &= 0xFF;
   1558             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1559             break;
   1560          }
   1561 
   1562          case Iop_CmpNEZ8:
   1563             e2 = IRExpr_Const(IRConst_U1(toBool(
   1564                     0 !=
   1565                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1566                  )));
   1567             break;
   1568          case Iop_CmpNEZ32:
   1569             e2 = IRExpr_Const(IRConst_U1(toBool(
   1570                     0 !=
   1571                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1572                  )));
   1573             break;
   1574          case Iop_CmpNEZ64:
   1575             e2 = IRExpr_Const(IRConst_U1(toBool(
   1576                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1577                  )));
   1578             break;
   1579 
   1580          case Iop_CmpwNEZ32: {
   1581             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1582             if (w32 == 0)
   1583                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1584             else
   1585                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1586             break;
   1587          }
   1588          case Iop_CmpwNEZ64: {
   1589             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1590             if (w64 == 0)
   1591                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1592             else
   1593                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1594             break;
   1595          }
   1596 
   1597          case Iop_Left32: {
   1598             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1599             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1600             s32 = (s32 | (-s32));
   1601             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1602             break;
   1603          }
   1604 
   1605          case Iop_Left64: {
   1606             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1607             Long  s64 = (Long)u64;
   1608             s64 = (s64 | (-s64));
   1609             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1610             break;
   1611          }
   1612 
   1613          case Iop_Clz32: {
   1614             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1615             if (u32 != 0)
   1616                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1617             break;
   1618          }
   1619          case Iop_Clz64: {
   1620             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1621             if (u64 != 0ULL)
   1622                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1623             break;
   1624          }
   1625 
   1626          /* For these vector ones, can't fold all cases, but at least
   1627             do the most obvious one.  Could do better here using
   1628             summarise/desummarise of vector constants, but too
   1629             difficult to verify; hence just handle the zero cases. */
   1630          case Iop_32UtoV128: {
   1631             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1632             if (0 == u32) {
   1633                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1634             } else {
   1635                goto unhandled;
   1636             }
   1637             break;
   1638          }
   1639          case Iop_V128to64: {
   1640             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1641             if (0 == ((v128 >> 0) & 0xFF)) {
   1642                e2 = IRExpr_Const(IRConst_U64(0));
   1643             } else {
   1644                goto unhandled;
   1645             }
   1646             break;
   1647          }
   1648          case Iop_V128HIto64: {
   1649             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1650             if (0 == ((v128 >> 8) & 0xFF)) {
   1651                e2 = IRExpr_Const(IRConst_U64(0));
   1652             } else {
   1653                goto unhandled;
   1654             }
   1655             break;
   1656          }
   1657          case Iop_64UtoV128: {
   1658             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1659             if (0 == u64) {
   1660                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1661             } else {
   1662                goto unhandled;
   1663             }
   1664             break;
   1665          }
   1666 
   1667          /* Even stupider (although still correct ..) */
   1668          case Iop_V256to64_0: case Iop_V256to64_1:
   1669          case Iop_V256to64_2: case Iop_V256to64_3: {
   1670             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
   1671             if (v256 == 0x00000000) {
   1672                e2 = IRExpr_Const(IRConst_U64(0));
   1673             } else {
   1674                goto unhandled;
   1675             }
   1676             break;
   1677          }
   1678 
   1679          case Iop_ZeroHI64ofV128: {
   1680             /* Could do better here -- only need to look at the bottom 64 bits
   1681                of the argument, really. */
   1682             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1683             if (v128 == 0x0000) {
   1684                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1685             } else {
   1686                goto unhandled;
   1687             }
   1688             break;
   1689          }
   1690 
   1691          default:
   1692             goto unhandled;
   1693       }
   1694       }
   1695       break;
   1696 
   1697    case Iex_Binop:
   1698       /* BINARY ops */
   1699       if (e->Iex.Binop.arg1->tag == Iex_Const
   1700           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1701          /* cases where both args are consts */
   1702          switch (e->Iex.Binop.op) {
   1703 
   1704             /* -- Or -- */
   1705             case Iop_Or8:
   1706                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1707                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1708                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1709                break;
   1710             case Iop_Or16:
   1711                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1712                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1713                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1714                break;
   1715             case Iop_Or32:
   1716                e2 = IRExpr_Const(IRConst_U32(
   1717                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1718                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1719                break;
   1720             case Iop_Or64:
   1721                e2 = IRExpr_Const(IRConst_U64(
   1722                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1723                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1724                break;
   1725             case Iop_OrV128:
   1726                e2 = IRExpr_Const(IRConst_V128(
   1727                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1728                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1729                break;
   1730 
   1731             /* -- Xor -- */
   1732             case Iop_Xor8:
   1733                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1734                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1735                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1736                break;
   1737             case Iop_Xor16:
   1738                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1739                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1740                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1741                break;
   1742             case Iop_Xor32:
   1743                e2 = IRExpr_Const(IRConst_U32(
   1744                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1745                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1746                break;
   1747             case Iop_Xor64:
   1748                e2 = IRExpr_Const(IRConst_U64(
   1749                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1750                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1751                break;
   1752             case Iop_XorV128:
   1753                e2 = IRExpr_Const(IRConst_V128(
   1754                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1755                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1756                break;
   1757 
   1758             /* -- And -- */
   1759             case Iop_And8:
   1760                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1761                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1762                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1763                break;
   1764             case Iop_And16:
   1765                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1766                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1767                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1768                break;
   1769             case Iop_And32:
   1770                e2 = IRExpr_Const(IRConst_U32(
   1771                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1772                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1773                break;
   1774             case Iop_And64:
   1775                e2 = IRExpr_Const(IRConst_U64(
   1776                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1777                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1778                break;
   1779             case Iop_AndV128:
   1780                e2 = IRExpr_Const(IRConst_V128(
   1781                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1782                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1783                break;
   1784 
   1785             /* -- Add -- */
   1786             case Iop_Add8:
   1787                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1788                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1789                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1790                break;
   1791             case Iop_Add32:
   1792                e2 = IRExpr_Const(IRConst_U32(
   1793                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1794                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1795                break;
   1796             case Iop_Add64:
   1797                e2 = IRExpr_Const(IRConst_U64(
   1798                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1799                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1800                break;
   1801 
   1802             /* -- Sub -- */
   1803             case Iop_Sub8:
   1804                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1805                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1806                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1807                break;
   1808             case Iop_Sub32:
   1809                e2 = IRExpr_Const(IRConst_U32(
   1810                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1811                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1812                break;
   1813             case Iop_Sub64:
   1814                e2 = IRExpr_Const(IRConst_U64(
   1815                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1816                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1817                break;
   1818 
   1819             /* -- Max32U -- */
   1820             case Iop_Max32U: {
   1821                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1822                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1823                UInt res  = u32a > u32b ? u32a : u32b;
   1824                e2 = IRExpr_Const(IRConst_U32(res));
   1825                break;
   1826             }
   1827 
   1828             /* -- Mul -- */
   1829             case Iop_Mul32:
   1830                e2 = IRExpr_Const(IRConst_U32(
   1831                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1832                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1833                break;
   1834             case Iop_Mul64:
   1835                e2 = IRExpr_Const(IRConst_U64(
   1836                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1837                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1838                break;
   1839 
   1840             case Iop_MullS32: {
   1841                /* very paranoid */
   1842                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1843                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1844                Int   s32a = (Int)u32a;
   1845                Int   s32b = (Int)u32b;
   1846                Long  s64a = (Long)s32a;
   1847                Long  s64b = (Long)s32b;
   1848                Long  sres = s64a * s64b;
   1849                ULong ures = (ULong)sres;
   1850                e2 = IRExpr_Const(IRConst_U64(ures));
   1851                break;
   1852             }
   1853 
   1854             /* -- Shl -- */
   1855             case Iop_Shl32:
   1856                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1857                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1858                if (shift >= 0 && shift <= 31)
   1859                   e2 = IRExpr_Const(IRConst_U32(
   1860                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1861                            << shift)));
   1862                break;
   1863             case Iop_Shl64:
   1864                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1865                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1866                if (shift >= 0 && shift <= 63)
   1867                   e2 = IRExpr_Const(IRConst_U64(
   1868                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1869                            << shift)));
   1870                break;
   1871 
   1872             /* -- Sar -- */
   1873             case Iop_Sar32: {
   1874                /* paranoid ... */
   1875                /*signed*/ Int s32;
   1876                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1877                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1878                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1879                if (shift >= 0 && shift <= 31) {
   1880                   s32 >>=/*signed*/ shift;
   1881                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1882                }
   1883                break;
   1884             }
   1885             case Iop_Sar64: {
   1886                /* paranoid ... */
   1887                /*signed*/ Long s64;
   1888                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1889                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1890                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1891                if (shift >= 0 && shift <= 63) {
   1892                   s64 >>=/*signed*/ shift;
   1893                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1894                }
   1895                break;
   1896             }
   1897 
   1898             /* -- Shr -- */
   1899             case Iop_Shr32: {
   1900                /* paranoid ... */
   1901                /*unsigned*/ UInt u32;
   1902                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1903                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1904                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1905                if (shift >= 0 && shift <= 31) {
   1906                   u32 >>=/*unsigned*/ shift;
   1907                   e2 = IRExpr_Const(IRConst_U32(u32));
   1908                }
   1909                break;
   1910             }
   1911             case Iop_Shr64: {
   1912                /* paranoid ... */
   1913                /*unsigned*/ ULong u64;
   1914                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1915                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1916                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1917                if (shift >= 0 && shift <= 63) {
   1918                   u64 >>=/*unsigned*/ shift;
   1919                   e2 = IRExpr_Const(IRConst_U64(u64));
   1920                }
   1921                break;
   1922             }
   1923 
   1924             /* -- CmpEQ -- */
   1925             case Iop_CmpEQ32:
   1926                e2 = IRExpr_Const(IRConst_U1(toBool(
   1927                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1928                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1929                break;
   1930             case Iop_CmpEQ64:
   1931                e2 = IRExpr_Const(IRConst_U1(toBool(
   1932                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1933                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1934                break;
   1935 
   1936             /* -- CmpNE -- */
   1937             case Iop_CmpNE8:
   1938             case Iop_CasCmpNE8:
   1939             case Iop_ExpCmpNE8:
   1940                e2 = IRExpr_Const(IRConst_U1(toBool(
   1941                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1942                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1943                break;
   1944             case Iop_CmpNE32:
   1945             case Iop_CasCmpNE32:
   1946             case Iop_ExpCmpNE32:
   1947                e2 = IRExpr_Const(IRConst_U1(toBool(
   1948                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1949                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1950                break;
   1951             case Iop_CmpNE64:
   1952             case Iop_CasCmpNE64:
   1953             case Iop_ExpCmpNE64:
   1954                e2 = IRExpr_Const(IRConst_U1(toBool(
   1955                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1956                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1957                break;
   1958 
   1959             /* -- CmpLEU -- */
   1960             case Iop_CmpLE32U:
   1961                e2 = IRExpr_Const(IRConst_U1(toBool(
   1962                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1963                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1964                break;
   1965             case Iop_CmpLE64U:
   1966                e2 = IRExpr_Const(IRConst_U1(toBool(
   1967                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1968                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1969                break;
   1970 
   1971             /* -- CmpLES -- */
   1972             case Iop_CmpLE32S:
   1973                e2 = IRExpr_Const(IRConst_U1(toBool(
   1974                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1975                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1976                break;
   1977             case Iop_CmpLE64S:
   1978                e2 = IRExpr_Const(IRConst_U1(toBool(
   1979                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1980                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1981                break;
   1982 
   1983             /* -- CmpLTS -- */
   1984             case Iop_CmpLT32S:
   1985                e2 = IRExpr_Const(IRConst_U1(toBool(
   1986                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1987                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1988                break;
   1989             case Iop_CmpLT64S:
   1990                e2 = IRExpr_Const(IRConst_U1(toBool(
   1991                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1992                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1993                break;
   1994 
   1995             /* -- CmpLTU -- */
   1996             case Iop_CmpLT32U:
   1997                e2 = IRExpr_Const(IRConst_U1(toBool(
   1998                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1999                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   2000                break;
   2001             case Iop_CmpLT64U:
   2002                e2 = IRExpr_Const(IRConst_U1(toBool(
   2003                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   2004                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   2005                break;
   2006 
   2007             /* -- CmpORD -- */
   2008             case Iop_CmpORD32S: {
   2009                /* very paranoid */
   2010                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   2011                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   2012                Int   s32a = (Int)u32a;
   2013                Int   s32b = (Int)u32b;
   2014                Int   r = 0x2; /* EQ */
   2015                if (s32a < s32b) {
   2016                   r = 0x8; /* LT */
   2017                }
   2018                else if (s32a > s32b) {
   2019                   r = 0x4; /* GT */
   2020                }
   2021                e2 = IRExpr_Const(IRConst_U32(r));
   2022                break;
   2023             }
   2024 
   2025             /* -- nHLto2n -- */
   2026             case Iop_32HLto64:
   2027                e2 = IRExpr_Const(IRConst_U64(
   2028                        (((ULong)(e->Iex.Binop.arg1
   2029                                   ->Iex.Const.con->Ico.U32)) << 32)
   2030                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   2031                     ));
   2032                break;
   2033             case Iop_64HLto128:
   2034                /* We can't fold this, because there is no way to
   2035                   express he result in IR, but at least pretend to
   2036                   handle it, so as to stop getting blasted with
   2037                   no-rule-for-this-primop messages. */
   2038                break;
   2039             /* For this vector one, can't fold all cases, but at
   2040                least do the most obvious one.  Could do better here
   2041                using summarise/desummarise of vector constants, but
   2042                too difficult to verify; hence just handle the zero
   2043                cases. */
   2044             case Iop_64HLtoV128: {
   2045                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
   2046                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2047                if (0 == argHi && 0 == argLo) {
   2048                   e2 = IRExpr_Const(IRConst_V128(0));
   2049                } else {
   2050                   goto unhandled;
   2051                }
   2052                break;
   2053             }
   2054             /* Same reasoning for the 256-bit version. */
   2055             case Iop_V128HLtoV256: {
   2056                IRExpr* argHi = e->Iex.Binop.arg1;
   2057                IRExpr* argLo = e->Iex.Binop.arg2;
   2058                if (isZeroV128(argHi) && isZeroV128(argLo)) {
   2059                   e2 = IRExpr_Const(IRConst_V256(0));
   2060                } else {
   2061                   goto unhandled;
   2062                }
   2063                break;
   2064             }
   2065 
   2066             /* -- V128 stuff -- */
   2067             case Iop_InterleaveLO8x16: {
   2068                /* This turns up a lot in Memcheck instrumentation of
   2069                   Icc generated code.  I don't know why. */
   2070                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
   2071                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
   2072                if (0 == arg1 && 0 == arg2) {
   2073                   e2 = IRExpr_Const(IRConst_V128(0));
   2074                } else {
   2075                   goto unhandled;
   2076                }
   2077                break;
   2078             }
   2079 
   2080             default:
   2081                goto unhandled;
   2082          }
   2083 
   2084       } else {
   2085 
   2086          /* other cases (identities, etc) */
   2087          switch (e->Iex.Binop.op) {
   2088 
   2089             case Iop_Shl32:
   2090             case Iop_Shl64:
   2091             case Iop_Shr64:
   2092             case Iop_Sar64:
   2093                /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
   2094                if (isZeroU(e->Iex.Binop.arg2)) {
   2095                   e2 = e->Iex.Binop.arg1;
   2096                   break;
   2097                }
   2098                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
   2099                if (isZeroU(e->Iex.Binop.arg1)) {
   2100                   e2 = e->Iex.Binop.arg1;
   2101                   break;
   2102                }
   2103                break;
   2104 
   2105             case Iop_Sar32:
   2106             case Iop_Shr32:
   2107                /* Shr32/Sar32(x,0) ==> x */
   2108                if (isZeroU(e->Iex.Binop.arg2)) {
   2109                   e2 = e->Iex.Binop.arg1;
   2110                   break;
   2111                }
   2112                break;
   2113 
   2114             case Iop_Or8:
   2115             case Iop_Or16:
   2116             case Iop_Or32:
   2117             case Iop_Or64:
   2118             case Iop_Max32U:
   2119                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
   2120                if (isZeroU(e->Iex.Binop.arg2)) {
   2121                   e2 = e->Iex.Binop.arg1;
   2122                   break;
   2123                }
   2124                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
   2125                if (isZeroU(e->Iex.Binop.arg1)) {
   2126                   e2 = e->Iex.Binop.arg2;
   2127                   break;
   2128                }
   2129                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
   2130                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
   2131                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
   2132                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2133                   break;
   2134                }
   2135                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
   2136                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2137                   e2 = e->Iex.Binop.arg1;
   2138                   break;
   2139                }
   2140                break;
   2141 
   2142             case Iop_Add8:
   2143                /* Add8(t,t) ==> t << 1.
   2144                   Memcheck doesn't understand that
   2145                   x+x produces a defined least significant bit, and it seems
   2146                   simplest just to get rid of the problem by rewriting it
   2147                   out, since the opportunity to do so exists. */
   2148                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2149                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
   2150                                     IRExpr_Const(IRConst_U8(1)));
   2151                   break;
   2152                }
   2153                break;
   2154 
   2155                /* NB no Add16(t,t) case yet as no known test case exists */
   2156 
   2157             case Iop_Add32:
   2158             case Iop_Add64:
   2159                /* Add32/Add64(x,0) ==> x */
   2160                if (isZeroU(e->Iex.Binop.arg2)) {
   2161                   e2 = e->Iex.Binop.arg1;
   2162                   break;
   2163                }
   2164                /* Add32/Add64(0,x) ==> x */
   2165                if (isZeroU(e->Iex.Binop.arg1)) {
   2166                   e2 = e->Iex.Binop.arg2;
   2167                   break;
   2168                }
   2169                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
   2170                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2171                   e2 = IRExpr_Binop(
   2172                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
   2173                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
   2174                   break;
   2175                }
   2176                break;
   2177 
   2178             case Iop_Sub32:
   2179             case Iop_Sub64:
   2180                /* Sub32/Sub64(x,0) ==> x */
   2181                if (isZeroU(e->Iex.Binop.arg2)) {
   2182                   e2 = e->Iex.Binop.arg1;
   2183                   break;
   2184                }
   2185                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
   2186                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2187                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2188                   break;
   2189                }
   2190                break;
   2191             case Iop_Sub8x16:
   2192                /* Sub8x16(x,0) ==> x */
   2193                if (isZeroV128(e->Iex.Binop.arg2)) {
   2194                   e2 = e->Iex.Binop.arg1;
   2195                   break;
   2196                }
   2197                break;
   2198 
   2199             case Iop_And8:
   2200             case Iop_And16:
   2201             case Iop_And32:
   2202             case Iop_And64:
   2203                /* And8/And16/And32/And64(x,1---1b) ==> x */
   2204                if (isOnesU(e->Iex.Binop.arg2)) {
   2205                   e2 = e->Iex.Binop.arg1;
   2206                   break;
   2207                }
   2208                /* And8/And16/And32/And64(1---1b,x) ==> x */
   2209                if (isOnesU(e->Iex.Binop.arg1)) {
   2210                   e2 = e->Iex.Binop.arg2;
   2211                   break;
   2212                }
   2213                /* And8/And16/And32/And64(x,0) ==> 0 */
   2214                if (isZeroU(e->Iex.Binop.arg2)) {
   2215                   e2 = e->Iex.Binop.arg2;
   2216                   break;
   2217                }
   2218                /* And8/And16/And32/And64(0,x) ==> 0 */
   2219                if (isZeroU(e->Iex.Binop.arg1)) {
   2220                   e2 = e->Iex.Binop.arg1;
   2221                   break;
   2222                }
   2223                /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
   2224                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2225                   e2 = e->Iex.Binop.arg1;
   2226                   break;
   2227                }
   2228                break;
   2229 
   2230             case Iop_AndV128:
   2231             case Iop_AndV256:
   2232                /* And8/And16/AndV128/AndV256(t,t)
   2233                   ==> t, for some IRTemp t */
   2234                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2235                   e2 = e->Iex.Binop.arg1;
   2236                   break;
   2237                }
   2238                /* Deal with either arg zero.  Could handle other And
   2239                   cases here too. */
   2240                if (e->Iex.Binop.op == Iop_AndV256
   2241                    && (isZeroV256(e->Iex.Binop.arg1)
   2242                        || isZeroV256(e->Iex.Binop.arg2))) {
   2243                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2244                   break;
   2245                } else if (e->Iex.Binop.op == Iop_AndV128
   2246                           && (isZeroV128(e->Iex.Binop.arg1)
   2247                               || isZeroV128(e->Iex.Binop.arg2))) {
   2248                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2249                   break;
   2250                }
   2251                break;
   2252 
   2253             case Iop_OrV128:
   2254             case Iop_OrV256:
   2255                /* V128/V256(t,t) ==> t, for some IRTemp t */
   2256                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2257                   e2 = e->Iex.Binop.arg1;
   2258                   break;
   2259                }
   2260                /* OrV128(t,0) ==> t */
   2261                if (e->Iex.Binop.op == Iop_OrV128) {
   2262                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2263                      e2 = e->Iex.Binop.arg1;
   2264                      break;
   2265                   }
   2266                   if (isZeroV128(e->Iex.Binop.arg1)) {
   2267                      e2 = e->Iex.Binop.arg2;
   2268                      break;
   2269                   }
   2270                }
   2271                /* OrV256(t,0) ==> t */
   2272                if (e->Iex.Binop.op == Iop_OrV256) {
   2273                   if (isZeroV256(e->Iex.Binop.arg2)) {
   2274                      e2 = e->Iex.Binop.arg1;
   2275                      break;
   2276                   }
   2277                   //Disabled because there's no known test case right now.
   2278                   //if (isZeroV256(e->Iex.Binop.arg1)) {
   2279                   //   e2 = e->Iex.Binop.arg2;
   2280                   //   break;
   2281                   //}
   2282                }
   2283                break;
   2284 
   2285             case Iop_Xor8:
   2286             case Iop_Xor16:
   2287             case Iop_Xor32:
   2288             case Iop_Xor64:
   2289             case Iop_XorV128:
   2290             case Iop_XorV256:
   2291                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   2292                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2293                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2294                   break;
   2295                }
   2296                /* XorV128(t,0) ==> t */
   2297                if (e->Iex.Binop.op == Iop_XorV128) {
   2298                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2299                      e2 = e->Iex.Binop.arg1;
   2300                      break;
   2301                   }
   2302                   //Disabled because there's no known test case right now.
   2303                   //if (isZeroV128(e->Iex.Binop.arg1)) {
   2304                   //   e2 = e->Iex.Binop.arg2;
   2305                   //   break;
   2306                   //}
   2307                } else {
   2308                   /* Xor8/16/32/64(0,t) ==> t */
   2309                   if (isZeroU(e->Iex.Binop.arg1)) {
   2310                      e2 = e->Iex.Binop.arg2;
   2311                      break;
   2312                   }
   2313                   /* Xor8/16/32/64(t,0) ==> t */
   2314                   if (isZeroU(e->Iex.Binop.arg2)) {
   2315                      e2 = e->Iex.Binop.arg1;
   2316                      break;
   2317                   }
   2318                }
   2319                break;
   2320 
   2321             case Iop_CmpNE32:
   2322                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
   2323                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2324                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2325                   break;
   2326                }
   2327                /* CmpNE32(1Uto32(b), 0) ==> b */
   2328                if (isZeroU32(e->Iex.Binop.arg2)) {
   2329                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
   2330                   if (a1 && a1->tag == Iex_Unop
   2331                          && a1->Iex.Unop.op == Iop_1Uto32) {
   2332                      e2 = a1->Iex.Unop.arg;
   2333                      break;
   2334                   }
   2335                }
   2336                break;
   2337 
   2338             case Iop_CmpEQ32:
   2339             case Iop_CmpEQ64:
   2340             case Iop_CmpEQ8x8:
   2341             case Iop_CmpEQ8x16:
   2342             case Iop_CmpEQ16x8:
   2343             case Iop_CmpEQ32x4:
   2344                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2345                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2346                   break;
   2347                }
   2348                break;
   2349 
   2350             default:
   2351                break;
   2352          }
   2353       }
   2354       break;
   2355 
   2356    case Iex_ITE:
   2357       /* ITE */
   2358       /* is the discriminant is a constant? */
   2359       if (e->Iex.ITE.cond->tag == Iex_Const) {
   2360          /* assured us by the IR type rules */
   2361          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
   2362          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
   2363                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
   2364       }
   2365       else
   2366       /* are the arms identical? (pretty weedy test) */
   2367       if (sameIRExprs(env, e->Iex.ITE.iftrue,
   2368                            e->Iex.ITE.iffalse)) {
   2369          e2 = e->Iex.ITE.iffalse;
   2370       }
   2371       break;
   2372 
   2373    default:
   2374       /* not considered */
   2375       break;
   2376    }
   2377 
   2378    /* Show cases where we've found but not folded 'op(t,t)'.  Be
   2379       careful not to call sameIRExprs with values of different types,
   2380       though, else it will assert (and so it should!).  We can't
   2381       conveniently call typeOfIRExpr on the two args without a whole
   2382       bunch of extra plumbing to pass in a type env, so just use a
   2383       hacky test to check the arguments are not anything that might
   2384       sameIRExprs to assert.  This is only OK because this kludge is
   2385       only used for debug printing, not for "real" operation.  For
   2386       "real" operation (ie, all other calls to sameIRExprs), it is
   2387       essential that the to args have the same type.
   2388 
   2389       The "right" solution is to plumb the containing block's
   2390       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
   2391       that's a bunch of extra parameter passing which will just slow
   2392       down the normal case, for no purpose. */
   2393    if (vex_control.iropt_verbosity > 0
   2394        && e == e2
   2395        && e->tag == Iex_Binop
   2396        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
   2397                                                     e->Iex.Binop.arg2)
   2398        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2399       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
   2400       ppIRExpr(e);
   2401       vex_printf("\n");
   2402    }
   2403 
   2404    /* Show the overall results of folding. */
   2405    if (DEBUG_IROPT && e2 != e) {
   2406       vex_printf("FOLD: ");
   2407       ppIRExpr(e); vex_printf("  ->  ");
   2408       ppIRExpr(e2); vex_printf("\n");
   2409    }
   2410 
   2411    return e2;
   2412 
   2413  unhandled:
   2414 #  if 0
   2415    vex_printf("\n\n");
   2416    ppIRExpr(e);
   2417    vpanic("fold_Expr: no rule for the above");
   2418 #  else
   2419    if (vex_control.iropt_verbosity > 0) {
   2420       vex_printf("vex iropt: fold_Expr: no const rule for: ");
   2421       ppIRExpr(e);
   2422       vex_printf("\n");
   2423    }
   2424    return e2;
   2425 #  endif
   2426 }
   2427 
   2428 
   2429 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   2430    1-level due to previous flattening pass. */
   2431 
   2432 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   2433 {
   2434    switch (ex->tag) {
   2435       case Iex_RdTmp:
   2436          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   2437             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
   2438             if (rhs->tag == Iex_RdTmp)
   2439                return rhs;
   2440             if (rhs->tag == Iex_Const
   2441                 && rhs->Iex.Const.con->tag != Ico_F64i)
   2442                return rhs;
   2443          }
   2444          /* not bound in env */
   2445          return ex;
   2446 
   2447       case Iex_Const:
   2448       case Iex_Get:
   2449          return ex;
   2450 
   2451       case Iex_GetI:
   2452          vassert(isIRAtom(ex->Iex.GetI.ix));
   2453          return IRExpr_GetI(
   2454             ex->Iex.GetI.descr,
   2455             subst_Expr(env, ex->Iex.GetI.ix),
   2456             ex->Iex.GetI.bias
   2457          );
   2458 
   2459       case Iex_Qop: {
   2460          IRQop* qop = ex->Iex.Qop.details;
   2461          vassert(isIRAtom(qop->arg1));
   2462          vassert(isIRAtom(qop->arg2));
   2463          vassert(isIRAtom(qop->arg3));
   2464          vassert(isIRAtom(qop->arg4));
   2465          return IRExpr_Qop(
   2466                    qop->op,
   2467                    subst_Expr(env, qop->arg1),
   2468                    subst_Expr(env, qop->arg2),
   2469                    subst_Expr(env, qop->arg3),
   2470                    subst_Expr(env, qop->arg4)
   2471                 );
   2472       }
   2473 
   2474       case Iex_Triop: {
   2475          IRTriop* triop = ex->Iex.Triop.details;
   2476          vassert(isIRAtom(triop->arg1));
   2477          vassert(isIRAtom(triop->arg2));
   2478          vassert(isIRAtom(triop->arg3));
   2479          return IRExpr_Triop(
   2480                    triop->op,
   2481                    subst_Expr(env, triop->arg1),
   2482                    subst_Expr(env, triop->arg2),
   2483                    subst_Expr(env, triop->arg3)
   2484                 );
   2485       }
   2486 
   2487       case Iex_Binop:
   2488          vassert(isIRAtom(ex->Iex.Binop.arg1));
   2489          vassert(isIRAtom(ex->Iex.Binop.arg2));
   2490          return IRExpr_Binop(
   2491                    ex->Iex.Binop.op,
   2492                    subst_Expr(env, ex->Iex.Binop.arg1),
   2493                    subst_Expr(env, ex->Iex.Binop.arg2)
   2494                 );
   2495 
   2496       case Iex_Unop:
   2497          vassert(isIRAtom(ex->Iex.Unop.arg));
   2498          return IRExpr_Unop(
   2499                    ex->Iex.Unop.op,
   2500                    subst_Expr(env, ex->Iex.Unop.arg)
   2501                 );
   2502 
   2503       case Iex_Load:
   2504          vassert(isIRAtom(ex->Iex.Load.addr));
   2505          return IRExpr_Load(
   2506                    ex->Iex.Load.end,
   2507                    ex->Iex.Load.ty,
   2508                    subst_Expr(env, ex->Iex.Load.addr)
   2509                 );
   2510 
   2511       case Iex_CCall: {
   2512          Int      i;
   2513          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   2514          for (i = 0; args2[i]; i++) {
   2515             vassert(isIRAtom(args2[i]));
   2516             args2[i] = subst_Expr(env, args2[i]);
   2517          }
   2518          return IRExpr_CCall(
   2519                    ex->Iex.CCall.cee,
   2520                    ex->Iex.CCall.retty,
   2521                    args2
   2522                 );
   2523       }
   2524 
   2525       case Iex_ITE:
   2526          vassert(isIRAtom(ex->Iex.ITE.cond));
   2527          vassert(isIRAtom(ex->Iex.ITE.iftrue));
   2528          vassert(isIRAtom(ex->Iex.ITE.iffalse));
   2529          return IRExpr_ITE(
   2530                    subst_Expr(env, ex->Iex.ITE.cond),
   2531                    subst_Expr(env, ex->Iex.ITE.iftrue),
   2532                    subst_Expr(env, ex->Iex.ITE.iffalse)
   2533                 );
   2534 
   2535       default:
   2536          vex_printf("\n\n"); ppIRExpr(ex);
   2537          vpanic("subst_Expr");
   2538 
   2539    }
   2540 }
   2541 
   2542 
   2543 /* Apply the subst to stmt, then fold the result as much as possible.
   2544    Much simplified due to stmt being previously flattened.  As a
   2545    result of this, the stmt may wind up being turned into a no-op.
   2546 */
   2547 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   2548 {
   2549 #  if 0
   2550    vex_printf("\nsubst and fold stmt\n");
   2551    ppIRStmt(st);
   2552    vex_printf("\n");
   2553 #  endif
   2554 
   2555    switch (st->tag) {
   2556       case Ist_AbiHint:
   2557          vassert(isIRAtom(st->Ist.AbiHint.base));
   2558          vassert(isIRAtom(st->Ist.AbiHint.nia));
   2559          return IRStmt_AbiHint(
   2560                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
   2561                    st->Ist.AbiHint.len,
   2562                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
   2563                 );
   2564       case Ist_Put:
   2565          vassert(isIRAtom(st->Ist.Put.data));
   2566          return IRStmt_Put(
   2567                    st->Ist.Put.offset,
   2568                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
   2569                 );
   2570 
   2571       case Ist_PutI: {
   2572          IRPutI *puti, *puti2;
   2573          puti = st->Ist.PutI.details;
   2574          vassert(isIRAtom(puti->ix));
   2575          vassert(isIRAtom(puti->data));
   2576          puti2 = mkIRPutI(puti->descr,
   2577                           fold_Expr(env, subst_Expr(env, puti->ix)),
   2578                           puti->bias,
   2579                           fold_Expr(env, subst_Expr(env, puti->data)));
   2580          return IRStmt_PutI(puti2);
   2581       }
   2582 
   2583       case Ist_WrTmp:
   2584          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   2585             allowed to be more than just a constant or a tmp. */
   2586          return IRStmt_WrTmp(
   2587                    st->Ist.WrTmp.tmp,
   2588                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
   2589                 );
   2590 
   2591       case Ist_Store:
   2592          vassert(isIRAtom(st->Ist.Store.addr));
   2593          vassert(isIRAtom(st->Ist.Store.data));
   2594          return IRStmt_Store(
   2595                    st->Ist.Store.end,
   2596                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
   2597                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
   2598                 );
   2599 
   2600       case Ist_StoreG: {
   2601          IRStoreG* sg = st->Ist.StoreG.details;
   2602          vassert(isIRAtom(sg->addr));
   2603          vassert(isIRAtom(sg->data));
   2604          vassert(isIRAtom(sg->guard));
   2605          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
   2606          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
   2607          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
   2608          if (fguard->tag == Iex_Const) {
   2609             /* The condition on this store has folded down to a constant. */
   2610             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2611             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2612                return IRStmt_NoOp();
   2613             } else {
   2614                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2615                return IRStmt_Store(sg->end, faddr, fdata);
   2616             }
   2617          }
   2618          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
   2619       }
   2620 
   2621       case Ist_LoadG: {
   2622          /* This is complicated.  If the guard folds down to 'false',
   2623             we can replace it with an assignment 'dst := alt', but if
   2624             the guard folds down to 'true', we can't conveniently
   2625             replace it with an unconditional load, because doing so
   2626             requires generating a new temporary, and that is not easy
   2627             to do at this point. */
   2628          IRLoadG* lg = st->Ist.LoadG.details;
   2629          vassert(isIRAtom(lg->addr));
   2630          vassert(isIRAtom(lg->alt));
   2631          vassert(isIRAtom(lg->guard));
   2632          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
   2633          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
   2634          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
   2635          if (fguard->tag == Iex_Const) {
   2636             /* The condition on this load has folded down to a constant. */
   2637             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2638             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2639                /* The load is not going to happen -- instead 'alt' is
   2640                   assigned to 'dst'.  */
   2641                return IRStmt_WrTmp(lg->dst, falt);
   2642             } else {
   2643                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2644                /* The load is always going to happen.  We want to
   2645                   convert to an unconditional load and assign to 'dst'
   2646                   (IRStmt_WrTmp).  Problem is we need an extra temp to
   2647                   hold the loaded value, but none is available.
   2648                   Instead, reconstitute the conditional load (with
   2649                   folded args, of course) and let the caller of this
   2650                   routine deal with the problem. */
   2651             }
   2652          }
   2653          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
   2654       }
   2655 
   2656       case Ist_CAS: {
   2657          IRCAS *cas, *cas2;
   2658          cas = st->Ist.CAS.details;
   2659          vassert(isIRAtom(cas->addr));
   2660          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   2661          vassert(isIRAtom(cas->expdLo));
   2662          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   2663          vassert(isIRAtom(cas->dataLo));
   2664          cas2 = mkIRCAS(
   2665                    cas->oldHi, cas->oldLo, cas->end,
   2666                    fold_Expr(env, subst_Expr(env, cas->addr)),
   2667                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
   2668                                : NULL,
   2669                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
   2670                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
   2671                                : NULL,
   2672                    fold_Expr(env, subst_Expr(env, cas->dataLo))
   2673                 );
   2674          return IRStmt_CAS(cas2);
   2675       }
   2676 
   2677       case Ist_LLSC:
   2678          vassert(isIRAtom(st->Ist.LLSC.addr));
   2679          if (st->Ist.LLSC.storedata)
   2680             vassert(isIRAtom(st->Ist.LLSC.storedata));
   2681          return IRStmt_LLSC(
   2682                    st->Ist.LLSC.end,
   2683                    st->Ist.LLSC.result,
   2684                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
   2685                    st->Ist.LLSC.storedata
   2686                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
   2687                       : NULL
   2688                 );
   2689 
   2690       case Ist_Dirty: {
   2691          Int     i;
   2692          IRDirty *d, *d2;
   2693          d = st->Ist.Dirty.details;
   2694          d2 = emptyIRDirty();
   2695          *d2 = *d;
   2696          d2->args = shallowCopyIRExprVec(d2->args);
   2697          if (d2->mFx != Ifx_None) {
   2698             vassert(isIRAtom(d2->mAddr));
   2699             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
   2700          }
   2701          vassert(isIRAtom(d2->guard));
   2702          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
   2703          for (i = 0; d2->args[i]; i++) {
   2704             IRExpr* arg = d2->args[i];
   2705             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
   2706                vassert(isIRAtom(arg));
   2707                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
   2708             }
   2709          }
   2710          return IRStmt_Dirty(d2);
   2711       }
   2712 
   2713       case Ist_IMark:
   2714          return IRStmt_IMark(st->Ist.IMark.addr,
   2715                              st->Ist.IMark.len,
   2716                              st->Ist.IMark.delta);
   2717 
   2718       case Ist_NoOp:
   2719          return IRStmt_NoOp();
   2720 
   2721       case Ist_MBE:
   2722          return IRStmt_MBE(st->Ist.MBE.event);
   2723 
   2724       case Ist_Exit: {
   2725          IRExpr* fcond;
   2726          vassert(isIRAtom(st->Ist.Exit.guard));
   2727          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
   2728          if (fcond->tag == Iex_Const) {
   2729             /* Interesting.  The condition on this exit has folded down to
   2730                a constant. */
   2731             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2732             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2733                /* exit is never going to happen, so dump the statement. */
   2734                return IRStmt_NoOp();
   2735             } else {
   2736                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2737                /* Hmmm.  The exit has become unconditional.  Leave it
   2738                   as it is for now, since we'd have to truncate the BB
   2739                   at this point, which is tricky.  Such truncation is
   2740                   done later by the dead-code elimination pass. */
   2741                /* fall out into the reconstruct-the-exit code. */
   2742                if (vex_control.iropt_verbosity > 0)
   2743                   /* really a misuse of vex_control.iropt_verbosity */
   2744                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2745             }
   2746          }
   2747          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
   2748                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
   2749       }
   2750 
   2751    default:
   2752       vex_printf("\n"); ppIRStmt(st);
   2753       vpanic("subst_and_fold_Stmt");
   2754    }
   2755 }
   2756 
   2757 
   2758 IRSB* cprop_BB ( IRSB* in )
   2759 {
   2760    Int      i;
   2761    IRSB*    out;
   2762    IRStmt*  st2;
   2763    Int      n_tmps = in->tyenv->types_used;
   2764    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   2765    /* Keep track of IRStmt_LoadGs that we need to revisit after
   2766       processing all the other statements. */
   2767    const Int N_FIXUPS = 16;
   2768    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
   2769    Int n_fixups = 0;
   2770 
   2771    out = emptyIRSB();
   2772    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2773 
   2774    /* Set up the env with which travels forward.  This holds a
   2775       substitution, mapping IRTemps to IRExprs. The environment
   2776       is to be applied as we move along.  Keys are IRTemps.
   2777       Values are IRExpr*s.
   2778    */
   2779    for (i = 0; i < n_tmps; i++)
   2780       env[i] = NULL;
   2781 
   2782    /* For each original SSA-form stmt ... */
   2783    for (i = 0; i < in->stmts_used; i++) {
   2784 
   2785       /* First apply the substitution to the current stmt.  This
   2786          propagates in any constants and tmp-tmp assignments
   2787          accumulated prior to this point.  As part of the subst_Stmt
   2788          call, also then fold any constant expressions resulting. */
   2789 
   2790       st2 = in->stmts[i];
   2791 
   2792       /* perhaps st2 is already a no-op? */
   2793       if (st2->tag == Ist_NoOp) continue;
   2794 
   2795       st2 = subst_and_fold_Stmt( env, st2 );
   2796 
   2797       /* Deal with some post-folding special cases. */
   2798       switch (st2->tag) {
   2799 
   2800          /* If the statement has been folded into a no-op, forget
   2801             it. */
   2802          case Ist_NoOp:
   2803             continue;
   2804 
   2805          /* If the statement assigns to an IRTemp add it to the
   2806             running environment. This is for the benefit of copy
   2807             propagation and to allow sameIRExpr look through
   2808             IRTemps. */
   2809          case Ist_WrTmp: {
   2810             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
   2811             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2812 
   2813             /* 't1 = t2' -- don't add to BB; will be optimized out */
   2814             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
   2815                continue;
   2816 
   2817             /* 't = const' && 'const != F64i' -- don't add to BB
   2818                Note, we choose not to propagate const when const is an
   2819                F64i, so that F64i literals can be CSE'd later.  This
   2820                helps x86 floating point code generation. */
   2821             if (st2->Ist.WrTmp.data->tag == Iex_Const
   2822                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   2823                continue;
   2824             }
   2825             /* else add it to the output, as normal */
   2826             break;
   2827          }
   2828 
   2829          case Ist_LoadG: {
   2830             IRLoadG* lg    = st2->Ist.LoadG.details;
   2831             IRExpr*  guard = lg->guard;
   2832             if (guard->tag == Iex_Const) {
   2833                /* The guard has folded to a constant, and that
   2834                   constant must be 1:I1, since subst_and_fold_Stmt
   2835                   folds out the case 0:I1 by itself. */
   2836                vassert(guard->Iex.Const.con->tag == Ico_U1);
   2837                vassert(guard->Iex.Const.con->Ico.U1 == True);
   2838                /* Add a NoOp here as a placeholder, and make a note of
   2839                   where it is in the output block.  Afterwards we'll
   2840                   come back here and transform the NoOp and the LoadG
   2841                   into a load-convert pair.  The fixups[] entry
   2842                   refers to the inserted NoOp, and we expect to find
   2843                   the relevant LoadG immediately after it. */
   2844                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2845                if (n_fixups < N_FIXUPS) {
   2846                   fixups[n_fixups++] = out->stmts_used;
   2847                   addStmtToIRSB( out, IRStmt_NoOp() );
   2848                }
   2849             }
   2850             /* And always add the LoadG to the output, regardless. */
   2851             break;
   2852          }
   2853 
   2854       default:
   2855          break;
   2856       }
   2857 
   2858       /* Not interesting, copy st2 into the output block. */
   2859       addStmtToIRSB( out, st2 );
   2860    }
   2861 
   2862 #  if STATS_IROPT
   2863    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
   2864               invocation_count, recursion_count, success_count,
   2865               recursion_success_count, max_nodes_visited);
   2866 #  endif
   2867 
   2868    out->next     = subst_Expr( env, in->next );
   2869    out->jumpkind = in->jumpkind;
   2870    out->offsIP   = in->offsIP;
   2871 
   2872    /* Process any leftover unconditional LoadGs that we noticed
   2873       in the main pass. */
   2874    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2875    for (i = 0; i < n_fixups; i++) {
   2876       Int ix = fixups[i];
   2877       /* Carefully verify that the LoadG has the expected form. */
   2878       vassert(ix >= 0 && ix+1 < out->stmts_used);
   2879       IRStmt* nop = out->stmts[ix];
   2880       IRStmt* lgu = out->stmts[ix+1];
   2881       vassert(nop->tag == Ist_NoOp);
   2882       vassert(lgu->tag == Ist_LoadG);
   2883       IRLoadG* lg    = lgu->Ist.LoadG.details;
   2884       IRExpr*  guard = lg->guard;
   2885       vassert(guard->Iex.Const.con->tag == Ico_U1);
   2886       vassert(guard->Iex.Const.con->Ico.U1 == True);
   2887       /* Figure out the load and result types, and the implied
   2888          conversion operation. */
   2889       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
   2890       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
   2891       IROp cvtOp = Iop_INVALID;
   2892       switch (lg->cvt) {
   2893          case ILGop_IdentV128:
   2894          case ILGop_Ident64:
   2895          case ILGop_Ident32: break;
   2896          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
   2897          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
   2898          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
   2899          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
   2900          default: vpanic("cprop_BB: unhandled ILGOp");
   2901       }
   2902       /* Replace the placeholder NoOp by the required unconditional
   2903          load. */
   2904       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
   2905       out->stmts[ix]
   2906          = IRStmt_WrTmp(tLoaded,
   2907                         IRExpr_Load(lg->end, cvtArg, lg->addr));
   2908       /* Replace the LoadG by a conversion from the loaded value's
   2909          type to the required result type. */
   2910       out->stmts[ix+1]
   2911          = IRStmt_WrTmp(
   2912               lg->dst, cvtOp == Iop_INVALID
   2913                           ? IRExpr_RdTmp(tLoaded)
   2914                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
   2915    }
   2916 
   2917    return out;
   2918 }
   2919 
   2920 
   2921 /*---------------------------------------------------------------*/
   2922 /*--- Dead code (t = E) removal                               ---*/
   2923 /*---------------------------------------------------------------*/
   2924 
   2925 /* As a side effect, also removes all code following an unconditional
   2926    side exit. */
   2927 
   2928 /* The type of the HashHW map is: a map from IRTemp to nothing
   2929    -- really just operating a set or IRTemps.
   2930 */
   2931 
   2932 inline
   2933 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2934 {
   2935    set[(Int)tmp] = True;
   2936 }
   2937 
   2938 static void addUses_Expr ( Bool* set, IRExpr* e )
   2939 {
   2940    Int i;
   2941    switch (e->tag) {
   2942       case Iex_GetI:
   2943          addUses_Expr(set, e->Iex.GetI.ix);
   2944          return;
   2945       case Iex_ITE:
   2946          addUses_Expr(set, e->Iex.ITE.cond);
   2947          addUses_Expr(set, e->Iex.ITE.iftrue);
   2948          addUses_Expr(set, e->Iex.ITE.iffalse);
   2949          return;
   2950       case Iex_CCall:
   2951          for (i = 0; e->Iex.CCall.args[i]; i++)
   2952             addUses_Expr(set, e->Iex.CCall.args[i]);
   2953          return;
   2954       case Iex_Load:
   2955          addUses_Expr(set, e->Iex.Load.addr);
   2956          return;
   2957       case Iex_Qop:
   2958          addUses_Expr(set, e->Iex.Qop.details->arg1);
   2959          addUses_Expr(set, e->Iex.Qop.details->arg2);
   2960          addUses_Expr(set, e->Iex.Qop.details->arg3);
   2961          addUses_Expr(set, e->Iex.Qop.details->arg4);
   2962          return;
   2963       case Iex_Triop:
   2964          addUses_Expr(set, e->Iex.Triop.details->arg1);
   2965          addUses_Expr(set, e->Iex.Triop.details->arg2);
   2966          addUses_Expr(set, e->Iex.Triop.details->arg3);
   2967          return;
   2968       case Iex_Binop:
   2969          addUses_Expr(set, e->Iex.Binop.arg1);
   2970          addUses_Expr(set, e->Iex.Binop.arg2);
   2971          return;
   2972       case Iex_Unop:
   2973          addUses_Expr(set, e->Iex.Unop.arg);
   2974          return;
   2975       case Iex_RdTmp:
   2976          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2977          return;
   2978       case Iex_Const:
   2979       case Iex_Get:
   2980          return;
   2981       default:
   2982          vex_printf("\n");
   2983          ppIRExpr(e);
   2984          vpanic("addUses_Expr");
   2985    }
   2986 }
   2987 
   2988 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2989 {
   2990    Int      i;
   2991    IRDirty* d;
   2992    IRCAS*   cas;
   2993    switch (st->tag) {
   2994       case Ist_AbiHint:
   2995          addUses_Expr(set, st->Ist.AbiHint.base);
   2996          addUses_Expr(set, st->Ist.AbiHint.nia);
   2997          return;
   2998       case Ist_PutI:
   2999          addUses_Expr(set, st->Ist.PutI.details->ix);
   3000          addUses_Expr(set, st->Ist.PutI.details->data);
   3001          return;
   3002       case Ist_WrTmp:
   3003          addUses_Expr(set, st->Ist.WrTmp.data);
   3004          return;
   3005       case Ist_Put:
   3006          addUses_Expr(set, st->Ist.Put.data);
   3007          return;
   3008       case Ist_Store:
   3009          addUses_Expr(set, st->Ist.Store.addr);
   3010          addUses_Expr(set, st->Ist.Store.data);
   3011          return;
   3012       case Ist_StoreG: {
   3013          IRStoreG* sg = st->Ist.StoreG.details;
   3014          addUses_Expr(set, sg->addr);
   3015          addUses_Expr(set, sg->data);
   3016          addUses_Expr(set, sg->guard);
   3017          return;
   3018       }
   3019       case Ist_LoadG: {
   3020          IRLoadG* lg = st->Ist.LoadG.details;
   3021          addUses_Expr(set, lg->addr);
   3022          addUses_Expr(set, lg->alt);
   3023          addUses_Expr(set, lg->guard);
   3024          return;
   3025       }
   3026       case Ist_CAS:
   3027          cas = st->Ist.CAS.details;
   3028          addUses_Expr(set, cas->addr);
   3029          if (cas->expdHi)
   3030             addUses_Expr(set, cas->expdHi);
   3031          addUses_Expr(set, cas->expdLo);
   3032          if (cas->dataHi)
   3033             addUses_Expr(set, cas->dataHi);
   3034          addUses_Expr(set, cas->dataLo);
   3035          return;
   3036       case Ist_LLSC:
   3037          addUses_Expr(set, st->Ist.LLSC.addr);
   3038          if (st->Ist.LLSC.storedata)
   3039             addUses_Expr(set, st->Ist.LLSC.storedata);
   3040          return;
   3041       case Ist_Dirty:
   3042          d = st->Ist.Dirty.details;
   3043          if (d->mFx != Ifx_None)
   3044             addUses_Expr(set, d->mAddr);
   3045          addUses_Expr(set, d->guard);
   3046          for (i = 0; d->args[i] != NULL; i++) {
   3047             IRExpr* arg = d->args[i];
   3048             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
   3049                addUses_Expr(set, arg);
   3050          }
   3051          return;
   3052       case Ist_NoOp:
   3053       case Ist_IMark:
   3054       case Ist_MBE:
   3055          return;
   3056       case Ist_Exit:
   3057          addUses_Expr(set, st->Ist.Exit.guard);
   3058          return;
   3059       default:
   3060          vex_printf("\n");
   3061          ppIRStmt(st);
   3062          vpanic("addUses_Stmt");
   3063    }
   3064 }
   3065 
   3066 
   3067 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   3068 static Bool isZeroU1 ( IRExpr* e )
   3069 {
   3070    return toBool( e->tag == Iex_Const
   3071                   && e->Iex.Const.con->tag == Ico_U1
   3072                   && e->Iex.Const.con->Ico.U1 == False );
   3073 }
   3074 
   3075 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   3076 static Bool isOneU1 ( IRExpr* e )
   3077 {
   3078    return toBool( e->tag == Iex_Const
   3079                   && e->Iex.Const.con->tag == Ico_U1
   3080                   && e->Iex.Const.con->Ico.U1 == True );
   3081 }
   3082 
   3083 
   3084 /* Note, this destructively modifies the given IRSB. */
   3085 
   3086 /* Scan backwards through statements, carrying a set of IRTemps which
   3087    are known to be used after the current point.  On encountering 't =
   3088    E', delete the binding if it is not used.  Otherwise, add any temp
   3089    uses to the set and keep on moving backwards.
   3090 
   3091    As an enhancement, the first (backwards) pass searches for IR exits
   3092    with always-taken conditions and notes the location of the earliest
   3093    one in the block.  If any such are found, a second pass copies the
   3094    exit destination and jump kind to the bb-end.  Then, the exit and
   3095    all statements following it are turned into no-ops.
   3096 */
   3097 
   3098 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   3099 {
   3100    Int     i, i_unconditional_exit;
   3101    Int     n_tmps = bb->tyenv->types_used;
   3102    Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
   3103    IRStmt* st;
   3104 
   3105    for (i = 0; i < n_tmps; i++)
   3106       set[i] = False;
   3107 
   3108    /* start off by recording IRTemp uses in the next field. */
   3109    addUses_Expr(set, bb->next);
   3110 
   3111    /* First pass */
   3112 
   3113    /* Work backwards through the stmts */
   3114    i_unconditional_exit = -1;
   3115    for (i = bb->stmts_used-1; i >= 0; i--) {
   3116       st = bb->stmts[i];
   3117       if (st->tag == Ist_NoOp)
   3118          continue;
   3119       /* take note of any unconditional exits */
   3120       if (st->tag == Ist_Exit
   3121           && isOneU1(st->Ist.Exit.guard))
   3122          i_unconditional_exit = i;
   3123       if (st->tag == Ist_WrTmp
   3124           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   3125           /* it's an IRTemp which never got used.  Delete it. */
   3126          if (DEBUG_IROPT) {
   3127             vex_printf("DEAD: ");
   3128             ppIRStmt(st);
   3129             vex_printf("\n");
   3130          }
   3131          bb->stmts[i] = IRStmt_NoOp();
   3132       }
   3133       else
   3134       if (st->tag == Ist_Dirty
   3135           && st->Ist.Dirty.details->guard
   3136           && isZeroU1(st->Ist.Dirty.details->guard)) {
   3137          /* This is a dirty helper which will never get called.
   3138             Delete it. */
   3139          bb->stmts[i] = IRStmt_NoOp();
   3140        }
   3141        else {
   3142          /* Note any IRTemp uses made by the current statement. */
   3143          addUses_Stmt(set, st);
   3144       }
   3145    }
   3146 
   3147    /* Optional second pass: if any unconditional exits were found,
   3148       delete them and all following statements. */
   3149 
   3150    if (i_unconditional_exit != -1) {
   3151       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   3152                         i_unconditional_exit);
   3153       vassert(i_unconditional_exit >= 0
   3154               && i_unconditional_exit < bb->stmts_used);
   3155       bb->next
   3156          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   3157       bb->jumpkind
   3158          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   3159       bb->offsIP
   3160          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
   3161       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   3162          bb->stmts[i] = IRStmt_NoOp();
   3163    }
   3164 }
   3165 
   3166 
   3167 /*---------------------------------------------------------------*/
   3168 /*--- Specialisation of helper function calls, in             ---*/
   3169 /*--- collaboration with the front end                        ---*/
   3170 /*---------------------------------------------------------------*/
   3171 
   3172 static
   3173 IRSB* spec_helpers_BB(
   3174          IRSB* bb,
   3175          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
   3176       )
   3177 {
   3178    Int     i;
   3179    IRStmt* st;
   3180    IRExpr* ex;
   3181    Bool    any = False;
   3182 
   3183    for (i = bb->stmts_used-1; i >= 0; i--) {
   3184       st = bb->stmts[i];
   3185 
   3186       if (st->tag != Ist_WrTmp
   3187           || st->Ist.WrTmp.data->tag != Iex_CCall)
   3188          continue;
   3189 
   3190       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   3191                           st->Ist.WrTmp.data->Iex.CCall.args,
   3192                           &bb->stmts[0], i );
   3193       if (!ex)
   3194         /* the front end can't think of a suitable replacement */
   3195         continue;
   3196 
   3197       /* We got something better.  Install it in the bb. */
   3198       any = True;
   3199       bb->stmts[i]
   3200          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   3201 
   3202       if (0) {
   3203          vex_printf("SPEC: ");
   3204          ppIRExpr(st->Ist.WrTmp.data);
   3205          vex_printf("  -->  ");
   3206          ppIRExpr(ex);
   3207          vex_printf("\n");
   3208       }
   3209    }
   3210 
   3211    if (any)
   3212       bb = flatten_BB(bb);
   3213    return bb;
   3214 }
   3215 
   3216 
   3217 /*---------------------------------------------------------------*/
   3218 /*--- Determination of guest state aliasing relationships     ---*/
   3219 /*---------------------------------------------------------------*/
   3220 
   3221 /* These are helper functions for CSE and GetI/PutI transformations.
   3222 
   3223    Determine, to the extent possible, the relationship between two
   3224    guest state accesses.  The possible outcomes are:
   3225 
   3226    * Exact alias.  These two accesses denote precisely the same
   3227      piece of the guest state.
   3228 
   3229    * Definitely no alias.  These two accesses are guaranteed not to
   3230      overlap any part of the guest state.
   3231 
   3232    * Unknown -- if neither of the above can be established.
   3233 
   3234    If in doubt, return Unknown.  */
   3235 
   3236 typedef
   3237    enum { ExactAlias, NoAlias, UnknownAlias }
   3238    GSAliasing;
   3239 
   3240 
   3241 /* Produces the alias relation between an indexed guest
   3242    state access and a non-indexed access. */
   3243 
   3244 static
   3245 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   3246                                     Int offset2, IRType ty2 )
   3247 {
   3248    UInt minoff1, maxoff1, minoff2, maxoff2;
   3249 
   3250    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3251    minoff2 = offset2;
   3252    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   3253 
   3254    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3255       return NoAlias;
   3256 
   3257    /* Could probably do better here if required.  For the moment
   3258       however just claim not to know anything more. */
   3259    return UnknownAlias;
   3260 }
   3261 
   3262 
   3263 /* Produces the alias relation between two indexed guest state
   3264    accesses. */
   3265 
   3266 static
   3267 GSAliasing getAliasingRelation_II (
   3268               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   3269               IRRegArray* descr2, IRExpr* ix2, Int bias2
   3270            )
   3271 {
   3272    UInt minoff1, maxoff1, minoff2, maxoff2;
   3273    Int  iters;
   3274 
   3275    /* First try hard to show they don't alias. */
   3276    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3277    getArrayBounds( descr2, &minoff2, &maxoff2 );
   3278    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3279       return NoAlias;
   3280 
   3281    /* So the two arrays at least partially overlap.  To get any
   3282       further we'll have to be sure that the descriptors are
   3283       identical. */
   3284    if (!eqIRRegArray(descr1, descr2))
   3285       return UnknownAlias;
   3286 
   3287    /* The descriptors are identical.  Now the only difference can be
   3288       in the index expressions.  If they cannot be shown to be
   3289       identical, we have to say we don't know what the aliasing
   3290       relation will be.  Now, since the IR is flattened, the index
   3291       expressions should be atoms -- either consts or tmps.  So that
   3292       makes the comparison simple. */
   3293    vassert(isIRAtom(ix1));
   3294    vassert(isIRAtom(ix2));
   3295    if (!eqIRAtom(ix1,ix2))
   3296       return UnknownAlias;
   3297 
   3298    /* Ok, the index expressions are identical.  So now the only way
   3299       they can be different is in the bias.  Normalise this
   3300       paranoidly, to reliably establish equality/non-equality. */
   3301 
   3302    /* So now we know that the GetI and PutI index the same array
   3303       with the same base.  Are the offsets the same, modulo the
   3304       array size?  Do this paranoidly. */
   3305    vassert(descr1->nElems == descr2->nElems);
   3306    vassert(descr1->elemTy == descr2->elemTy);
   3307    vassert(descr1->base   == descr2->base);
   3308    iters = 0;
   3309    while (bias1 < 0 || bias2 < 0) {
   3310       bias1 += descr1->nElems;
   3311       bias2 += descr1->nElems;
   3312       iters++;
   3313       if (iters > 10)
   3314          vpanic("getAliasingRelation: iters");
   3315    }
   3316    vassert(bias1 >= 0 && bias2 >= 0);
   3317    bias1 %= descr1->nElems;
   3318    bias2 %= descr1->nElems;
   3319    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   3320    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   3321 
   3322    /* Finally, biasP and biasG are normalised into the range
   3323       0 .. descrP/G->nElems - 1.  And so we can establish
   3324       equality/non-equality. */
   3325 
   3326    return bias1==bias2 ? ExactAlias : NoAlias;
   3327 }
   3328 
   3329 
   3330 /*---------------------------------------------------------------*/
   3331 /*--- Common Subexpression Elimination                        ---*/
   3332 /*---------------------------------------------------------------*/
   3333 
   3334 /* Expensive in time and space. */
   3335 
   3336 /* Uses two environments:
   3337    a IRTemp -> IRTemp mapping
   3338    a mapping from AvailExpr* to IRTemp
   3339 */
   3340 
   3341 typedef
   3342    struct {
   3343       enum { TCc, TCt } tag;
   3344       union { IRTemp tmp; IRConst* con; } u;
   3345    }
   3346    TmpOrConst;
   3347 
   3348 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
   3349 {
   3350    if (tc1->tag != tc2->tag)
   3351       return False;
   3352    switch (tc1->tag) {
   3353       case TCc:
   3354          return eqIRConst(tc1->u.con, tc2->u.con);
   3355       case TCt:
   3356          return tc1->u.tmp == tc2->u.tmp;
   3357       default:
   3358          vpanic("eqTmpOrConst");
   3359    }
   3360 }
   3361 
   3362 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
   3363 {
   3364    Bool eq = cee1->addr == cee2->addr;
   3365    if (eq) {
   3366       vassert(cee1->regparms == cee2->regparms);
   3367       vassert(cee1->mcx_mask == cee2->mcx_mask);
   3368       /* Names should be the same too, but we don't bother to
   3369          check. */
   3370    }
   3371    return eq;
   3372 }
   3373 
   3374 /* Convert an atomic IRExpr* to a TmpOrConst. */
   3375 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
   3376 {
   3377    switch (e->tag) {
   3378       case Iex_RdTmp:
   3379          tc->tag   = TCt;
   3380          tc->u.tmp = e->Iex.RdTmp.tmp;
   3381          break;
   3382       case Iex_Const:
   3383          tc->tag   = TCc;
   3384          tc->u.con = e->Iex.Const.con;
   3385          break;
   3386       default:
   3387          /* Getting here is a serious error.  It means that the
   3388             presented arg isn't an IR atom, as it should be. */
   3389          vpanic("irExpr_to_TmpOrConst");
   3390    }
   3391 }
   3392 
   3393 /* Convert a TmpOrConst to an atomic IRExpr*. */
   3394 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
   3395 {
   3396    switch (tc->tag) {
   3397       case TCc: return IRExpr_Const(tc->u.con);
   3398       case TCt: return IRExpr_RdTmp(tc->u.tmp);
   3399       default:  vpanic("tmpOrConst_to_IRExpr");
   3400    }
   3401 }
   3402 
   3403 /* Convert a NULL terminated IRExpr* vector to an array of
   3404    TmpOrConsts, and a length. */
   3405 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
   3406                                        /*OUT*/Int* nOuts,
   3407                                        IRExpr** ins )
   3408 {
   3409    Int i, n;
   3410    /* We have to make two passes, one to count, one to copy. */
   3411    for (n = 0; ins[n]; n++)
   3412       ;
   3413    *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
   3414    *nOuts = n;
   3415    /* and now copy .. */
   3416    for (i = 0; i < n; i++) {
   3417       IRExpr*     arg = ins[i];
   3418       TmpOrConst* dst = &(*outs)[i];
   3419       irExpr_to_TmpOrConst(dst, arg);
   3420    }
   3421 }
   3422 
   3423 typedef
   3424    struct {
   3425       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
   3426              CCall, Load
   3427       } tag;
   3428       union {
   3429          /* unop(tmp) */
   3430          struct {
   3431             IROp   op;
   3432             IRTemp arg;
   3433          } Ut;
   3434          /* binop(tmp,tmp) */
   3435          struct {
   3436             IROp   op;
   3437             IRTemp arg1;
   3438             IRTemp arg2;
   3439          } Btt;
   3440          /* binop(tmp,const) */
   3441          struct {
   3442             IROp    op;
   3443             IRTemp  arg1;
   3444             IRConst con2;
   3445          } Btc;
   3446          /* binop(const,tmp) */
   3447          struct {
   3448             IROp    op;
   3449             IRConst con1;
   3450             IRTemp  arg2;
   3451          } Bct;
   3452          /* F64i-style const */
   3453          struct {
   3454             ULong f64i;
   3455          } Cf64i;
   3456          /* ITE(tmp,tmp,tmp) */
   3457          struct {
   3458             IRTemp co;
   3459             IRTemp e1;
   3460             IRTemp e0;
   3461          } Ittt;
   3462          /* ITE(tmp,tmp,const) */
   3463          struct {
   3464             IRTemp  co;
   3465             IRTemp  e1;
   3466             IRConst con0;
   3467          } Ittc;
   3468          /* ITE(tmp,const,tmp) */
   3469          struct {
   3470             IRTemp  co;
   3471             IRConst con1;
   3472             IRTemp  e0;
   3473          } Itct;
   3474          /* ITE(tmp,const,const) */
   3475          struct {
   3476             IRTemp  co;
   3477             IRConst con1;
   3478             IRConst con0;
   3479          } Itcc;
   3480          /* GetI(descr,tmp,bias)*/
   3481          struct {
   3482             IRRegArray* descr;
   3483             IRTemp      ix;
   3484             Int         bias;
   3485          } GetIt;
   3486          /* Clean helper call */
   3487          struct {
   3488             IRCallee*   cee;
   3489             TmpOrConst* args;
   3490             Int         nArgs;
   3491             IRType      retty;
   3492          } CCall;
   3493          /* Load(end,ty,addr) */
   3494          struct {
   3495             IREndness  end;
   3496             IRType     ty;
   3497             TmpOrConst addr;
   3498          } Load;
   3499       } u;
   3500    }
   3501    AvailExpr;
   3502 
   3503 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   3504 {
   3505    if (LIKELY(a1->tag != a2->tag))
   3506       return False;
   3507    switch (a1->tag) {
   3508       case Ut:
   3509          return toBool(
   3510                 a1->u.Ut.op == a2->u.Ut.op
   3511                 && a1->u.Ut.arg == a2->u.Ut.arg);
   3512       case Btt:
   3513          return toBool(
   3514                 a1->u.Btt.op == a2->u.Btt.op
   3515                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   3516                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   3517       case Btc:
   3518          return toBool(
   3519                 a1->u.Btc.op == a2->u.Btc.op
   3520                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   3521                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   3522       case Bct:
   3523          return toBool(
   3524                 a1->u.Bct.op == a2->u.Bct.op
   3525                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   3526                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   3527       case Cf64i:
   3528          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   3529       case Ittt:
   3530          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
   3531                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
   3532                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
   3533       case Ittc:
   3534          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
   3535                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
   3536                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
   3537       case Itct:
   3538          return toBool(a1->u.Itct.co == a2->u.Itct.co
   3539                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
   3540                        && a1->u.Itct.e0 == a2->u.Itct.e0);
   3541       case Itcc:
   3542          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
   3543                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
   3544                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
   3545       case GetIt:
   3546          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   3547                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   3548                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   3549       case CCall: {
   3550          Int  i, n;
   3551          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
   3552                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
   3553          if (eq) {
   3554             n = a1->u.CCall.nArgs;
   3555             for (i = 0; i < n; i++) {
   3556                if (!eqTmpOrConst( &a1->u.CCall.args[i],
   3557                                   &a2->u.CCall.args[i] )) {
   3558                   eq = False;
   3559                   break;
   3560                }
   3561             }
   3562          }
   3563          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
   3564          return eq;
   3565       }
   3566       case Load: {
   3567          Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
   3568                           && a1->u.Load.ty == a2->u.Load.ty
   3569                           && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
   3570          return eq;
   3571       }
   3572       default:
   3573          vpanic("eq_AvailExpr");
   3574    }
   3575 }
   3576 
   3577 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   3578 {
   3579    IRConst *con, *con0, *con1;
   3580    switch (ae->tag) {
   3581       case Ut:
   3582          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   3583       case Btt:
   3584          return IRExpr_Binop( ae->u.Btt.op,
   3585                               IRExpr_RdTmp(ae->u.Btt.arg1),
   3586                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   3587       case Btc:
   3588          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3589          *con = ae->u.Btc.con2;
   3590          return IRExpr_Binop( ae->u.Btc.op,
   3591                               IRExpr_RdTmp(ae->u.Btc.arg1),
   3592                               IRExpr_Const(con) );
   3593       case Bct:
   3594          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3595          *con = ae->u.Bct.con1;
   3596          return IRExpr_Binop( ae->u.Bct.op,
   3597                               IRExpr_Const(con),
   3598                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   3599       case Cf64i:
   3600          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   3601       case Ittt:
   3602          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
   3603                            IRExpr_RdTmp(ae->u.Ittt.e1),
   3604                            IRExpr_RdTmp(ae->u.Ittt.e0));
   3605       case Ittc:
   3606          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3607          *con0 = ae->u.Ittc.con0;
   3608          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
   3609                            IRExpr_RdTmp(ae->u.Ittc.e1),
   3610                            IRExpr_Const(con0));
   3611       case Itct:
   3612          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3613          *con1 = ae->u.Itct.con1;
   3614          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
   3615                            IRExpr_Const(con1),
   3616                            IRExpr_RdTmp(ae->u.Itct.e0));
   3617 
   3618       case Itcc:
   3619          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3620          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3621          *con0 = ae->u.Itcc.con0;
   3622          *con1 = ae->u.Itcc.con1;
   3623          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
   3624                            IRExpr_Const(con1),
   3625                            IRExpr_Const(con0));
   3626       case GetIt:
   3627          return IRExpr_GetI(ae->u.GetIt.descr,
   3628                             IRExpr_RdTmp(ae->u.GetIt.ix),
   3629                             ae->u.GetIt.bias);
   3630       case CCall: {
   3631          Int i, n = ae->u.CCall.nArgs;
   3632          vassert(n >= 0);
   3633          IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
   3634          vec[n] = NULL;
   3635          for (i = 0; i < n; i++) {
   3636             vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
   3637          }
   3638          return IRExpr_CCall(ae->u.CCall.cee,
   3639                              ae->u.CCall.retty,
   3640                              vec);
   3641       }
   3642       case Load:
   3643          return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
   3644                             tmpOrConst_to_IRExpr(&ae->u.Load.addr));
   3645       default:
   3646          vpanic("availExpr_to_IRExpr");
   3647    }
   3648 }
   3649 
   3650 inline
   3651 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   3652 {
   3653    HWord res;
   3654    /* env :: IRTemp -> IRTemp */
   3655    if (lookupHHW( env, &res, (HWord)tmp ))
   3656       return (IRTemp)res;
   3657    else
   3658       return tmp;
   3659 }
   3660 
   3661 inline
   3662 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
   3663                                           HashHW* env )
   3664 {
   3665    /* env :: IRTemp -> IRTemp */
   3666    if (tc->tag == TCt) {
   3667       tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
   3668    }
   3669 }
   3670 
   3671 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   3672 {
   3673    /* env :: IRTemp -> IRTemp */
   3674    switch (ae->tag) {
   3675       case Ut:
   3676          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   3677          break;
   3678       case Btt:
   3679          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   3680          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   3681          break;
   3682       case Btc:
   3683          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   3684          break;
   3685       case Bct:
   3686          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   3687          break;
   3688       case Cf64i:
   3689          break;
   3690       case Ittt:
   3691          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
   3692          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
   3693          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
   3694          break;
   3695       case Ittc:
   3696          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
   3697          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
   3698          break;
   3699       case Itct:
   3700          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
   3701          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
   3702          break;
   3703       case Itcc:
   3704          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
   3705          break;
   3706       case GetIt:
   3707          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   3708          break;
   3709       case CCall: {
   3710          Int i, n = ae->u.CCall.nArgs;;
   3711          for (i = 0; i < n; i++) {
   3712             subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
   3713          }
   3714          break;
   3715       }
   3716       case Load:
   3717          subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
   3718          break;
   3719       default:
   3720          vpanic("subst_AvailExpr");
   3721    }
   3722 }
   3723 
   3724 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
   3725 {
   3726    AvailExpr* ae;
   3727 
   3728    switch (e->tag) {
   3729       case Iex_Unop:
   3730          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
   3731             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3732             ae->tag      = Ut;
   3733             ae->u.Ut.op  = e->Iex.Unop.op;
   3734             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   3735             return ae;
   3736          }
   3737          break;
   3738 
   3739       case Iex_Binop:
   3740          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
   3741             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3742                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3743                ae->tag        = Btt;
   3744                ae->u.Btt.op   = e->Iex.Binop.op;
   3745                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3746                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3747                return ae;
   3748             }
   3749             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   3750                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3751                ae->tag        = Btc;
   3752                ae->u.Btc.op   = e->Iex.Binop.op;
   3753                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3754                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   3755                return ae;
   3756             }
   3757          } else if (e->Iex.Binop.arg1->tag == Iex_Const
   3758                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3759             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3760             ae->tag        = Bct;
   3761             ae->u.Bct.op   = e->Iex.Binop.op;
   3762             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3763             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   3764             return ae;
   3765          }
   3766          break;
   3767 
   3768       case Iex_Const:
   3769          if (e->Iex.Const.con->tag == Ico_F64i) {
   3770             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3771             ae->tag          = Cf64i;
   3772             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   3773             return ae;
   3774          }
   3775          break;
   3776 
   3777       case Iex_ITE:
   3778          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
   3779             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
   3780                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3781                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3782                   ae->tag       = Ittt;
   3783                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3784                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3785                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3786                   return ae;
   3787                }
   3788                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3789                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3790                   ae->tag       = Itct;
   3791                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3792                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3793                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3794                   return ae;
   3795                }
   3796             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
   3797                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3798                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3799                   ae->tag       = Ittc;
   3800                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3801                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3802                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3803                   return ae;
   3804                }
   3805                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3806                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3807                   ae->tag       = Itcc;
   3808                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3809                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3810                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3811                   return ae;
   3812                }
   3813             }
   3814          }
   3815          break;
   3816 
   3817       case Iex_GetI:
   3818          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
   3819             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3820             ae->tag           = GetIt;
   3821             ae->u.GetIt.descr = e->Iex.GetI.descr;
   3822             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   3823             ae->u.GetIt.bias  = e->Iex.GetI.bias;
   3824             return ae;
   3825          }
   3826          break;
   3827 
   3828       case Iex_CCall:
   3829          ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3830          ae->tag = CCall;
   3831          /* Ok to share only the cee, since it is immutable. */
   3832          ae->u.CCall.cee   = e->Iex.CCall.cee;
   3833          ae->u.CCall.retty = e->Iex.CCall.retty;
   3834          /* irExprVec_to_TmpOrConsts will assert if the args are
   3835             neither tmps nor constants, but that's ok .. that's all they
   3836             should be. */
   3837          irExprVec_to_TmpOrConsts(
   3838                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
   3839                                   e->Iex.CCall.args
   3840                                  );
   3841          return ae;
   3842 
   3843       case Iex_Load:
   3844          /* If the caller of do_cse_BB has requested that loads also
   3845             be CSEd, convert them into AvailExprs.  If not, we'll just
   3846             return NULL here, and the load never becomes considered
   3847             "available", which effectively disables CSEing of them, as
   3848             desired. */
   3849          if (allowLoadsToBeCSEd) {
   3850             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3851             ae->tag        = Load;
   3852             ae->u.Load.end = e->Iex.Load.end;
   3853             ae->u.Load.ty  = e->Iex.Load.ty;
   3854             irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
   3855             return ae;
   3856          }
   3857          break;
   3858 
   3859       default:
   3860          break;
   3861    }
   3862 
   3863    return NULL;
   3864 }
   3865 
   3866 
   3867 /* The BB is modified in-place.  Returns True if any changes were
   3868    made.  The caller can choose whether or not loads should be CSEd.
   3869    In the normal course of things we don't do that, since CSEing loads
   3870    is something of a dodgy proposition if the guest program is doing
   3871    some screwy stuff to do with races and spinloops. */
   3872 
   3873 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
   3874 {
   3875    Int        i, j, paranoia;
   3876    IRTemp     t, q;
   3877    IRStmt*    st;
   3878    AvailExpr* eprime;
   3879    AvailExpr* ae;
   3880    Bool       invalidate;
   3881    Bool       anyDone = False;
   3882 
   3883    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   3884    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   3885 
   3886    vassert(sizeof(IRTemp) <= sizeof(HWord));
   3887 
   3888    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   3889 
   3890    /* Iterate forwards over the stmts.
   3891       On seeing "t = E", where E is one of the AvailExpr forms:
   3892          let E' = apply tenv substitution to E
   3893          search aenv for E'
   3894             if a mapping E' -> q is found,
   3895                replace this stmt by "t = q"
   3896                and add binding t -> q to tenv
   3897             else
   3898                add binding E' -> t to aenv
   3899                replace this stmt by "t = E'"
   3900 
   3901       Other statements are only interesting to the extent that they
   3902       might invalidate some of the expressions in aenv.  So there is
   3903       an invalidate-bindings check for each statement seen.
   3904    */
   3905    for (i = 0; i < bb->stmts_used; i++) {
   3906       st = bb->stmts[i];
   3907 
   3908       /* ------ BEGIN invalidate aenv bindings ------ */
   3909       /* This is critical: remove from aenv any E' -> .. bindings
   3910          which might be invalidated by this statement.  The only
   3911          vulnerable kind of bindings are the GetI and Load kinds.
   3912             Dirty call - dump (paranoia level -> 2)
   3913             Store      - dump (ditto)
   3914             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   3915          Uses getAliasingRelation_IC and getAliasingRelation_II
   3916          to do the no-overlap assessments needed for Put/PutI.
   3917       */
   3918       switch (st->tag) {
   3919          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   3920          case Ist_CAS: case Ist_LLSC:
   3921          case Ist_StoreG:
   3922             paranoia = 2; break;
   3923          case Ist_Put: case Ist_PutI:
   3924             paranoia = 1; break;
   3925          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   3926          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
   3927             paranoia = 0; break;
   3928          default:
   3929             vpanic("do_cse_BB(1)");
   3930       }
   3931 
   3932       if (paranoia > 0) {
   3933          for (j = 0; j < aenv->used; j++) {
   3934             if (!aenv->inuse[j])
   3935                continue;
   3936             ae = (AvailExpr*)aenv->key[j];
   3937             if (ae->tag != GetIt && ae->tag != Load)
   3938                continue;
   3939             invalidate = False;
   3940             if (paranoia >= 2) {
   3941                invalidate = True;
   3942             } else {
   3943                vassert(paranoia == 1);
   3944                if (ae->tag == Load) {
   3945                   /* Loads can be invalidated by anything that could
   3946                      possibly touch memory.  But in that case we
   3947                      should have |paranoia| == 2 and we won't get
   3948                      here.  So there's nothing to do; we don't have to
   3949                      invalidate the load. */
   3950                }
   3951                else
   3952                if (st->tag == Ist_Put) {
   3953                   if (getAliasingRelation_IC(
   3954                          ae->u.GetIt.descr,
   3955                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3956                          st->Ist.Put.offset,
   3957                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   3958                       ) != NoAlias)
   3959                      invalidate = True;
   3960                }
   3961                else
   3962                if (st->tag == Ist_PutI) {
   3963                   IRPutI *puti = st->Ist.PutI.details;
   3964                   if (getAliasingRelation_II(
   3965                          ae->u.GetIt.descr,
   3966                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3967                          ae->u.GetIt.bias,
   3968                          puti->descr,
   3969                          puti->ix,
   3970                          puti->bias
   3971                       ) != NoAlias)
   3972                      invalidate = True;
   3973                }
   3974                else
   3975                   vpanic("do_cse_BB(2)");
   3976             }
   3977 
   3978             if (invalidate) {
   3979                aenv->inuse[j] = False;
   3980                aenv->key[j]   = (HWord)NULL;  /* be sure */
   3981             }
   3982          } /* for j */
   3983       } /* paranoia > 0 */
   3984 
   3985       /* ------ ENV invalidate aenv bindings ------ */
   3986 
   3987       /* ignore not-interestings */
   3988       if (st->tag != Ist_WrTmp)
   3989          continue;
   3990 
   3991       t = st->Ist.WrTmp.tmp;
   3992       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
   3993       /* ignore if not of AvailExpr form */
   3994       if (!eprime)
   3995          continue;
   3996 
   3997       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   3998 
   3999       /* apply tenv */
   4000       subst_AvailExpr( tenv, eprime );
   4001 
   4002       /* search aenv for eprime, unfortunately the hard way */
   4003       for (j = 0; j < aenv->used; j++)
   4004          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   4005             break;
   4006 
   4007       if (j < aenv->used) {
   4008          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   4009             note the t->q binding in tenv. */
   4010          /* (this is the core of the CSE action) */
   4011          q = (IRTemp)aenv->val[j];
   4012          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   4013          addToHHW( tenv, (HWord)t, (HWord)q );
   4014          anyDone = True;
   4015       } else {
   4016          /* No binding was found, so instead we add E' -> t to our
   4017             collection of available expressions, replace this stmt
   4018             with "t = E'", and move on. */
   4019          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   4020          addToHHW( aenv, (HWord)eprime, (HWord)t );
   4021       }
   4022    }
   4023 
   4024    /*
   4025    ppIRSB(bb);
   4026    sanityCheckIRSB(bb, Ity_I32);
   4027    vex_printf("\n\n");
   4028    */
   4029    return anyDone;
   4030 }
   4031 
   4032 
   4033 /*---------------------------------------------------------------*/
   4034 /*--- Add32/Sub32 chain collapsing                            ---*/
   4035 /*---------------------------------------------------------------*/
   4036 
   4037 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   4038 
   4039 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   4040    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   4041    root node is Add32, not Sub32. */
   4042 
   4043 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   4044 {
   4045    if (e->tag != Iex_Binop)
   4046       return False;
   4047    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   4048       return False;
   4049    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   4050       return False;
   4051    if (e->Iex.Binop.arg2->tag != Iex_Const)
   4052       return False;
   4053    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   4054    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   4055    if (e->Iex.Binop.op == Iop_Sub32)
   4056       *i32 = -*i32;
   4057    return True;
   4058 }
   4059 
   4060 
   4061 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   4062    other tmp2.  Scan backwards from the specified start point -- an
   4063    optimisation. */
   4064 
   4065 static Bool collapseChain ( IRSB* bb, Int startHere,
   4066                             IRTemp tmp,
   4067                             IRTemp* tmp2, Int* i32 )
   4068 {
   4069    Int     j, ii;
   4070    IRTemp  vv;
   4071    IRStmt* st;
   4072    IRExpr* e;
   4073 
   4074    /* the (var, con) pair contain the current 'representation' for
   4075       'tmp'.  We start with 'tmp + 0'.  */
   4076    IRTemp var = tmp;
   4077    Int    con = 0;
   4078 
   4079    /* Scan backwards to see if tmp can be replaced by some other tmp
   4080      +/- a constant. */
   4081    for (j = startHere; j >= 0; j--) {
   4082       st = bb->stmts[j];
   4083       if (st->tag != Ist_WrTmp)
   4084          continue;
   4085       if (st->Ist.WrTmp.tmp != var)
   4086          continue;
   4087       e = st->Ist.WrTmp.data;
   4088       if (!isAdd32OrSub32(e, &vv, &ii))
   4089          break;
   4090       var = vv;
   4091       con += ii;
   4092    }
   4093    if (j == -1)
   4094       /* no earlier binding for var .. ill-formed IR */
   4095       vpanic("collapseChain");
   4096 
   4097    /* so, did we find anything interesting? */
   4098    if (var == tmp)
   4099       return False; /* no .. */
   4100 
   4101    *tmp2 = var;
   4102    *i32  = con;
   4103    return True;
   4104 }
   4105 
   4106 
   4107 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   4108 
   4109 static void collapse_AddSub_chains_BB ( IRSB* bb )
   4110 {
   4111    IRStmt *st;
   4112    IRTemp var, var2;
   4113    Int    i, con, con2;
   4114 
   4115    for (i = bb->stmts_used-1; i >= 0; i--) {
   4116       st = bb->stmts[i];
   4117       if (st->tag == Ist_NoOp)
   4118          continue;
   4119 
   4120       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   4121 
   4122       if (st->tag == Ist_WrTmp
   4123           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   4124 
   4125          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   4126             Find out if var can be expressed as var2 + con2. */
   4127          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   4128             if (DEBUG_IROPT) {
   4129                vex_printf("replacing1 ");
   4130                ppIRStmt(st);
   4131                vex_printf(" with ");
   4132             }
   4133             con2 += con;
   4134             bb->stmts[i]
   4135                = IRStmt_WrTmp(
   4136                     st->Ist.WrTmp.tmp,
   4137                     (con2 >= 0)
   4138                       ? IRExpr_Binop(Iop_Add32,
   4139                                      IRExpr_RdTmp(var2),
   4140                                      IRExpr_Const(IRConst_U32(con2)))
   4141                       : IRExpr_Binop(Iop_Sub32,
   4142                                      IRExpr_RdTmp(var2),
   4143                                      IRExpr_Const(IRConst_U32(-con2)))
   4144                  );
   4145             if (DEBUG_IROPT) {
   4146                ppIRStmt(bb->stmts[i]);
   4147                vex_printf("\n");
   4148             }
   4149          }
   4150 
   4151          continue;
   4152       }
   4153 
   4154       /* Try to collapse 't1 = GetI[t2, con]'. */
   4155 
   4156       if (st->tag == Ist_WrTmp
   4157           && st->Ist.WrTmp.data->tag == Iex_GetI
   4158           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   4159           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   4160                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   4161          if (DEBUG_IROPT) {
   4162             vex_printf("replacing3 ");
   4163             ppIRStmt(st);
   4164             vex_printf(" with ");
   4165          }
   4166          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   4167          bb->stmts[i]
   4168             = IRStmt_WrTmp(
   4169                  st->Ist.WrTmp.tmp,
   4170                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   4171                              IRExpr_RdTmp(var2),
   4172                              con2));
   4173          if (DEBUG_IROPT) {
   4174             ppIRStmt(bb->stmts[i]);
   4175             vex_printf("\n");
   4176          }
   4177          continue;
   4178       }
   4179 
   4180       /* Perhaps st is PutI[t, con] ? */
   4181       IRPutI *puti = st->Ist.PutI.details;
   4182       if (st->tag == Ist_PutI
   4183           && puti->ix->tag == Iex_RdTmp
   4184           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
   4185                                &var2, &con2)) {
   4186          if (DEBUG_IROPT) {
   4187             vex_printf("replacing2 ");
   4188             ppIRStmt(st);
   4189             vex_printf(" with ");
   4190          }
   4191          con2 += puti->bias;
   4192          bb->stmts[i]
   4193             = IRStmt_PutI(mkIRPutI(puti->descr,
   4194                                    IRExpr_RdTmp(var2),
   4195                                    con2,
   4196                                    puti->data));
   4197          if (DEBUG_IROPT) {
   4198             ppIRStmt(bb->stmts[i]);
   4199             vex_printf("\n");
   4200          }
   4201          continue;
   4202       }
   4203 
   4204    } /* for */
   4205 }
   4206 
   4207 
   4208 /*---------------------------------------------------------------*/
   4209 /*--- PutI/GetI transformations                               ---*/
   4210 /*---------------------------------------------------------------*/
   4211 
   4212 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   4213    the given starting point to find, if any, a PutI which writes
   4214    exactly the same piece of guest state, and so return the expression
   4215    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   4216 
   4217 static
   4218 IRExpr* findPutI ( IRSB* bb, Int startHere,
   4219                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   4220 {
   4221    Int        j;
   4222    IRStmt*    st;
   4223    GSAliasing relation;
   4224 
   4225    if (0) {
   4226       vex_printf("\nfindPutI ");
   4227       ppIRRegArray(descrG);
   4228       vex_printf(" ");
   4229       ppIRExpr(ixG);
   4230       vex_printf(" %d\n", biasG);
   4231    }
   4232 
   4233    /* Scan backwards in bb from startHere to find a suitable PutI
   4234       binding for (descrG, ixG, biasG), if any. */
   4235 
   4236    for (j = startHere; j >= 0; j--) {
   4237       st = bb->stmts[j];
   4238       if (st->tag == Ist_NoOp)
   4239          continue;
   4240 
   4241       if (st->tag == Ist_Put) {
   4242          /* Non-indexed Put.  This can't give a binding, but we do
   4243             need to check it doesn't invalidate the search by
   4244             overlapping any part of the indexed guest state. */
   4245 
   4246          relation
   4247             = getAliasingRelation_IC(
   4248                  descrG, ixG,
   4249                  st->Ist.Put.offset,
   4250                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   4251 
   4252          if (relation == NoAlias) {
   4253             /* we're OK; keep going */
   4254             continue;
   4255          } else {
   4256             /* relation == UnknownAlias || relation == ExactAlias */
   4257             /* If this assertion fails, we've found a Put which writes
   4258                an area of guest state which is read by a GetI.  Which
   4259                is unlikely (although not per se wrong). */
   4260             vassert(relation != ExactAlias);
   4261             /* This Put potentially writes guest state that the GetI
   4262                reads; we must fail. */
   4263             return NULL;
   4264          }
   4265       }
   4266 
   4267       if (st->tag == Ist_PutI) {
   4268          IRPutI *puti = st->Ist.PutI.details;
   4269 
   4270          relation = getAliasingRelation_II(
   4271                        descrG, ixG, biasG,
   4272                        puti->descr,
   4273                        puti->ix,
   4274                        puti->bias
   4275                     );
   4276 
   4277          if (relation == NoAlias) {
   4278             /* This PutI definitely doesn't overlap.  Ignore it and
   4279                keep going. */
   4280             continue; /* the for j loop */
   4281          }
   4282 
   4283          if (relation == UnknownAlias) {
   4284             /* We don't know if this PutI writes to the same guest
   4285                state that the GetI, or not.  So we have to give up. */
   4286             return NULL;
   4287          }
   4288 
   4289          /* Otherwise, we've found what we're looking for.  */
   4290          vassert(relation == ExactAlias);
   4291          return puti->data;
   4292 
   4293       } /* if (st->tag == Ist_PutI) */
   4294 
   4295       if (st->tag == Ist_Dirty) {
   4296          /* Be conservative.  If the dirty call has any guest effects at
   4297             all, give up.  We could do better -- only give up if there
   4298             are any guest writes/modifies. */
   4299          if (st->Ist.Dirty.details->nFxState > 0)
   4300             return NULL;
   4301       }
   4302 
   4303    } /* for */
   4304 
   4305    /* No valid replacement was found. */
   4306    return NULL;
   4307 }
   4308 
   4309 
   4310 
   4311 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   4312    that it writes exactly the same piece of guest state) ?  Safe
   4313    answer: False. */
   4314 
   4315 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   4316 {
   4317    vassert(pi->tag == Ist_PutI);
   4318    if (s2->tag != Ist_PutI)
   4319       return False;
   4320 
   4321    IRPutI *p1 = pi->Ist.PutI.details;
   4322    IRPutI *p2 = s2->Ist.PutI.details;
   4323 
   4324    return toBool(
   4325           getAliasingRelation_II(
   4326              p1->descr, p1->ix, p1->bias,
   4327              p2->descr, p2->ix, p2->bias
   4328           )
   4329           == ExactAlias
   4330           );
   4331 }
   4332 
   4333 
   4334 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   4335    overlap it?  Safe answer: True.  Note, we could do a lot better
   4336    than this if needed. */
   4337 
   4338 static
   4339 Bool guestAccessWhichMightOverlapPutI (
   4340         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   4341      )
   4342 {
   4343    GSAliasing relation;
   4344    UInt       minoffP, maxoffP;
   4345 
   4346    vassert(pi->tag == Ist_PutI);
   4347 
   4348    IRPutI *p1 = pi->Ist.PutI.details;
   4349 
   4350    getArrayBounds(p1->descr, &minoffP, &maxoffP);
   4351    switch (s2->tag) {
   4352 
   4353       case Ist_NoOp:
   4354       case Ist_IMark:
   4355          return False;
   4356 
   4357       case Ist_MBE:
   4358       case Ist_AbiHint:
   4359          /* just be paranoid ... these should be rare. */
   4360          return True;
   4361 
   4362       case Ist_CAS:
   4363          /* This is unbelievably lame, but it's probably not
   4364             significant from a performance point of view.  Really, a
   4365             CAS is a load-store op, so it should be safe to say False.
   4366             However .. */
   4367          return True;
   4368 
   4369       case Ist_Dirty:
   4370          /* If the dirty call has any guest effects at all, give up.
   4371             Probably could do better. */
   4372          if (s2->Ist.Dirty.details->nFxState > 0)
   4373             return True;
   4374          return False;
   4375 
   4376       case Ist_Put:
   4377          vassert(isIRAtom(s2->Ist.Put.data));
   4378          relation
   4379             = getAliasingRelation_IC(
   4380                  p1->descr, p1->ix,
   4381                  s2->Ist.Put.offset,
   4382                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   4383               );
   4384          goto have_relation;
   4385 
   4386       case Ist_PutI: {
   4387          IRPutI *p2 = s2->Ist.PutI.details;
   4388 
   4389          vassert(isIRAtom(p2->ix));
   4390          vassert(isIRAtom(p2->data));
   4391          relation
   4392             = getAliasingRelation_II(
   4393                  p1->descr, p1->ix, p1->bias,
   4394                  p2->descr, p2->ix, p2->bias
   4395               );
   4396          goto have_relation;
   4397       }
   4398 
   4399       case Ist_WrTmp:
   4400          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   4401             relation
   4402                = getAliasingRelation_II(
   4403                     p1->descr, p1->ix, p1->bias,
   4404                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   4405                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   4406                     s2->Ist.WrTmp.data->Iex.GetI.bias
   4407                  );
   4408             goto have_relation;
   4409          }
   4410          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   4411             relation
   4412                = getAliasingRelation_IC(
   4413                     p1->descr, p1->ix,
   4414                     s2->Ist.WrTmp.data->Iex.Get.offset,
   4415                     s2->Ist.WrTmp.data->Iex.Get.ty
   4416                  );
   4417             goto have_relation;
   4418          }
   4419          return False;
   4420 
   4421       case Ist_Store:
   4422          vassert(isIRAtom(s2->Ist.Store.addr));
   4423          vassert(isIRAtom(s2->Ist.Store.data));
   4424          return False;
   4425 
   4426       default:
   4427          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   4428          vpanic("guestAccessWhichMightOverlapPutI");
   4429    }
   4430 
   4431   have_relation:
   4432    if (relation == NoAlias)
   4433       return False;
   4434    else
   4435       return True; /* ExactAlias or UnknownAlias */
   4436 }
   4437 
   4438 
   4439 
   4440 /* ---------- PutI/GetI transformations main functions --------- */
   4441 
   4442 /* Remove redundant GetIs, to the extent that they can be detected.
   4443    bb is modified in-place. */
   4444 
   4445 static
   4446 void do_redundant_GetI_elimination ( IRSB* bb )
   4447 {
   4448    Int     i;
   4449    IRStmt* st;
   4450 
   4451    for (i = bb->stmts_used-1; i >= 0; i--) {
   4452       st = bb->stmts[i];
   4453       if (st->tag == Ist_NoOp)
   4454          continue;
   4455 
   4456       if (st->tag == Ist_WrTmp
   4457           && st->Ist.WrTmp.data->tag == Iex_GetI
   4458           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   4459          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   4460          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   4461          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   4462          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   4463          if (replacement
   4464              && isIRAtom(replacement)
   4465              /* Make sure we're doing a type-safe transformation! */
   4466              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   4467             if (DEBUG_IROPT) {
   4468                vex_printf("rGI:  ");
   4469                ppIRExpr(st->Ist.WrTmp.data);
   4470                vex_printf(" -> ");
   4471                ppIRExpr(replacement);
   4472                vex_printf("\n");
   4473             }
   4474             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   4475          }
   4476       }
   4477    }
   4478 
   4479 }
   4480 
   4481 
   4482 /* Remove redundant PutIs, to the extent which they can be detected.
   4483    bb is modified in-place. */
   4484 
   4485 static
   4486 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
   4487 {
   4488    Int    i, j;
   4489    Bool   delete;
   4490    IRStmt *st, *stj;
   4491 
   4492    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
   4493 
   4494    for (i = 0; i < bb->stmts_used; i++) {
   4495       st = bb->stmts[i];
   4496       if (st->tag != Ist_PutI)
   4497          continue;
   4498       /* Ok, search forwards from here to see if we can find another
   4499          PutI which makes this one redundant, and dodging various
   4500          hazards.  Search forwards:
   4501          * If conditional exit, give up (because anything after that
   4502            does not postdominate this put).
   4503          * If a Get which might overlap, give up (because this PutI
   4504            not necessarily dead).
   4505          * If a Put which is identical, stop with success.
   4506          * If a Put which might overlap, but is not identical, give up.
   4507          * If a dirty helper call which might write guest state, give up.
   4508          * If a Put which definitely doesn't overlap, or any other
   4509            kind of stmt, continue.
   4510       */
   4511       delete = False;
   4512       for (j = i+1; j < bb->stmts_used; j++) {
   4513          stj = bb->stmts[j];
   4514          if (stj->tag == Ist_NoOp)
   4515             continue;
   4516          if (identicalPutIs(st, stj)) {
   4517             /* success! */
   4518             delete = True;
   4519             break;
   4520          }
   4521          if (stj->tag == Ist_Exit)
   4522             /* give up */
   4523             break;
   4524          if (st->tag == Ist_Dirty)
   4525             /* give up; could do better here */
   4526             break;
   4527          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   4528             /* give up */
   4529            break;
   4530       }
   4531 
   4532       if (delete) {
   4533          if (DEBUG_IROPT) {
   4534             vex_printf("rPI:  ");
   4535             ppIRStmt(st);
   4536             vex_printf("\n");
   4537          }
   4538          bb->stmts[i] = IRStmt_NoOp();
   4539       }
   4540 
   4541    }
   4542 }
   4543 
   4544 
   4545 /*---------------------------------------------------------------*/
   4546 /*--- Loop unrolling                                          ---*/
   4547 /*---------------------------------------------------------------*/
   4548 
   4549 /* Adjust all tmp values (names) in e by delta.  e is destructively
   4550    modified. */
   4551 
   4552 static void deltaIRExpr ( IRExpr* e, Int delta )
   4553 {
   4554    Int i;
   4555    switch (e->tag) {
   4556       case Iex_RdTmp:
   4557          e->Iex.RdTmp.tmp += delta;
   4558          break;
   4559       case Iex_Get:
   4560       case Iex_Const:
   4561          break;
   4562       case Iex_GetI:
   4563          deltaIRExpr(e->Iex.GetI.ix, delta);
   4564          break;
   4565       case Iex_Qop:
   4566          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
   4567          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
   4568          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
   4569          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
   4570          break;
   4571       case Iex_Triop:
   4572          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
   4573          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
   4574          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
   4575          break;
   4576       case Iex_Binop:
   4577          deltaIRExpr(e->Iex.Binop.arg1, delta);
   4578          deltaIRExpr(e->Iex.Binop.arg2, delta);
   4579          break;
   4580       case Iex_Unop:
   4581          deltaIRExpr(e->Iex.Unop.arg, delta);
   4582          break;
   4583       case Iex_Load:
   4584          deltaIRExpr(e->Iex.Load.addr, delta);
   4585          break;
   4586       case Iex_CCall:
   4587          for (i = 0; e->Iex.CCall.args[i]; i++)
   4588             deltaIRExpr(e->Iex.CCall.args[i], delta);
   4589          break;
   4590       case Iex_ITE:
   4591          deltaIRExpr(e->Iex.ITE.cond, delta);
   4592          deltaIRExpr(e->Iex.ITE.iftrue, delta);
   4593          deltaIRExpr(e->Iex.ITE.iffalse, delta);
   4594          break;
   4595       default:
   4596          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4597          vpanic("deltaIRExpr");
   4598    }
   4599 }
   4600 
   4601 /* Adjust all tmp values (names) in st by delta.  st is destructively
   4602    modified. */
   4603 
   4604 static void deltaIRStmt ( IRStmt* st, Int delta )
   4605 {
   4606    Int      i;
   4607    IRDirty* d;
   4608    switch (st->tag) {
   4609       case Ist_NoOp:
   4610       case Ist_IMark:
   4611       case Ist_MBE:
   4612          break;
   4613       case Ist_AbiHint:
   4614          deltaIRExpr(st->Ist.AbiHint.base, delta);
   4615          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   4616          break;
   4617       case Ist_Put:
   4618          deltaIRExpr(st->Ist.Put.data, delta);
   4619          break;
   4620       case Ist_PutI:
   4621          deltaIRExpr(st->Ist.PutI.details->ix, delta);
   4622          deltaIRExpr(st->Ist.PutI.details->data, delta);
   4623          break;
   4624       case Ist_WrTmp:
   4625          st->Ist.WrTmp.tmp += delta;
   4626          deltaIRExpr(st->Ist.WrTmp.data, delta);
   4627          break;
   4628       case Ist_Exit:
   4629          deltaIRExpr(st->Ist.Exit.guard, delta);
   4630          break;
   4631       case Ist_Store:
   4632          deltaIRExpr(st->Ist.Store.addr, delta);
   4633          deltaIRExpr(st->Ist.Store.data, delta);
   4634          break;
   4635       case Ist_StoreG: {
   4636          IRStoreG* sg = st->Ist.StoreG.details;
   4637          deltaIRExpr(sg->addr, delta);
   4638          deltaIRExpr(sg->data, delta);
   4639          deltaIRExpr(sg->guard, delta);
   4640          break;
   4641       }
   4642       case Ist_LoadG: {
   4643          IRLoadG* lg = st->Ist.LoadG.details;
   4644          lg->dst += delta;
   4645          deltaIRExpr(lg->addr, delta);
   4646          deltaIRExpr(lg->alt, delta);
   4647          deltaIRExpr(lg->guard, delta);
   4648          break;
   4649       }
   4650       case Ist_CAS:
   4651          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   4652             st->Ist.CAS.details->oldHi += delta;
   4653          st->Ist.CAS.details->oldLo += delta;
   4654          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   4655          if (st->Ist.CAS.details->expdHi)
   4656             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   4657          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   4658          if (st->Ist.CAS.details->dataHi)
   4659             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   4660          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   4661          break;
   4662       case Ist_LLSC:
   4663          st->Ist.LLSC.result += delta;
   4664          deltaIRExpr(st->Ist.LLSC.addr, delta);
   4665          if (st->Ist.LLSC.storedata)
   4666             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   4667          break;
   4668       case Ist_Dirty:
   4669          d = st->Ist.Dirty.details;
   4670          deltaIRExpr(d->guard, delta);
   4671          for (i = 0; d->args[i]; i++) {
   4672             IRExpr* arg = d->args[i];
   4673             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
   4674                deltaIRExpr(arg, delta);
   4675          }
   4676          if (d->tmp != IRTemp_INVALID)
   4677             d->tmp += delta;
   4678          if (d->mAddr)
   4679             deltaIRExpr(d->mAddr, delta);
   4680          break;
   4681       default:
   4682          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4683          vpanic("deltaIRStmt");
   4684    }
   4685 }
   4686 
   4687 
   4688 /* If possible, return a loop-unrolled version of bb0.  The original
   4689    is changed.  If not possible, return NULL.  */
   4690 
   4691 /* The two schemas considered are:
   4692 
   4693      X: BODY; goto X
   4694 
   4695      which unrolls to (eg)  X: BODY;BODY; goto X
   4696 
   4697    and
   4698 
   4699        X: BODY; if (c) goto X; goto Y
   4700    which trivially transforms to
   4701        X: BODY; if (!c) goto Y; goto X;
   4702    so it falls in the scope of the first case.
   4703 
   4704    X and Y must be literal (guest) addresses.
   4705 */
   4706 
   4707 static Int calc_unroll_factor( IRSB* bb )
   4708 {
   4709    Int n_stmts, i;
   4710 
   4711    n_stmts = 0;
   4712    for (i = 0; i < bb->stmts_used; i++) {
   4713       if (bb->stmts[i]->tag != Ist_NoOp)
   4714          n_stmts++;
   4715    }
   4716 
   4717    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   4718       if (vex_control.iropt_verbosity > 0)
   4719          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   4720                     n_stmts, 8* n_stmts);
   4721       return 8;
   4722    }
   4723    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   4724       if (vex_control.iropt_verbosity > 0)
   4725          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   4726                     n_stmts, 4* n_stmts);
   4727       return 4;
   4728    }
   4729 
   4730    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   4731       if (vex_control.iropt_verbosity > 0)
   4732          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   4733                     n_stmts, 2* n_stmts);
   4734       return 2;
   4735    }
   4736 
   4737    if (vex_control.iropt_verbosity > 0)
   4738       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   4739 
   4740    return 1;
   4741 }
   4742 
   4743 
   4744 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
   4745 {
   4746    Int      i, j, jmax, n_vars;
   4747    Bool     xxx_known;
   4748    Addr64   xxx_value, yyy_value;
   4749    IRExpr*  udst;
   4750    IRStmt*  st;
   4751    IRConst* con;
   4752    IRSB     *bb1, *bb2;
   4753    Int      unroll_factor;
   4754 
   4755    if (vex_control.iropt_unroll_thresh <= 0)
   4756       return NULL;
   4757 
   4758    /* First off, figure out if we can unroll this loop.  Do this
   4759       without modifying bb0. */
   4760 
   4761    if (bb0->jumpkind != Ijk_Boring)
   4762       return NULL;
   4763 
   4764    xxx_known = False;
   4765    xxx_value = 0;
   4766 
   4767    /* Extract the next-guest address.  If it isn't a literal, we
   4768       have to give up. */
   4769 
   4770    udst = bb0->next;
   4771    if (udst->tag == Iex_Const
   4772        && (udst->Iex.Const.con->tag == Ico_U32
   4773            || udst->Iex.Const.con->tag == Ico_U64)) {
   4774       /* The BB ends in a jump to a literal location. */
   4775       xxx_known = True;
   4776       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   4777                     ?  udst->Iex.Const.con->Ico.U64
   4778                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   4779    }
   4780 
   4781    if (!xxx_known)
   4782       return NULL;
   4783 
   4784    /* Now we know the BB ends to a jump to a literal location.  If
   4785       it's a jump to itself (viz, idiom #1), move directly to the
   4786       unrolling stage, first cloning the bb so the original isn't
   4787       modified. */
   4788    if (xxx_value == my_addr) {
   4789       unroll_factor = calc_unroll_factor( bb0 );
   4790       if (unroll_factor < 2)
   4791          return NULL;
   4792       bb1 = deepCopyIRSB( bb0 );
   4793       bb0 = NULL;
   4794       udst = NULL; /* is now invalid */
   4795       goto do_unroll;
   4796    }
   4797 
   4798    /* Search for the second idiomatic form:
   4799         X: BODY; if (c) goto X; goto Y
   4800       We know Y, but need to establish that the last stmt
   4801       is 'if (c) goto X'.
   4802    */
   4803    yyy_value = xxx_value;
   4804    for (i = bb0->stmts_used-1; i >= 0; i--)
   4805       if (bb0->stmts[i])
   4806          break;
   4807 
   4808    if (i < 0)
   4809       return NULL; /* block with no stmts.  Strange. */
   4810 
   4811    st = bb0->stmts[i];
   4812    if (st->tag != Ist_Exit)
   4813       return NULL;
   4814    if (st->Ist.Exit.jk != Ijk_Boring)
   4815       return NULL;
   4816 
   4817    con = st->Ist.Exit.dst;
   4818    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4819 
   4820    xxx_value = con->tag == Ico_U64
   4821                   ? st->Ist.Exit.dst->Ico.U64
   4822                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   4823 
   4824    /* If this assertion fails, we have some kind of type error. */
   4825    vassert(con->tag == udst->Iex.Const.con->tag);
   4826 
   4827    if (xxx_value != my_addr)
   4828       /* We didn't find either idiom.  Give up. */
   4829       return NULL;
   4830 
   4831    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   4832       yyy values (which makes it look like idiom #1), and go into
   4833       unrolling proper.  This means finding (again) the last stmt, in
   4834       the copied BB. */
   4835 
   4836    unroll_factor = calc_unroll_factor( bb0 );
   4837    if (unroll_factor < 2)
   4838       return NULL;
   4839 
   4840    bb1 = deepCopyIRSB( bb0 );
   4841    bb0 = NULL;
   4842    udst = NULL; /* is now invalid */
   4843    for (i = bb1->stmts_used-1; i >= 0; i--)
   4844       if (bb1->stmts[i])
   4845          break;
   4846 
   4847    /* The next bunch of assertions should be true since we already
   4848       found and checked the last stmt in the original bb. */
   4849 
   4850    vassert(i >= 0);
   4851 
   4852    st = bb1->stmts[i];
   4853    vassert(st->tag == Ist_Exit);
   4854 
   4855    con = st->Ist.Exit.dst;
   4856    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4857 
   4858    udst = bb1->next;
   4859    vassert(udst->tag == Iex_Const);
   4860    vassert(udst->Iex.Const.con->tag == Ico_U32
   4861           || udst->Iex.Const.con->tag == Ico_U64);
   4862    vassert(con->tag == udst->Iex.Const.con->tag);
   4863 
   4864    /* switch the xxx and yyy fields around */
   4865    if (con->tag == Ico_U64) {
   4866       udst->Iex.Const.con->Ico.U64 = xxx_value;
   4867       con->Ico.U64 = yyy_value;
   4868    } else {
   4869       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   4870       con->Ico.U32 = (UInt)yyy_value;
   4871    }
   4872 
   4873    /* negate the test condition */
   4874    st->Ist.Exit.guard
   4875       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   4876 
   4877    /* --- The unroller proper.  Both idioms are by now --- */
   4878    /* --- now converted to idiom 1. --- */
   4879 
   4880   do_unroll:
   4881 
   4882    vassert(unroll_factor == 2
   4883            || unroll_factor == 4
   4884            || unroll_factor == 8);
   4885 
   4886    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   4887    for (j = 1; j <= jmax; j++) {
   4888 
   4889       n_vars = bb1->tyenv->types_used;
   4890 
   4891       bb2 = deepCopyIRSB(bb1);
   4892       for (i = 0; i < n_vars; i++)
   4893          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   4894 
   4895       for (i = 0; i < bb2->stmts_used; i++) {
   4896          /* deltaIRStmt destructively modifies the stmt, but
   4897             that's OK since bb2 is a complete fresh copy of bb1. */
   4898          deltaIRStmt(bb2->stmts[i], n_vars);
   4899          addStmtToIRSB(bb1, bb2->stmts[i]);
   4900       }
   4901    }
   4902 
   4903    if (DEBUG_IROPT) {
   4904       vex_printf("\nUNROLLED (%lx)\n", my_addr);
   4905       ppIRSB(bb1);
   4906       vex_printf("\n");
   4907    }
   4908 
   4909    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   4910       by negating the test condition.  This should be fixed properly.
   4911       For the moment use this shotgun approach.  */
   4912    return flatten_BB(bb1);
   4913 }
   4914 
   4915 
   4916 /*---------------------------------------------------------------*/
   4917 /*--- The tree builder                                        ---*/
   4918 /*---------------------------------------------------------------*/
   4919 
   4920 /* This isn't part of IR optimisation.  Really it's a pass done prior
   4921    to instruction selection, which improves the code that the
   4922    instruction selector can produce. */
   4923 
   4924 /* --- The 'tmp' environment is the central data structure here --- */
   4925 
   4926 /* The number of outstanding bindings we're prepared to track.
   4927    The number of times the env becomes full and we have to dump
   4928    the oldest binding (hence reducing code quality) falls very
   4929    rapidly as the env size increases.  8 gives reasonable performance
   4930    under most circumstances. */
   4931 #define A_NENV 10
   4932 
   4933 /* An interval. Used to record the bytes in the guest state accessed
   4934    by a Put[I] statement or by (one or more) Get[I] expression(s). In
   4935    case of several Get[I] expressions, the lower/upper bounds are recorded.
   4936    This is conservative but cheap.
   4937    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
   4938    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
   4939    offset 200 would be recorded as [100,203] */
   4940 typedef
   4941    struct {
   4942       Bool present;
   4943       Int  low;
   4944       Int  high;
   4945    }
   4946    Interval;
   4947 
   4948 static inline Bool
   4949 intervals_overlap(Interval i1, Interval i2)
   4950 {
   4951    return (i1.low >= i2.low && i1.low <= i2.high) ||
   4952           (i2.low >= i1.low && i2.low <= i1.high);
   4953 }
   4954 
   4955 static inline void
   4956 update_interval(Interval *i, Int low, Int high)
   4957 {
   4958    vassert(low <= high);
   4959 
   4960    if (i->present) {
   4961       if (low  < i->low)  i->low  = low;
   4962       if (high > i->high) i->high = high;
   4963    } else {
   4964       i->present = True;
   4965       i->low  = low;
   4966       i->high = high;
   4967    }
   4968 }
   4969 
   4970 
   4971 /* bindee == NULL   ===  slot is not in use
   4972    bindee != NULL   ===  slot is in use
   4973 */
   4974 typedef
   4975    struct {
   4976       IRTemp  binder;
   4977       IRExpr* bindee;
   4978       Bool    doesLoad;
   4979       /* Record the bytes of the guest state BINDEE reads from. */
   4980       Interval getInterval;
   4981    }
   4982    ATmpInfo;
   4983 
   4984 __attribute__((unused))
   4985 static void ppAEnv ( ATmpInfo* env )
   4986 {
   4987    Int i;
   4988    for (i = 0; i < A_NENV; i++) {
   4989       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   4990       if (env[i].bindee)
   4991          ppIRExpr(env[i].bindee);
   4992       else
   4993          vex_printf("(null)");
   4994       vex_printf("\n");
   4995    }
   4996 }
   4997 
   4998 /* --- Tree-traversal fns --- */
   4999 
   5000 /* Traverse an expr, and detect if any part of it reads memory or does
   5001    a Get.  Be careful ... this really controls how much the
   5002    tree-builder can reorder the code, so getting it right is critical.
   5003 */
   5004 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
   5005 {
   5006    Int i;
   5007    switch (e->tag) {
   5008       case Iex_CCall:
   5009          for (i = 0; e->Iex.CCall.args[i]; i++)
   5010             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
   5011          return;
   5012       case Iex_ITE:
   5013          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
   5014          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
   5015          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
   5016          return;
   5017       case Iex_Qop:
   5018          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
   5019          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
   5020          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
   5021          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
   5022          return;
   5023       case Iex_Triop:
   5024          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
   5025          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
   5026          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
   5027          return;
   5028       case Iex_Binop:
   5029          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
   5030          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
   5031          return;
   5032       case Iex_Unop:
   5033          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
   5034          return;
   5035       case Iex_Load:
   5036          *doesLoad = True;
   5037          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
   5038          return;
   5039       case Iex_Get: {
   5040          Int low = e->Iex.Get.offset;
   5041          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
   5042          update_interval(getInterval, low, high);
   5043          return;
   5044       }
   5045       case Iex_GetI: {
   5046          IRRegArray *descr = e->Iex.GetI.descr;
   5047          Int size = sizeofIRType(descr->elemTy);
   5048          Int low  = descr->base;
   5049          Int high = low + descr->nElems * size - 1;
   5050          update_interval(getInterval, low, high);
   5051          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
   5052          return;
   5053       }
   5054       case Iex_RdTmp:
   5055       case Iex_Const:
   5056          return;
   5057       default:
   5058          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5059          vpanic("setHints_Expr");
   5060    }
   5061 }
   5062 
   5063 
   5064 /* Add a binding to the front of the env and slide all the rest
   5065    backwards.  It should be the case that the last slot is free. */
   5066 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   5067 {
   5068    Int i;
   5069    vassert(env[A_NENV-1].bindee == NULL);
   5070    for (i = A_NENV-1; i >= 1; i--)
   5071       env[i] = env[i-1];
   5072    env[0].binder   = binder;
   5073    env[0].bindee   = bindee;
   5074    env[0].doesLoad = False; /* filled in later */
   5075    env[0].getInterval.present  = False; /* filled in later */
   5076    env[0].getInterval.low  = -1; /* filled in later */
   5077    env[0].getInterval.high = -1; /* filled in later */
   5078 }
   5079 
   5080 /* Given uses :: array of UShort, indexed by IRTemp
   5081    Add the use-occurrences of temps in this expression
   5082    to the env.
   5083 */
   5084 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   5085 {
   5086    Int i;
   5087 
   5088    switch (e->tag) {
   5089 
   5090       case Iex_RdTmp: /* the only interesting case */
   5091          uses[e->Iex.RdTmp.tmp]++;
   5092          return;
   5093 
   5094       case Iex_ITE:
   5095          aoccCount_Expr(uses, e->Iex.ITE.cond);
   5096          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
   5097          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
   5098          return;
   5099 
   5100       case Iex_Qop:
   5101          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
   5102          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
   5103          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
   5104          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
   5105          return;
   5106 
   5107       case Iex_Triop:
   5108          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
   5109          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
   5110          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
   5111          return;
   5112 
   5113       case Iex_Binop:
   5114          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   5115          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   5116          return;
   5117 
   5118       case Iex_Unop:
   5119          aoccCount_Expr(uses, e->Iex.Unop.arg);
   5120          return;
   5121 
   5122       case Iex_Load:
   5123          aoccCount_Expr(uses, e->Iex.Load.addr);
   5124          return;
   5125 
   5126       case Iex_CCall:
   5127          for (i = 0; e->Iex.CCall.args[i]; i++)
   5128             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   5129          return;
   5130 
   5131       case Iex_GetI:
   5132          aoccCount_Expr(uses, e->Iex.GetI.ix);
   5133          return;
   5134 
   5135       case Iex_Const:
   5136       case Iex_Get:
   5137          return;
   5138 
   5139       default:
   5140          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5141          vpanic("aoccCount_Expr");
   5142     }
   5143 }
   5144 
   5145 
   5146 /* Given uses :: array of UShort, indexed by IRTemp
   5147    Add the use-occurrences of temps in this statement
   5148    to the env.
   5149 */
   5150 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   5151 {
   5152    Int      i;
   5153    IRDirty* d;
   5154    IRCAS*   cas;
   5155    switch (st->tag) {
   5156       case Ist_AbiHint:
   5157          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   5158          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   5159          return;
   5160       case Ist_WrTmp:
   5161          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   5162          return;
   5163       case Ist_Put:
   5164          aoccCount_Expr(uses, st->Ist.Put.data);
   5165          return;
   5166       case Ist_PutI:
   5167          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
   5168          aoccCount_Expr(uses, st->Ist.PutI.details->data);
   5169          return;
   5170       case Ist_Store:
   5171          aoccCount_Expr(uses, st->Ist.Store.addr);
   5172          aoccCount_Expr(uses, st->Ist.Store.data);
   5173          return;
   5174       case Ist_StoreG: {
   5175          IRStoreG* sg = st->Ist.StoreG.details;
   5176          aoccCount_Expr(uses, sg->addr);
   5177          aoccCount_Expr(uses, sg->data);
   5178          aoccCount_Expr(uses, sg->guard);
   5179          return;
   5180       }
   5181       case Ist_LoadG: {
   5182          IRLoadG* lg = st->Ist.LoadG.details;
   5183          aoccCount_Expr(uses, lg->addr);
   5184          aoccCount_Expr(uses, lg->alt);
   5185          aoccCount_Expr(uses, lg->guard);
   5186          return;
   5187       }
   5188       case Ist_CAS:
   5189          cas = st->Ist.CAS.details;
   5190          aoccCount_Expr(uses, cas->addr);
   5191          if (cas->expdHi)
   5192             aoccCount_Expr(uses, cas->expdHi);
   5193          aoccCount_Expr(uses, cas->expdLo);
   5194          if (cas->dataHi)
   5195             aoccCount_Expr(uses, cas->dataHi);
   5196          aoccCount_Expr(uses, cas->dataLo);
   5197          return;
   5198       case Ist_LLSC:
   5199          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   5200          if (st->Ist.LLSC.storedata)
   5201             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   5202          return;
   5203       case Ist_Dirty:
   5204          d = st->Ist.Dirty.details;
   5205          if (d->mFx != Ifx_None)
   5206             aoccCount_Expr(uses, d->mAddr);
   5207          aoccCount_Expr(uses, d->guard);
   5208          for (i = 0; d->args[i]; i++) {
   5209             IRExpr* arg = d->args[i];
   5210             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
   5211                aoccCount_Expr(uses, arg);
   5212          }
   5213          return;
   5214       case Ist_NoOp:
   5215       case Ist_IMark:
   5216       case Ist_MBE:
   5217          return;
   5218       case Ist_Exit:
   5219          aoccCount_Expr(uses, st->Ist.Exit.guard);
   5220          return;
   5221       default:
   5222          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5223          vpanic("aoccCount_Stmt");
   5224    }
   5225 }
   5226 
   5227 /* Look up a binding for tmp in the env.  If found, return the bound
   5228    expression, and set the env's binding to NULL so it is marked as
   5229    used.  If not found, return NULL. */
   5230 
   5231 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   5232 {
   5233    Int i;
   5234    for (i = 0; i < A_NENV; i++) {
   5235       if (env[i].binder == tmp && env[i].bindee != NULL) {
   5236          IRExpr* bindee = env[i].bindee;
   5237          env[i].bindee = NULL;
   5238          return bindee;
   5239       }
   5240    }
   5241    return NULL;
   5242 }
   5243 
   5244 /* Traverse e, looking for temps.  For each observed temp, see if env
   5245    contains a binding for the temp, and if so return the bound value.
   5246    The env has the property that any binding it holds is
   5247    'single-shot', so once a binding is used, it is marked as no longer
   5248    available, by setting its .bindee field to NULL. */
   5249 
   5250 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   5251    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   5252 }
   5253 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   5254    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   5255 }
   5256 
   5257 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   5258 {
   5259    switch (op) {
   5260    case Iop_Or32:
   5261       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   5262       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   5263          return IRExpr_Unop( Iop_CmpwNEZ32,
   5264                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   5265                                                      a2->Iex.Unop.arg ) );
   5266       break;
   5267 
   5268    case Iop_CmpNE32:
   5269       /* Since X has type Ity_I1 we can simplify:
   5270          CmpNE32(1Uto32(X),0)) ==> X */
   5271       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   5272          return a1->Iex.Unop.arg;
   5273       break;
   5274 
   5275    default:
   5276       break;
   5277    }
   5278    /* no reduction rule applies */
   5279    return IRExpr_Binop( op, a1, a2 );
   5280 }
   5281 
   5282 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   5283 {
   5284    switch (op) {
   5285    case Iop_CmpwNEZ64:
   5286       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
   5287       if (is_Unop(aa, Iop_CmpwNEZ64))
   5288          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
   5289       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5290       if (is_Binop(aa, Iop_Or64)
   5291           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   5292          return fold_IRExpr_Unop(
   5293                    Iop_CmpwNEZ64,
   5294                    IRExpr_Binop(Iop_Or64,
   5295                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   5296                                 aa->Iex.Binop.arg2));
   5297       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5298       if (is_Binop(aa, Iop_Or64)
   5299           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   5300          return fold_IRExpr_Unop(
   5301                    Iop_CmpwNEZ64,
   5302                    IRExpr_Binop(Iop_Or64,
   5303                                 aa->Iex.Binop.arg1,
   5304                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   5305       break;
   5306    case Iop_CmpNEZ64:
   5307       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   5308       if (is_Unop(aa, Iop_Left64))
   5309          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   5310       /* CmpNEZ64( 1Uto64(X) ) --> X */
   5311       if (is_Unop(aa, Iop_1Uto64))
   5312          return aa->Iex.Unop.arg;
   5313       break;
   5314    case Iop_CmpwNEZ32:
   5315       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   5316       if (is_Unop(aa, Iop_CmpwNEZ32))
   5317          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   5318       break;
   5319    case Iop_CmpNEZ32:
   5320       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   5321       if (is_Unop(aa, Iop_Left32))
   5322          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   5323       /* CmpNEZ32( 1Uto32(X) ) --> X */
   5324       if (is_Unop(aa, Iop_1Uto32))
   5325          return aa->Iex.Unop.arg;
   5326       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
   5327       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
   5328          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
   5329       break;
   5330    case Iop_CmpNEZ8:
   5331       /* CmpNEZ8( 1Uto8(X) ) --> X */
   5332       if (is_Unop(aa, Iop_1Uto8))
   5333          return aa->Iex.Unop.arg;
   5334       break;
   5335    case Iop_Left32:
   5336       /* Left32( Left32(x) ) --> Left32(x) */
   5337       if (is_Unop(aa, Iop_Left32))
   5338          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   5339       break;
   5340    case Iop_Left64:
   5341       /* Left64( Left64(x) ) --> Left64(x) */
   5342       if (is_Unop(aa, Iop_Left64))
   5343          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
   5344       break;
   5345    case Iop_ZeroHI64ofV128:
   5346       /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
   5347       if (is_Unop(aa, Iop_ZeroHI64ofV128))
   5348          return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
   5349       break;
   5350    case Iop_32to1:
   5351       /* 32to1( 1Uto32 ( x ) ) --> x */
   5352       if (is_Unop(aa, Iop_1Uto32))
   5353          return aa->Iex.Unop.arg;
   5354       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   5355       if (is_Unop(aa, Iop_CmpwNEZ32))
   5356          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   5357       break;
   5358    case Iop_64to1:
   5359       /* 64to1( 1Uto64 ( x ) ) --> x */
   5360       if (is_Unop(aa, Iop_1Uto64))
   5361          return aa->Iex.Unop.arg;
   5362       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   5363       if (is_Unop(aa, Iop_CmpwNEZ64))
   5364          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   5365       break;
   5366    case Iop_64to32:
   5367       /* 64to32( 32Uto64 ( x )) --> x */
   5368       if (is_Unop(aa, Iop_32Uto64))
   5369          return aa->Iex.Unop.arg;
   5370       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   5371       if (is_Unop(aa, Iop_8Uto64))
   5372          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   5373       break;
   5374 
   5375    case Iop_32Uto64:
   5376       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   5377       if (is_Unop(aa, Iop_8Uto32))
   5378          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   5379       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   5380       if (is_Unop(aa, Iop_16Uto32))
   5381          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   5382       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
   5383                      --> Shr64( 32Uto64(64to32(x)), sh )) */
   5384       if (is_Unop(aa, Iop_64to32)
   5385           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
   5386           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5387           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5388                      Iop_64to32)) {
   5389          return aa->Iex.Unop.arg;
   5390       }
   5391       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
   5392          --> 32Uto64(64to32( Shl64(                x,   sh )) */
   5393       if (is_Unop(aa, Iop_64to32)
   5394           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
   5395           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5396           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5397                      Iop_64to32)) {
   5398          return
   5399             IRExpr_Unop(
   5400                Iop_32Uto64,
   5401                IRExpr_Unop(
   5402                   Iop_64to32,
   5403                   IRExpr_Binop(
   5404                      Iop_Shl64,
   5405                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
   5406                      aa->Iex.Unop.arg->Iex.Binop.arg2
   5407             )));
   5408       }
   5409       break;
   5410 
   5411    case Iop_1Sto32:
   5412       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   5413       if (is_Unop(aa, Iop_CmpNEZ8)
   5414           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   5415           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   5416           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   5417                      Iop_CmpNEZ32)) {
   5418          return IRExpr_Unop( Iop_CmpwNEZ32,
   5419                              aa->Iex.Unop.arg->Iex.Unop.arg
   5420                                ->Iex.Unop.arg->Iex.Unop.arg);
   5421       }
   5422       break;
   5423 
   5424    default:
   5425       break;
   5426    }
   5427    /* no reduction rule applies */
   5428    return IRExpr_Unop( op, aa );
   5429 }
   5430 
   5431 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   5432 {
   5433    IRExpr*  e2;
   5434    IRExpr** args2;
   5435    Int      i;
   5436 
   5437    switch (e->tag) {
   5438 
   5439       case Iex_CCall:
   5440          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   5441          for (i = 0; args2[i]; i++)
   5442             args2[i] = atbSubst_Expr(env,args2[i]);
   5443          return IRExpr_CCall(
   5444                    e->Iex.CCall.cee,
   5445                    e->Iex.CCall.retty,
   5446                    args2
   5447                 );
   5448       case Iex_RdTmp:
   5449          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   5450          return e2 ? e2 : e;
   5451       case Iex_ITE:
   5452          return IRExpr_ITE(
   5453                    atbSubst_Expr(env, e->Iex.ITE.cond),
   5454                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
   5455                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
   5456                 );
   5457       case Iex_Qop:
   5458          return IRExpr_Qop(
   5459                    e->Iex.Qop.details->op,
   5460                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
   5461                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
   5462                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
   5463                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
   5464                 );
   5465       case Iex_Triop:
   5466          return IRExpr_Triop(
   5467                    e->Iex.Triop.details->op,
   5468                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
   5469                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
   5470                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
   5471                 );
   5472       case Iex_Binop:
   5473          return fold_IRExpr_Binop(
   5474                    e->Iex.Binop.op,
   5475                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   5476                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   5477                 );
   5478       case Iex_Unop:
   5479          return fold_IRExpr_Unop(
   5480                    e->Iex.Unop.op,
   5481                    atbSubst_Expr(env, e->Iex.Unop.arg)
   5482                 );
   5483       case Iex_Load:
   5484          return IRExpr_Load(
   5485                    e->Iex.Load.end,
   5486                    e->Iex.Load.ty,
   5487                    atbSubst_Expr(env, e->Iex.Load.addr)
   5488                 );
   5489       case Iex_GetI:
   5490          return IRExpr_GetI(
   5491                    e->Iex.GetI.descr,
   5492                    atbSubst_Expr(env, e->Iex.GetI.ix),
   5493                    e->Iex.GetI.bias
   5494                 );
   5495       case Iex_Const:
   5496       case Iex_Get:
   5497          return e;
   5498       default:
   5499          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5500          vpanic("atbSubst_Expr");
   5501    }
   5502 }
   5503 
   5504 /* Same deal as atbSubst_Expr, except for stmts. */
   5505 
   5506 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   5507 {
   5508    Int     i;
   5509    IRDirty *d, *d2;
   5510    IRCAS   *cas, *cas2;
   5511    IRPutI  *puti, *puti2;
   5512 
   5513    switch (st->tag) {
   5514       case Ist_AbiHint:
   5515          return IRStmt_AbiHint(
   5516                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   5517                    st->Ist.AbiHint.len,
   5518                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   5519                 );
   5520       case Ist_Store:
   5521          return IRStmt_Store(
   5522                    st->Ist.Store.end,
   5523                    atbSubst_Expr(env, st->Ist.Store.addr),
   5524                    atbSubst_Expr(env, st->Ist.Store.data)
   5525                 );
   5526       case Ist_StoreG: {
   5527          IRStoreG* sg = st->Ist.StoreG.details;
   5528          return IRStmt_StoreG(sg->end,
   5529                               atbSubst_Expr(env, sg->addr),
   5530                               atbSubst_Expr(env, sg->data),
   5531                               atbSubst_Expr(env, sg->guard));
   5532       }
   5533       case Ist_LoadG: {
   5534          IRLoadG* lg = st->Ist.LoadG.details;
   5535          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
   5536                              atbSubst_Expr(env, lg->addr),
   5537                              atbSubst_Expr(env, lg->alt),
   5538                              atbSubst_Expr(env, lg->guard));
   5539       }
   5540       case Ist_WrTmp:
   5541          return IRStmt_WrTmp(
   5542                    st->Ist.WrTmp.tmp,
   5543                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   5544                 );
   5545       case Ist_Put:
   5546          return IRStmt_Put(
   5547                    st->Ist.Put.offset,
   5548                    atbSubst_Expr(env, st->Ist.Put.data)
   5549                 );
   5550       case Ist_PutI:
   5551          puti  = st->Ist.PutI.details;
   5552          puti2 = mkIRPutI(puti->descr,
   5553                           atbSubst_Expr(env, puti->ix),
   5554                           puti->bias,
   5555                           atbSubst_Expr(env, puti->data));
   5556          return IRStmt_PutI(puti2);
   5557 
   5558       case Ist_Exit:
   5559          return IRStmt_Exit(
   5560                    atbSubst_Expr(env, st->Ist.Exit.guard),
   5561                    st->Ist.Exit.jk,
   5562                    st->Ist.Exit.dst,
   5563                    st->Ist.Exit.offsIP
   5564                 );
   5565       case Ist_IMark:
   5566          return IRStmt_IMark(st->Ist.IMark.addr,
   5567                              st->Ist.IMark.len,
   5568                              st->Ist.IMark.delta);
   5569       case Ist_NoOp:
   5570          return IRStmt_NoOp();
   5571       case Ist_MBE:
   5572          return IRStmt_MBE(st->Ist.MBE.event);
   5573       case Ist_CAS:
   5574          cas  = st->Ist.CAS.details;
   5575          cas2 = mkIRCAS(
   5576                    cas->oldHi, cas->oldLo, cas->end,
   5577                    atbSubst_Expr(env, cas->addr),
   5578                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   5579                    atbSubst_Expr(env, cas->expdLo),
   5580                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   5581                    atbSubst_Expr(env, cas->dataLo)
   5582                 );
   5583          return IRStmt_CAS(cas2);
   5584       case Ist_LLSC:
   5585          return IRStmt_LLSC(
   5586                    st->Ist.LLSC.end,
   5587                    st->Ist.LLSC.result,
   5588                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   5589                    st->Ist.LLSC.storedata
   5590                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   5591                 );
   5592       case Ist_Dirty:
   5593          d  = st->Ist.Dirty.details;
   5594          d2 = emptyIRDirty();
   5595          *d2 = *d;
   5596          if (d2->mFx != Ifx_None)
   5597             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   5598          d2->guard = atbSubst_Expr(env, d2->guard);
   5599          for (i = 0; d2->args[i]; i++) {
   5600             IRExpr* arg = d2->args[i];
   5601             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
   5602                d2->args[i] = atbSubst_Expr(env, arg);
   5603          }
   5604          return IRStmt_Dirty(d2);
   5605       default:
   5606          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5607          vpanic("atbSubst_Stmt");
   5608    }
   5609 }
   5610 
   5611 inline
   5612 static Bool dirty_helper_stores ( const IRDirty *d )
   5613 {
   5614    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
   5615 }
   5616 
   5617 inline
   5618 static Interval dirty_helper_puts (
   5619                    const IRDirty *d,
   5620                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5621                    VexRegisterUpdates pxControl,
   5622                    /*OUT*/Bool *requiresPreciseMemExns
   5623                 )
   5624 {
   5625    Int i;
   5626    Interval interval;
   5627 
   5628    /* Passing the guest state pointer opens the door to modifying the
   5629       guest state under the covers.  It's not allowed, but let's be
   5630       extra conservative and assume the worst. */
   5631    for (i = 0; d->args[i]; i++) {
   5632       if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) {
   5633          *requiresPreciseMemExns = True;
   5634          /* Assume all guest state is written. */
   5635          interval.present = True;
   5636          interval.low  = 0;
   5637          interval.high = 0x7FFFFFFF;
   5638          return interval;
   5639       }
   5640    }
   5641 
   5642    /* Check the side effects on the guest state */
   5643    interval.present = False;
   5644    interval.low = interval.high = -1;
   5645    *requiresPreciseMemExns = False;
   5646 
   5647    for (i = 0; i < d->nFxState; ++i) {
   5648       if (d->fxState[i].fx != Ifx_Read) {
   5649          Int offset = d->fxState[i].offset;
   5650          Int size = d->fxState[i].size;
   5651          Int nRepeats = d->fxState[i].nRepeats;
   5652          Int repeatLen = d->fxState[i].repeatLen;
   5653 
   5654          if (preciseMemExnsFn(offset,
   5655                               offset + nRepeats * repeatLen + size - 1,
   5656                               pxControl)) {
   5657             *requiresPreciseMemExns = True;
   5658          }
   5659          update_interval(&interval, offset,
   5660                          offset + nRepeats * repeatLen + size - 1);
   5661       }
   5662    }
   5663 
   5664    return interval;
   5665 }
   5666 
   5667 /* Return an interval if st modifies the guest state.  Via
   5668    requiresPreciseMemExns return whether or not that modification
   5669    requires precise exceptions. */
   5670 static Interval stmt_modifies_guest_state (
   5671                    IRSB *bb, const IRStmt *st,
   5672                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5673                    VexRegisterUpdates pxControl,
   5674                    /*OUT*/Bool *requiresPreciseMemExns
   5675                 )
   5676 {
   5677    Interval interval;
   5678 
   5679    switch (st->tag) {
   5680    case Ist_Put: {
   5681       Int offset = st->Ist.Put.offset;
   5682       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
   5683 
   5684       *requiresPreciseMemExns
   5685          = preciseMemExnsFn(offset, offset + size - 1, pxControl);
   5686       interval.present = True;
   5687       interval.low  = offset;
   5688       interval.high = offset + size - 1;
   5689       return interval;
   5690    }
   5691 
   5692    case Ist_PutI: {
   5693       IRRegArray *descr = st->Ist.PutI.details->descr;
   5694       Int offset = descr->base;
   5695       Int size = sizeofIRType(descr->elemTy);
   5696 
   5697       /* We quietly assume here that all segments are contiguous and there
   5698          are no holes. This is to avoid a loop. The assumption is conservative
   5699          in the sense that we might report that precise memory exceptions are
   5700          needed when in fact they are not. */
   5701       *requiresPreciseMemExns
   5702          = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
   5703                             pxControl);
   5704       interval.present = True;
   5705       interval.low  = offset;
   5706       interval.high = offset + descr->nElems * size - 1;
   5707       return interval;
   5708    }
   5709 
   5710    case Ist_Dirty:
   5711       return dirty_helper_puts(st->Ist.Dirty.details,
   5712                                preciseMemExnsFn, pxControl,
   5713                                requiresPreciseMemExns);
   5714 
   5715    default:
   5716       *requiresPreciseMemExns = False;
   5717       interval.present = False;
   5718       interval.low  = -1;
   5719       interval.high = -1;
   5720       return interval;
   5721    }
   5722 }
   5723 
   5724 /* notstatic */ Addr ado_treebuild_BB (
   5725                         IRSB* bb,
   5726                         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5727                         VexRegisterUpdates pxControl
   5728                      )
   5729 {
   5730    Int      i, j, k, m;
   5731    Bool     stmtStores, invalidateMe;
   5732    Interval putInterval;
   5733    IRStmt*  st;
   5734    IRStmt*  st2;
   5735    ATmpInfo env[A_NENV];
   5736 
   5737    Bool   max_ga_known = False;
   5738    Addr   max_ga       = 0;
   5739 
   5740    Int       n_tmps = bb->tyenv->types_used;
   5741    UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
   5742 
   5743    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   5744       temp.  Also count occurrences in the bb->next field.  Take the
   5745       opportunity to also find the maximum guest address in the block,
   5746       since that will be needed later for deciding when we can safely
   5747       elide event checks. */
   5748 
   5749    for (i = 0; i < n_tmps; i++)
   5750       uses[i] = 0;
   5751 
   5752    for (i = 0; i < bb->stmts_used; i++) {
   5753       st = bb->stmts[i];
   5754       switch (st->tag) {
   5755          case Ist_NoOp:
   5756             continue;
   5757          case Ist_IMark: {
   5758             UInt len = st->Ist.IMark.len;
   5759             Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
   5760             max_ga_known = True;
   5761             if (mga > max_ga)
   5762                max_ga = mga;
   5763             break;
   5764          }
   5765          default:
   5766             break;
   5767       }
   5768       aoccCount_Stmt( uses, st );
   5769    }
   5770    aoccCount_Expr(uses, bb->next );
   5771 
   5772 #  if 0
   5773    for (i = 0; i < n_tmps; i++) {
   5774       if (uses[i] == 0)
   5775         continue;
   5776       ppIRTemp( (IRTemp)i );
   5777       vex_printf("  used %d\n", (Int)uses[i] );
   5778    }
   5779 #  endif
   5780 
   5781    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   5782 
   5783          If the env is full, emit the end element.  This guarantees
   5784          there is at least one free slot in the following.
   5785 
   5786          On seeing 't = E', occ(t)==1,
   5787             let E'=env(E)
   5788             delete this stmt
   5789             add t -> E' to the front of the env
   5790             Examine E' and set the hints for E' appropriately
   5791               (doesLoad? doesGet?)
   5792 
   5793          On seeing any other stmt,
   5794             let stmt' = env(stmt)
   5795             remove from env any 't=E' binds invalidated by stmt
   5796                 emit the invalidated stmts
   5797             emit stmt'
   5798             compact any holes in env
   5799               by sliding entries towards the front
   5800 
   5801       Finally, apply env to bb->next.
   5802    */
   5803 
   5804    for (i = 0; i < A_NENV; i++) {
   5805       env[i].bindee = NULL;
   5806       env[i].binder = IRTemp_INVALID;
   5807    }
   5808 
   5809    /* The stmts in bb are being reordered, and we are guaranteed to
   5810       end up with no more than the number we started with.  Use i to
   5811       be the cursor of the current stmt examined and j <= i to be that
   5812       for the current stmt being written.
   5813    */
   5814    j = 0;
   5815    for (i = 0; i < bb->stmts_used; i++) {
   5816 
   5817       st = bb->stmts[i];
   5818       if (st->tag == Ist_NoOp)
   5819          continue;
   5820 
   5821       /* Ensure there's at least one space in the env, by emitting
   5822          the oldest binding if necessary. */
   5823       if (env[A_NENV-1].bindee != NULL) {
   5824          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   5825                                       env[A_NENV-1].bindee );
   5826          j++;
   5827          vassert(j <= i);
   5828          env[A_NENV-1].bindee = NULL;
   5829       }
   5830 
   5831       /* Consider current stmt. */
   5832       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   5833          IRExpr *e, *e2;
   5834 
   5835          /* optional extra: dump dead bindings as we find them.
   5836             Removes the need for a prior dead-code removal pass. */
   5837          if (uses[st->Ist.WrTmp.tmp] == 0) {
   5838 	    if (0) vex_printf("DEAD binding\n");
   5839             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5840          }
   5841          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   5842 
   5843          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   5844             actions. */
   5845          e  = st->Ist.WrTmp.data;
   5846          e2 = atbSubst_Expr(env, e);
   5847          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   5848          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
   5849          /* don't advance j, as we are deleting this stmt and instead
   5850             holding it temporarily in the env. */
   5851          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5852       }
   5853 
   5854       /* we get here for any other kind of statement. */
   5855       /* 'use up' any bindings required by the current statement. */
   5856       st2 = atbSubst_Stmt(env, st);
   5857 
   5858       /* Now, before this stmt, dump any bindings in env that it
   5859          invalidates.  These need to be dumped in the order in which
   5860          they originally entered env -- that means from oldest to
   5861          youngest. */
   5862 
   5863       /* putInterval/stmtStores characterise what the stmt under
   5864          consideration does, or might do (sidely safe @ True). */
   5865 
   5866       Bool putRequiresPreciseMemExns;
   5867       putInterval = stmt_modifies_guest_state(
   5868                        bb, st, preciseMemExnsFn, pxControl,
   5869                        &putRequiresPreciseMemExns
   5870                     );
   5871 
   5872       /* be True if this stmt writes memory or might do (==> we don't
   5873          want to reorder other loads or stores relative to it).  Also,
   5874          both LL and SC fall under this classification, since we
   5875          really ought to be conservative and not reorder any other
   5876          memory transactions relative to them. */
   5877       stmtStores
   5878          = toBool( st->tag == Ist_Store
   5879                    || (st->tag == Ist_Dirty
   5880                        && dirty_helper_stores(st->Ist.Dirty.details))
   5881                    || st->tag == Ist_LLSC
   5882                    || st->tag == Ist_CAS );
   5883 
   5884       for (k = A_NENV-1; k >= 0; k--) {
   5885          if (env[k].bindee == NULL)
   5886             continue;
   5887          /* Compare the actions of this stmt with the actions of
   5888             binding 'k', to see if they invalidate the binding. */
   5889          invalidateMe
   5890             = toBool(
   5891               /* a store invalidates loaded data */
   5892               (env[k].doesLoad && stmtStores)
   5893               /* a put invalidates get'd data, if they overlap */
   5894               || ((env[k].getInterval.present && putInterval.present) &&
   5895                   intervals_overlap(env[k].getInterval, putInterval))
   5896               /* a put invalidates loaded data. That means, in essense, that
   5897                  a load expression cannot be substituted into a statement
   5898                  that follows the put. But there is nothing wrong doing so
   5899                  except when the put statement requries precise exceptions.
   5900                  Think of a load that is moved past a put where the put
   5901                  updates the IP in the guest state. If the load generates
   5902                  a segfault, the wrong address (line number) would be
   5903                  reported. */
   5904               || (env[k].doesLoad && putInterval.present &&
   5905                   putRequiresPreciseMemExns)
   5906               /* probably overly conservative: a memory bus event
   5907                  invalidates absolutely everything, so that all
   5908                  computation prior to it is forced to complete before
   5909                  proceeding with the event (fence,lock,unlock). */
   5910               || st->tag == Ist_MBE
   5911               /* also be (probably overly) paranoid re AbiHints */
   5912               || st->tag == Ist_AbiHint
   5913               );
   5914          if (invalidateMe) {
   5915             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   5916             j++;
   5917             vassert(j <= i);
   5918             env[k].bindee = NULL;
   5919          }
   5920       }
   5921 
   5922       /* Slide in-use entries in env up to the front */
   5923       m = 0;
   5924       for (k = 0; k < A_NENV; k++) {
   5925          if (env[k].bindee != NULL) {
   5926             env[m] = env[k];
   5927             m++;
   5928 	 }
   5929       }
   5930       for (m = m; m < A_NENV; m++) {
   5931          env[m].bindee = NULL;
   5932       }
   5933 
   5934       /* finally, emit the substituted statement */
   5935       bb->stmts[j] = st2;
   5936       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   5937       j++;
   5938 
   5939       vassert(j <= i+1);
   5940    } /* for each stmt in the original bb ... */
   5941 
   5942    /* Finally ... substitute the ->next field as much as possible, and
   5943       dump any left-over bindings.  Hmm.  Perhaps there should be no
   5944       left over bindings?  Or any left-over bindings are
   5945       by definition dead? */
   5946    bb->next = atbSubst_Expr(env, bb->next);
   5947    bb->stmts_used = j;
   5948 
   5949    return max_ga_known ? max_ga : ~(Addr)0;
   5950 }
   5951 
   5952 
   5953 /*---------------------------------------------------------------*/
   5954 /*--- MSVC specific transformation hacks                      ---*/
   5955 /*---------------------------------------------------------------*/
   5956 
   5957 /* The purpose of all this is to find MSVC's idiom for non-constant
   5958    bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
   5959    gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
   5960    generates a lot of false positives from the MSVC version because it
   5961    doesn't understand that XORing an undefined bit with itself gives a
   5962    defined result.
   5963 
   5964    This isn't a problem for the simple case "x ^ x", because iropt
   5965    folds it to a constant zero before Memcheck ever sees it.  But in
   5966    this case we have an intervening "& c" which defeats the simple
   5967    case.  So we have to carefully inspect all expressions rooted at an
   5968    XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
   5969    7 other variants resulting from swapping the order of arguments to
   5970    the three binary operations.  If we get a match, we then replace
   5971    the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
   5972 
   5973    The key difficulty is to spot the two uses of "a".  To normalise
   5974    the IR to maximise the chances of success, we first do a CSE pass,
   5975    with CSEing of loads enabled, since the two "a" expressions may be
   5976    loads, which need to be commoned up.  Then we do a constant folding
   5977    pass, so as to remove any tmp-to-tmp assignment chains that would
   5978    make matching the original expression more difficult.
   5979 */
   5980 
   5981 
   5982 /* Helper function for debug printing */
   5983 __attribute__((unused))
   5984 static void print_flat_expr ( IRExpr** env, IRExpr* e )
   5985 {
   5986    if (e == NULL) {
   5987       vex_printf("?");
   5988       return;
   5989    }
   5990    switch (e->tag) {
   5991       case Iex_Binop: {
   5992          ppIROp(e->Iex.Binop.op);
   5993          vex_printf("(");
   5994          print_flat_expr(env, e->Iex.Binop.arg1);
   5995          vex_printf(",");
   5996          print_flat_expr(env, e->Iex.Binop.arg2);
   5997          vex_printf(")");
   5998          break;
   5999       }
   6000       case Iex_Unop: {
   6001          ppIROp(e->Iex.Unop.op);
   6002          vex_printf("(");
   6003          print_flat_expr(env, e->Iex.Unop.arg);
   6004          vex_printf(")");
   6005          break;
   6006       }
   6007       case Iex_RdTmp:
   6008          ppIRTemp(e->Iex.RdTmp.tmp);
   6009          vex_printf("=");
   6010          print_flat_expr(env, chase1(env, e));
   6011          break;
   6012       case Iex_Const:
   6013       case Iex_CCall:
   6014       case Iex_Load:
   6015       case Iex_ITE:
   6016       case Iex_Get:
   6017          ppIRExpr(e);
   6018          break;
   6019       default:
   6020          vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
   6021          vassert(0);
   6022    }
   6023 }
   6024 
   6025 /* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
   6026    or any of the other 7 variants generated by switching the order
   6027    of arguments to the outer ^, the inner ^ and the &.
   6028 */
   6029 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
   6030                                      /*OUT*/IRExpr** cc,
   6031                                      IRExpr** env, IRExpr* e,
   6032                                      IROp opAND, IROp opXOR)
   6033 {
   6034 #  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
   6035                               && (_e)->Iex.Binop.op == (_op))
   6036 #  define ISATOM(_e)    isIRAtom(_e)
   6037 #  define STEP(_e)      chase1(env, (_e))
   6038 #  define LL(_e)        ((_e)->Iex.Binop.arg1)
   6039 #  define RR(_e)        ((_e)->Iex.Binop.arg2)
   6040 
   6041    IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
   6042 
   6043    /* This is common to all 8 cases */
   6044    if (!ISBIN(e, opXOR)) goto fail;
   6045 
   6046    /*                        -----and------ */
   6047    /*                         --xor---      */
   6048    /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
   6049    /* find variant 2:   a1 ^ ((b ^ a2) & c) */
   6050    a1 = and = xor = c = a2bL = a2bR = NULL;
   6051 
   6052    a1   = LL(e);
   6053    and  = STEP(RR(e));
   6054    if (!ISBIN(and, opAND)) goto v34;
   6055    xor  = STEP(LL(and));
   6056    c    = RR(and);
   6057    if (!ISBIN(xor, opXOR)) goto v34;
   6058    a2bL = LL(xor);
   6059    a2bR = RR(xor);
   6060 
   6061    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6062       *aa = a1;
   6063       *bb = a2bR;
   6064       *cc = c;
   6065       return 1;
   6066    }
   6067    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6068       *aa = a1;
   6069       *bb = a2bL;
   6070       *cc = c;
   6071       return 2;
   6072    }
   6073 
   6074   v34:
   6075    /*                   -----and------      */
   6076    /*                    --xor---           */
   6077    /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
   6078    /* find variant 4:   ((b ^ a2) & c) ^ a1 */
   6079    a1 = and = xor = c = a2bL = a2bR = NULL;
   6080 
   6081    a1   = RR(e);
   6082    and  = STEP(LL(e));
   6083    if (!ISBIN(and, opAND)) goto v56;
   6084    xor  = STEP(LL(and));
   6085    c    = RR(and);
   6086    if (!ISBIN(xor, opXOR)) goto v56;
   6087    a2bL = LL(xor);
   6088    a2bR = RR(xor);
   6089 
   6090    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6091       *aa = a1;
   6092       *bb = a2bR;
   6093       *cc = c;
   6094       return 3;
   6095    }
   6096    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6097       *aa = a1;
   6098       *bb = a2bL;
   6099       *cc = c;
   6100       return 4;
   6101    }
   6102 
   6103   v56:
   6104    /*                        -----and------ */
   6105    /*                         --xor---      */
   6106    /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
   6107    /* find variant 6:   a1 ^ (c & (b ^ a2)) */
   6108    a1 = and = xor = c = a2bL = a2bR = NULL;
   6109 
   6110    a1   = LL(e);
   6111    and  = STEP(RR(e));
   6112    if (!ISBIN(and, opAND)) goto v78;
   6113    xor  = STEP(RR(and));
   6114    c    = LL(and);
   6115    if (!ISBIN(xor, opXOR)) goto v78;
   6116    a2bL = LL(xor);
   6117    a2bR = RR(xor);
   6118 
   6119    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6120       *aa = a1;
   6121       *bb = a2bR;
   6122       *cc = c;
   6123       vassert(5-5); // ATC
   6124       return 5;
   6125    }
   6126    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6127       *aa = a1;
   6128       *bb = a2bL;
   6129       *cc = c;
   6130       vassert(6-6); // ATC
   6131       return 6;
   6132    }
   6133 
   6134  v78:
   6135    /*                   -----and------      */
   6136    /*                    --xor---           */
   6137    /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
   6138    /* find variant 8:   (c & (b ^ a2)) ^ a1 */
   6139    a1 = and = xor = c = a2bL = a2bR = NULL;
   6140 
   6141    a1   = RR(e);
   6142    and  = STEP(LL(e));
   6143    if (!ISBIN(and, opAND)) goto fail;
   6144    xor  = STEP(RR(and));
   6145    c    = LL(and);
   6146    if (!ISBIN(xor, opXOR)) goto fail;
   6147    a2bL = LL(xor);
   6148    a2bR = RR(xor);
   6149 
   6150    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6151       *aa = a1;
   6152       *bb = a2bR;
   6153       *cc = c;
   6154       return 7;
   6155    }
   6156    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6157       *aa = a1;
   6158       *bb = a2bL;
   6159       *cc = c;
   6160       return 8;
   6161    }
   6162 
   6163  fail:
   6164    return 0;
   6165 
   6166 #  undef ISBIN
   6167 #  undef ISATOM
   6168 #  undef STEP
   6169 #  undef LL
   6170 #  undef RR
   6171 }
   6172 
   6173 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
   6174    variants thereof generated by switching arguments around), return
   6175    the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
   6176 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
   6177 {
   6178    if (e->tag != Iex_Binop)
   6179       return NULL;
   6180 
   6181    const HChar* tyNm = NULL;
   6182    IROp   opOR  = Iop_INVALID;
   6183    IROp   opAND = Iop_INVALID;
   6184    IROp   opNOT = Iop_INVALID;
   6185    IROp   opXOR = Iop_INVALID;
   6186    switch (e->Iex.Binop.op) {
   6187       case Iop_Xor32:
   6188          tyNm  = "I32";
   6189          opOR  = Iop_Or32;  opAND = Iop_And32;
   6190          opNOT = Iop_Not32; opXOR = Iop_Xor32;
   6191          break;
   6192       case Iop_Xor16:
   6193          tyNm  = "I16";
   6194          opOR  = Iop_Or16;  opAND = Iop_And16;
   6195          opNOT = Iop_Not16; opXOR = Iop_Xor16;
   6196          break;
   6197       case Iop_Xor8:
   6198          tyNm  = "I8";
   6199          opOR  = Iop_Or8;  opAND = Iop_And8;
   6200          opNOT = Iop_Not8; opXOR = Iop_Xor8;
   6201          break;
   6202       default:
   6203          return NULL;
   6204    }
   6205 
   6206    IRExpr* aa = NULL;
   6207    IRExpr* bb = NULL;
   6208    IRExpr* cc = NULL;
   6209    UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
   6210    if (variant > 0) {
   6211       static UInt ctr = 0;
   6212       if (0)
   6213          vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
   6214                     "type %s, variant %u\n",
   6215                     ++ctr, tyNm, variant);
   6216       /* It's vitally important that the returned aa, bb and cc are
   6217          atoms -- either constants or tmps.  If it's anything else
   6218          (eg, a GET) then incorporating them in a tree at this point
   6219          in the SB may erroneously pull them forwards (eg of a PUT
   6220          that originally was after the GET) and so transform the IR
   6221          wrongly.  spotBitfieldAssignment should guarantee only to
   6222          give us atoms, but we check here anyway. */
   6223       vassert(aa && isIRAtom(aa));
   6224       vassert(bb && isIRAtom(bb));
   6225       vassert(cc && isIRAtom(cc));
   6226       return IRExpr_Binop(
   6227                 opOR,
   6228                 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
   6229                 IRExpr_Binop(opAND, bb, cc)
   6230              );
   6231    }
   6232    return NULL;
   6233 }
   6234 
   6235 
   6236 /* SB is modified in-place.  Visit all the IRExprs and, for those
   6237    which are allowed to be non-atomic, perform the XOR transform if
   6238    possible.  This makes |sb| be non-flat, but that's ok, the caller
   6239    can re-flatten it.  Returns True iff any changes were made. */
   6240 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
   6241 {
   6242    Int  i;
   6243    Bool changed = False;
   6244 
   6245    /* Make the tmp->expr environment, so we can use it for
   6246       chasing expressions. */
   6247    Int      n_tmps = sb->tyenv->types_used;
   6248    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   6249    for (i = 0; i < n_tmps; i++)
   6250       env[i] = NULL;
   6251 
   6252    for (i = 0; i < sb->stmts_used; i++) {
   6253       IRStmt* st = sb->stmts[i];
   6254       if (st->tag != Ist_WrTmp)
   6255          continue;
   6256       IRTemp t = st->Ist.WrTmp.tmp;
   6257       vassert(t >= 0 && t < n_tmps);
   6258       env[t] = st->Ist.WrTmp.data;
   6259    }
   6260 
   6261    for (i = 0; i < sb->stmts_used; i++) {
   6262       IRStmt* st = sb->stmts[i];
   6263 
   6264       switch (st->tag) {
   6265          case Ist_AbiHint:
   6266             vassert(isIRAtom(st->Ist.AbiHint.base));
   6267             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6268             break;
   6269          case Ist_Put:
   6270             vassert(isIRAtom(st->Ist.Put.data));
   6271             break;
   6272          case Ist_PutI: {
   6273             IRPutI* puti = st->Ist.PutI.details;
   6274             vassert(isIRAtom(puti->ix));
   6275             vassert(isIRAtom(puti->data));
   6276             break;
   6277          }
   6278          case Ist_WrTmp: {
   6279             /* This is the one place where an expr (st->Ist.WrTmp.data) is
   6280                allowed to be more than just a constant or a tmp. */
   6281             IRExpr* mb_new_data
   6282                = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
   6283             if (mb_new_data) {
   6284                //ppIRSB(sb);
   6285                st->Ist.WrTmp.data = mb_new_data;
   6286                //ppIRSB(sb);
   6287                changed = True;
   6288             }
   6289             break;
   6290          }
   6291          case Ist_Store:
   6292             vassert(isIRAtom(st->Ist.Store.addr));
   6293             vassert(isIRAtom(st->Ist.Store.data));
   6294             break;
   6295          case Ist_StoreG: {
   6296             IRStoreG* sg = st->Ist.StoreG.details;
   6297             vassert(isIRAtom(sg->addr));
   6298             vassert(isIRAtom(sg->data));
   6299             vassert(isIRAtom(sg->guard));
   6300             break;
   6301          }
   6302          case Ist_LoadG: {
   6303             IRLoadG* lg = st->Ist.LoadG.details;
   6304             vassert(isIRAtom(lg->addr));
   6305             vassert(isIRAtom(lg->alt));
   6306             vassert(isIRAtom(lg->guard));
   6307             break;
   6308          }
   6309          case Ist_CAS: {
   6310             IRCAS* cas = st->Ist.CAS.details;
   6311             vassert(isIRAtom(cas->addr));
   6312             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6313             vassert(isIRAtom(cas->expdLo));
   6314             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6315             vassert(isIRAtom(cas->dataLo));
   6316             break;
   6317          }
   6318          case Ist_LLSC:
   6319             vassert(isIRAtom(st->Ist.LLSC.addr));
   6320             if (st->Ist.LLSC.storedata)
   6321                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6322             break;
   6323          case Ist_Dirty: {
   6324             IRDirty* d = st->Ist.Dirty.details;
   6325             if (d->mFx != Ifx_None) {
   6326                vassert(isIRAtom(d->mAddr));
   6327             }
   6328             vassert(isIRAtom(d->guard));
   6329             for (Int j = 0; d->args[j]; j++) {
   6330                IRExpr* arg = d->args[j];
   6331                if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
   6332                   vassert(isIRAtom(arg));
   6333                }
   6334             }
   6335             break;
   6336          }
   6337          case Ist_IMark:
   6338          case Ist_NoOp:
   6339          case Ist_MBE:
   6340             break;
   6341          case Ist_Exit:
   6342             vassert(isIRAtom(st->Ist.Exit.guard));
   6343             break;
   6344          default:
   6345             vex_printf("\n"); ppIRStmt(st);
   6346             vpanic("do_XOR_TRANSFORMS_IRSB");
   6347       }
   6348    }
   6349 
   6350    vassert(isIRAtom(sb->next));
   6351    return changed;
   6352 }
   6353 
   6354 
   6355 static IRSB* do_MSVC_HACKS ( IRSB* sb )
   6356 {
   6357    // Normalise as much as we can.  This is the one-and-only place
   6358    // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
   6359    Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
   6360    if (any_cse_changes) {
   6361       // CSEing might have created dead code.  Remove it.
   6362       sb = cprop_BB ( sb );
   6363       do_deadcode_BB(sb);
   6364    }
   6365 
   6366    // Visit all atoms, do the transformation proper.  bb is modified
   6367    // in-place.
   6368    Bool changed = do_XOR_TRANSFORM_IRSB(sb);
   6369 
   6370    if (changed) {
   6371       // The transformation generates non-flat expressions, so we now
   6372       // need to re-flatten the block.
   6373       sb = flatten_BB(sb);
   6374    }
   6375 
   6376    return sb;
   6377 }
   6378 
   6379 
   6380 /*---------------------------------------------------------------*/
   6381 /*--- iropt main                                              ---*/
   6382 /*---------------------------------------------------------------*/
   6383 
   6384 static Bool iropt_verbose = False; /* True; */
   6385 
   6386 
   6387 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   6388    redundant Put removal, constant propagation, dead code removal,
   6389    clean helper specialisation, and dead code removal (again).
   6390 */
   6391 
   6392 
   6393 static
   6394 IRSB* cheap_transformations (
   6395          IRSB* bb,
   6396          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6397          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6398          VexRegisterUpdates pxControl
   6399       )
   6400 {
   6401    redundant_get_removal_BB ( bb );
   6402    if (iropt_verbose) {
   6403       vex_printf("\n========= REDUNDANT GET\n\n" );
   6404       ppIRSB(bb);
   6405    }
   6406 
   6407    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6408       redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6409    }
   6410    if (iropt_verbose) {
   6411       vex_printf("\n========= REDUNDANT PUT\n\n" );
   6412       ppIRSB(bb);
   6413    }
   6414 
   6415    bb = cprop_BB ( bb );
   6416    if (iropt_verbose) {
   6417       vex_printf("\n========= CPROPD\n\n" );
   6418       ppIRSB(bb);
   6419    }
   6420 
   6421    do_deadcode_BB ( bb );
   6422    if (iropt_verbose) {
   6423       vex_printf("\n========= DEAD\n\n" );
   6424       ppIRSB(bb);
   6425    }
   6426 
   6427    bb = spec_helpers_BB ( bb, specHelper );
   6428    do_deadcode_BB ( bb );
   6429    if (iropt_verbose) {
   6430       vex_printf("\n========= SPECd \n\n" );
   6431       ppIRSB(bb);
   6432    }
   6433 
   6434    return bb;
   6435 }
   6436 
   6437 
   6438 /* Do some more expensive transformations on bb, which are aimed at
   6439    optimising as much as possible in the presence of GetI and PutI.  */
   6440 
   6441 static
   6442 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
   6443 {
   6444    (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6445    collapse_AddSub_chains_BB( bb );
   6446    do_redundant_GetI_elimination( bb );
   6447    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6448       do_redundant_PutI_elimination( bb, pxControl );
   6449    }
   6450    do_deadcode_BB( bb );
   6451    return bb;
   6452 }
   6453 
   6454 
   6455 /* Scan a flattened BB to look for signs that more expensive
   6456    optimisations might be useful:
   6457    - find out if there are any GetIs and PutIs
   6458    - find out if there are any floating or vector-typed temporaries
   6459 */
   6460 
   6461 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   6462                                  /*OUT*/Bool* hasVorFtemps,
   6463                                  IRSB* bb )
   6464 {
   6465    Int      i, j;
   6466    IRStmt*  st;
   6467    IRDirty* d;
   6468    IRCAS*   cas;
   6469 
   6470    *hasGetIorPutI = False;
   6471    *hasVorFtemps  = False;
   6472 
   6473    for (i = 0; i < bb->stmts_used; i++) {
   6474       st = bb->stmts[i];
   6475       switch (st->tag) {
   6476          case Ist_AbiHint:
   6477             vassert(isIRAtom(st->Ist.AbiHint.base));
   6478             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6479             break;
   6480          case Ist_PutI:
   6481             *hasGetIorPutI = True;
   6482             break;
   6483          case Ist_WrTmp:
   6484             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   6485                *hasGetIorPutI = True;
   6486             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   6487                case Ity_I1: case Ity_I8: case Ity_I16:
   6488                case Ity_I32: case Ity_I64: case Ity_I128:
   6489                   break;
   6490                case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
   6491                case Ity_V128: case Ity_V256:
   6492                   *hasVorFtemps = True;
   6493                   break;
   6494                case Ity_D32: case Ity_D64: case Ity_D128:
   6495                   *hasVorFtemps = True;
   6496                   break;
   6497                default:
   6498                   goto bad;
   6499             }
   6500             break;
   6501          case Ist_Put:
   6502             vassert(isIRAtom(st->Ist.Put.data));
   6503             break;
   6504          case Ist_Store:
   6505             vassert(isIRAtom(st->Ist.Store.addr));
   6506             vassert(isIRAtom(st->Ist.Store.data));
   6507             break;
   6508          case Ist_StoreG: {
   6509             IRStoreG* sg = st->Ist.StoreG.details;
   6510             vassert(isIRAtom(sg->addr));
   6511             vassert(isIRAtom(sg->data));
   6512             vassert(isIRAtom(sg->guard));
   6513             break;
   6514          }
   6515          case Ist_LoadG: {
   6516             IRLoadG* lg = st->Ist.LoadG.details;
   6517             vassert(isIRAtom(lg->addr));
   6518             vassert(isIRAtom(lg->alt));
   6519             vassert(isIRAtom(lg->guard));
   6520             break;
   6521          }
   6522          case Ist_CAS:
   6523             cas = st->Ist.CAS.details;
   6524             vassert(isIRAtom(cas->addr));
   6525             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6526             vassert(isIRAtom(cas->expdLo));
   6527             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6528             vassert(isIRAtom(cas->dataLo));
   6529             break;
   6530          case Ist_LLSC:
   6531             vassert(isIRAtom(st->Ist.LLSC.addr));
   6532             if (st->Ist.LLSC.storedata)
   6533                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6534             break;
   6535          case Ist_Dirty:
   6536             d = st->Ist.Dirty.details;
   6537             vassert(isIRAtom(d->guard));
   6538             for (j = 0; d->args[j]; j++) {
   6539                IRExpr* arg = d->args[j];
   6540                if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
   6541                   vassert(isIRAtom(arg));
   6542             }
   6543             if (d->mFx != Ifx_None)
   6544                vassert(isIRAtom(d->mAddr));
   6545             break;
   6546          case Ist_NoOp:
   6547          case Ist_IMark:
   6548          case Ist_MBE:
   6549             break;
   6550          case Ist_Exit:
   6551             vassert(isIRAtom(st->Ist.Exit.guard));
   6552             break;
   6553          default:
   6554          bad:
   6555             ppIRStmt(st);
   6556             vpanic("considerExpensives");
   6557       }
   6558    }
   6559 }
   6560 
   6561 
   6562 /* ---------------- The main iropt entry point. ---------------- */
   6563 
   6564 /* exported from this file */
   6565 /* Rules of the game:
   6566 
   6567    - IRExpr/IRStmt trees should be treated as immutable, as they
   6568      may get shared.  So never change a field of such a tree node;
   6569      instead construct and return a new one if needed.
   6570 */
   6571 
   6572 
   6573 IRSB* do_iropt_BB(
   6574          IRSB* bb0,
   6575          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6576          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6577          VexRegisterUpdates pxControl,
   6578          Addr    guest_addr,
   6579          VexArch guest_arch
   6580       )
   6581 {
   6582    static Int n_total     = 0;
   6583    static Int n_expensive = 0;
   6584 
   6585    Bool hasGetIorPutI, hasVorFtemps;
   6586    IRSB *bb, *bb2;
   6587 
   6588    n_total++;
   6589 
   6590    /* First flatten the block out, since all other
   6591       phases assume flat code. */
   6592 
   6593    bb = flatten_BB ( bb0 );
   6594 
   6595    if (iropt_verbose) {
   6596       vex_printf("\n========= FLAT\n\n" );
   6597       ppIRSB(bb);
   6598    }
   6599 
   6600    /* If at level 0, stop now. */
   6601    if (vex_control.iropt_level <= 0) return bb;
   6602 
   6603    /* Now do a preliminary cleanup pass, and figure out if we also
   6604       need to do 'expensive' optimisations.  Expensive optimisations
   6605       are deemed necessary if the block contains any GetIs or PutIs.
   6606       If needed, do expensive transformations and then another cheap
   6607       cleanup pass. */
   6608 
   6609    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
   6610 
   6611    if (guest_arch == VexArchARM) {
   6612       /* Translating Thumb2 code produces a lot of chaff.  We have to
   6613          work extra hard to get rid of it. */
   6614       bb = cprop_BB(bb);
   6615       bb = spec_helpers_BB ( bb, specHelper );
   6616       if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6617          redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6618       }
   6619       do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6620       do_deadcode_BB( bb );
   6621    }
   6622 
   6623    if (vex_control.iropt_level > 1) {
   6624 
   6625       /* Peer at what we have, to decide how much more effort to throw
   6626          at it. */
   6627       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   6628 
   6629       if (hasVorFtemps && !hasGetIorPutI) {
   6630          /* If any evidence of FP or Vector activity, CSE, as that
   6631             tends to mop up all manner of lardy code to do with
   6632             rounding modes.  Don't bother if hasGetIorPutI since that
   6633             case leads into the expensive transformations, which do
   6634             CSE anyway. */
   6635          (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6636          do_deadcode_BB( bb );
   6637       }
   6638 
   6639       if (hasGetIorPutI) {
   6640          Bool cses;
   6641          n_expensive++;
   6642          if (DEBUG_IROPT)
   6643             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   6644          bb = expensive_transformations( bb, pxControl );
   6645          bb = cheap_transformations( bb, specHelper,
   6646                                      preciseMemExnsFn, pxControl );
   6647          /* Potentially common up GetIs */
   6648          cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6649          if (cses)
   6650             bb = cheap_transformations( bb, specHelper,
   6651                                         preciseMemExnsFn, pxControl );
   6652       }
   6653 
   6654       ///////////////////////////////////////////////////////////
   6655       // BEGIN MSVC optimised code transformation hacks
   6656       if (0)
   6657          bb = do_MSVC_HACKS(bb);
   6658       // END   MSVC optimised code transformation hacks
   6659       ///////////////////////////////////////////////////////////
   6660 
   6661       /* Now have a go at unrolling simple (single-BB) loops.  If
   6662          successful, clean up the results as much as possible. */
   6663 
   6664       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   6665       if (bb2) {
   6666          bb = cheap_transformations( bb2, specHelper,
   6667                                      preciseMemExnsFn, pxControl );
   6668          if (hasGetIorPutI) {
   6669             bb = expensive_transformations( bb, pxControl );
   6670             bb = cheap_transformations( bb, specHelper,
   6671                                         preciseMemExnsFn, pxControl );
   6672          } else {
   6673             /* at least do CSE and dead code removal */
   6674             do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6675             do_deadcode_BB( bb );
   6676          }
   6677          if (0) vex_printf("vex iropt: unrolled a loop\n");
   6678       }
   6679 
   6680    }
   6681 
   6682    return bb;
   6683 }
   6684 
   6685 
   6686 /*---------------------------------------------------------------*/
   6687 /*--- end                                            ir_opt.c ---*/
   6688 /*---------------------------------------------------------------*/
   6689