Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*---------------------------------------------------------------*/
      4 /*--- begin                                          ir_opt.c ---*/
      5 /*---------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2004-2015 OpenWorks LLP
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 
     31    Neither the names of the U.S. Department of Energy nor the
     32    University of California nor the names of its contributors may be
     33    used to endorse or promote products derived from this software
     34    without prior written permission.
     35 */
     36 
     37 #include "libvex_basictypes.h"
     38 #include "libvex_ir.h"
     39 #include "libvex.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "ir_opt.h"
     44 
     45 
     46 /* Set to 1 for lots of debugging output. */
     47 #define DEBUG_IROPT 0
     48 
     49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
     50 #define STATS_IROPT 0
     51 
     52 
     53 /* What iropt does, 29 Dec 04.
     54 
     55    It takes an IRSB and produces a new one with the same meaning,
     56    defined thus:
     57 
     58    After execution of the new BB, all guest state and guest memory is
     59    the same as after execution of the original.  This is true
     60    regardless of how the block was exited (at the end vs side exit).
     61 
     62    In addition, parts of the guest state will be identical to that
     63    created by execution of the original at the following observation
     64    points:
     65 
     66    * In a dirty helper call, any parts of the guest state that the
     67      helper states that it reads or modifies will be up to date.
     68      Also, guest memory will be up to date.  Parts of the guest state
     69      not marked as being read or modified by the helper cannot be
     70      assumed to be up-to-date at the point where the helper is called.
     71 
     72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
     73      The guest state is only up to date only as explained above
     74      (i.e. at SB exits and as specified by dirty helper call).
     75      Also, the stack pointer register is up to date at memory
     76      exception points (as this is needed for the stack extension
     77      logic in m_signals.c).
     78 
     79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
     80      Immediately prior to any load or store, those parts of the guest
     81      state marked as requiring precise exceptions will be up to date.
     82      Also, guest memory will be up to date.  Parts of the guest state
     83      not marked as requiring precise exceptions cannot be assumed to
     84      be up-to-date at the point of the load/store.
     85 
     86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
     87      Same as minimal, but all the guest state is up to date at memory
     88      exception points.
     89 
     90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
     91      Guest state is up to date at each instruction.
     92 
     93    The relative order of loads and stores (including loads/stores of
     94    guest memory done by dirty helpers annotated as such) is not
     95    changed.  However, the relative order of loads with no intervening
     96    stores/modifies may be changed.
     97 
     98    Transformation order
     99    ~~~~~~~~~~~~~~~~~~~~
    100 
    101    There are three levels of optimisation, controlled by
    102    vex_control.iropt_level.  Define first:
    103 
    104    "Cheap transformations" are the following sequence:
    105       * Redundant-Get removal
    106       * Redundant-Put removal
    107       * Constant propagation/folding
    108       * Dead code removal
    109       * Specialisation of clean helper functions
    110       * Dead code removal
    111 
    112    "Expensive transformations" are the following sequence:
    113       * CSE
    114       * Folding of add/sub chains
    115       * Redundant-GetI removal
    116       * Redundant-PutI removal
    117       * Dead code removal
    118 
    119    Then the transformations are as follows, as defined by
    120    vex_control.iropt_level:
    121 
    122    Level 0:
    123       * Flatten into atomic form.
    124 
    125    Level 1: the following sequence:
    126       * Flatten into atomic form.
    127       * Cheap transformations.
    128 
    129    Level 2: the following sequence
    130       * Flatten into atomic form.
    131       * Cheap transformations.
    132       * If block contains any floating or vector types, CSE.
    133       * If block contains GetI or PutI, Expensive transformations.
    134       * Try unrolling loops.  Three possible outcomes:
    135         - No effect: do nothing more.
    136         - Unrolled a loop, and block does not contain GetI or PutI:
    137           Do: * CSE
    138               * Dead code removal
    139         - Unrolled a loop, and block contains GetI or PutI:
    140           Do: * Expensive transformations
    141               * Cheap transformations
    142 */
    143 
    144 /* Implementation notes, 29 Dec 04.
    145 
    146    TODO (important): I think rPutI removal ignores precise exceptions
    147    and is therefore in a sense, wrong.  In the sense that PutIs are
    148    assumed not to write parts of the guest state that we need to have
    149    up-to-date at loads/stores.  So far on x86 guest that has not
    150    mattered since indeed only the x87 FP registers and tags are
    151    accessed using GetI/PutI, and there is no need so far for them to
    152    be up to date at mem exception points.  The rPutI pass should be
    153    fixed.
    154 
    155    TODO: improve pessimistic handling of precise exceptions
    156      in the tree builder.
    157 
    158    TODO: check interaction of rGetI and dirty helpers.
    159 
    160    F64i constants are treated differently from other constants.
    161    They are not regarded as atoms, and instead lifted off and
    162    bound to temps.  This allows them to participate in CSE, which
    163    is important for getting good performance for x86 guest code.
    164 
    165    CSE up F64 literals (already doing F64is)
    166 
    167    CSE: consider carefully the requirement for precise exns
    168         prior to making CSE any more aggressive.  */
    169 
    170 
    171 /*---------------------------------------------------------------*/
    172 /*--- Finite mappery, of a sort                               ---*/
    173 /*---------------------------------------------------------------*/
    174 
    175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
    176    hashing, but it's not clear whether or not this would really be any
    177    faster. */
    178 
    179 typedef
    180    struct {
    181       Bool*  inuse;
    182       HWord* key;
    183       HWord* val;
    184       Int    size;
    185       Int    used;
    186    }
    187    HashHW;
    188 
    189 static HashHW* newHHW ( void )
    190 {
    191    HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
    192    h->size   = 8;
    193    h->used   = 0;
    194    h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
    195    h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    196    h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
    197    return h;
    198 }
    199 
    200 
    201 /* Look up key in the map. */
    202 
    203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
    204 {
    205    Int i;
    206    /* vex_printf("lookupHHW(%llx)\n", key ); */
    207    for (i = 0; i < h->used; i++) {
    208       if (h->inuse[i] && h->key[i] == key) {
    209          if (val)
    210             *val = h->val[i];
    211          return True;
    212       }
    213    }
    214    return False;
    215 }
    216 
    217 
    218 /* Add key->val to the map.  Replaces any existing binding for key. */
    219 
    220 static void addToHHW ( HashHW* h, HWord key, HWord val )
    221 {
    222    Int i, j;
    223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
    224 
    225    /* Find and replace existing binding, if any. */
    226    for (i = 0; i < h->used; i++) {
    227       if (h->inuse[i] && h->key[i] == key) {
    228          h->val[i] = val;
    229          return;
    230       }
    231    }
    232 
    233    /* Ensure a space is available. */
    234    if (h->used == h->size) {
    235       /* Copy into arrays twice the size. */
    236       Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
    237       HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    238       HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
    239       for (i = j = 0; i < h->size; i++) {
    240          if (!h->inuse[i]) continue;
    241          inuse2[j] = True;
    242          key2[j] = h->key[i];
    243          val2[j] = h->val[i];
    244          j++;
    245       }
    246       h->used = j;
    247       h->size *= 2;
    248       h->inuse = inuse2;
    249       h->key = key2;
    250       h->val = val2;
    251    }
    252 
    253    /* Finally, add it. */
    254    vassert(h->used < h->size);
    255    h->inuse[h->used] = True;
    256    h->key[h->used] = key;
    257    h->val[h->used] = val;
    258    h->used++;
    259 }
    260 
    261 
    262 /*---------------------------------------------------------------*/
    263 /*--- Flattening out a BB into atomic SSA form                ---*/
    264 /*---------------------------------------------------------------*/
    265 
    266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
    267    copies made by flattening.  If in doubt return False. */
    268 
    269 static Bool isFlat ( IRExpr* e )
    270 {
    271    if (e->tag == Iex_Get)
    272       return True;
    273    if (e->tag == Iex_Binop)
    274       return toBool( isIRAtom(e->Iex.Binop.arg1)
    275                      && isIRAtom(e->Iex.Binop.arg2) );
    276    if (e->tag == Iex_Load)
    277       return isIRAtom(e->Iex.Load.addr);
    278    return False;
    279 }
    280 
    281 /* Flatten out 'ex' so it is atomic, returning a new expression with
    282    the same value, after having appended extra IRTemp assignments to
    283    the end of 'bb'. */
    284 
    285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
    286 {
    287    Int i;
    288    IRExpr** newargs;
    289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
    290    IRTemp t1;
    291 
    292    switch (ex->tag) {
    293 
    294       case Iex_GetI:
    295          t1 = newIRTemp(bb->tyenv, ty);
    296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    297             IRExpr_GetI(ex->Iex.GetI.descr,
    298                         flatten_Expr(bb, ex->Iex.GetI.ix),
    299                         ex->Iex.GetI.bias)));
    300          return IRExpr_RdTmp(t1);
    301 
    302       case Iex_Get:
    303          t1 = newIRTemp(bb->tyenv, ty);
    304          addStmtToIRSB(bb,
    305             IRStmt_WrTmp(t1, ex));
    306          return IRExpr_RdTmp(t1);
    307 
    308       case Iex_Qop: {
    309          IRQop* qop = ex->Iex.Qop.details;
    310          t1 = newIRTemp(bb->tyenv, ty);
    311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    312             IRExpr_Qop(qop->op,
    313                          flatten_Expr(bb, qop->arg1),
    314                          flatten_Expr(bb, qop->arg2),
    315                          flatten_Expr(bb, qop->arg3),
    316                          flatten_Expr(bb, qop->arg4))));
    317          return IRExpr_RdTmp(t1);
    318       }
    319 
    320       case Iex_Triop: {
    321          IRTriop* triop = ex->Iex.Triop.details;
    322          t1 = newIRTemp(bb->tyenv, ty);
    323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    324             IRExpr_Triop(triop->op,
    325                          flatten_Expr(bb, triop->arg1),
    326                          flatten_Expr(bb, triop->arg2),
    327                          flatten_Expr(bb, triop->arg3))));
    328          return IRExpr_RdTmp(t1);
    329       }
    330 
    331       case Iex_Binop:
    332          t1 = newIRTemp(bb->tyenv, ty);
    333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    334             IRExpr_Binop(ex->Iex.Binop.op,
    335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
    336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
    337          return IRExpr_RdTmp(t1);
    338 
    339       case Iex_Unop:
    340          t1 = newIRTemp(bb->tyenv, ty);
    341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    342             IRExpr_Unop(ex->Iex.Unop.op,
    343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
    344          return IRExpr_RdTmp(t1);
    345 
    346       case Iex_Load:
    347          t1 = newIRTemp(bb->tyenv, ty);
    348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    349             IRExpr_Load(ex->Iex.Load.end,
    350                         ex->Iex.Load.ty,
    351                         flatten_Expr(bb, ex->Iex.Load.addr))));
    352          return IRExpr_RdTmp(t1);
    353 
    354       case Iex_CCall:
    355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
    356          for (i = 0; newargs[i]; i++)
    357             newargs[i] = flatten_Expr(bb, newargs[i]);
    358          t1 = newIRTemp(bb->tyenv, ty);
    359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    360             IRExpr_CCall(ex->Iex.CCall.cee,
    361                          ex->Iex.CCall.retty,
    362                          newargs)));
    363          return IRExpr_RdTmp(t1);
    364 
    365       case Iex_ITE:
    366          t1 = newIRTemp(bb->tyenv, ty);
    367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
    369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
    370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
    371          return IRExpr_RdTmp(t1);
    372 
    373       case Iex_Const:
    374          /* Lift F64i constants out onto temps so they can be CSEd
    375             later. */
    376          if (ex->Iex.Const.con->tag == Ico_F64i) {
    377             t1 = newIRTemp(bb->tyenv, ty);
    378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
    379                IRExpr_Const(ex->Iex.Const.con)));
    380             return IRExpr_RdTmp(t1);
    381          } else {
    382             /* Leave all other constants alone. */
    383             return ex;
    384          }
    385 
    386       case Iex_RdTmp:
    387          return ex;
    388 
    389       default:
    390          vex_printf("\n");
    391          ppIRExpr(ex);
    392          vex_printf("\n");
    393          vpanic("flatten_Expr");
    394    }
    395 }
    396 
    397 
    398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
    399 
    400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
    401 {
    402    Int i;
    403    IRExpr   *e1, *e2, *e3, *e4, *e5;
    404    IRDirty  *d,  *d2;
    405    IRCAS    *cas, *cas2;
    406    IRPutI   *puti, *puti2;
    407    IRLoadG  *lg;
    408    IRStoreG *sg;
    409    switch (st->tag) {
    410       case Ist_Put:
    411          if (isIRAtom(st->Ist.Put.data)) {
    412             /* optimisation to reduce the amount of heap wasted
    413                by the flattener */
    414             addStmtToIRSB(bb, st);
    415          } else {
    416             /* general case, always correct */
    417             e1 = flatten_Expr(bb, st->Ist.Put.data);
    418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
    419          }
    420          break;
    421       case Ist_PutI:
    422          puti = st->Ist.PutI.details;
    423          e1 = flatten_Expr(bb, puti->ix);
    424          e2 = flatten_Expr(bb, puti->data);
    425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
    426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
    427          break;
    428       case Ist_WrTmp:
    429          if (isFlat(st->Ist.WrTmp.data)) {
    430             /* optimisation, to reduce the number of tmp-tmp
    431                copies generated */
    432             addStmtToIRSB(bb, st);
    433          } else {
    434             /* general case, always correct */
    435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
    436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
    437          }
    438          break;
    439       case Ist_Store:
    440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
    441          e2 = flatten_Expr(bb, st->Ist.Store.data);
    442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
    443          break;
    444       case Ist_StoreG:
    445          sg = st->Ist.StoreG.details;
    446          e1 = flatten_Expr(bb, sg->addr);
    447          e2 = flatten_Expr(bb, sg->data);
    448          e3 = flatten_Expr(bb, sg->guard);
    449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
    450          break;
    451       case Ist_LoadG:
    452          lg = st->Ist.LoadG.details;
    453          e1 = flatten_Expr(bb, lg->addr);
    454          e2 = flatten_Expr(bb, lg->alt);
    455          e3 = flatten_Expr(bb, lg->guard);
    456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
    457                                         e1, e2, e3));
    458          break;
    459       case Ist_CAS:
    460          cas  = st->Ist.CAS.details;
    461          e1   = flatten_Expr(bb, cas->addr);
    462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
    463          e3   = flatten_Expr(bb, cas->expdLo);
    464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
    465          e5   = flatten_Expr(bb, cas->dataLo);
    466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
    467                          e1, e2, e3, e4, e5 );
    468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
    469          break;
    470       case Ist_LLSC:
    471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
    472          e2 = st->Ist.LLSC.storedata
    473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
    474                  : NULL;
    475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
    476                                        st->Ist.LLSC.result, e1, e2));
    477          break;
    478       case Ist_Dirty:
    479          d = st->Ist.Dirty.details;
    480          d2 = emptyIRDirty();
    481          *d2 = *d;
    482          d2->args = shallowCopyIRExprVec(d2->args);
    483          if (d2->mFx != Ifx_None) {
    484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
    485          } else {
    486             vassert(d2->mAddr == NULL);
    487          }
    488          d2->guard = flatten_Expr(bb, d2->guard);
    489          for (i = 0; d2->args[i]; i++) {
    490             IRExpr* arg = d2->args[i];
    491             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
    492                d2->args[i] = flatten_Expr(bb, arg);
    493          }
    494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
    495          break;
    496       case Ist_NoOp:
    497       case Ist_MBE:
    498       case Ist_IMark:
    499          addStmtToIRSB(bb, st);
    500          break;
    501       case Ist_AbiHint:
    502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
    503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
    504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
    505          break;
    506       case Ist_Exit:
    507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
    508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
    509                                        st->Ist.Exit.dst,
    510                                        st->Ist.Exit.offsIP));
    511          break;
    512       default:
    513          vex_printf("\n");
    514          ppIRStmt(st);
    515          vex_printf("\n");
    516          vpanic("flatten_Stmt");
    517    }
    518 }
    519 
    520 
    521 static IRSB* flatten_BB ( IRSB* in )
    522 {
    523    Int   i;
    524    IRSB* out;
    525    out = emptyIRSB();
    526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
    527    for (i = 0; i < in->stmts_used; i++)
    528       if (in->stmts[i])
    529          flatten_Stmt( out, in->stmts[i] );
    530    out->next     = flatten_Expr( out, in->next );
    531    out->jumpkind = in->jumpkind;
    532    out->offsIP   = in->offsIP;
    533    return out;
    534 }
    535 
    536 
    537 /*---------------------------------------------------------------*/
    538 /*--- In-place removal of redundant GETs                      ---*/
    539 /*---------------------------------------------------------------*/
    540 
    541 /* Scan forwards, building up an environment binding (min offset, max
    542    offset) pairs to values, which will either be temps or constants.
    543 
    544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
    545    env and if it matches, replace the Get with the stored value.  If
    546    there is no match, add a (minoff,maxoff) :-> t binding.
    547 
    548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
    549    any binding which fully or partially overlaps with (minoff,maxoff).
    550    Then add a new (minoff,maxoff) :-> t or c binding.  */
    551 
    552 /* Extract the min/max offsets from a guest state array descriptor. */
    553 
    554 inline
    555 static void getArrayBounds ( IRRegArray* descr,
    556                              UInt* minoff, UInt* maxoff )
    557 {
    558    *minoff = descr->base;
    559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
    560    vassert((*minoff & ~0xFFFF) == 0);
    561    vassert((*maxoff & ~0xFFFF) == 0);
    562    vassert(*minoff <= *maxoff);
    563 }
    564 
    565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
    566 
    567 static UInt mk_key_GetPut ( Int offset, IRType ty )
    568 {
    569    /* offset should fit in 16 bits. */
    570    UInt minoff = offset;
    571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
    572    vassert((minoff & ~0xFFFF) == 0);
    573    vassert((maxoff & ~0xFFFF) == 0);
    574    return (minoff << 16) | maxoff;
    575 }
    576 
    577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
    578 {
    579    UInt minoff, maxoff;
    580    getArrayBounds( descr, &minoff, &maxoff );
    581    vassert((minoff & ~0xFFFF) == 0);
    582    vassert((maxoff & ~0xFFFF) == 0);
    583    return (minoff << 16) | maxoff;
    584 }
    585 
    586 /* Supposing h has keys of the form generated by mk_key_GetPut and
    587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
    588    .. k_hi).
    589 */
    590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
    591 {
    592    Int  j;
    593    UInt e_lo, e_hi;
    594    vassert(k_lo <= k_hi);
    595    /* invalidate any env entries which in any way overlap (k_lo
    596       .. k_hi) */
    597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
    598 
    599    for (j = 0; j < h->used; j++) {
    600       if (!h->inuse[j])
    601          continue;
    602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
    603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
    604       vassert(e_lo <= e_hi);
    605       if (e_hi < k_lo || k_hi < e_lo)
    606          continue; /* no overlap possible */
    607       else
    608          /* overlap; invalidate */
    609          h->inuse[j] = False;
    610    }
    611 }
    612 
    613 
    614 static void redundant_get_removal_BB ( IRSB* bb )
    615 {
    616    HashHW* env = newHHW();
    617    UInt    key = 0; /* keep gcc -O happy */
    618    Int     i, j;
    619    HWord   val;
    620 
    621    for (i = 0; i < bb->stmts_used; i++) {
    622       IRStmt* st = bb->stmts[i];
    623 
    624       if (st->tag == Ist_NoOp)
    625          continue;
    626 
    627       /* Deal with Gets */
    628       if (st->tag == Ist_WrTmp
    629           && st->Ist.WrTmp.data->tag == Iex_Get) {
    630          /* st is 't = Get(...)'.  Look up in the environment and see
    631             if the Get can be replaced. */
    632          IRExpr* get = st->Ist.WrTmp.data;
    633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
    634                                      get->Iex.Get.ty );
    635          if (lookupHHW(env, &val, (HWord)key)) {
    636             /* found it */
    637             /* Note, we could do better here.  If the types are
    638                different we don't do the substitution, since doing so
    639                could lead to invalidly-typed IR.  An improvement would
    640                be to stick in a reinterpret-style cast, although that
    641                would make maintaining flatness more difficult. */
    642             IRExpr* valE    = (IRExpr*)val;
    643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
    644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
    645             if (typesOK && DEBUG_IROPT) {
    646                vex_printf("rGET: "); ppIRExpr(get);
    647                vex_printf("  ->  "); ppIRExpr(valE);
    648                vex_printf("\n");
    649             }
    650             if (typesOK)
    651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
    652          } else {
    653             /* Not found, but at least we know that t and the Get(...)
    654                are now associated.  So add a binding to reflect that
    655                fact. */
    656             addToHHW( env, (HWord)key,
    657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
    658          }
    659       }
    660 
    661       /* Deal with Puts: invalidate any env entries overlapped by this
    662          Put */
    663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
    664          UInt k_lo, k_hi;
    665          if (st->tag == Ist_Put) {
    666             key = mk_key_GetPut( st->Ist.Put.offset,
    667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    668          } else {
    669             vassert(st->tag == Ist_PutI);
    670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    671          }
    672 
    673          k_lo = (key >> 16) & 0xFFFF;
    674          k_hi = key & 0xFFFF;
    675          invalidateOverlaps(env, k_lo, k_hi);
    676       }
    677       else
    678       if (st->tag == Ist_Dirty) {
    679          /* Deal with dirty helpers which write or modify guest state.
    680             Invalidate the entire env.  We could do a lot better
    681             here. */
    682          IRDirty* d      = st->Ist.Dirty.details;
    683          Bool     writes = False;
    684          for (j = 0; j < d->nFxState; j++) {
    685             if (d->fxState[j].fx == Ifx_Modify
    686                 || d->fxState[j].fx == Ifx_Write)
    687             writes = True;
    688          }
    689          if (writes) {
    690             /* dump the entire env (not clever, but correct ...) */
    691             for (j = 0; j < env->used; j++)
    692                env->inuse[j] = False;
    693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
    694          }
    695       }
    696 
    697       /* add this one to the env, if appropriate */
    698       if (st->tag == Ist_Put) {
    699          vassert(isIRAtom(st->Ist.Put.data));
    700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
    701       }
    702 
    703    } /* for (i = 0; i < bb->stmts_used; i++) */
    704 
    705 }
    706 
    707 
    708 /*---------------------------------------------------------------*/
    709 /*--- In-place removal of redundant PUTs                      ---*/
    710 /*---------------------------------------------------------------*/
    711 
    712 /* Find any Get uses in st and invalidate any partially or fully
    713    overlapping ranges listed in env.  Due to the flattening phase, the
    714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
    715 
    716 static void handle_gets_Stmt (
    717                HashHW* env,
    718                IRStmt* st,
    719                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    720                VexRegisterUpdates pxControl
    721             )
    722 {
    723    Int     j;
    724    UInt    key = 0; /* keep gcc -O happy */
    725    Bool    isGet;
    726    Bool    memRW = False;
    727    IRExpr* e;
    728 
    729    switch (st->tag) {
    730 
    731       /* This is the only interesting case.  Deal with Gets in the RHS
    732          expression. */
    733       case Ist_WrTmp:
    734          e = st->Ist.WrTmp.data;
    735          switch (e->tag) {
    736             case Iex_Get:
    737                isGet = True;
    738                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
    739                break;
    740             case Iex_GetI:
    741                isGet = True;
    742                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
    743                break;
    744             case Iex_Load:
    745                isGet = False;
    746                memRW = True;
    747                break;
    748             default:
    749                isGet = False;
    750          }
    751          if (isGet) {
    752             UInt k_lo, k_hi;
    753             k_lo = (key >> 16) & 0xFFFF;
    754             k_hi = key & 0xFFFF;
    755             invalidateOverlaps(env, k_lo, k_hi);
    756          }
    757          break;
    758 
    759       /* Be very conservative for dirty helper calls; dump the entire
    760          environment.  The helper might read guest state, in which
    761          case it needs to be flushed first.  Also, the helper might
    762          access guest memory, in which case all parts of the guest
    763          state requiring precise exceptions needs to be flushed.  The
    764          crude solution is just to flush everything; we could easily
    765          enough do a lot better if needed. */
    766       /* Probably also overly-conservative, but also dump everything
    767          if we hit a memory bus event (fence, lock, unlock).  Ditto
    768          AbiHints, CASs, LLs and SCs. */
    769       case Ist_AbiHint:
    770          vassert(isIRAtom(st->Ist.AbiHint.base));
    771          vassert(isIRAtom(st->Ist.AbiHint.nia));
    772          /* fall through */
    773       case Ist_MBE:
    774       case Ist_Dirty:
    775       case Ist_CAS:
    776       case Ist_LLSC:
    777          for (j = 0; j < env->used; j++)
    778             env->inuse[j] = False;
    779          break;
    780 
    781       /* all other cases are boring. */
    782       case Ist_Store:
    783          vassert(isIRAtom(st->Ist.Store.addr));
    784          vassert(isIRAtom(st->Ist.Store.data));
    785          memRW = True;
    786          break;
    787       case Ist_StoreG: {
    788          IRStoreG* sg = st->Ist.StoreG.details;
    789          vassert(isIRAtom(sg->addr));
    790          vassert(isIRAtom(sg->data));
    791          vassert(isIRAtom(sg->guard));
    792          memRW = True;
    793          break;
    794       }
    795       case Ist_LoadG: {
    796          IRLoadG* lg = st->Ist.LoadG.details;
    797          vassert(isIRAtom(lg->addr));
    798          vassert(isIRAtom(lg->alt));
    799          vassert(isIRAtom(lg->guard));
    800          memRW = True;
    801          break;
    802       }
    803       case Ist_Exit:
    804          vassert(isIRAtom(st->Ist.Exit.guard));
    805          break;
    806 
    807       case Ist_Put:
    808          vassert(isIRAtom(st->Ist.Put.data));
    809          break;
    810 
    811       case Ist_PutI:
    812          vassert(isIRAtom(st->Ist.PutI.details->ix));
    813          vassert(isIRAtom(st->Ist.PutI.details->data));
    814          break;
    815 
    816       case Ist_NoOp:
    817       case Ist_IMark:
    818          break;
    819 
    820       default:
    821          vex_printf("\n");
    822          ppIRStmt(st);
    823          vex_printf("\n");
    824          vpanic("handle_gets_Stmt");
    825    }
    826 
    827    if (memRW) {
    828       /* This statement accesses memory.  So we might need to dump all parts
    829          of the environment corresponding to guest state that may not
    830          be reordered with respect to memory references.  That means
    831          at least the stack pointer. */
    832       switch (pxControl) {
    833          case VexRegUpdAllregsAtMemAccess:
    834             /* Precise exceptions required at mem access.
    835                Flush all guest state. */
    836             for (j = 0; j < env->used; j++)
    837                env->inuse[j] = False;
    838             break;
    839          case VexRegUpdSpAtMemAccess:
    840             /* We need to dump the stack pointer
    841                (needed for stack extension in m_signals.c).
    842                preciseMemExnsFn will use vex_control.iropt_register_updates
    843                to verify only the sp is to be checked. */
    844             /* fallthrough */
    845          case VexRegUpdUnwindregsAtMemAccess:
    846             for (j = 0; j < env->used; j++) {
    847                if (!env->inuse[j])
    848                   continue;
    849                /* Just flush the minimal amount required, as computed by
    850                   preciseMemExnsFn. */
    851                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
    852                HWord k_hi = env->key[j] & 0xFFFF;
    853                if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
    854                   env->inuse[j] = False;
    855             }
    856             break;
    857          case VexRegUpdAllregsAtEachInsn:
    858             // VexRegUpdAllregsAtEachInsn cannot happen here.
    859             // fall through
    860          case VexRegUpd_INVALID:
    861          default:
    862             vassert(0);
    863       }
    864    } /* if (memRW) */
    865 
    866 }
    867 
    868 
    869 /* Scan backwards, building up a set of (min offset, max
    870    offset) pairs, indicating those parts of the guest state
    871    for which the next event is a write.
    872 
    873    On seeing a conditional exit, empty the set.
    874 
    875    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
    876    completely within the set, remove the Put.  Otherwise, add
    877    (minoff,maxoff) to the set.
    878 
    879    On seeing 'Get (minoff,maxoff)', remove any part of the set
    880    overlapping (minoff,maxoff).  The same has to happen for any events
    881    which implicitly read parts of the guest state: dirty helper calls
    882    and loads/stores.
    883 */
    884 
    885 static void redundant_put_removal_BB (
    886                IRSB* bb,
    887                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
    888                VexRegisterUpdates pxControl
    889             )
    890 {
    891    Int     i, j;
    892    Bool    isPut;
    893    IRStmt* st;
    894    UInt    key = 0; /* keep gcc -O happy */
    895 
    896    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
    897 
    898    HashHW* env = newHHW();
    899 
    900    /* Initialise the running env with the fact that the final exit
    901       writes the IP (or, whatever it claims to write.  We don't
    902       care.) */
    903    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
    904    addToHHW(env, (HWord)key, 0);
    905 
    906    /* And now scan backwards through the statements. */
    907    for (i = bb->stmts_used-1; i >= 0; i--) {
    908       st = bb->stmts[i];
    909 
    910       if (st->tag == Ist_NoOp)
    911          continue;
    912 
    913       /* Deal with conditional exits. */
    914       if (st->tag == Ist_Exit) {
    915          //Bool re_add;
    916          /* Need to throw out from the env, any part of it which
    917             doesn't overlap with the guest state written by this exit.
    918             Since the exit only writes one section, it's simplest to
    919             do this: (1) check whether env contains a write that
    920             completely overlaps the write done by this exit; (2) empty
    921             out env; and (3) if (1) was true, add the write done by
    922             this exit.
    923 
    924             To make (1) a bit simpler, merely search for a write that
    925             exactly matches the one done by this exit.  That's safe
    926             because it will fail as often or more often than a full
    927             overlap check, and failure to find an overlapping write in
    928             env is the safe case (we just nuke env if that
    929             happens). */
    930          //vassert(isIRAtom(st->Ist.Exit.guard));
    931          /* (1) */
    932          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
    933          //                    typeOfIRConst(st->Ist.Exit.dst));
    934          //re_add = lookupHHW(env, NULL, key);
    935          /* (2) */
    936          for (j = 0; j < env->used; j++)
    937             env->inuse[j] = False;
    938          /* (3) */
    939          //if (0 && re_add)
    940          //   addToHHW(env, (HWord)key, 0);
    941          continue;
    942       }
    943 
    944       /* Deal with Puts */
    945       switch (st->tag) {
    946          case Ist_Put:
    947             isPut = True;
    948             key = mk_key_GetPut( st->Ist.Put.offset,
    949                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
    950             vassert(isIRAtom(st->Ist.Put.data));
    951             break;
    952          case Ist_PutI:
    953             isPut = True;
    954             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
    955             vassert(isIRAtom(st->Ist.PutI.details->ix));
    956             vassert(isIRAtom(st->Ist.PutI.details->data));
    957             break;
    958          default:
    959             isPut = False;
    960       }
    961       if (isPut && st->tag != Ist_PutI) {
    962          /* See if any single entry in env overlaps this Put.  This is
    963             simplistic in that the transformation is valid if, say, two
    964             or more entries in the env overlap this Put, but the use of
    965             lookupHHW will only find a single entry which exactly
    966             overlaps this Put.  This is suboptimal but safe. */
    967          if (lookupHHW(env, NULL, (HWord)key)) {
    968             /* This Put is redundant because a later one will overwrite
    969                it.  So NULL (nop) it out. */
    970             if (DEBUG_IROPT) {
    971                vex_printf("rPUT: "); ppIRStmt(st);
    972                vex_printf("\n");
    973             }
    974             bb->stmts[i] = IRStmt_NoOp();
    975          } else {
    976             /* We can't demonstrate that this Put is redundant, so add it
    977                to the running collection. */
    978             addToHHW(env, (HWord)key, 0);
    979          }
    980          continue;
    981       }
    982 
    983       /* Deal with Gets.  These remove bits of the environment since
    984          appearance of a Get means that the next event for that slice
    985          of the guest state is no longer a write, but a read.  Also
    986          deals with implicit reads of guest state needed to maintain
    987          precise exceptions. */
    988       handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
    989    }
    990 }
    991 
    992 
    993 /*---------------------------------------------------------------*/
    994 /*--- Constant propagation and folding                        ---*/
    995 /*---------------------------------------------------------------*/
    996 
    997 #if STATS_IROPT
    998 /* How often sameIRExprs was invoked */
    999 static UInt invocation_count;
   1000 /* How often sameIRExprs recursed through IRTemp assignments */
   1001 static UInt recursion_count;
   1002 /* How often sameIRExprs found identical IRExprs */
   1003 static UInt success_count;
   1004 /* How often recursing through assignments to IRTemps helped
   1005    establishing equality. */
   1006 static UInt recursion_success_count;
   1007 /* Whether or not recursing through an IRTemp assignment helped
   1008    establishing IRExpr equality for a given sameIRExprs invocation. */
   1009 static Bool recursion_helped;
   1010 /* Whether or not a given sameIRExprs invocation recursed through an
   1011    IRTemp assignment */
   1012 static Bool recursed;
   1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
   1014 static UInt max_nodes_visited;
   1015 #endif /* STATS_IROPT */
   1016 
   1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
   1018 static UInt num_nodes_visited;
   1019 
   1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
   1021    This is to guard against performance degradation by visiting large
   1022    trees without success. */
   1023 #define NODE_LIMIT 30
   1024 
   1025 
   1026 /* The env in this section is a map from IRTemp to IRExpr*,
   1027    that is, an array indexed by IRTemp. */
   1028 
   1029 /* Do both expressions compute the same value? The answer is generally
   1030    conservative, i.e. it will report that the expressions do not compute
   1031    the same value when in fact they do. The reason is that we do not
   1032    keep track of changes in the guest state and memory. Thusly, two
   1033    Get's, GetI's or Load's, even when accessing the same location, will be
   1034    assumed to compute different values. After all the accesses may happen
   1035    at different times and the guest state / memory can have changed in
   1036    the meantime.
   1037 
   1038    XXX IMPORTANT XXX the two expressions must have the same IR type.
   1039    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
   1040 
   1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
   1042    wrapper that deals with the common tags-don't-match case, and a
   1043    slower out of line general case.  Saves a few insns. */
   1044 
   1045 __attribute__((noinline))
   1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
   1047 
   1048 inline
   1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1050 {
   1051    if (e1->tag != e2->tag) return False;
   1052    return sameIRExprs_aux2(env, e1, e2);
   1053 }
   1054 
   1055 __attribute__((noinline))
   1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1057 {
   1058    if (num_nodes_visited++ > NODE_LIMIT) return False;
   1059 
   1060    switch (e1->tag) {
   1061       case Iex_RdTmp:
   1062          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
   1063 
   1064          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
   1065             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
   1066                                         env[e2->Iex.RdTmp.tmp]);
   1067 #if STATS_IROPT
   1068             recursed = True;
   1069             if (same) recursion_helped = True;
   1070 #endif
   1071             return same;
   1072          }
   1073          return False;
   1074 
   1075       case Iex_Get:
   1076       case Iex_GetI:
   1077       case Iex_Load:
   1078          /* Guest state / memory could have changed in the meantime. */
   1079          return False;
   1080 
   1081       case Iex_Binop:
   1082          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
   1083                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
   1084                                                  e2->Iex.Binop.arg1 )
   1085                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
   1086                                                  e2->Iex.Binop.arg2 ));
   1087 
   1088       case Iex_Unop:
   1089          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
   1090                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
   1091                                                  e2->Iex.Unop.arg ));
   1092 
   1093       case Iex_Const: {
   1094          IRConst *c1 = e1->Iex.Const.con;
   1095          IRConst *c2 = e2->Iex.Const.con;
   1096          vassert(c1->tag == c2->tag);
   1097          switch (c1->tag) {
   1098             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
   1099             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
   1100             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
   1101             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
   1102             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
   1103             default: break;
   1104          }
   1105          return False;
   1106       }
   1107 
   1108       case Iex_Triop: {
   1109          IRTriop *tri1 = e1->Iex.Triop.details;
   1110          IRTriop *tri2 = e2->Iex.Triop.details;
   1111          return toBool( tri1->op == tri2->op
   1112                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
   1113                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
   1114                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
   1115       }
   1116 
   1117       case Iex_ITE:
   1118          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
   1119                                                  e2->Iex.ITE.cond )
   1120                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
   1121                                                  e2->Iex.ITE.iftrue )
   1122                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
   1123                                                  e2->Iex.ITE.iffalse ));
   1124 
   1125       default:
   1126          /* Not very likely to be "same". */
   1127          break;
   1128    }
   1129 
   1130    return False;
   1131 }
   1132 
   1133 inline
   1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
   1135 {
   1136    Bool same;
   1137 
   1138    num_nodes_visited = 0;
   1139    same = sameIRExprs_aux(env, e1, e2);
   1140 
   1141 #if STATS_IROPT
   1142    ++invocation_count;
   1143    if (recursed) ++recursion_count;
   1144    success_count += same;
   1145    if (same && recursion_helped)
   1146       ++recursion_success_count;
   1147    if (num_nodes_visited > max_nodes_visited)
   1148       max_nodes_visited = num_nodes_visited;
   1149    recursed = False; /* reset */
   1150    recursion_helped = False;  /* reset */
   1151 #endif /* STATS_IROPT */
   1152 
   1153    return same;
   1154 }
   1155 
   1156 
   1157 /* Debugging-only hack (not used in production runs): make a guess
   1158    whether sameIRExprs might assert due to the two args being of
   1159    different types.  If in doubt return False.  Is only used when
   1160    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
   1161    Bad because it duplicates functionality from typeOfIRExpr.  See
   1162    comment on the single use point below for rationale. */
   1163 static
   1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
   1165 {
   1166    if (e1->tag != e2->tag) return False;
   1167    switch (e1->tag) {
   1168       case Iex_Const: {
   1169          /* The only interesting case */
   1170          IRConst *c1 = e1->Iex.Const.con;
   1171          IRConst *c2 = e2->Iex.Const.con;
   1172          return c1->tag != c2->tag;
   1173       }
   1174       default:
   1175          break;
   1176    }
   1177    return False;
   1178 }
   1179 
   1180 
   1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
   1182 static Bool isZeroU32 ( IRExpr* e )
   1183 {
   1184    return toBool( e->tag == Iex_Const
   1185                   && e->Iex.Const.con->tag == Ico_U32
   1186                   && e->Iex.Const.con->Ico.U32 == 0);
   1187 }
   1188 
   1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
   1190    Currently unused; commented out to avoid compiler warning */
   1191 #if 0
   1192 static Bool isZeroU64 ( IRExpr* e )
   1193 {
   1194    return toBool( e->tag == Iex_Const
   1195                   && e->Iex.Const.con->tag == Ico_U64
   1196                   && e->Iex.Const.con->Ico.U64 == 0);
   1197 }
   1198 #endif
   1199 
   1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
   1201 static Bool isZeroV128 ( IRExpr* e )
   1202 {
   1203    return toBool( e->tag == Iex_Const
   1204                   && e->Iex.Const.con->tag == Ico_V128
   1205                   && e->Iex.Const.con->Ico.V128 == 0x0000);
   1206 }
   1207 
   1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
   1209 static Bool isZeroV256 ( IRExpr* e )
   1210 {
   1211    return toBool( e->tag == Iex_Const
   1212                   && e->Iex.Const.con->tag == Ico_V256
   1213                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
   1214 }
   1215 
   1216 /* Is this an integer constant with value 0 ? */
   1217 static Bool isZeroU ( IRExpr* e )
   1218 {
   1219    if (e->tag != Iex_Const) return False;
   1220    switch (e->Iex.Const.con->tag) {
   1221       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
   1222       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
   1223       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
   1224       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
   1225       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
   1226       default: vpanic("isZeroU");
   1227    }
   1228 }
   1229 
   1230 /* Is this an integer constant with value 1---1b ? */
   1231 static Bool isOnesU ( IRExpr* e )
   1232 {
   1233    if (e->tag != Iex_Const) return False;
   1234    switch (e->Iex.Const.con->tag) {
   1235       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
   1236       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
   1237       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
   1238                                      == 0xFFFFFFFF);
   1239       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
   1240                                      == 0xFFFFFFFFFFFFFFFFULL);
   1241       default: ppIRExpr(e); vpanic("isOnesU");
   1242    }
   1243 }
   1244 
   1245 static Bool notBool ( Bool b )
   1246 {
   1247    if (b == True) return False;
   1248    if (b == False) return True;
   1249    vpanic("notBool");
   1250 }
   1251 
   1252 /* Make a zero which has the same type as the result of the given
   1253    primop. */
   1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
   1255 {
   1256    switch (op) {
   1257       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
   1258       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
   1259       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
   1260       case Iop_Sub32:
   1261       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
   1262       case Iop_And64:
   1263       case Iop_Sub64:
   1264       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
   1265       case Iop_XorV128:
   1266       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
   1267       case Iop_XorV256:
   1268       case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
   1269       default: vpanic("mkZeroOfPrimopResultType: bad primop");
   1270    }
   1271 }
   1272 
   1273 /* Make a value containing all 1-bits, which has the same type as the
   1274    result of the given primop. */
   1275 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
   1276 {
   1277    switch (op) {
   1278       case Iop_CmpEQ32:
   1279       case Iop_CmpEQ64:
   1280          return IRExpr_Const(IRConst_U1(toBool(1)));
   1281       case Iop_Or8:
   1282          return IRExpr_Const(IRConst_U8(0xFF));
   1283       case Iop_Or16:
   1284          return IRExpr_Const(IRConst_U16(0xFFFF));
   1285       case Iop_Or32:
   1286          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
   1287       case Iop_CmpEQ8x8:
   1288       case Iop_Or64:
   1289          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
   1290       case Iop_CmpEQ8x16:
   1291       case Iop_CmpEQ16x8:
   1292       case Iop_CmpEQ32x4:
   1293          return IRExpr_Const(IRConst_V128(0xFFFF));
   1294       default:
   1295          ppIROp(op);
   1296          vpanic("mkOnesOfPrimopResultType: bad primop");
   1297    }
   1298 }
   1299 
   1300 /* Helpers for folding Clz32/64. */
   1301 static UInt fold_Clz64 ( ULong value )
   1302 {
   1303    UInt i;
   1304    vassert(value != 0ULL); /* no defined semantics for arg==0 */
   1305    for (i = 0; i < 64; ++i) {
   1306       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
   1307    }
   1308    vassert(0);
   1309    /*NOTREACHED*/
   1310    return 0;
   1311 }
   1312 
   1313 static UInt fold_Clz32 ( UInt value )
   1314 {
   1315    UInt i;
   1316    vassert(value != 0); /* no defined semantics for arg==0 */
   1317    for (i = 0; i < 32; ++i) {
   1318       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
   1319    }
   1320    vassert(0);
   1321    /*NOTREACHED*/
   1322    return 0;
   1323 }
   1324 
   1325 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
   1326    the corresponding real constant. */
   1327 //XXX re-check this before use
   1328 //static ULong de_summarise_V64 ( UChar v64 )
   1329 //{
   1330 //   ULong r = 0;
   1331 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
   1332 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
   1333 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
   1334 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
   1335 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
   1336 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
   1337 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
   1338 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
   1339 //   return r;
   1340 //}
   1341 
   1342 /* Helper for arbitrary expression pattern matching in flat IR.  If
   1343    'e' is a reference to a tmp, look it up in env -- repeatedly, if
   1344    necessary -- until it resolves to a non-tmp.  Note that this can
   1345    return NULL if it can't resolve 'e' to a new expression, which will
   1346    be the case if 'e' is instead defined by an IRStmt (IRDirty or
   1347    LLSC). */
   1348 static IRExpr* chase ( IRExpr** env, IRExpr* e )
   1349 {
   1350    /* Why is this loop guaranteed to terminate?  Because all tmps must
   1351       have definitions before use, hence a tmp cannot be bound
   1352       (directly or indirectly) to itself. */
   1353    while (e->tag == Iex_RdTmp) {
   1354       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
   1355       e = env[(Int)e->Iex.RdTmp.tmp];
   1356       if (e == NULL) break;
   1357    }
   1358    return e;
   1359 }
   1360 
   1361 /* Similar to |chase|, but follows at most one level of tmp reference. */
   1362 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
   1363 {
   1364    if (e == NULL || e->tag != Iex_RdTmp)
   1365       return e;
   1366    else
   1367       return env[(Int)e->Iex.RdTmp.tmp];
   1368 }
   1369 
   1370 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
   1371 {
   1372    Int     shift;
   1373    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
   1374 
   1375    switch (e->tag) {
   1376    case Iex_Unop:
   1377       /* UNARY ops */
   1378       if (e->Iex.Unop.arg->tag == Iex_Const) {
   1379          switch (e->Iex.Unop.op) {
   1380          case Iop_1Uto8:
   1381             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1382                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1383                     ? 1 : 0)));
   1384             break;
   1385          case Iop_1Uto32:
   1386             e2 = IRExpr_Const(IRConst_U32(
   1387                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1388                     ? 1 : 0));
   1389             break;
   1390          case Iop_1Uto64:
   1391             e2 = IRExpr_Const(IRConst_U64(
   1392                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1393                     ? 1 : 0));
   1394             break;
   1395 
   1396          case Iop_1Sto8:
   1397             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1398                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1399                     ? 0xFF : 0)));
   1400             break;
   1401          case Iop_1Sto16:
   1402             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1403                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1404                     ? 0xFFFF : 0)));
   1405             break;
   1406          case Iop_1Sto32:
   1407             e2 = IRExpr_Const(IRConst_U32(
   1408                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1409                     ? 0xFFFFFFFF : 0));
   1410             break;
   1411          case Iop_1Sto64:
   1412             e2 = IRExpr_Const(IRConst_U64(
   1413                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
   1414                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
   1415             break;
   1416 
   1417          case Iop_8Sto32: {
   1418             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1419             u32 <<= 24;
   1420             u32 = (Int)u32 >> 24;   /* signed shift */
   1421             e2 = IRExpr_Const(IRConst_U32(u32));
   1422             break;
   1423          }
   1424          case Iop_16Sto32: {
   1425             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1426             u32 <<= 16;
   1427             u32 = (Int)u32 >> 16;   /* signed shift */
   1428             e2 = IRExpr_Const(IRConst_U32(u32));
   1429             break;
   1430          }
   1431          case Iop_8Uto64:
   1432             e2 = IRExpr_Const(IRConst_U64(
   1433                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1434             break;
   1435          case Iop_16Uto64:
   1436             e2 = IRExpr_Const(IRConst_U64(
   1437                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1438             break;
   1439          case Iop_8Uto32:
   1440             e2 = IRExpr_Const(IRConst_U32(
   1441                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1442             break;
   1443          case Iop_8Sto16: {
   1444             UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
   1445             u16 <<= 8;
   1446             u16 = (Short)u16 >> 8;  /* signed shift */
   1447             e2 = IRExpr_Const(IRConst_U16(u16));
   1448             break;
   1449          }
   1450          case Iop_8Uto16:
   1451             e2 = IRExpr_Const(IRConst_U16(
   1452                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
   1453             break;
   1454          case Iop_16Uto32:
   1455             e2 = IRExpr_Const(IRConst_U32(
   1456                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
   1457             break;
   1458          case Iop_32to16:
   1459             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1460                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1461             break;
   1462          case Iop_32to8:
   1463             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1464                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1465             break;
   1466          case Iop_32to1:
   1467             e2 = IRExpr_Const(IRConst_U1(toBool(
   1468                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1469                  )));
   1470             break;
   1471          case Iop_64to1:
   1472             e2 = IRExpr_Const(IRConst_U1(toBool(
   1473                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
   1474                  )));
   1475             break;
   1476 
   1477          case Iop_NotV128:
   1478             e2 = IRExpr_Const(IRConst_V128(
   1479                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
   1480             break;
   1481          case Iop_Not64:
   1482             e2 = IRExpr_Const(IRConst_U64(
   1483                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
   1484             break;
   1485          case Iop_Not32:
   1486             e2 = IRExpr_Const(IRConst_U32(
   1487                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
   1488             break;
   1489          case Iop_Not16:
   1490             e2 = IRExpr_Const(IRConst_U16(toUShort(
   1491                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
   1492             break;
   1493          case Iop_Not8:
   1494             e2 = IRExpr_Const(IRConst_U8(toUChar(
   1495                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
   1496             break;
   1497 
   1498          case Iop_Not1:
   1499             e2 = IRExpr_Const(IRConst_U1(
   1500                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
   1501             break;
   1502 
   1503          case Iop_64to8: {
   1504             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1505             w64 &= 0xFFULL;
   1506             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
   1507             break;
   1508          }
   1509          case Iop_64to16: {
   1510             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1511             w64 &= 0xFFFFULL;
   1512             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
   1513             break;
   1514          }
   1515          case Iop_64to32: {
   1516             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1517             w64 &= 0x00000000FFFFFFFFULL;
   1518             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1519             break;
   1520          }
   1521          case Iop_64HIto32: {
   1522             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1523             w64 >>= 32;
   1524             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
   1525             break;
   1526          }
   1527          case Iop_32Uto64:
   1528             e2 = IRExpr_Const(IRConst_U64(
   1529                     0xFFFFFFFFULL
   1530                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
   1531             break;
   1532          case Iop_16Sto64: {
   1533             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1534             u64 <<= 48;
   1535             u64 = (Long)u64 >> 48;   /* signed shift */
   1536             e2 = IRExpr_Const(IRConst_U64(u64));
   1537             break;
   1538          }
   1539          case Iop_32Sto64: {
   1540             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1541             u64 <<= 32;
   1542             u64 = (Long)u64 >> 32;   /* signed shift */
   1543             e2 = IRExpr_Const(IRConst_U64(u64));
   1544             break;
   1545          }
   1546 
   1547          case Iop_16to8: {
   1548             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1549             w16 &= 0xFF;
   1550             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1551             break;
   1552          }
   1553          case Iop_16HIto8: {
   1554             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
   1555             w16 >>= 8;
   1556             w16 &= 0xFF;
   1557             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
   1558             break;
   1559          }
   1560 
   1561          case Iop_CmpNEZ8:
   1562             e2 = IRExpr_Const(IRConst_U1(toBool(
   1563                     0 !=
   1564                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
   1565                  )));
   1566             break;
   1567          case Iop_CmpNEZ32:
   1568             e2 = IRExpr_Const(IRConst_U1(toBool(
   1569                     0 !=
   1570                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
   1571                  )));
   1572             break;
   1573          case Iop_CmpNEZ64:
   1574             e2 = IRExpr_Const(IRConst_U1(toBool(
   1575                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
   1576                  )));
   1577             break;
   1578 
   1579          case Iop_CmpwNEZ32: {
   1580             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1581             if (w32 == 0)
   1582                e2 = IRExpr_Const(IRConst_U32( 0 ));
   1583             else
   1584                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
   1585             break;
   1586          }
   1587          case Iop_CmpwNEZ64: {
   1588             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1589             if (w64 == 0)
   1590                e2 = IRExpr_Const(IRConst_U64( 0 ));
   1591             else
   1592                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
   1593             break;
   1594          }
   1595 
   1596          case Iop_Left32: {
   1597             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1598             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
   1599             s32 = (s32 | (-s32));
   1600             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
   1601             break;
   1602          }
   1603 
   1604          case Iop_Left64: {
   1605             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1606             Long  s64 = (Long)u64;
   1607             s64 = (s64 | (-s64));
   1608             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
   1609             break;
   1610          }
   1611 
   1612          case Iop_Clz32: {
   1613             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1614             if (u32 != 0)
   1615                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
   1616             break;
   1617          }
   1618          case Iop_Clz64: {
   1619             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1620             if (u64 != 0ULL)
   1621                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
   1622             break;
   1623          }
   1624 
   1625          /* For these vector ones, can't fold all cases, but at least
   1626             do the most obvious one.  Could do better here using
   1627             summarise/desummarise of vector constants, but too
   1628             difficult to verify; hence just handle the zero cases. */
   1629          case Iop_32UtoV128: {
   1630             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
   1631             if (0 == u32) {
   1632                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1633             } else {
   1634                goto unhandled;
   1635             }
   1636             break;
   1637          }
   1638          case Iop_V128to64: {
   1639             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1640             if (0 == ((v128 >> 0) & 0xFF)) {
   1641                e2 = IRExpr_Const(IRConst_U64(0));
   1642             } else {
   1643                goto unhandled;
   1644             }
   1645             break;
   1646          }
   1647          case Iop_V128HIto64: {
   1648             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1649             if (0 == ((v128 >> 8) & 0xFF)) {
   1650                e2 = IRExpr_Const(IRConst_U64(0));
   1651             } else {
   1652                goto unhandled;
   1653             }
   1654             break;
   1655          }
   1656          case Iop_64UtoV128: {
   1657             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
   1658             if (0 == u64) {
   1659                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1660             } else {
   1661                goto unhandled;
   1662             }
   1663             break;
   1664          }
   1665 
   1666          /* Even stupider (although still correct ..) */
   1667          case Iop_V256to64_0: case Iop_V256to64_1:
   1668          case Iop_V256to64_2: case Iop_V256to64_3: {
   1669             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
   1670             if (v256 == 0x00000000) {
   1671                e2 = IRExpr_Const(IRConst_U64(0));
   1672             } else {
   1673                goto unhandled;
   1674             }
   1675             break;
   1676          }
   1677 
   1678          case Iop_ZeroHI64ofV128: {
   1679             /* Could do better here -- only need to look at the bottom 64 bits
   1680                of the argument, really. */
   1681             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
   1682             if (v128 == 0x0000) {
   1683                e2 = IRExpr_Const(IRConst_V128(0x0000));
   1684             } else {
   1685                goto unhandled;
   1686             }
   1687             break;
   1688          }
   1689 
   1690          default:
   1691             goto unhandled;
   1692       }
   1693       }
   1694       break;
   1695 
   1696    case Iex_Binop:
   1697       /* BINARY ops */
   1698       if (e->Iex.Binop.arg1->tag == Iex_Const
   1699           && e->Iex.Binop.arg2->tag == Iex_Const) {
   1700          /* cases where both args are consts */
   1701          switch (e->Iex.Binop.op) {
   1702 
   1703             /* -- Or -- */
   1704             case Iop_Or8:
   1705                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1706                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1707                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1708                break;
   1709             case Iop_Or16:
   1710                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1711                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1712                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1713                break;
   1714             case Iop_Or32:
   1715                e2 = IRExpr_Const(IRConst_U32(
   1716                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1717                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1718                break;
   1719             case Iop_Or64:
   1720                e2 = IRExpr_Const(IRConst_U64(
   1721                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1722                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1723                break;
   1724             case Iop_OrV128:
   1725                e2 = IRExpr_Const(IRConst_V128(
   1726                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1727                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1728                break;
   1729 
   1730             /* -- Xor -- */
   1731             case Iop_Xor8:
   1732                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1733                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1734                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1735                break;
   1736             case Iop_Xor16:
   1737                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1738                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1739                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1740                break;
   1741             case Iop_Xor32:
   1742                e2 = IRExpr_Const(IRConst_U32(
   1743                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1744                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1745                break;
   1746             case Iop_Xor64:
   1747                e2 = IRExpr_Const(IRConst_U64(
   1748                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1749                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1750                break;
   1751             case Iop_XorV128:
   1752                e2 = IRExpr_Const(IRConst_V128(
   1753                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1754                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1755                break;
   1756 
   1757             /* -- And -- */
   1758             case Iop_And8:
   1759                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1760                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1761                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1762                break;
   1763             case Iop_And16:
   1764                e2 = IRExpr_Const(IRConst_U16(toUShort(
   1765                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
   1766                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
   1767                break;
   1768             case Iop_And32:
   1769                e2 = IRExpr_Const(IRConst_U32(
   1770                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1771                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1772                break;
   1773             case Iop_And64:
   1774                e2 = IRExpr_Const(IRConst_U64(
   1775                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1776                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1777                break;
   1778             case Iop_AndV128:
   1779                e2 = IRExpr_Const(IRConst_V128(
   1780                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
   1781                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
   1782                break;
   1783 
   1784             /* -- Add -- */
   1785             case Iop_Add8:
   1786                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1787                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1788                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1789                break;
   1790             case Iop_Add32:
   1791                e2 = IRExpr_Const(IRConst_U32(
   1792                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1793                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1794                break;
   1795             case Iop_Add64:
   1796                e2 = IRExpr_Const(IRConst_U64(
   1797                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1798                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1799                break;
   1800 
   1801             /* -- Sub -- */
   1802             case Iop_Sub8:
   1803                e2 = IRExpr_Const(IRConst_U8(toUChar(
   1804                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
   1805                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
   1806                break;
   1807             case Iop_Sub32:
   1808                e2 = IRExpr_Const(IRConst_U32(
   1809                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1810                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1811                break;
   1812             case Iop_Sub64:
   1813                e2 = IRExpr_Const(IRConst_U64(
   1814                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1815                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1816                break;
   1817 
   1818             /* -- Max32U -- */
   1819             case Iop_Max32U: {
   1820                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1821                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1822                UInt res  = u32a > u32b ? u32a : u32b;
   1823                e2 = IRExpr_Const(IRConst_U32(res));
   1824                break;
   1825             }
   1826 
   1827             /* -- Mul -- */
   1828             case Iop_Mul32:
   1829                e2 = IRExpr_Const(IRConst_U32(
   1830                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1831                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
   1832                break;
   1833             case Iop_Mul64:
   1834                e2 = IRExpr_Const(IRConst_U64(
   1835                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1836                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
   1837                break;
   1838 
   1839             case Iop_MullS32: {
   1840                /* very paranoid */
   1841                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   1842                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1843                Int   s32a = (Int)u32a;
   1844                Int   s32b = (Int)u32b;
   1845                Long  s64a = (Long)s32a;
   1846                Long  s64b = (Long)s32b;
   1847                Long  sres = s64a * s64b;
   1848                ULong ures = (ULong)sres;
   1849                e2 = IRExpr_Const(IRConst_U64(ures));
   1850                break;
   1851             }
   1852 
   1853             /* -- Shl -- */
   1854             case Iop_Shl32:
   1855                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1856                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1857                if (shift >= 0 && shift <= 31)
   1858                   e2 = IRExpr_Const(IRConst_U32(
   1859                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1860                            << shift)));
   1861                break;
   1862             case Iop_Shl64:
   1863                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1864                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1865                if (shift >= 0 && shift <= 63)
   1866                   e2 = IRExpr_Const(IRConst_U64(
   1867                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1868                            << shift)));
   1869                break;
   1870 
   1871             /* -- Sar -- */
   1872             case Iop_Sar32: {
   1873                /* paranoid ... */
   1874                /*signed*/ Int s32;
   1875                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1876                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1877                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1878                if (shift >= 0 && shift <= 31) {
   1879                   s32 >>=/*signed*/ shift;
   1880                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
   1881                }
   1882                break;
   1883             }
   1884             case Iop_Sar64: {
   1885                /* paranoid ... */
   1886                /*signed*/ Long s64;
   1887                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1888                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1889                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1890                if (shift >= 0 && shift <= 63) {
   1891                   s64 >>=/*signed*/ shift;
   1892                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
   1893                }
   1894                break;
   1895             }
   1896 
   1897             /* -- Shr -- */
   1898             case Iop_Shr32: {
   1899                /* paranoid ... */
   1900                /*unsigned*/ UInt u32;
   1901                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1902                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
   1903                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1904                if (shift >= 0 && shift <= 31) {
   1905                   u32 >>=/*unsigned*/ shift;
   1906                   e2 = IRExpr_Const(IRConst_U32(u32));
   1907                }
   1908                break;
   1909             }
   1910             case Iop_Shr64: {
   1911                /* paranoid ... */
   1912                /*unsigned*/ ULong u64;
   1913                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1914                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
   1915                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
   1916                if (shift >= 0 && shift <= 63) {
   1917                   u64 >>=/*unsigned*/ shift;
   1918                   e2 = IRExpr_Const(IRConst_U64(u64));
   1919                }
   1920                break;
   1921             }
   1922 
   1923             /* -- CmpEQ -- */
   1924             case Iop_CmpEQ32:
   1925                e2 = IRExpr_Const(IRConst_U1(toBool(
   1926                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1927                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1928                break;
   1929             case Iop_CmpEQ64:
   1930                e2 = IRExpr_Const(IRConst_U1(toBool(
   1931                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1932                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1933                break;
   1934 
   1935             /* -- CmpNE -- */
   1936             case Iop_CmpNE8:
   1937             case Iop_CasCmpNE8:
   1938             case Iop_ExpCmpNE8:
   1939                e2 = IRExpr_Const(IRConst_U1(toBool(
   1940                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
   1941                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
   1942                break;
   1943             case Iop_CmpNE32:
   1944             case Iop_CasCmpNE32:
   1945             case Iop_ExpCmpNE32:
   1946                e2 = IRExpr_Const(IRConst_U1(toBool(
   1947                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
   1948                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
   1949                break;
   1950             case Iop_CmpNE64:
   1951             case Iop_CasCmpNE64:
   1952             case Iop_ExpCmpNE64:
   1953                e2 = IRExpr_Const(IRConst_U1(toBool(
   1954                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
   1955                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
   1956                break;
   1957 
   1958             /* -- CmpLEU -- */
   1959             case Iop_CmpLE32U:
   1960                e2 = IRExpr_Const(IRConst_U1(toBool(
   1961                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1962                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1963                break;
   1964             case Iop_CmpLE64U:
   1965                e2 = IRExpr_Const(IRConst_U1(toBool(
   1966                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1967                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1968                break;
   1969 
   1970             /* -- CmpLES -- */
   1971             case Iop_CmpLE32S:
   1972                e2 = IRExpr_Const(IRConst_U1(toBool(
   1973                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1974                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1975                break;
   1976             case Iop_CmpLE64S:
   1977                e2 = IRExpr_Const(IRConst_U1(toBool(
   1978                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1979                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1980                break;
   1981 
   1982             /* -- CmpLTS -- */
   1983             case Iop_CmpLT32S:
   1984                e2 = IRExpr_Const(IRConst_U1(toBool(
   1985                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1986                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1987                break;
   1988             case Iop_CmpLT64S:
   1989                e2 = IRExpr_Const(IRConst_U1(toBool(
   1990                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   1991                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   1992                break;
   1993 
   1994             /* -- CmpLTU -- */
   1995             case Iop_CmpLT32U:
   1996                e2 = IRExpr_Const(IRConst_U1(toBool(
   1997                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
   1998                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
   1999                break;
   2000             case Iop_CmpLT64U:
   2001                e2 = IRExpr_Const(IRConst_U1(toBool(
   2002                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
   2003                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
   2004                break;
   2005 
   2006             /* -- CmpORD -- */
   2007             case Iop_CmpORD32S: {
   2008                /* very paranoid */
   2009                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
   2010                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   2011                Int   s32a = (Int)u32a;
   2012                Int   s32b = (Int)u32b;
   2013                Int   r = 0x2; /* EQ */
   2014                if (s32a < s32b) {
   2015                   r = 0x8; /* LT */
   2016                }
   2017                else if (s32a > s32b) {
   2018                   r = 0x4; /* GT */
   2019                }
   2020                e2 = IRExpr_Const(IRConst_U32(r));
   2021                break;
   2022             }
   2023 
   2024             /* -- nHLto2n -- */
   2025             case Iop_32HLto64:
   2026                e2 = IRExpr_Const(IRConst_U64(
   2027                        (((ULong)(e->Iex.Binop.arg1
   2028                                   ->Iex.Const.con->Ico.U32)) << 32)
   2029                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
   2030                     ));
   2031                break;
   2032             case Iop_64HLto128:
   2033                /* We can't fold this, because there is no way to
   2034                   express he result in IR, but at least pretend to
   2035                   handle it, so as to stop getting blasted with
   2036                   no-rule-for-this-primop messages. */
   2037                break;
   2038             /* For this vector one, can't fold all cases, but at
   2039                least do the most obvious one.  Could do better here
   2040                using summarise/desummarise of vector constants, but
   2041                too difficult to verify; hence just handle the zero
   2042                cases. */
   2043             case Iop_64HLtoV128: {
   2044                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
   2045                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2046                if (0 == argHi && 0 == argLo) {
   2047                   e2 = IRExpr_Const(IRConst_V128(0));
   2048                } else {
   2049                   goto unhandled;
   2050                }
   2051                break;
   2052             }
   2053             /* Same reasoning for the 256-bit version. */
   2054             case Iop_V128HLtoV256: {
   2055                IRExpr* argHi = e->Iex.Binop.arg1;
   2056                IRExpr* argLo = e->Iex.Binop.arg2;
   2057                if (isZeroV128(argHi) && isZeroV128(argLo)) {
   2058                   e2 = IRExpr_Const(IRConst_V256(0));
   2059                } else {
   2060                   goto unhandled;
   2061                }
   2062                break;
   2063             }
   2064 
   2065             /* -- V128 stuff -- */
   2066             case Iop_InterleaveLO8x16: {
   2067                /* This turns up a lot in Memcheck instrumentation of
   2068                   Icc generated code.  I don't know why. */
   2069                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
   2070                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
   2071                if (0 == arg1 && 0 == arg2) {
   2072                   e2 = IRExpr_Const(IRConst_V128(0));
   2073                } else {
   2074                   goto unhandled;
   2075                }
   2076                break;
   2077             }
   2078 
   2079             default:
   2080                goto unhandled;
   2081          }
   2082 
   2083       } else {
   2084 
   2085          /* other cases (identities, etc) */
   2086          switch (e->Iex.Binop.op) {
   2087 
   2088             case Iop_Shl32:
   2089             case Iop_Shl64:
   2090             case Iop_Shr64:
   2091             case Iop_Sar64:
   2092                /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
   2093                if (isZeroU(e->Iex.Binop.arg2)) {
   2094                   e2 = e->Iex.Binop.arg1;
   2095                   break;
   2096                }
   2097                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
   2098                if (isZeroU(e->Iex.Binop.arg1)) {
   2099                   e2 = e->Iex.Binop.arg1;
   2100                   break;
   2101                }
   2102                break;
   2103 
   2104             case Iop_Sar32:
   2105             case Iop_Shr32:
   2106                /* Shr32/Sar32(x,0) ==> x */
   2107                if (isZeroU(e->Iex.Binop.arg2)) {
   2108                   e2 = e->Iex.Binop.arg1;
   2109                   break;
   2110                }
   2111                break;
   2112 
   2113             case Iop_Or8:
   2114             case Iop_Or16:
   2115             case Iop_Or32:
   2116             case Iop_Or64:
   2117             case Iop_Max32U:
   2118                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
   2119                if (isZeroU(e->Iex.Binop.arg2)) {
   2120                   e2 = e->Iex.Binop.arg1;
   2121                   break;
   2122                }
   2123                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
   2124                if (isZeroU(e->Iex.Binop.arg1)) {
   2125                   e2 = e->Iex.Binop.arg2;
   2126                   break;
   2127                }
   2128                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
   2129                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
   2130                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
   2131                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2132                   break;
   2133                }
   2134                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
   2135                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2136                   e2 = e->Iex.Binop.arg1;
   2137                   break;
   2138                }
   2139                break;
   2140 
   2141             case Iop_Add8:
   2142                /* Add8(t,t) ==> t << 1.
   2143                   Memcheck doesn't understand that
   2144                   x+x produces a defined least significant bit, and it seems
   2145                   simplest just to get rid of the problem by rewriting it
   2146                   out, since the opportunity to do so exists. */
   2147                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2148                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
   2149                                     IRExpr_Const(IRConst_U8(1)));
   2150                   break;
   2151                }
   2152                break;
   2153 
   2154                /* NB no Add16(t,t) case yet as no known test case exists */
   2155 
   2156             case Iop_Add32:
   2157             case Iop_Add64:
   2158                /* Add32/Add64(x,0) ==> x */
   2159                if (isZeroU(e->Iex.Binop.arg2)) {
   2160                   e2 = e->Iex.Binop.arg1;
   2161                   break;
   2162                }
   2163                /* Add32/Add64(0,x) ==> x */
   2164                if (isZeroU(e->Iex.Binop.arg1)) {
   2165                   e2 = e->Iex.Binop.arg2;
   2166                   break;
   2167                }
   2168                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
   2169                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2170                   e2 = IRExpr_Binop(
   2171                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
   2172                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
   2173                   break;
   2174                }
   2175                break;
   2176 
   2177             case Iop_Sub32:
   2178             case Iop_Sub64:
   2179                /* Sub32/Sub64(x,0) ==> x */
   2180                if (isZeroU(e->Iex.Binop.arg2)) {
   2181                   e2 = e->Iex.Binop.arg1;
   2182                   break;
   2183                }
   2184                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
   2185                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2186                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2187                   break;
   2188                }
   2189                break;
   2190             case Iop_Sub8x16:
   2191                /* Sub8x16(x,0) ==> x */
   2192                if (isZeroV128(e->Iex.Binop.arg2)) {
   2193                   e2 = e->Iex.Binop.arg1;
   2194                   break;
   2195                }
   2196                break;
   2197 
   2198             case Iop_And8:
   2199             case Iop_And16:
   2200             case Iop_And32:
   2201             case Iop_And64:
   2202                /* And8/And16/And32/And64(x,1---1b) ==> x */
   2203                if (isOnesU(e->Iex.Binop.arg2)) {
   2204                   e2 = e->Iex.Binop.arg1;
   2205                   break;
   2206                }
   2207                /* And8/And16/And32/And64(1---1b,x) ==> x */
   2208                if (isOnesU(e->Iex.Binop.arg1)) {
   2209                   e2 = e->Iex.Binop.arg2;
   2210                   break;
   2211                }
   2212                /* And8/And16/And32/And64(x,0) ==> 0 */
   2213                if (isZeroU(e->Iex.Binop.arg2)) {
   2214                   e2 = e->Iex.Binop.arg2;
   2215                   break;
   2216                }
   2217                /* And8/And16/And32/And64(0,x) ==> 0 */
   2218                if (isZeroU(e->Iex.Binop.arg1)) {
   2219                   e2 = e->Iex.Binop.arg1;
   2220                   break;
   2221                }
   2222                /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
   2223                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2224                   e2 = e->Iex.Binop.arg1;
   2225                   break;
   2226                }
   2227                break;
   2228 
   2229             case Iop_AndV128:
   2230             case Iop_AndV256:
   2231                /* And8/And16/AndV128/AndV256(t,t)
   2232                   ==> t, for some IRTemp t */
   2233                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2234                   e2 = e->Iex.Binop.arg1;
   2235                   break;
   2236                }
   2237                /* Deal with either arg zero.  Could handle other And
   2238                   cases here too. */
   2239                if (e->Iex.Binop.op == Iop_AndV256
   2240                    && (isZeroV256(e->Iex.Binop.arg1)
   2241                        || isZeroV256(e->Iex.Binop.arg2))) {
   2242                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2243                   break;
   2244                } else if (e->Iex.Binop.op == Iop_AndV128
   2245                           && (isZeroV128(e->Iex.Binop.arg1)
   2246                               || isZeroV128(e->Iex.Binop.arg2))) {
   2247                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2248                   break;
   2249                }
   2250                break;
   2251 
   2252             case Iop_OrV128:
   2253             case Iop_OrV256:
   2254                /* V128/V256(t,t) ==> t, for some IRTemp t */
   2255                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2256                   e2 = e->Iex.Binop.arg1;
   2257                   break;
   2258                }
   2259                /* OrV128(t,0) ==> t */
   2260                if (e->Iex.Binop.op == Iop_OrV128) {
   2261                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2262                      e2 = e->Iex.Binop.arg1;
   2263                      break;
   2264                   }
   2265                   if (isZeroV128(e->Iex.Binop.arg1)) {
   2266                      e2 = e->Iex.Binop.arg2;
   2267                      break;
   2268                   }
   2269                }
   2270                /* OrV256(t,0) ==> t */
   2271                if (e->Iex.Binop.op == Iop_OrV256) {
   2272                   if (isZeroV256(e->Iex.Binop.arg2)) {
   2273                      e2 = e->Iex.Binop.arg1;
   2274                      break;
   2275                   }
   2276                   //Disabled because there's no known test case right now.
   2277                   //if (isZeroV256(e->Iex.Binop.arg1)) {
   2278                   //   e2 = e->Iex.Binop.arg2;
   2279                   //   break;
   2280                   //}
   2281                }
   2282                break;
   2283 
   2284             case Iop_Xor8:
   2285             case Iop_Xor16:
   2286             case Iop_Xor32:
   2287             case Iop_Xor64:
   2288             case Iop_XorV128:
   2289             case Iop_XorV256:
   2290                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
   2291                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2292                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2293                   break;
   2294                }
   2295                /* XorV128(t,0) ==> t */
   2296                if (e->Iex.Binop.op == Iop_XorV128) {
   2297                   if (isZeroV128(e->Iex.Binop.arg2)) {
   2298                      e2 = e->Iex.Binop.arg1;
   2299                      break;
   2300                   }
   2301                   //Disabled because there's no known test case right now.
   2302                   //if (isZeroV128(e->Iex.Binop.arg1)) {
   2303                   //   e2 = e->Iex.Binop.arg2;
   2304                   //   break;
   2305                   //}
   2306                } else {
   2307                   /* Xor8/16/32/64(0,t) ==> t */
   2308                   if (isZeroU(e->Iex.Binop.arg1)) {
   2309                      e2 = e->Iex.Binop.arg2;
   2310                      break;
   2311                   }
   2312                   /* Xor8/16/32/64(t,0) ==> t */
   2313                   if (isZeroU(e->Iex.Binop.arg2)) {
   2314                      e2 = e->Iex.Binop.arg1;
   2315                      break;
   2316                   }
   2317                }
   2318                break;
   2319 
   2320             case Iop_CmpNE32:
   2321                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
   2322                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2323                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
   2324                   break;
   2325                }
   2326                /* CmpNE32(1Uto32(b), 0) ==> b */
   2327                if (isZeroU32(e->Iex.Binop.arg2)) {
   2328                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
   2329                   if (a1 && a1->tag == Iex_Unop
   2330                          && a1->Iex.Unop.op == Iop_1Uto32) {
   2331                      e2 = a1->Iex.Unop.arg;
   2332                      break;
   2333                   }
   2334                }
   2335                break;
   2336 
   2337             case Iop_CmpEQ32:
   2338             case Iop_CmpEQ64:
   2339             case Iop_CmpEQ8x8:
   2340             case Iop_CmpEQ8x16:
   2341             case Iop_CmpEQ16x8:
   2342             case Iop_CmpEQ32x4:
   2343                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2344                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
   2345                   break;
   2346                }
   2347                break;
   2348 
   2349             default:
   2350                break;
   2351          }
   2352       }
   2353       break;
   2354 
   2355    case Iex_ITE:
   2356       /* ITE */
   2357       /* is the discriminant is a constant? */
   2358       if (e->Iex.ITE.cond->tag == Iex_Const) {
   2359          /* assured us by the IR type rules */
   2360          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
   2361          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
   2362                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
   2363       }
   2364       else
   2365       /* are the arms identical? (pretty weedy test) */
   2366       if (sameIRExprs(env, e->Iex.ITE.iftrue,
   2367                            e->Iex.ITE.iffalse)) {
   2368          e2 = e->Iex.ITE.iffalse;
   2369       }
   2370       break;
   2371 
   2372    default:
   2373       /* not considered */
   2374       break;
   2375    }
   2376 
   2377    /* Show cases where we've found but not folded 'op(t,t)'.  Be
   2378       careful not to call sameIRExprs with values of different types,
   2379       though, else it will assert (and so it should!).  We can't
   2380       conveniently call typeOfIRExpr on the two args without a whole
   2381       bunch of extra plumbing to pass in a type env, so just use a
   2382       hacky test to check the arguments are not anything that might
   2383       sameIRExprs to assert.  This is only OK because this kludge is
   2384       only used for debug printing, not for "real" operation.  For
   2385       "real" operation (ie, all other calls to sameIRExprs), it is
   2386       essential that the to args have the same type.
   2387 
   2388       The "right" solution is to plumb the containing block's
   2389       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
   2390       that's a bunch of extra parameter passing which will just slow
   2391       down the normal case, for no purpose. */
   2392    if (vex_control.iropt_verbosity > 0
   2393        && e == e2
   2394        && e->tag == Iex_Binop
   2395        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
   2396                                                     e->Iex.Binop.arg2)
   2397        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
   2398       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
   2399       ppIRExpr(e);
   2400       vex_printf("\n");
   2401    }
   2402 
   2403    /* Show the overall results of folding. */
   2404    if (DEBUG_IROPT && e2 != e) {
   2405       vex_printf("FOLD: ");
   2406       ppIRExpr(e); vex_printf("  ->  ");
   2407       ppIRExpr(e2); vex_printf("\n");
   2408    }
   2409 
   2410    return e2;
   2411 
   2412  unhandled:
   2413 #  if 0
   2414    vex_printf("\n\n");
   2415    ppIRExpr(e);
   2416    vpanic("fold_Expr: no rule for the above");
   2417 #  else
   2418    if (vex_control.iropt_verbosity > 0) {
   2419       vex_printf("vex iropt: fold_Expr: no const rule for: ");
   2420       ppIRExpr(e);
   2421       vex_printf("\n");
   2422    }
   2423    return e2;
   2424 #  endif
   2425 }
   2426 
   2427 
   2428 /* Apply the subst to a simple 1-level expression -- guaranteed to be
   2429    1-level due to previous flattening pass. */
   2430 
   2431 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
   2432 {
   2433    switch (ex->tag) {
   2434       case Iex_RdTmp:
   2435          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
   2436             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
   2437             if (rhs->tag == Iex_RdTmp)
   2438                return rhs;
   2439             if (rhs->tag == Iex_Const
   2440                 && rhs->Iex.Const.con->tag != Ico_F64i)
   2441                return rhs;
   2442          }
   2443          /* not bound in env */
   2444          return ex;
   2445 
   2446       case Iex_Const:
   2447       case Iex_Get:
   2448          return ex;
   2449 
   2450       case Iex_GetI:
   2451          vassert(isIRAtom(ex->Iex.GetI.ix));
   2452          return IRExpr_GetI(
   2453             ex->Iex.GetI.descr,
   2454             subst_Expr(env, ex->Iex.GetI.ix),
   2455             ex->Iex.GetI.bias
   2456          );
   2457 
   2458       case Iex_Qop: {
   2459          IRQop* qop = ex->Iex.Qop.details;
   2460          vassert(isIRAtom(qop->arg1));
   2461          vassert(isIRAtom(qop->arg2));
   2462          vassert(isIRAtom(qop->arg3));
   2463          vassert(isIRAtom(qop->arg4));
   2464          return IRExpr_Qop(
   2465                    qop->op,
   2466                    subst_Expr(env, qop->arg1),
   2467                    subst_Expr(env, qop->arg2),
   2468                    subst_Expr(env, qop->arg3),
   2469                    subst_Expr(env, qop->arg4)
   2470                 );
   2471       }
   2472 
   2473       case Iex_Triop: {
   2474          IRTriop* triop = ex->Iex.Triop.details;
   2475          vassert(isIRAtom(triop->arg1));
   2476          vassert(isIRAtom(triop->arg2));
   2477          vassert(isIRAtom(triop->arg3));
   2478          return IRExpr_Triop(
   2479                    triop->op,
   2480                    subst_Expr(env, triop->arg1),
   2481                    subst_Expr(env, triop->arg2),
   2482                    subst_Expr(env, triop->arg3)
   2483                 );
   2484       }
   2485 
   2486       case Iex_Binop:
   2487          vassert(isIRAtom(ex->Iex.Binop.arg1));
   2488          vassert(isIRAtom(ex->Iex.Binop.arg2));
   2489          return IRExpr_Binop(
   2490                    ex->Iex.Binop.op,
   2491                    subst_Expr(env, ex->Iex.Binop.arg1),
   2492                    subst_Expr(env, ex->Iex.Binop.arg2)
   2493                 );
   2494 
   2495       case Iex_Unop:
   2496          vassert(isIRAtom(ex->Iex.Unop.arg));
   2497          return IRExpr_Unop(
   2498                    ex->Iex.Unop.op,
   2499                    subst_Expr(env, ex->Iex.Unop.arg)
   2500                 );
   2501 
   2502       case Iex_Load:
   2503          vassert(isIRAtom(ex->Iex.Load.addr));
   2504          return IRExpr_Load(
   2505                    ex->Iex.Load.end,
   2506                    ex->Iex.Load.ty,
   2507                    subst_Expr(env, ex->Iex.Load.addr)
   2508                 );
   2509 
   2510       case Iex_CCall: {
   2511          Int      i;
   2512          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
   2513          for (i = 0; args2[i]; i++) {
   2514             vassert(isIRAtom(args2[i]));
   2515             args2[i] = subst_Expr(env, args2[i]);
   2516          }
   2517          return IRExpr_CCall(
   2518                    ex->Iex.CCall.cee,
   2519                    ex->Iex.CCall.retty,
   2520                    args2
   2521                 );
   2522       }
   2523 
   2524       case Iex_ITE:
   2525          vassert(isIRAtom(ex->Iex.ITE.cond));
   2526          vassert(isIRAtom(ex->Iex.ITE.iftrue));
   2527          vassert(isIRAtom(ex->Iex.ITE.iffalse));
   2528          return IRExpr_ITE(
   2529                    subst_Expr(env, ex->Iex.ITE.cond),
   2530                    subst_Expr(env, ex->Iex.ITE.iftrue),
   2531                    subst_Expr(env, ex->Iex.ITE.iffalse)
   2532                 );
   2533 
   2534       default:
   2535          vex_printf("\n\n"); ppIRExpr(ex);
   2536          vpanic("subst_Expr");
   2537 
   2538    }
   2539 }
   2540 
   2541 
   2542 /* Apply the subst to stmt, then fold the result as much as possible.
   2543    Much simplified due to stmt being previously flattened.  As a
   2544    result of this, the stmt may wind up being turned into a no-op.
   2545 */
   2546 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
   2547 {
   2548 #  if 0
   2549    vex_printf("\nsubst and fold stmt\n");
   2550    ppIRStmt(st);
   2551    vex_printf("\n");
   2552 #  endif
   2553 
   2554    switch (st->tag) {
   2555       case Ist_AbiHint:
   2556          vassert(isIRAtom(st->Ist.AbiHint.base));
   2557          vassert(isIRAtom(st->Ist.AbiHint.nia));
   2558          return IRStmt_AbiHint(
   2559                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
   2560                    st->Ist.AbiHint.len,
   2561                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
   2562                 );
   2563       case Ist_Put:
   2564          vassert(isIRAtom(st->Ist.Put.data));
   2565          return IRStmt_Put(
   2566                    st->Ist.Put.offset,
   2567                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
   2568                 );
   2569 
   2570       case Ist_PutI: {
   2571          IRPutI *puti, *puti2;
   2572          puti = st->Ist.PutI.details;
   2573          vassert(isIRAtom(puti->ix));
   2574          vassert(isIRAtom(puti->data));
   2575          puti2 = mkIRPutI(puti->descr,
   2576                           fold_Expr(env, subst_Expr(env, puti->ix)),
   2577                           puti->bias,
   2578                           fold_Expr(env, subst_Expr(env, puti->data)));
   2579          return IRStmt_PutI(puti2);
   2580       }
   2581 
   2582       case Ist_WrTmp:
   2583          /* This is the one place where an expr (st->Ist.WrTmp.data) is
   2584             allowed to be more than just a constant or a tmp. */
   2585          return IRStmt_WrTmp(
   2586                    st->Ist.WrTmp.tmp,
   2587                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
   2588                 );
   2589 
   2590       case Ist_Store:
   2591          vassert(isIRAtom(st->Ist.Store.addr));
   2592          vassert(isIRAtom(st->Ist.Store.data));
   2593          return IRStmt_Store(
   2594                    st->Ist.Store.end,
   2595                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
   2596                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
   2597                 );
   2598 
   2599       case Ist_StoreG: {
   2600          IRStoreG* sg = st->Ist.StoreG.details;
   2601          vassert(isIRAtom(sg->addr));
   2602          vassert(isIRAtom(sg->data));
   2603          vassert(isIRAtom(sg->guard));
   2604          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
   2605          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
   2606          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
   2607          if (fguard->tag == Iex_Const) {
   2608             /* The condition on this store has folded down to a constant. */
   2609             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2610             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2611                return IRStmt_NoOp();
   2612             } else {
   2613                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2614                return IRStmt_Store(sg->end, faddr, fdata);
   2615             }
   2616          }
   2617          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
   2618       }
   2619 
   2620       case Ist_LoadG: {
   2621          /* This is complicated.  If the guard folds down to 'false',
   2622             we can replace it with an assignment 'dst := alt', but if
   2623             the guard folds down to 'true', we can't conveniently
   2624             replace it with an unconditional load, because doing so
   2625             requires generating a new temporary, and that is not easy
   2626             to do at this point. */
   2627          IRLoadG* lg = st->Ist.LoadG.details;
   2628          vassert(isIRAtom(lg->addr));
   2629          vassert(isIRAtom(lg->alt));
   2630          vassert(isIRAtom(lg->guard));
   2631          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
   2632          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
   2633          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
   2634          if (fguard->tag == Iex_Const) {
   2635             /* The condition on this load has folded down to a constant. */
   2636             vassert(fguard->Iex.Const.con->tag == Ico_U1);
   2637             if (fguard->Iex.Const.con->Ico.U1 == False) {
   2638                /* The load is not going to happen -- instead 'alt' is
   2639                   assigned to 'dst'.  */
   2640                return IRStmt_WrTmp(lg->dst, falt);
   2641             } else {
   2642                vassert(fguard->Iex.Const.con->Ico.U1 == True);
   2643                /* The load is always going to happen.  We want to
   2644                   convert to an unconditional load and assign to 'dst'
   2645                   (IRStmt_WrTmp).  Problem is we need an extra temp to
   2646                   hold the loaded value, but none is available.
   2647                   Instead, reconstitute the conditional load (with
   2648                   folded args, of course) and let the caller of this
   2649                   routine deal with the problem. */
   2650             }
   2651          }
   2652          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
   2653       }
   2654 
   2655       case Ist_CAS: {
   2656          IRCAS *cas, *cas2;
   2657          cas = st->Ist.CAS.details;
   2658          vassert(isIRAtom(cas->addr));
   2659          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   2660          vassert(isIRAtom(cas->expdLo));
   2661          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   2662          vassert(isIRAtom(cas->dataLo));
   2663          cas2 = mkIRCAS(
   2664                    cas->oldHi, cas->oldLo, cas->end,
   2665                    fold_Expr(env, subst_Expr(env, cas->addr)),
   2666                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
   2667                                : NULL,
   2668                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
   2669                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
   2670                                : NULL,
   2671                    fold_Expr(env, subst_Expr(env, cas->dataLo))
   2672                 );
   2673          return IRStmt_CAS(cas2);
   2674       }
   2675 
   2676       case Ist_LLSC:
   2677          vassert(isIRAtom(st->Ist.LLSC.addr));
   2678          if (st->Ist.LLSC.storedata)
   2679             vassert(isIRAtom(st->Ist.LLSC.storedata));
   2680          return IRStmt_LLSC(
   2681                    st->Ist.LLSC.end,
   2682                    st->Ist.LLSC.result,
   2683                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
   2684                    st->Ist.LLSC.storedata
   2685                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
   2686                       : NULL
   2687                 );
   2688 
   2689       case Ist_Dirty: {
   2690          Int     i;
   2691          IRDirty *d, *d2;
   2692          d = st->Ist.Dirty.details;
   2693          d2 = emptyIRDirty();
   2694          *d2 = *d;
   2695          d2->args = shallowCopyIRExprVec(d2->args);
   2696          if (d2->mFx != Ifx_None) {
   2697             vassert(isIRAtom(d2->mAddr));
   2698             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
   2699          }
   2700          vassert(isIRAtom(d2->guard));
   2701          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
   2702          for (i = 0; d2->args[i]; i++) {
   2703             IRExpr* arg = d2->args[i];
   2704             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
   2705                vassert(isIRAtom(arg));
   2706                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
   2707             }
   2708          }
   2709          return IRStmt_Dirty(d2);
   2710       }
   2711 
   2712       case Ist_IMark:
   2713          return IRStmt_IMark(st->Ist.IMark.addr,
   2714                              st->Ist.IMark.len,
   2715                              st->Ist.IMark.delta);
   2716 
   2717       case Ist_NoOp:
   2718          return IRStmt_NoOp();
   2719 
   2720       case Ist_MBE:
   2721          return IRStmt_MBE(st->Ist.MBE.event);
   2722 
   2723       case Ist_Exit: {
   2724          IRExpr* fcond;
   2725          vassert(isIRAtom(st->Ist.Exit.guard));
   2726          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
   2727          if (fcond->tag == Iex_Const) {
   2728             /* Interesting.  The condition on this exit has folded down to
   2729                a constant. */
   2730             vassert(fcond->Iex.Const.con->tag == Ico_U1);
   2731             if (fcond->Iex.Const.con->Ico.U1 == False) {
   2732                /* exit is never going to happen, so dump the statement. */
   2733                return IRStmt_NoOp();
   2734             } else {
   2735                vassert(fcond->Iex.Const.con->Ico.U1 == True);
   2736                /* Hmmm.  The exit has become unconditional.  Leave it
   2737                   as it is for now, since we'd have to truncate the BB
   2738                   at this point, which is tricky.  Such truncation is
   2739                   done later by the dead-code elimination pass. */
   2740                /* fall out into the reconstruct-the-exit code. */
   2741                if (vex_control.iropt_verbosity > 0)
   2742                   /* really a misuse of vex_control.iropt_verbosity */
   2743                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
   2744             }
   2745          }
   2746          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
   2747                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
   2748       }
   2749 
   2750    default:
   2751       vex_printf("\n"); ppIRStmt(st);
   2752       vpanic("subst_and_fold_Stmt");
   2753    }
   2754 }
   2755 
   2756 
   2757 IRSB* cprop_BB ( IRSB* in )
   2758 {
   2759    Int      i;
   2760    IRSB*    out;
   2761    IRStmt*  st2;
   2762    Int      n_tmps = in->tyenv->types_used;
   2763    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   2764    /* Keep track of IRStmt_LoadGs that we need to revisit after
   2765       processing all the other statements. */
   2766    const Int N_FIXUPS = 16;
   2767    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
   2768    Int n_fixups = 0;
   2769 
   2770    out = emptyIRSB();
   2771    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
   2772 
   2773    /* Set up the env with which travels forward.  This holds a
   2774       substitution, mapping IRTemps to IRExprs. The environment
   2775       is to be applied as we move along.  Keys are IRTemps.
   2776       Values are IRExpr*s.
   2777    */
   2778    for (i = 0; i < n_tmps; i++)
   2779       env[i] = NULL;
   2780 
   2781    /* For each original SSA-form stmt ... */
   2782    for (i = 0; i < in->stmts_used; i++) {
   2783 
   2784       /* First apply the substitution to the current stmt.  This
   2785          propagates in any constants and tmp-tmp assignments
   2786          accumulated prior to this point.  As part of the subst_Stmt
   2787          call, also then fold any constant expressions resulting. */
   2788 
   2789       st2 = in->stmts[i];
   2790 
   2791       /* perhaps st2 is already a no-op? */
   2792       if (st2->tag == Ist_NoOp) continue;
   2793 
   2794       st2 = subst_and_fold_Stmt( env, st2 );
   2795 
   2796       /* Deal with some post-folding special cases. */
   2797       switch (st2->tag) {
   2798 
   2799          /* If the statement has been folded into a no-op, forget
   2800             it. */
   2801          case Ist_NoOp:
   2802             continue;
   2803 
   2804          /* If the statement assigns to an IRTemp add it to the
   2805             running environment. This is for the benefit of copy
   2806             propagation and to allow sameIRExpr look through
   2807             IRTemps. */
   2808          case Ist_WrTmp: {
   2809             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
   2810             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
   2811 
   2812             /* 't1 = t2' -- don't add to BB; will be optimized out */
   2813             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
   2814                continue;
   2815 
   2816             /* 't = const' && 'const != F64i' -- don't add to BB
   2817                Note, we choose not to propagate const when const is an
   2818                F64i, so that F64i literals can be CSE'd later.  This
   2819                helps x86 floating point code generation. */
   2820             if (st2->Ist.WrTmp.data->tag == Iex_Const
   2821                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
   2822                continue;
   2823             }
   2824             /* else add it to the output, as normal */
   2825             break;
   2826          }
   2827 
   2828          case Ist_LoadG: {
   2829             IRLoadG* lg    = st2->Ist.LoadG.details;
   2830             IRExpr*  guard = lg->guard;
   2831             if (guard->tag == Iex_Const) {
   2832                /* The guard has folded to a constant, and that
   2833                   constant must be 1:I1, since subst_and_fold_Stmt
   2834                   folds out the case 0:I1 by itself. */
   2835                vassert(guard->Iex.Const.con->tag == Ico_U1);
   2836                vassert(guard->Iex.Const.con->Ico.U1 == True);
   2837                /* Add a NoOp here as a placeholder, and make a note of
   2838                   where it is in the output block.  Afterwards we'll
   2839                   come back here and transform the NoOp and the LoadG
   2840                   into a load-convert pair.  The fixups[] entry
   2841                   refers to the inserted NoOp, and we expect to find
   2842                   the relevant LoadG immediately after it. */
   2843                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2844                if (n_fixups < N_FIXUPS) {
   2845                   fixups[n_fixups++] = out->stmts_used;
   2846                   addStmtToIRSB( out, IRStmt_NoOp() );
   2847                }
   2848             }
   2849             /* And always add the LoadG to the output, regardless. */
   2850             break;
   2851          }
   2852 
   2853       default:
   2854          break;
   2855       }
   2856 
   2857       /* Not interesting, copy st2 into the output block. */
   2858       addStmtToIRSB( out, st2 );
   2859    }
   2860 
   2861 #  if STATS_IROPT
   2862    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
   2863               invocation_count, recursion_count, success_count,
   2864               recursion_success_count, max_nodes_visited);
   2865 #  endif
   2866 
   2867    out->next     = subst_Expr( env, in->next );
   2868    out->jumpkind = in->jumpkind;
   2869    out->offsIP   = in->offsIP;
   2870 
   2871    /* Process any leftover unconditional LoadGs that we noticed
   2872       in the main pass. */
   2873    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
   2874    for (i = 0; i < n_fixups; i++) {
   2875       Int ix = fixups[i];
   2876       /* Carefully verify that the LoadG has the expected form. */
   2877       vassert(ix >= 0 && ix+1 < out->stmts_used);
   2878       IRStmt* nop = out->stmts[ix];
   2879       IRStmt* lgu = out->stmts[ix+1];
   2880       vassert(nop->tag == Ist_NoOp);
   2881       vassert(lgu->tag == Ist_LoadG);
   2882       IRLoadG* lg    = lgu->Ist.LoadG.details;
   2883       IRExpr*  guard = lg->guard;
   2884       vassert(guard->Iex.Const.con->tag == Ico_U1);
   2885       vassert(guard->Iex.Const.con->Ico.U1 == True);
   2886       /* Figure out the load and result types, and the implied
   2887          conversion operation. */
   2888       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
   2889       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
   2890       IROp cvtOp = Iop_INVALID;
   2891       switch (lg->cvt) {
   2892          case ILGop_IdentV128:
   2893          case ILGop_Ident64:
   2894          case ILGop_Ident32: break;
   2895          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
   2896          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
   2897          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
   2898          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
   2899          default: vpanic("cprop_BB: unhandled ILGOp");
   2900       }
   2901       /* Replace the placeholder NoOp by the required unconditional
   2902          load. */
   2903       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
   2904       out->stmts[ix]
   2905          = IRStmt_WrTmp(tLoaded,
   2906                         IRExpr_Load(lg->end, cvtArg, lg->addr));
   2907       /* Replace the LoadG by a conversion from the loaded value's
   2908          type to the required result type. */
   2909       out->stmts[ix+1]
   2910          = IRStmt_WrTmp(
   2911               lg->dst, cvtOp == Iop_INVALID
   2912                           ? IRExpr_RdTmp(tLoaded)
   2913                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
   2914    }
   2915 
   2916    return out;
   2917 }
   2918 
   2919 
   2920 /*---------------------------------------------------------------*/
   2921 /*--- Dead code (t = E) removal                               ---*/
   2922 /*---------------------------------------------------------------*/
   2923 
   2924 /* As a side effect, also removes all code following an unconditional
   2925    side exit. */
   2926 
   2927 /* The type of the HashHW map is: a map from IRTemp to nothing
   2928    -- really just operating a set or IRTemps.
   2929 */
   2930 
   2931 inline
   2932 static void addUses_Temp ( Bool* set, IRTemp tmp )
   2933 {
   2934    set[(Int)tmp] = True;
   2935 }
   2936 
   2937 static void addUses_Expr ( Bool* set, IRExpr* e )
   2938 {
   2939    Int i;
   2940    switch (e->tag) {
   2941       case Iex_GetI:
   2942          addUses_Expr(set, e->Iex.GetI.ix);
   2943          return;
   2944       case Iex_ITE:
   2945          addUses_Expr(set, e->Iex.ITE.cond);
   2946          addUses_Expr(set, e->Iex.ITE.iftrue);
   2947          addUses_Expr(set, e->Iex.ITE.iffalse);
   2948          return;
   2949       case Iex_CCall:
   2950          for (i = 0; e->Iex.CCall.args[i]; i++)
   2951             addUses_Expr(set, e->Iex.CCall.args[i]);
   2952          return;
   2953       case Iex_Load:
   2954          addUses_Expr(set, e->Iex.Load.addr);
   2955          return;
   2956       case Iex_Qop:
   2957          addUses_Expr(set, e->Iex.Qop.details->arg1);
   2958          addUses_Expr(set, e->Iex.Qop.details->arg2);
   2959          addUses_Expr(set, e->Iex.Qop.details->arg3);
   2960          addUses_Expr(set, e->Iex.Qop.details->arg4);
   2961          return;
   2962       case Iex_Triop:
   2963          addUses_Expr(set, e->Iex.Triop.details->arg1);
   2964          addUses_Expr(set, e->Iex.Triop.details->arg2);
   2965          addUses_Expr(set, e->Iex.Triop.details->arg3);
   2966          return;
   2967       case Iex_Binop:
   2968          addUses_Expr(set, e->Iex.Binop.arg1);
   2969          addUses_Expr(set, e->Iex.Binop.arg2);
   2970          return;
   2971       case Iex_Unop:
   2972          addUses_Expr(set, e->Iex.Unop.arg);
   2973          return;
   2974       case Iex_RdTmp:
   2975          addUses_Temp(set, e->Iex.RdTmp.tmp);
   2976          return;
   2977       case Iex_Const:
   2978       case Iex_Get:
   2979          return;
   2980       default:
   2981          vex_printf("\n");
   2982          ppIRExpr(e);
   2983          vpanic("addUses_Expr");
   2984    }
   2985 }
   2986 
   2987 static void addUses_Stmt ( Bool* set, IRStmt* st )
   2988 {
   2989    Int      i;
   2990    IRDirty* d;
   2991    IRCAS*   cas;
   2992    switch (st->tag) {
   2993       case Ist_AbiHint:
   2994          addUses_Expr(set, st->Ist.AbiHint.base);
   2995          addUses_Expr(set, st->Ist.AbiHint.nia);
   2996          return;
   2997       case Ist_PutI:
   2998          addUses_Expr(set, st->Ist.PutI.details->ix);
   2999          addUses_Expr(set, st->Ist.PutI.details->data);
   3000          return;
   3001       case Ist_WrTmp:
   3002          addUses_Expr(set, st->Ist.WrTmp.data);
   3003          return;
   3004       case Ist_Put:
   3005          addUses_Expr(set, st->Ist.Put.data);
   3006          return;
   3007       case Ist_Store:
   3008          addUses_Expr(set, st->Ist.Store.addr);
   3009          addUses_Expr(set, st->Ist.Store.data);
   3010          return;
   3011       case Ist_StoreG: {
   3012          IRStoreG* sg = st->Ist.StoreG.details;
   3013          addUses_Expr(set, sg->addr);
   3014          addUses_Expr(set, sg->data);
   3015          addUses_Expr(set, sg->guard);
   3016          return;
   3017       }
   3018       case Ist_LoadG: {
   3019          IRLoadG* lg = st->Ist.LoadG.details;
   3020          addUses_Expr(set, lg->addr);
   3021          addUses_Expr(set, lg->alt);
   3022          addUses_Expr(set, lg->guard);
   3023          return;
   3024       }
   3025       case Ist_CAS:
   3026          cas = st->Ist.CAS.details;
   3027          addUses_Expr(set, cas->addr);
   3028          if (cas->expdHi)
   3029             addUses_Expr(set, cas->expdHi);
   3030          addUses_Expr(set, cas->expdLo);
   3031          if (cas->dataHi)
   3032             addUses_Expr(set, cas->dataHi);
   3033          addUses_Expr(set, cas->dataLo);
   3034          return;
   3035       case Ist_LLSC:
   3036          addUses_Expr(set, st->Ist.LLSC.addr);
   3037          if (st->Ist.LLSC.storedata)
   3038             addUses_Expr(set, st->Ist.LLSC.storedata);
   3039          return;
   3040       case Ist_Dirty:
   3041          d = st->Ist.Dirty.details;
   3042          if (d->mFx != Ifx_None)
   3043             addUses_Expr(set, d->mAddr);
   3044          addUses_Expr(set, d->guard);
   3045          for (i = 0; d->args[i] != NULL; i++) {
   3046             IRExpr* arg = d->args[i];
   3047             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   3048                addUses_Expr(set, arg);
   3049          }
   3050          return;
   3051       case Ist_NoOp:
   3052       case Ist_IMark:
   3053       case Ist_MBE:
   3054          return;
   3055       case Ist_Exit:
   3056          addUses_Expr(set, st->Ist.Exit.guard);
   3057          return;
   3058       default:
   3059          vex_printf("\n");
   3060          ppIRStmt(st);
   3061          vpanic("addUses_Stmt");
   3062    }
   3063 }
   3064 
   3065 
   3066 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
   3067 static Bool isZeroU1 ( IRExpr* e )
   3068 {
   3069    return toBool( e->tag == Iex_Const
   3070                   && e->Iex.Const.con->tag == Ico_U1
   3071                   && e->Iex.Const.con->Ico.U1 == False );
   3072 }
   3073 
   3074 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
   3075 static Bool isOneU1 ( IRExpr* e )
   3076 {
   3077    return toBool( e->tag == Iex_Const
   3078                   && e->Iex.Const.con->tag == Ico_U1
   3079                   && e->Iex.Const.con->Ico.U1 == True );
   3080 }
   3081 
   3082 
   3083 /* Note, this destructively modifies the given IRSB. */
   3084 
   3085 /* Scan backwards through statements, carrying a set of IRTemps which
   3086    are known to be used after the current point.  On encountering 't =
   3087    E', delete the binding if it is not used.  Otherwise, add any temp
   3088    uses to the set and keep on moving backwards.
   3089 
   3090    As an enhancement, the first (backwards) pass searches for IR exits
   3091    with always-taken conditions and notes the location of the earliest
   3092    one in the block.  If any such are found, a second pass copies the
   3093    exit destination and jump kind to the bb-end.  Then, the exit and
   3094    all statements following it are turned into no-ops.
   3095 */
   3096 
   3097 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
   3098 {
   3099    Int     i, i_unconditional_exit;
   3100    Int     n_tmps = bb->tyenv->types_used;
   3101    Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
   3102    IRStmt* st;
   3103 
   3104    for (i = 0; i < n_tmps; i++)
   3105       set[i] = False;
   3106 
   3107    /* start off by recording IRTemp uses in the next field. */
   3108    addUses_Expr(set, bb->next);
   3109 
   3110    /* First pass */
   3111 
   3112    /* Work backwards through the stmts */
   3113    i_unconditional_exit = -1;
   3114    for (i = bb->stmts_used-1; i >= 0; i--) {
   3115       st = bb->stmts[i];
   3116       if (st->tag == Ist_NoOp)
   3117          continue;
   3118       /* take note of any unconditional exits */
   3119       if (st->tag == Ist_Exit
   3120           && isOneU1(st->Ist.Exit.guard))
   3121          i_unconditional_exit = i;
   3122       if (st->tag == Ist_WrTmp
   3123           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
   3124           /* it's an IRTemp which never got used.  Delete it. */
   3125          if (DEBUG_IROPT) {
   3126             vex_printf("DEAD: ");
   3127             ppIRStmt(st);
   3128             vex_printf("\n");
   3129          }
   3130          bb->stmts[i] = IRStmt_NoOp();
   3131       }
   3132       else
   3133       if (st->tag == Ist_Dirty
   3134           && st->Ist.Dirty.details->guard
   3135           && isZeroU1(st->Ist.Dirty.details->guard)) {
   3136          /* This is a dirty helper which will never get called.
   3137             Delete it. */
   3138          bb->stmts[i] = IRStmt_NoOp();
   3139        }
   3140        else {
   3141          /* Note any IRTemp uses made by the current statement. */
   3142          addUses_Stmt(set, st);
   3143       }
   3144    }
   3145 
   3146    /* Optional second pass: if any unconditional exits were found,
   3147       delete them and all following statements. */
   3148 
   3149    if (i_unconditional_exit != -1) {
   3150       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
   3151                         i_unconditional_exit);
   3152       vassert(i_unconditional_exit >= 0
   3153               && i_unconditional_exit < bb->stmts_used);
   3154       bb->next
   3155          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
   3156       bb->jumpkind
   3157          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
   3158       bb->offsIP
   3159          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
   3160       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
   3161          bb->stmts[i] = IRStmt_NoOp();
   3162    }
   3163 }
   3164 
   3165 
   3166 /*---------------------------------------------------------------*/
   3167 /*--- Specialisation of helper function calls, in             ---*/
   3168 /*--- collaboration with the front end                        ---*/
   3169 /*---------------------------------------------------------------*/
   3170 
   3171 static
   3172 IRSB* spec_helpers_BB(
   3173          IRSB* bb,
   3174          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
   3175       )
   3176 {
   3177    Int     i;
   3178    IRStmt* st;
   3179    IRExpr* ex;
   3180    Bool    any = False;
   3181 
   3182    for (i = bb->stmts_used-1; i >= 0; i--) {
   3183       st = bb->stmts[i];
   3184 
   3185       if (st->tag != Ist_WrTmp
   3186           || st->Ist.WrTmp.data->tag != Iex_CCall)
   3187          continue;
   3188 
   3189       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
   3190                           st->Ist.WrTmp.data->Iex.CCall.args,
   3191                           &bb->stmts[0], i );
   3192       if (!ex)
   3193         /* the front end can't think of a suitable replacement */
   3194         continue;
   3195 
   3196       /* We got something better.  Install it in the bb. */
   3197       any = True;
   3198       bb->stmts[i]
   3199          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
   3200 
   3201       if (0) {
   3202          vex_printf("SPEC: ");
   3203          ppIRExpr(st->Ist.WrTmp.data);
   3204          vex_printf("  -->  ");
   3205          ppIRExpr(ex);
   3206          vex_printf("\n");
   3207       }
   3208    }
   3209 
   3210    if (any)
   3211       bb = flatten_BB(bb);
   3212    return bb;
   3213 }
   3214 
   3215 
   3216 /*---------------------------------------------------------------*/
   3217 /*--- Determination of guest state aliasing relationships     ---*/
   3218 /*---------------------------------------------------------------*/
   3219 
   3220 /* These are helper functions for CSE and GetI/PutI transformations.
   3221 
   3222    Determine, to the extent possible, the relationship between two
   3223    guest state accesses.  The possible outcomes are:
   3224 
   3225    * Exact alias.  These two accesses denote precisely the same
   3226      piece of the guest state.
   3227 
   3228    * Definitely no alias.  These two accesses are guaranteed not to
   3229      overlap any part of the guest state.
   3230 
   3231    * Unknown -- if neither of the above can be established.
   3232 
   3233    If in doubt, return Unknown.  */
   3234 
   3235 typedef
   3236    enum { ExactAlias, NoAlias, UnknownAlias }
   3237    GSAliasing;
   3238 
   3239 
   3240 /* Produces the alias relation between an indexed guest
   3241    state access and a non-indexed access. */
   3242 
   3243 static
   3244 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
   3245                                     Int offset2, IRType ty2 )
   3246 {
   3247    UInt minoff1, maxoff1, minoff2, maxoff2;
   3248 
   3249    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3250    minoff2 = offset2;
   3251    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
   3252 
   3253    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3254       return NoAlias;
   3255 
   3256    /* Could probably do better here if required.  For the moment
   3257       however just claim not to know anything more. */
   3258    return UnknownAlias;
   3259 }
   3260 
   3261 
   3262 /* Produces the alias relation between two indexed guest state
   3263    accesses. */
   3264 
   3265 static
   3266 GSAliasing getAliasingRelation_II (
   3267               IRRegArray* descr1, IRExpr* ix1, Int bias1,
   3268               IRRegArray* descr2, IRExpr* ix2, Int bias2
   3269            )
   3270 {
   3271    UInt minoff1, maxoff1, minoff2, maxoff2;
   3272    Int  iters;
   3273 
   3274    /* First try hard to show they don't alias. */
   3275    getArrayBounds( descr1, &minoff1, &maxoff1 );
   3276    getArrayBounds( descr2, &minoff2, &maxoff2 );
   3277    if (maxoff1 < minoff2 || maxoff2 < minoff1)
   3278       return NoAlias;
   3279 
   3280    /* So the two arrays at least partially overlap.  To get any
   3281       further we'll have to be sure that the descriptors are
   3282       identical. */
   3283    if (!eqIRRegArray(descr1, descr2))
   3284       return UnknownAlias;
   3285 
   3286    /* The descriptors are identical.  Now the only difference can be
   3287       in the index expressions.  If they cannot be shown to be
   3288       identical, we have to say we don't know what the aliasing
   3289       relation will be.  Now, since the IR is flattened, the index
   3290       expressions should be atoms -- either consts or tmps.  So that
   3291       makes the comparison simple. */
   3292    vassert(isIRAtom(ix1));
   3293    vassert(isIRAtom(ix2));
   3294    if (!eqIRAtom(ix1,ix2))
   3295       return UnknownAlias;
   3296 
   3297    /* Ok, the index expressions are identical.  So now the only way
   3298       they can be different is in the bias.  Normalise this
   3299       paranoidly, to reliably establish equality/non-equality. */
   3300 
   3301    /* So now we know that the GetI and PutI index the same array
   3302       with the same base.  Are the offsets the same, modulo the
   3303       array size?  Do this paranoidly. */
   3304    vassert(descr1->nElems == descr2->nElems);
   3305    vassert(descr1->elemTy == descr2->elemTy);
   3306    vassert(descr1->base   == descr2->base);
   3307    iters = 0;
   3308    while (bias1 < 0 || bias2 < 0) {
   3309       bias1 += descr1->nElems;
   3310       bias2 += descr1->nElems;
   3311       iters++;
   3312       if (iters > 10)
   3313          vpanic("getAliasingRelation: iters");
   3314    }
   3315    vassert(bias1 >= 0 && bias2 >= 0);
   3316    bias1 %= descr1->nElems;
   3317    bias2 %= descr1->nElems;
   3318    vassert(bias1 >= 0 && bias1 < descr1->nElems);
   3319    vassert(bias2 >= 0 && bias2 < descr1->nElems);
   3320 
   3321    /* Finally, biasP and biasG are normalised into the range
   3322       0 .. descrP/G->nElems - 1.  And so we can establish
   3323       equality/non-equality. */
   3324 
   3325    return bias1==bias2 ? ExactAlias : NoAlias;
   3326 }
   3327 
   3328 
   3329 /*---------------------------------------------------------------*/
   3330 /*--- Common Subexpression Elimination                        ---*/
   3331 /*---------------------------------------------------------------*/
   3332 
   3333 /* Expensive in time and space. */
   3334 
   3335 /* Uses two environments:
   3336    a IRTemp -> IRTemp mapping
   3337    a mapping from AvailExpr* to IRTemp
   3338 */
   3339 
   3340 typedef
   3341    struct {
   3342       enum { TCc, TCt } tag;
   3343       union { IRTemp tmp; IRConst* con; } u;
   3344    }
   3345    TmpOrConst;
   3346 
   3347 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
   3348 {
   3349    if (tc1->tag != tc2->tag)
   3350       return False;
   3351    switch (tc1->tag) {
   3352       case TCc:
   3353          return eqIRConst(tc1->u.con, tc2->u.con);
   3354       case TCt:
   3355          return tc1->u.tmp == tc2->u.tmp;
   3356       default:
   3357          vpanic("eqTmpOrConst");
   3358    }
   3359 }
   3360 
   3361 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
   3362 {
   3363    Bool eq = cee1->addr == cee2->addr;
   3364    if (eq) {
   3365       vassert(cee1->regparms == cee2->regparms);
   3366       vassert(cee1->mcx_mask == cee2->mcx_mask);
   3367       /* Names should be the same too, but we don't bother to
   3368          check. */
   3369    }
   3370    return eq;
   3371 }
   3372 
   3373 /* Convert an atomic IRExpr* to a TmpOrConst. */
   3374 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
   3375 {
   3376    switch (e->tag) {
   3377       case Iex_RdTmp:
   3378          tc->tag   = TCt;
   3379          tc->u.tmp = e->Iex.RdTmp.tmp;
   3380          break;
   3381       case Iex_Const:
   3382          tc->tag   = TCc;
   3383          tc->u.con = e->Iex.Const.con;
   3384          break;
   3385       default:
   3386          /* Getting here is a serious error.  It means that the
   3387             presented arg isn't an IR atom, as it should be. */
   3388          vpanic("irExpr_to_TmpOrConst");
   3389    }
   3390 }
   3391 
   3392 /* Convert a TmpOrConst to an atomic IRExpr*. */
   3393 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
   3394 {
   3395    switch (tc->tag) {
   3396       case TCc: return IRExpr_Const(tc->u.con);
   3397       case TCt: return IRExpr_RdTmp(tc->u.tmp);
   3398       default:  vpanic("tmpOrConst_to_IRExpr");
   3399    }
   3400 }
   3401 
   3402 /* Convert a NULL terminated IRExpr* vector to an array of
   3403    TmpOrConsts, and a length. */
   3404 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
   3405                                        /*OUT*/Int* nOuts,
   3406                                        IRExpr** ins )
   3407 {
   3408    Int i, n;
   3409    /* We have to make two passes, one to count, one to copy. */
   3410    for (n = 0; ins[n]; n++)
   3411       ;
   3412    *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
   3413    *nOuts = n;
   3414    /* and now copy .. */
   3415    for (i = 0; i < n; i++) {
   3416       IRExpr*     arg = ins[i];
   3417       TmpOrConst* dst = &(*outs)[i];
   3418       irExpr_to_TmpOrConst(dst, arg);
   3419    }
   3420 }
   3421 
   3422 typedef
   3423    struct {
   3424       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
   3425              CCall, Load
   3426       } tag;
   3427       union {
   3428          /* unop(tmp) */
   3429          struct {
   3430             IROp   op;
   3431             IRTemp arg;
   3432          } Ut;
   3433          /* binop(tmp,tmp) */
   3434          struct {
   3435             IROp   op;
   3436             IRTemp arg1;
   3437             IRTemp arg2;
   3438          } Btt;
   3439          /* binop(tmp,const) */
   3440          struct {
   3441             IROp    op;
   3442             IRTemp  arg1;
   3443             IRConst con2;
   3444          } Btc;
   3445          /* binop(const,tmp) */
   3446          struct {
   3447             IROp    op;
   3448             IRConst con1;
   3449             IRTemp  arg2;
   3450          } Bct;
   3451          /* F64i-style const */
   3452          struct {
   3453             ULong f64i;
   3454          } Cf64i;
   3455          /* ITE(tmp,tmp,tmp) */
   3456          struct {
   3457             IRTemp co;
   3458             IRTemp e1;
   3459             IRTemp e0;
   3460          } Ittt;
   3461          /* ITE(tmp,tmp,const) */
   3462          struct {
   3463             IRTemp  co;
   3464             IRTemp  e1;
   3465             IRConst con0;
   3466          } Ittc;
   3467          /* ITE(tmp,const,tmp) */
   3468          struct {
   3469             IRTemp  co;
   3470             IRConst con1;
   3471             IRTemp  e0;
   3472          } Itct;
   3473          /* ITE(tmp,const,const) */
   3474          struct {
   3475             IRTemp  co;
   3476             IRConst con1;
   3477             IRConst con0;
   3478          } Itcc;
   3479          /* GetI(descr,tmp,bias)*/
   3480          struct {
   3481             IRRegArray* descr;
   3482             IRTemp      ix;
   3483             Int         bias;
   3484          } GetIt;
   3485          /* Clean helper call */
   3486          struct {
   3487             IRCallee*   cee;
   3488             TmpOrConst* args;
   3489             Int         nArgs;
   3490             IRType      retty;
   3491          } CCall;
   3492          /* Load(end,ty,addr) */
   3493          struct {
   3494             IREndness  end;
   3495             IRType     ty;
   3496             TmpOrConst addr;
   3497          } Load;
   3498       } u;
   3499    }
   3500    AvailExpr;
   3501 
   3502 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
   3503 {
   3504    if (LIKELY(a1->tag != a2->tag))
   3505       return False;
   3506    switch (a1->tag) {
   3507       case Ut:
   3508          return toBool(
   3509                 a1->u.Ut.op == a2->u.Ut.op
   3510                 && a1->u.Ut.arg == a2->u.Ut.arg);
   3511       case Btt:
   3512          return toBool(
   3513                 a1->u.Btt.op == a2->u.Btt.op
   3514                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
   3515                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
   3516       case Btc:
   3517          return toBool(
   3518                 a1->u.Btc.op == a2->u.Btc.op
   3519                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
   3520                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
   3521       case Bct:
   3522          return toBool(
   3523                 a1->u.Bct.op == a2->u.Bct.op
   3524                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
   3525                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
   3526       case Cf64i:
   3527          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
   3528       case Ittt:
   3529          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
   3530                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
   3531                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
   3532       case Ittc:
   3533          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
   3534                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
   3535                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
   3536       case Itct:
   3537          return toBool(a1->u.Itct.co == a2->u.Itct.co
   3538                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
   3539                        && a1->u.Itct.e0 == a2->u.Itct.e0);
   3540       case Itcc:
   3541          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
   3542                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
   3543                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
   3544       case GetIt:
   3545          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
   3546                        && a1->u.GetIt.ix == a2->u.GetIt.ix
   3547                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
   3548       case CCall: {
   3549          Int  i, n;
   3550          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
   3551                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
   3552          if (eq) {
   3553             n = a1->u.CCall.nArgs;
   3554             for (i = 0; i < n; i++) {
   3555                if (!eqTmpOrConst( &a1->u.CCall.args[i],
   3556                                   &a2->u.CCall.args[i] )) {
   3557                   eq = False;
   3558                   break;
   3559                }
   3560             }
   3561          }
   3562          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
   3563          return eq;
   3564       }
   3565       case Load: {
   3566          Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
   3567                           && a1->u.Load.ty == a2->u.Load.ty
   3568                           && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
   3569          return eq;
   3570       }
   3571       default:
   3572          vpanic("eq_AvailExpr");
   3573    }
   3574 }
   3575 
   3576 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
   3577 {
   3578    IRConst *con, *con0, *con1;
   3579    switch (ae->tag) {
   3580       case Ut:
   3581          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
   3582       case Btt:
   3583          return IRExpr_Binop( ae->u.Btt.op,
   3584                               IRExpr_RdTmp(ae->u.Btt.arg1),
   3585                               IRExpr_RdTmp(ae->u.Btt.arg2) );
   3586       case Btc:
   3587          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3588          *con = ae->u.Btc.con2;
   3589          return IRExpr_Binop( ae->u.Btc.op,
   3590                               IRExpr_RdTmp(ae->u.Btc.arg1),
   3591                               IRExpr_Const(con) );
   3592       case Bct:
   3593          con = LibVEX_Alloc_inline(sizeof(IRConst));
   3594          *con = ae->u.Bct.con1;
   3595          return IRExpr_Binop( ae->u.Bct.op,
   3596                               IRExpr_Const(con),
   3597                               IRExpr_RdTmp(ae->u.Bct.arg2) );
   3598       case Cf64i:
   3599          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
   3600       case Ittt:
   3601          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
   3602                            IRExpr_RdTmp(ae->u.Ittt.e1),
   3603                            IRExpr_RdTmp(ae->u.Ittt.e0));
   3604       case Ittc:
   3605          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3606          *con0 = ae->u.Ittc.con0;
   3607          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
   3608                            IRExpr_RdTmp(ae->u.Ittc.e1),
   3609                            IRExpr_Const(con0));
   3610       case Itct:
   3611          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3612          *con1 = ae->u.Itct.con1;
   3613          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
   3614                            IRExpr_Const(con1),
   3615                            IRExpr_RdTmp(ae->u.Itct.e0));
   3616 
   3617       case Itcc:
   3618          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
   3619          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
   3620          *con0 = ae->u.Itcc.con0;
   3621          *con1 = ae->u.Itcc.con1;
   3622          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
   3623                            IRExpr_Const(con1),
   3624                            IRExpr_Const(con0));
   3625       case GetIt:
   3626          return IRExpr_GetI(ae->u.GetIt.descr,
   3627                             IRExpr_RdTmp(ae->u.GetIt.ix),
   3628                             ae->u.GetIt.bias);
   3629       case CCall: {
   3630          Int i, n = ae->u.CCall.nArgs;
   3631          vassert(n >= 0);
   3632          IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
   3633          vec[n] = NULL;
   3634          for (i = 0; i < n; i++) {
   3635             vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
   3636          }
   3637          return IRExpr_CCall(ae->u.CCall.cee,
   3638                              ae->u.CCall.retty,
   3639                              vec);
   3640       }
   3641       case Load:
   3642          return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
   3643                             tmpOrConst_to_IRExpr(&ae->u.Load.addr));
   3644       default:
   3645          vpanic("availExpr_to_IRExpr");
   3646    }
   3647 }
   3648 
   3649 inline
   3650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
   3651 {
   3652    HWord res;
   3653    /* env :: IRTemp -> IRTemp */
   3654    if (lookupHHW( env, &res, (HWord)tmp ))
   3655       return (IRTemp)res;
   3656    else
   3657       return tmp;
   3658 }
   3659 
   3660 inline
   3661 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
   3662                                           HashHW* env )
   3663 {
   3664    /* env :: IRTemp -> IRTemp */
   3665    if (tc->tag == TCt) {
   3666       tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
   3667    }
   3668 }
   3669 
   3670 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
   3671 {
   3672    /* env :: IRTemp -> IRTemp */
   3673    switch (ae->tag) {
   3674       case Ut:
   3675          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
   3676          break;
   3677       case Btt:
   3678          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
   3679          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
   3680          break;
   3681       case Btc:
   3682          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
   3683          break;
   3684       case Bct:
   3685          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
   3686          break;
   3687       case Cf64i:
   3688          break;
   3689       case Ittt:
   3690          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
   3691          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
   3692          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
   3693          break;
   3694       case Ittc:
   3695          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
   3696          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
   3697          break;
   3698       case Itct:
   3699          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
   3700          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
   3701          break;
   3702       case Itcc:
   3703          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
   3704          break;
   3705       case GetIt:
   3706          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
   3707          break;
   3708       case CCall: {
   3709          Int i, n = ae->u.CCall.nArgs;;
   3710          for (i = 0; i < n; i++) {
   3711             subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
   3712          }
   3713          break;
   3714       }
   3715       case Load:
   3716          subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
   3717          break;
   3718       default:
   3719          vpanic("subst_AvailExpr");
   3720    }
   3721 }
   3722 
   3723 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
   3724 {
   3725    AvailExpr* ae;
   3726 
   3727    switch (e->tag) {
   3728       case Iex_Unop:
   3729          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
   3730             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3731             ae->tag      = Ut;
   3732             ae->u.Ut.op  = e->Iex.Unop.op;
   3733             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
   3734             return ae;
   3735          }
   3736          break;
   3737 
   3738       case Iex_Binop:
   3739          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
   3740             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3741                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3742                ae->tag        = Btt;
   3743                ae->u.Btt.op   = e->Iex.Binop.op;
   3744                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3745                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3746                return ae;
   3747             }
   3748             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   3749                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3750                ae->tag        = Btc;
   3751                ae->u.Btc.op   = e->Iex.Binop.op;
   3752                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   3753                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
   3754                return ae;
   3755             }
   3756          } else if (e->Iex.Binop.arg1->tag == Iex_Const
   3757                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
   3758             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3759             ae->tag        = Bct;
   3760             ae->u.Bct.op   = e->Iex.Binop.op;
   3761             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
   3762             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
   3763             return ae;
   3764          }
   3765          break;
   3766 
   3767       case Iex_Const:
   3768          if (e->Iex.Const.con->tag == Ico_F64i) {
   3769             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3770             ae->tag          = Cf64i;
   3771             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
   3772             return ae;
   3773          }
   3774          break;
   3775 
   3776       case Iex_ITE:
   3777          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
   3778             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
   3779                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3780                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3781                   ae->tag       = Ittt;
   3782                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3783                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3784                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3785                   return ae;
   3786                }
   3787                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3788                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3789                   ae->tag       = Itct;
   3790                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3791                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3792                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
   3793                   return ae;
   3794                }
   3795             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
   3796                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
   3797                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3798                   ae->tag       = Ittc;
   3799                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3800                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
   3801                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3802                   return ae;
   3803                }
   3804                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
   3805                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3806                   ae->tag       = Itcc;
   3807                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
   3808                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
   3809                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
   3810                   return ae;
   3811                }
   3812             }
   3813          }
   3814          break;
   3815 
   3816       case Iex_GetI:
   3817          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
   3818             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3819             ae->tag           = GetIt;
   3820             ae->u.GetIt.descr = e->Iex.GetI.descr;
   3821             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
   3822             ae->u.GetIt.bias  = e->Iex.GetI.bias;
   3823             return ae;
   3824          }
   3825          break;
   3826 
   3827       case Iex_CCall:
   3828          ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3829          ae->tag = CCall;
   3830          /* Ok to share only the cee, since it is immutable. */
   3831          ae->u.CCall.cee   = e->Iex.CCall.cee;
   3832          ae->u.CCall.retty = e->Iex.CCall.retty;
   3833          /* irExprVec_to_TmpOrConsts will assert if the args are
   3834             neither tmps nor constants, but that's ok .. that's all they
   3835             should be. */
   3836          irExprVec_to_TmpOrConsts(
   3837                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
   3838                                   e->Iex.CCall.args
   3839                                  );
   3840          return ae;
   3841 
   3842       case Iex_Load:
   3843          /* If the caller of do_cse_BB has requested that loads also
   3844             be CSEd, convert them into AvailExprs.  If not, we'll just
   3845             return NULL here, and the load never becomes considered
   3846             "available", which effectively disables CSEing of them, as
   3847             desired. */
   3848          if (allowLoadsToBeCSEd) {
   3849             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
   3850             ae->tag        = Load;
   3851             ae->u.Load.end = e->Iex.Load.end;
   3852             ae->u.Load.ty  = e->Iex.Load.ty;
   3853             irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
   3854             return ae;
   3855          }
   3856          break;
   3857 
   3858       default:
   3859          break;
   3860    }
   3861 
   3862    return NULL;
   3863 }
   3864 
   3865 
   3866 /* The BB is modified in-place.  Returns True if any changes were
   3867    made.  The caller can choose whether or not loads should be CSEd.
   3868    In the normal course of things we don't do that, since CSEing loads
   3869    is something of a dodgy proposition if the guest program is doing
   3870    some screwy stuff to do with races and spinloops. */
   3871 
   3872 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
   3873 {
   3874    Int        i, j, paranoia;
   3875    IRTemp     t, q;
   3876    IRStmt*    st;
   3877    AvailExpr* eprime;
   3878    AvailExpr* ae;
   3879    Bool       invalidate;
   3880    Bool       anyDone = False;
   3881 
   3882    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
   3883    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
   3884 
   3885    vassert(sizeof(IRTemp) <= sizeof(HWord));
   3886 
   3887    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
   3888 
   3889    /* Iterate forwards over the stmts.
   3890       On seeing "t = E", where E is one of the AvailExpr forms:
   3891          let E' = apply tenv substitution to E
   3892          search aenv for E'
   3893             if a mapping E' -> q is found,
   3894                replace this stmt by "t = q"
   3895                and add binding t -> q to tenv
   3896             else
   3897                add binding E' -> t to aenv
   3898                replace this stmt by "t = E'"
   3899 
   3900       Other statements are only interesting to the extent that they
   3901       might invalidate some of the expressions in aenv.  So there is
   3902       an invalidate-bindings check for each statement seen.
   3903    */
   3904    for (i = 0; i < bb->stmts_used; i++) {
   3905       st = bb->stmts[i];
   3906 
   3907       /* ------ BEGIN invalidate aenv bindings ------ */
   3908       /* This is critical: remove from aenv any E' -> .. bindings
   3909          which might be invalidated by this statement.  The only
   3910          vulnerable kind of bindings are the GetI and Load kinds.
   3911             Dirty call - dump (paranoia level -> 2)
   3912             Store      - dump (ditto)
   3913             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
   3914          Uses getAliasingRelation_IC and getAliasingRelation_II
   3915          to do the no-overlap assessments needed for Put/PutI.
   3916       */
   3917       switch (st->tag) {
   3918          case Ist_Dirty: case Ist_Store: case Ist_MBE:
   3919          case Ist_CAS: case Ist_LLSC:
   3920          case Ist_StoreG:
   3921             paranoia = 2; break;
   3922          case Ist_Put: case Ist_PutI:
   3923             paranoia = 1; break;
   3924          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
   3925          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
   3926             paranoia = 0; break;
   3927          default:
   3928             vpanic("do_cse_BB(1)");
   3929       }
   3930 
   3931       if (paranoia > 0) {
   3932          for (j = 0; j < aenv->used; j++) {
   3933             if (!aenv->inuse[j])
   3934                continue;
   3935             ae = (AvailExpr*)aenv->key[j];
   3936             if (ae->tag != GetIt && ae->tag != Load)
   3937                continue;
   3938             invalidate = False;
   3939             if (paranoia >= 2) {
   3940                invalidate = True;
   3941             } else {
   3942                vassert(paranoia == 1);
   3943                if (ae->tag == Load) {
   3944                   /* Loads can be invalidated by anything that could
   3945                      possibly touch memory.  But in that case we
   3946                      should have |paranoia| == 2 and we won't get
   3947                      here.  So there's nothing to do; we don't have to
   3948                      invalidate the load. */
   3949                }
   3950                else
   3951                if (st->tag == Ist_Put) {
   3952                   if (getAliasingRelation_IC(
   3953                          ae->u.GetIt.descr,
   3954                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3955                          st->Ist.Put.offset,
   3956                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
   3957                       ) != NoAlias)
   3958                      invalidate = True;
   3959                }
   3960                else
   3961                if (st->tag == Ist_PutI) {
   3962                   IRPutI *puti = st->Ist.PutI.details;
   3963                   if (getAliasingRelation_II(
   3964                          ae->u.GetIt.descr,
   3965                          IRExpr_RdTmp(ae->u.GetIt.ix),
   3966                          ae->u.GetIt.bias,
   3967                          puti->descr,
   3968                          puti->ix,
   3969                          puti->bias
   3970                       ) != NoAlias)
   3971                      invalidate = True;
   3972                }
   3973                else
   3974                   vpanic("do_cse_BB(2)");
   3975             }
   3976 
   3977             if (invalidate) {
   3978                aenv->inuse[j] = False;
   3979                aenv->key[j]   = (HWord)NULL;  /* be sure */
   3980             }
   3981          } /* for j */
   3982       } /* paranoia > 0 */
   3983 
   3984       /* ------ ENV invalidate aenv bindings ------ */
   3985 
   3986       /* ignore not-interestings */
   3987       if (st->tag != Ist_WrTmp)
   3988          continue;
   3989 
   3990       t = st->Ist.WrTmp.tmp;
   3991       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
   3992       /* ignore if not of AvailExpr form */
   3993       if (!eprime)
   3994          continue;
   3995 
   3996       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
   3997 
   3998       /* apply tenv */
   3999       subst_AvailExpr( tenv, eprime );
   4000 
   4001       /* search aenv for eprime, unfortunately the hard way */
   4002       for (j = 0; j < aenv->used; j++)
   4003          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
   4004             break;
   4005 
   4006       if (j < aenv->used) {
   4007          /* A binding E' -> q was found.  Replace stmt by "t = q" and
   4008             note the t->q binding in tenv. */
   4009          /* (this is the core of the CSE action) */
   4010          q = (IRTemp)aenv->val[j];
   4011          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
   4012          addToHHW( tenv, (HWord)t, (HWord)q );
   4013          anyDone = True;
   4014       } else {
   4015          /* No binding was found, so instead we add E' -> t to our
   4016             collection of available expressions, replace this stmt
   4017             with "t = E'", and move on. */
   4018          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
   4019          addToHHW( aenv, (HWord)eprime, (HWord)t );
   4020       }
   4021    }
   4022 
   4023    /*
   4024    ppIRSB(bb);
   4025    sanityCheckIRSB(bb, Ity_I32);
   4026    vex_printf("\n\n");
   4027    */
   4028    return anyDone;
   4029 }
   4030 
   4031 
   4032 /*---------------------------------------------------------------*/
   4033 /*--- Add32/Sub32 chain collapsing                            ---*/
   4034 /*---------------------------------------------------------------*/
   4035 
   4036 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
   4037 
   4038 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
   4039    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
   4040    root node is Add32, not Sub32. */
   4041 
   4042 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
   4043 {
   4044    if (e->tag != Iex_Binop)
   4045       return False;
   4046    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
   4047       return False;
   4048    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
   4049       return False;
   4050    if (e->Iex.Binop.arg2->tag != Iex_Const)
   4051       return False;
   4052    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
   4053    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
   4054    if (e->Iex.Binop.op == Iop_Sub32)
   4055       *i32 = -*i32;
   4056    return True;
   4057 }
   4058 
   4059 
   4060 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
   4061    other tmp2.  Scan backwards from the specified start point -- an
   4062    optimisation. */
   4063 
   4064 static Bool collapseChain ( IRSB* bb, Int startHere,
   4065                             IRTemp tmp,
   4066                             IRTemp* tmp2, Int* i32 )
   4067 {
   4068    Int     j, ii;
   4069    IRTemp  vv;
   4070    IRStmt* st;
   4071    IRExpr* e;
   4072 
   4073    /* the (var, con) pair contain the current 'representation' for
   4074       'tmp'.  We start with 'tmp + 0'.  */
   4075    IRTemp var = tmp;
   4076    Int    con = 0;
   4077 
   4078    /* Scan backwards to see if tmp can be replaced by some other tmp
   4079      +/- a constant. */
   4080    for (j = startHere; j >= 0; j--) {
   4081       st = bb->stmts[j];
   4082       if (st->tag != Ist_WrTmp)
   4083          continue;
   4084       if (st->Ist.WrTmp.tmp != var)
   4085          continue;
   4086       e = st->Ist.WrTmp.data;
   4087       if (!isAdd32OrSub32(e, &vv, &ii))
   4088          break;
   4089       var = vv;
   4090       con += ii;
   4091    }
   4092    if (j == -1)
   4093       /* no earlier binding for var .. ill-formed IR */
   4094       vpanic("collapseChain");
   4095 
   4096    /* so, did we find anything interesting? */
   4097    if (var == tmp)
   4098       return False; /* no .. */
   4099 
   4100    *tmp2 = var;
   4101    *i32  = con;
   4102    return True;
   4103 }
   4104 
   4105 
   4106 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
   4107 
   4108 static void collapse_AddSub_chains_BB ( IRSB* bb )
   4109 {
   4110    IRStmt *st;
   4111    IRTemp var, var2;
   4112    Int    i, con, con2;
   4113 
   4114    for (i = bb->stmts_used-1; i >= 0; i--) {
   4115       st = bb->stmts[i];
   4116       if (st->tag == Ist_NoOp)
   4117          continue;
   4118 
   4119       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
   4120 
   4121       if (st->tag == Ist_WrTmp
   4122           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
   4123 
   4124          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
   4125             Find out if var can be expressed as var2 + con2. */
   4126          if (collapseChain(bb, i-1, var, &var2, &con2)) {
   4127             if (DEBUG_IROPT) {
   4128                vex_printf("replacing1 ");
   4129                ppIRStmt(st);
   4130                vex_printf(" with ");
   4131             }
   4132             con2 += con;
   4133             bb->stmts[i]
   4134                = IRStmt_WrTmp(
   4135                     st->Ist.WrTmp.tmp,
   4136                     (con2 >= 0)
   4137                       ? IRExpr_Binop(Iop_Add32,
   4138                                      IRExpr_RdTmp(var2),
   4139                                      IRExpr_Const(IRConst_U32(con2)))
   4140                       : IRExpr_Binop(Iop_Sub32,
   4141                                      IRExpr_RdTmp(var2),
   4142                                      IRExpr_Const(IRConst_U32(-con2)))
   4143                  );
   4144             if (DEBUG_IROPT) {
   4145                ppIRStmt(bb->stmts[i]);
   4146                vex_printf("\n");
   4147             }
   4148          }
   4149 
   4150          continue;
   4151       }
   4152 
   4153       /* Try to collapse 't1 = GetI[t2, con]'. */
   4154 
   4155       if (st->tag == Ist_WrTmp
   4156           && st->Ist.WrTmp.data->tag == Iex_GetI
   4157           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
   4158           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
   4159                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
   4160          if (DEBUG_IROPT) {
   4161             vex_printf("replacing3 ");
   4162             ppIRStmt(st);
   4163             vex_printf(" with ");
   4164          }
   4165          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
   4166          bb->stmts[i]
   4167             = IRStmt_WrTmp(
   4168                  st->Ist.WrTmp.tmp,
   4169                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
   4170                              IRExpr_RdTmp(var2),
   4171                              con2));
   4172          if (DEBUG_IROPT) {
   4173             ppIRStmt(bb->stmts[i]);
   4174             vex_printf("\n");
   4175          }
   4176          continue;
   4177       }
   4178 
   4179       /* Perhaps st is PutI[t, con] ? */
   4180       IRPutI *puti = st->Ist.PutI.details;
   4181       if (st->tag == Ist_PutI
   4182           && puti->ix->tag == Iex_RdTmp
   4183           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
   4184                                &var2, &con2)) {
   4185          if (DEBUG_IROPT) {
   4186             vex_printf("replacing2 ");
   4187             ppIRStmt(st);
   4188             vex_printf(" with ");
   4189          }
   4190          con2 += puti->bias;
   4191          bb->stmts[i]
   4192             = IRStmt_PutI(mkIRPutI(puti->descr,
   4193                                    IRExpr_RdTmp(var2),
   4194                                    con2,
   4195                                    puti->data));
   4196          if (DEBUG_IROPT) {
   4197             ppIRStmt(bb->stmts[i]);
   4198             vex_printf("\n");
   4199          }
   4200          continue;
   4201       }
   4202 
   4203    } /* for */
   4204 }
   4205 
   4206 
   4207 /*---------------------------------------------------------------*/
   4208 /*--- PutI/GetI transformations                               ---*/
   4209 /*---------------------------------------------------------------*/
   4210 
   4211 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
   4212    the given starting point to find, if any, a PutI which writes
   4213    exactly the same piece of guest state, and so return the expression
   4214    that the PutI writes.  This is the core of PutI-GetI forwarding. */
   4215 
   4216 static
   4217 IRExpr* findPutI ( IRSB* bb, Int startHere,
   4218                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
   4219 {
   4220    Int        j;
   4221    IRStmt*    st;
   4222    GSAliasing relation;
   4223 
   4224    if (0) {
   4225       vex_printf("\nfindPutI ");
   4226       ppIRRegArray(descrG);
   4227       vex_printf(" ");
   4228       ppIRExpr(ixG);
   4229       vex_printf(" %d\n", biasG);
   4230    }
   4231 
   4232    /* Scan backwards in bb from startHere to find a suitable PutI
   4233       binding for (descrG, ixG, biasG), if any. */
   4234 
   4235    for (j = startHere; j >= 0; j--) {
   4236       st = bb->stmts[j];
   4237       if (st->tag == Ist_NoOp)
   4238          continue;
   4239 
   4240       if (st->tag == Ist_Put) {
   4241          /* Non-indexed Put.  This can't give a binding, but we do
   4242             need to check it doesn't invalidate the search by
   4243             overlapping any part of the indexed guest state. */
   4244 
   4245          relation
   4246             = getAliasingRelation_IC(
   4247                  descrG, ixG,
   4248                  st->Ist.Put.offset,
   4249                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
   4250 
   4251          if (relation == NoAlias) {
   4252             /* we're OK; keep going */
   4253             continue;
   4254          } else {
   4255             /* relation == UnknownAlias || relation == ExactAlias */
   4256             /* If this assertion fails, we've found a Put which writes
   4257                an area of guest state which is read by a GetI.  Which
   4258                is unlikely (although not per se wrong). */
   4259             vassert(relation != ExactAlias);
   4260             /* This Put potentially writes guest state that the GetI
   4261                reads; we must fail. */
   4262             return NULL;
   4263          }
   4264       }
   4265 
   4266       if (st->tag == Ist_PutI) {
   4267          IRPutI *puti = st->Ist.PutI.details;
   4268 
   4269          relation = getAliasingRelation_II(
   4270                        descrG, ixG, biasG,
   4271                        puti->descr,
   4272                        puti->ix,
   4273                        puti->bias
   4274                     );
   4275 
   4276          if (relation == NoAlias) {
   4277             /* This PutI definitely doesn't overlap.  Ignore it and
   4278                keep going. */
   4279             continue; /* the for j loop */
   4280          }
   4281 
   4282          if (relation == UnknownAlias) {
   4283             /* We don't know if this PutI writes to the same guest
   4284                state that the GetI, or not.  So we have to give up. */
   4285             return NULL;
   4286          }
   4287 
   4288          /* Otherwise, we've found what we're looking for.  */
   4289          vassert(relation == ExactAlias);
   4290          return puti->data;
   4291 
   4292       } /* if (st->tag == Ist_PutI) */
   4293 
   4294       if (st->tag == Ist_Dirty) {
   4295          /* Be conservative.  If the dirty call has any guest effects at
   4296             all, give up.  We could do better -- only give up if there
   4297             are any guest writes/modifies. */
   4298          if (st->Ist.Dirty.details->nFxState > 0)
   4299             return NULL;
   4300       }
   4301 
   4302    } /* for */
   4303 
   4304    /* No valid replacement was found. */
   4305    return NULL;
   4306 }
   4307 
   4308 
   4309 
   4310 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
   4311    that it writes exactly the same piece of guest state) ?  Safe
   4312    answer: False. */
   4313 
   4314 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
   4315 {
   4316    vassert(pi->tag == Ist_PutI);
   4317    if (s2->tag != Ist_PutI)
   4318       return False;
   4319 
   4320    IRPutI *p1 = pi->Ist.PutI.details;
   4321    IRPutI *p2 = s2->Ist.PutI.details;
   4322 
   4323    return toBool(
   4324           getAliasingRelation_II(
   4325              p1->descr, p1->ix, p1->bias,
   4326              p2->descr, p2->ix, p2->bias
   4327           )
   4328           == ExactAlias
   4329           );
   4330 }
   4331 
   4332 
   4333 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
   4334    overlap it?  Safe answer: True.  Note, we could do a lot better
   4335    than this if needed. */
   4336 
   4337 static
   4338 Bool guestAccessWhichMightOverlapPutI (
   4339         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
   4340      )
   4341 {
   4342    GSAliasing relation;
   4343    UInt       minoffP, maxoffP;
   4344 
   4345    vassert(pi->tag == Ist_PutI);
   4346 
   4347    IRPutI *p1 = pi->Ist.PutI.details;
   4348 
   4349    getArrayBounds(p1->descr, &minoffP, &maxoffP);
   4350    switch (s2->tag) {
   4351 
   4352       case Ist_NoOp:
   4353       case Ist_IMark:
   4354          return False;
   4355 
   4356       case Ist_MBE:
   4357       case Ist_AbiHint:
   4358          /* just be paranoid ... these should be rare. */
   4359          return True;
   4360 
   4361       case Ist_CAS:
   4362          /* This is unbelievably lame, but it's probably not
   4363             significant from a performance point of view.  Really, a
   4364             CAS is a load-store op, so it should be safe to say False.
   4365             However .. */
   4366          return True;
   4367 
   4368       case Ist_Dirty:
   4369          /* If the dirty call has any guest effects at all, give up.
   4370             Probably could do better. */
   4371          if (s2->Ist.Dirty.details->nFxState > 0)
   4372             return True;
   4373          return False;
   4374 
   4375       case Ist_Put:
   4376          vassert(isIRAtom(s2->Ist.Put.data));
   4377          relation
   4378             = getAliasingRelation_IC(
   4379                  p1->descr, p1->ix,
   4380                  s2->Ist.Put.offset,
   4381                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
   4382               );
   4383          goto have_relation;
   4384 
   4385       case Ist_PutI: {
   4386          IRPutI *p2 = s2->Ist.PutI.details;
   4387 
   4388          vassert(isIRAtom(p2->ix));
   4389          vassert(isIRAtom(p2->data));
   4390          relation
   4391             = getAliasingRelation_II(
   4392                  p1->descr, p1->ix, p1->bias,
   4393                  p2->descr, p2->ix, p2->bias
   4394               );
   4395          goto have_relation;
   4396       }
   4397 
   4398       case Ist_WrTmp:
   4399          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
   4400             relation
   4401                = getAliasingRelation_II(
   4402                     p1->descr, p1->ix, p1->bias,
   4403                     s2->Ist.WrTmp.data->Iex.GetI.descr,
   4404                     s2->Ist.WrTmp.data->Iex.GetI.ix,
   4405                     s2->Ist.WrTmp.data->Iex.GetI.bias
   4406                  );
   4407             goto have_relation;
   4408          }
   4409          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
   4410             relation
   4411                = getAliasingRelation_IC(
   4412                     p1->descr, p1->ix,
   4413                     s2->Ist.WrTmp.data->Iex.Get.offset,
   4414                     s2->Ist.WrTmp.data->Iex.Get.ty
   4415                  );
   4416             goto have_relation;
   4417          }
   4418          return False;
   4419 
   4420       case Ist_Store:
   4421          vassert(isIRAtom(s2->Ist.Store.addr));
   4422          vassert(isIRAtom(s2->Ist.Store.data));
   4423          return False;
   4424 
   4425       default:
   4426          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
   4427          vpanic("guestAccessWhichMightOverlapPutI");
   4428    }
   4429 
   4430   have_relation:
   4431    if (relation == NoAlias)
   4432       return False;
   4433    else
   4434       return True; /* ExactAlias or UnknownAlias */
   4435 }
   4436 
   4437 
   4438 
   4439 /* ---------- PutI/GetI transformations main functions --------- */
   4440 
   4441 /* Remove redundant GetIs, to the extent that they can be detected.
   4442    bb is modified in-place. */
   4443 
   4444 static
   4445 void do_redundant_GetI_elimination ( IRSB* bb )
   4446 {
   4447    Int     i;
   4448    IRStmt* st;
   4449 
   4450    for (i = bb->stmts_used-1; i >= 0; i--) {
   4451       st = bb->stmts[i];
   4452       if (st->tag == Ist_NoOp)
   4453          continue;
   4454 
   4455       if (st->tag == Ist_WrTmp
   4456           && st->Ist.WrTmp.data->tag == Iex_GetI
   4457           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
   4458          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
   4459          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
   4460          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
   4461          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
   4462          if (replacement
   4463              && isIRAtom(replacement)
   4464              /* Make sure we're doing a type-safe transformation! */
   4465              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
   4466             if (DEBUG_IROPT) {
   4467                vex_printf("rGI:  ");
   4468                ppIRExpr(st->Ist.WrTmp.data);
   4469                vex_printf(" -> ");
   4470                ppIRExpr(replacement);
   4471                vex_printf("\n");
   4472             }
   4473             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
   4474          }
   4475       }
   4476    }
   4477 
   4478 }
   4479 
   4480 
   4481 /* Remove redundant PutIs, to the extent which they can be detected.
   4482    bb is modified in-place. */
   4483 
   4484 static
   4485 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
   4486 {
   4487    Int    i, j;
   4488    Bool   delete;
   4489    IRStmt *st, *stj;
   4490 
   4491    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
   4492 
   4493    for (i = 0; i < bb->stmts_used; i++) {
   4494       st = bb->stmts[i];
   4495       if (st->tag != Ist_PutI)
   4496          continue;
   4497       /* Ok, search forwards from here to see if we can find another
   4498          PutI which makes this one redundant, and dodging various
   4499          hazards.  Search forwards:
   4500          * If conditional exit, give up (because anything after that
   4501            does not postdominate this put).
   4502          * If a Get which might overlap, give up (because this PutI
   4503            not necessarily dead).
   4504          * If a Put which is identical, stop with success.
   4505          * If a Put which might overlap, but is not identical, give up.
   4506          * If a dirty helper call which might write guest state, give up.
   4507          * If a Put which definitely doesn't overlap, or any other
   4508            kind of stmt, continue.
   4509       */
   4510       delete = False;
   4511       for (j = i+1; j < bb->stmts_used; j++) {
   4512          stj = bb->stmts[j];
   4513          if (stj->tag == Ist_NoOp)
   4514             continue;
   4515          if (identicalPutIs(st, stj)) {
   4516             /* success! */
   4517             delete = True;
   4518             break;
   4519          }
   4520          if (stj->tag == Ist_Exit)
   4521             /* give up */
   4522             break;
   4523          if (st->tag == Ist_Dirty)
   4524             /* give up; could do better here */
   4525             break;
   4526          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
   4527             /* give up */
   4528            break;
   4529       }
   4530 
   4531       if (delete) {
   4532          if (DEBUG_IROPT) {
   4533             vex_printf("rPI:  ");
   4534             ppIRStmt(st);
   4535             vex_printf("\n");
   4536          }
   4537          bb->stmts[i] = IRStmt_NoOp();
   4538       }
   4539 
   4540    }
   4541 }
   4542 
   4543 
   4544 /*---------------------------------------------------------------*/
   4545 /*--- Loop unrolling                                          ---*/
   4546 /*---------------------------------------------------------------*/
   4547 
   4548 /* Adjust all tmp values (names) in e by delta.  e is destructively
   4549    modified. */
   4550 
   4551 static void deltaIRExpr ( IRExpr* e, Int delta )
   4552 {
   4553    Int i;
   4554    switch (e->tag) {
   4555       case Iex_RdTmp:
   4556          e->Iex.RdTmp.tmp += delta;
   4557          break;
   4558       case Iex_Get:
   4559       case Iex_Const:
   4560          break;
   4561       case Iex_GetI:
   4562          deltaIRExpr(e->Iex.GetI.ix, delta);
   4563          break;
   4564       case Iex_Qop:
   4565          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
   4566          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
   4567          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
   4568          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
   4569          break;
   4570       case Iex_Triop:
   4571          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
   4572          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
   4573          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
   4574          break;
   4575       case Iex_Binop:
   4576          deltaIRExpr(e->Iex.Binop.arg1, delta);
   4577          deltaIRExpr(e->Iex.Binop.arg2, delta);
   4578          break;
   4579       case Iex_Unop:
   4580          deltaIRExpr(e->Iex.Unop.arg, delta);
   4581          break;
   4582       case Iex_Load:
   4583          deltaIRExpr(e->Iex.Load.addr, delta);
   4584          break;
   4585       case Iex_CCall:
   4586          for (i = 0; e->Iex.CCall.args[i]; i++)
   4587             deltaIRExpr(e->Iex.CCall.args[i], delta);
   4588          break;
   4589       case Iex_ITE:
   4590          deltaIRExpr(e->Iex.ITE.cond, delta);
   4591          deltaIRExpr(e->Iex.ITE.iftrue, delta);
   4592          deltaIRExpr(e->Iex.ITE.iffalse, delta);
   4593          break;
   4594       default:
   4595          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   4596          vpanic("deltaIRExpr");
   4597    }
   4598 }
   4599 
   4600 /* Adjust all tmp values (names) in st by delta.  st is destructively
   4601    modified. */
   4602 
   4603 static void deltaIRStmt ( IRStmt* st, Int delta )
   4604 {
   4605    Int      i;
   4606    IRDirty* d;
   4607    switch (st->tag) {
   4608       case Ist_NoOp:
   4609       case Ist_IMark:
   4610       case Ist_MBE:
   4611          break;
   4612       case Ist_AbiHint:
   4613          deltaIRExpr(st->Ist.AbiHint.base, delta);
   4614          deltaIRExpr(st->Ist.AbiHint.nia, delta);
   4615          break;
   4616       case Ist_Put:
   4617          deltaIRExpr(st->Ist.Put.data, delta);
   4618          break;
   4619       case Ist_PutI:
   4620          deltaIRExpr(st->Ist.PutI.details->ix, delta);
   4621          deltaIRExpr(st->Ist.PutI.details->data, delta);
   4622          break;
   4623       case Ist_WrTmp:
   4624          st->Ist.WrTmp.tmp += delta;
   4625          deltaIRExpr(st->Ist.WrTmp.data, delta);
   4626          break;
   4627       case Ist_Exit:
   4628          deltaIRExpr(st->Ist.Exit.guard, delta);
   4629          break;
   4630       case Ist_Store:
   4631          deltaIRExpr(st->Ist.Store.addr, delta);
   4632          deltaIRExpr(st->Ist.Store.data, delta);
   4633          break;
   4634       case Ist_StoreG: {
   4635          IRStoreG* sg = st->Ist.StoreG.details;
   4636          deltaIRExpr(sg->addr, delta);
   4637          deltaIRExpr(sg->data, delta);
   4638          deltaIRExpr(sg->guard, delta);
   4639          break;
   4640       }
   4641       case Ist_LoadG: {
   4642          IRLoadG* lg = st->Ist.LoadG.details;
   4643          lg->dst += delta;
   4644          deltaIRExpr(lg->addr, delta);
   4645          deltaIRExpr(lg->alt, delta);
   4646          deltaIRExpr(lg->guard, delta);
   4647          break;
   4648       }
   4649       case Ist_CAS:
   4650          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
   4651             st->Ist.CAS.details->oldHi += delta;
   4652          st->Ist.CAS.details->oldLo += delta;
   4653          deltaIRExpr(st->Ist.CAS.details->addr, delta);
   4654          if (st->Ist.CAS.details->expdHi)
   4655             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
   4656          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
   4657          if (st->Ist.CAS.details->dataHi)
   4658             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
   4659          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
   4660          break;
   4661       case Ist_LLSC:
   4662          st->Ist.LLSC.result += delta;
   4663          deltaIRExpr(st->Ist.LLSC.addr, delta);
   4664          if (st->Ist.LLSC.storedata)
   4665             deltaIRExpr(st->Ist.LLSC.storedata, delta);
   4666          break;
   4667       case Ist_Dirty:
   4668          d = st->Ist.Dirty.details;
   4669          deltaIRExpr(d->guard, delta);
   4670          for (i = 0; d->args[i]; i++) {
   4671             IRExpr* arg = d->args[i];
   4672             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   4673                deltaIRExpr(arg, delta);
   4674          }
   4675          if (d->tmp != IRTemp_INVALID)
   4676             d->tmp += delta;
   4677          if (d->mAddr)
   4678             deltaIRExpr(d->mAddr, delta);
   4679          break;
   4680       default:
   4681          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   4682          vpanic("deltaIRStmt");
   4683    }
   4684 }
   4685 
   4686 
   4687 /* If possible, return a loop-unrolled version of bb0.  The original
   4688    is changed.  If not possible, return NULL.  */
   4689 
   4690 /* The two schemas considered are:
   4691 
   4692      X: BODY; goto X
   4693 
   4694      which unrolls to (eg)  X: BODY;BODY; goto X
   4695 
   4696    and
   4697 
   4698        X: BODY; if (c) goto X; goto Y
   4699    which trivially transforms to
   4700        X: BODY; if (!c) goto Y; goto X;
   4701    so it falls in the scope of the first case.
   4702 
   4703    X and Y must be literal (guest) addresses.
   4704 */
   4705 
   4706 static Int calc_unroll_factor( IRSB* bb )
   4707 {
   4708    Int n_stmts, i;
   4709 
   4710    n_stmts = 0;
   4711    for (i = 0; i < bb->stmts_used; i++) {
   4712       if (bb->stmts[i]->tag != Ist_NoOp)
   4713          n_stmts++;
   4714    }
   4715 
   4716    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
   4717       if (vex_control.iropt_verbosity > 0)
   4718          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
   4719                     n_stmts, 8* n_stmts);
   4720       return 8;
   4721    }
   4722    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
   4723       if (vex_control.iropt_verbosity > 0)
   4724          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
   4725                     n_stmts, 4* n_stmts);
   4726       return 4;
   4727    }
   4728 
   4729    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
   4730       if (vex_control.iropt_verbosity > 0)
   4731          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
   4732                     n_stmts, 2* n_stmts);
   4733       return 2;
   4734    }
   4735 
   4736    if (vex_control.iropt_verbosity > 0)
   4737       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
   4738 
   4739    return 1;
   4740 }
   4741 
   4742 
   4743 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
   4744 {
   4745    Int      i, j, jmax, n_vars;
   4746    Bool     xxx_known;
   4747    Addr64   xxx_value, yyy_value;
   4748    IRExpr*  udst;
   4749    IRStmt*  st;
   4750    IRConst* con;
   4751    IRSB     *bb1, *bb2;
   4752    Int      unroll_factor;
   4753 
   4754    if (vex_control.iropt_unroll_thresh <= 0)
   4755       return NULL;
   4756 
   4757    /* First off, figure out if we can unroll this loop.  Do this
   4758       without modifying bb0. */
   4759 
   4760    if (bb0->jumpkind != Ijk_Boring)
   4761       return NULL;
   4762 
   4763    xxx_known = False;
   4764    xxx_value = 0;
   4765 
   4766    /* Extract the next-guest address.  If it isn't a literal, we
   4767       have to give up. */
   4768 
   4769    udst = bb0->next;
   4770    if (udst->tag == Iex_Const
   4771        && (udst->Iex.Const.con->tag == Ico_U32
   4772            || udst->Iex.Const.con->tag == Ico_U64)) {
   4773       /* The BB ends in a jump to a literal location. */
   4774       xxx_known = True;
   4775       xxx_value = udst->Iex.Const.con->tag == Ico_U64
   4776                     ?  udst->Iex.Const.con->Ico.U64
   4777                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
   4778    }
   4779 
   4780    if (!xxx_known)
   4781       return NULL;
   4782 
   4783    /* Now we know the BB ends to a jump to a literal location.  If
   4784       it's a jump to itself (viz, idiom #1), move directly to the
   4785       unrolling stage, first cloning the bb so the original isn't
   4786       modified. */
   4787    if (xxx_value == my_addr) {
   4788       unroll_factor = calc_unroll_factor( bb0 );
   4789       if (unroll_factor < 2)
   4790          return NULL;
   4791       bb1 = deepCopyIRSB( bb0 );
   4792       bb0 = NULL;
   4793       udst = NULL; /* is now invalid */
   4794       goto do_unroll;
   4795    }
   4796 
   4797    /* Search for the second idiomatic form:
   4798         X: BODY; if (c) goto X; goto Y
   4799       We know Y, but need to establish that the last stmt
   4800       is 'if (c) goto X'.
   4801    */
   4802    yyy_value = xxx_value;
   4803    for (i = bb0->stmts_used-1; i >= 0; i--)
   4804       if (bb0->stmts[i])
   4805          break;
   4806 
   4807    if (i < 0)
   4808       return NULL; /* block with no stmts.  Strange. */
   4809 
   4810    st = bb0->stmts[i];
   4811    if (st->tag != Ist_Exit)
   4812       return NULL;
   4813    if (st->Ist.Exit.jk != Ijk_Boring)
   4814       return NULL;
   4815 
   4816    con = st->Ist.Exit.dst;
   4817    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4818 
   4819    xxx_value = con->tag == Ico_U64
   4820                   ? st->Ist.Exit.dst->Ico.U64
   4821                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
   4822 
   4823    /* If this assertion fails, we have some kind of type error. */
   4824    vassert(con->tag == udst->Iex.Const.con->tag);
   4825 
   4826    if (xxx_value != my_addr)
   4827       /* We didn't find either idiom.  Give up. */
   4828       return NULL;
   4829 
   4830    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
   4831       yyy values (which makes it look like idiom #1), and go into
   4832       unrolling proper.  This means finding (again) the last stmt, in
   4833       the copied BB. */
   4834 
   4835    unroll_factor = calc_unroll_factor( bb0 );
   4836    if (unroll_factor < 2)
   4837       return NULL;
   4838 
   4839    bb1 = deepCopyIRSB( bb0 );
   4840    bb0 = NULL;
   4841    udst = NULL; /* is now invalid */
   4842    for (i = bb1->stmts_used-1; i >= 0; i--)
   4843       if (bb1->stmts[i])
   4844          break;
   4845 
   4846    /* The next bunch of assertions should be true since we already
   4847       found and checked the last stmt in the original bb. */
   4848 
   4849    vassert(i >= 0);
   4850 
   4851    st = bb1->stmts[i];
   4852    vassert(st->tag == Ist_Exit);
   4853 
   4854    con = st->Ist.Exit.dst;
   4855    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
   4856 
   4857    udst = bb1->next;
   4858    vassert(udst->tag == Iex_Const);
   4859    vassert(udst->Iex.Const.con->tag == Ico_U32
   4860           || udst->Iex.Const.con->tag == Ico_U64);
   4861    vassert(con->tag == udst->Iex.Const.con->tag);
   4862 
   4863    /* switch the xxx and yyy fields around */
   4864    if (con->tag == Ico_U64) {
   4865       udst->Iex.Const.con->Ico.U64 = xxx_value;
   4866       con->Ico.U64 = yyy_value;
   4867    } else {
   4868       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
   4869       con->Ico.U32 = (UInt)yyy_value;
   4870    }
   4871 
   4872    /* negate the test condition */
   4873    st->Ist.Exit.guard
   4874       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
   4875 
   4876    /* --- The unroller proper.  Both idioms are by now --- */
   4877    /* --- now converted to idiom 1. --- */
   4878 
   4879   do_unroll:
   4880 
   4881    vassert(unroll_factor == 2
   4882            || unroll_factor == 4
   4883            || unroll_factor == 8);
   4884 
   4885    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
   4886    for (j = 1; j <= jmax; j++) {
   4887 
   4888       n_vars = bb1->tyenv->types_used;
   4889 
   4890       bb2 = deepCopyIRSB(bb1);
   4891       for (i = 0; i < n_vars; i++)
   4892          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
   4893 
   4894       for (i = 0; i < bb2->stmts_used; i++) {
   4895          /* deltaIRStmt destructively modifies the stmt, but
   4896             that's OK since bb2 is a complete fresh copy of bb1. */
   4897          deltaIRStmt(bb2->stmts[i], n_vars);
   4898          addStmtToIRSB(bb1, bb2->stmts[i]);
   4899       }
   4900    }
   4901 
   4902    if (DEBUG_IROPT) {
   4903       vex_printf("\nUNROLLED (%lx)\n", my_addr);
   4904       ppIRSB(bb1);
   4905       vex_printf("\n");
   4906    }
   4907 
   4908    /* Flattening; sigh.  The unroller succeeds in breaking flatness
   4909       by negating the test condition.  This should be fixed properly.
   4910       For the moment use this shotgun approach.  */
   4911    return flatten_BB(bb1);
   4912 }
   4913 
   4914 
   4915 /*---------------------------------------------------------------*/
   4916 /*--- The tree builder                                        ---*/
   4917 /*---------------------------------------------------------------*/
   4918 
   4919 /* This isn't part of IR optimisation.  Really it's a pass done prior
   4920    to instruction selection, which improves the code that the
   4921    instruction selector can produce. */
   4922 
   4923 /* --- The 'tmp' environment is the central data structure here --- */
   4924 
   4925 /* The number of outstanding bindings we're prepared to track.
   4926    The number of times the env becomes full and we have to dump
   4927    the oldest binding (hence reducing code quality) falls very
   4928    rapidly as the env size increases.  8 gives reasonable performance
   4929    under most circumstances. */
   4930 #define A_NENV 10
   4931 
   4932 /* An interval. Used to record the bytes in the guest state accessed
   4933    by a Put[I] statement or by (one or more) Get[I] expression(s). In
   4934    case of several Get[I] expressions, the lower/upper bounds are recorded.
   4935    This is conservative but cheap.
   4936    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
   4937    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
   4938    offset 200 would be recorded as [100,203] */
   4939 typedef
   4940    struct {
   4941       Bool present;
   4942       Int  low;
   4943       Int  high;
   4944    }
   4945    Interval;
   4946 
   4947 static inline Bool
   4948 intervals_overlap(Interval i1, Interval i2)
   4949 {
   4950    return (i1.low >= i2.low && i1.low <= i2.high) ||
   4951           (i2.low >= i1.low && i2.low <= i1.high);
   4952 }
   4953 
   4954 static inline void
   4955 update_interval(Interval *i, Int low, Int high)
   4956 {
   4957    vassert(low <= high);
   4958 
   4959    if (i->present) {
   4960       if (low  < i->low)  i->low  = low;
   4961       if (high > i->high) i->high = high;
   4962    } else {
   4963       i->present = True;
   4964       i->low  = low;
   4965       i->high = high;
   4966    }
   4967 }
   4968 
   4969 
   4970 /* bindee == NULL   ===  slot is not in use
   4971    bindee != NULL   ===  slot is in use
   4972 */
   4973 typedef
   4974    struct {
   4975       IRTemp  binder;
   4976       IRExpr* bindee;
   4977       Bool    doesLoad;
   4978       /* Record the bytes of the guest state BINDEE reads from. */
   4979       Interval getInterval;
   4980    }
   4981    ATmpInfo;
   4982 
   4983 __attribute__((unused))
   4984 static void ppAEnv ( ATmpInfo* env )
   4985 {
   4986    Int i;
   4987    for (i = 0; i < A_NENV; i++) {
   4988       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
   4989       if (env[i].bindee)
   4990          ppIRExpr(env[i].bindee);
   4991       else
   4992          vex_printf("(null)");
   4993       vex_printf("\n");
   4994    }
   4995 }
   4996 
   4997 /* --- Tree-traversal fns --- */
   4998 
   4999 /* Traverse an expr, and detect if any part of it reads memory or does
   5000    a Get.  Be careful ... this really controls how much the
   5001    tree-builder can reorder the code, so getting it right is critical.
   5002 */
   5003 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
   5004 {
   5005    Int i;
   5006    switch (e->tag) {
   5007       case Iex_CCall:
   5008          for (i = 0; e->Iex.CCall.args[i]; i++)
   5009             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
   5010          return;
   5011       case Iex_ITE:
   5012          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
   5013          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
   5014          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
   5015          return;
   5016       case Iex_Qop:
   5017          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
   5018          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
   5019          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
   5020          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
   5021          return;
   5022       case Iex_Triop:
   5023          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
   5024          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
   5025          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
   5026          return;
   5027       case Iex_Binop:
   5028          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
   5029          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
   5030          return;
   5031       case Iex_Unop:
   5032          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
   5033          return;
   5034       case Iex_Load:
   5035          *doesLoad = True;
   5036          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
   5037          return;
   5038       case Iex_Get: {
   5039          Int low = e->Iex.Get.offset;
   5040          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
   5041          update_interval(getInterval, low, high);
   5042          return;
   5043       }
   5044       case Iex_GetI: {
   5045          IRRegArray *descr = e->Iex.GetI.descr;
   5046          Int size = sizeofIRType(descr->elemTy);
   5047          Int low  = descr->base;
   5048          Int high = low + descr->nElems * size - 1;
   5049          update_interval(getInterval, low, high);
   5050          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
   5051          return;
   5052       }
   5053       case Iex_RdTmp:
   5054       case Iex_Const:
   5055          return;
   5056       default:
   5057          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5058          vpanic("setHints_Expr");
   5059    }
   5060 }
   5061 
   5062 
   5063 /* Add a binding to the front of the env and slide all the rest
   5064    backwards.  It should be the case that the last slot is free. */
   5065 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
   5066 {
   5067    Int i;
   5068    vassert(env[A_NENV-1].bindee == NULL);
   5069    for (i = A_NENV-1; i >= 1; i--)
   5070       env[i] = env[i-1];
   5071    env[0].binder   = binder;
   5072    env[0].bindee   = bindee;
   5073    env[0].doesLoad = False; /* filled in later */
   5074    env[0].getInterval.present  = False; /* filled in later */
   5075    env[0].getInterval.low  = -1; /* filled in later */
   5076    env[0].getInterval.high = -1; /* filled in later */
   5077 }
   5078 
   5079 /* Given uses :: array of UShort, indexed by IRTemp
   5080    Add the use-occurrences of temps in this expression
   5081    to the env.
   5082 */
   5083 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
   5084 {
   5085    Int i;
   5086 
   5087    switch (e->tag) {
   5088 
   5089       case Iex_RdTmp: /* the only interesting case */
   5090          uses[e->Iex.RdTmp.tmp]++;
   5091          return;
   5092 
   5093       case Iex_ITE:
   5094          aoccCount_Expr(uses, e->Iex.ITE.cond);
   5095          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
   5096          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
   5097          return;
   5098 
   5099       case Iex_Qop:
   5100          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
   5101          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
   5102          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
   5103          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
   5104          return;
   5105 
   5106       case Iex_Triop:
   5107          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
   5108          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
   5109          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
   5110          return;
   5111 
   5112       case Iex_Binop:
   5113          aoccCount_Expr(uses, e->Iex.Binop.arg1);
   5114          aoccCount_Expr(uses, e->Iex.Binop.arg2);
   5115          return;
   5116 
   5117       case Iex_Unop:
   5118          aoccCount_Expr(uses, e->Iex.Unop.arg);
   5119          return;
   5120 
   5121       case Iex_Load:
   5122          aoccCount_Expr(uses, e->Iex.Load.addr);
   5123          return;
   5124 
   5125       case Iex_CCall:
   5126          for (i = 0; e->Iex.CCall.args[i]; i++)
   5127             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
   5128          return;
   5129 
   5130       case Iex_GetI:
   5131          aoccCount_Expr(uses, e->Iex.GetI.ix);
   5132          return;
   5133 
   5134       case Iex_Const:
   5135       case Iex_Get:
   5136          return;
   5137 
   5138       default:
   5139          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5140          vpanic("aoccCount_Expr");
   5141     }
   5142 }
   5143 
   5144 
   5145 /* Given uses :: array of UShort, indexed by IRTemp
   5146    Add the use-occurrences of temps in this statement
   5147    to the env.
   5148 */
   5149 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
   5150 {
   5151    Int      i;
   5152    IRDirty* d;
   5153    IRCAS*   cas;
   5154    switch (st->tag) {
   5155       case Ist_AbiHint:
   5156          aoccCount_Expr(uses, st->Ist.AbiHint.base);
   5157          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
   5158          return;
   5159       case Ist_WrTmp:
   5160          aoccCount_Expr(uses, st->Ist.WrTmp.data);
   5161          return;
   5162       case Ist_Put:
   5163          aoccCount_Expr(uses, st->Ist.Put.data);
   5164          return;
   5165       case Ist_PutI:
   5166          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
   5167          aoccCount_Expr(uses, st->Ist.PutI.details->data);
   5168          return;
   5169       case Ist_Store:
   5170          aoccCount_Expr(uses, st->Ist.Store.addr);
   5171          aoccCount_Expr(uses, st->Ist.Store.data);
   5172          return;
   5173       case Ist_StoreG: {
   5174          IRStoreG* sg = st->Ist.StoreG.details;
   5175          aoccCount_Expr(uses, sg->addr);
   5176          aoccCount_Expr(uses, sg->data);
   5177          aoccCount_Expr(uses, sg->guard);
   5178          return;
   5179       }
   5180       case Ist_LoadG: {
   5181          IRLoadG* lg = st->Ist.LoadG.details;
   5182          aoccCount_Expr(uses, lg->addr);
   5183          aoccCount_Expr(uses, lg->alt);
   5184          aoccCount_Expr(uses, lg->guard);
   5185          return;
   5186       }
   5187       case Ist_CAS:
   5188          cas = st->Ist.CAS.details;
   5189          aoccCount_Expr(uses, cas->addr);
   5190          if (cas->expdHi)
   5191             aoccCount_Expr(uses, cas->expdHi);
   5192          aoccCount_Expr(uses, cas->expdLo);
   5193          if (cas->dataHi)
   5194             aoccCount_Expr(uses, cas->dataHi);
   5195          aoccCount_Expr(uses, cas->dataLo);
   5196          return;
   5197       case Ist_LLSC:
   5198          aoccCount_Expr(uses, st->Ist.LLSC.addr);
   5199          if (st->Ist.LLSC.storedata)
   5200             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
   5201          return;
   5202       case Ist_Dirty:
   5203          d = st->Ist.Dirty.details;
   5204          if (d->mFx != Ifx_None)
   5205             aoccCount_Expr(uses, d->mAddr);
   5206          aoccCount_Expr(uses, d->guard);
   5207          for (i = 0; d->args[i]; i++) {
   5208             IRExpr* arg = d->args[i];
   5209             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5210                aoccCount_Expr(uses, arg);
   5211          }
   5212          return;
   5213       case Ist_NoOp:
   5214       case Ist_IMark:
   5215       case Ist_MBE:
   5216          return;
   5217       case Ist_Exit:
   5218          aoccCount_Expr(uses, st->Ist.Exit.guard);
   5219          return;
   5220       default:
   5221          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5222          vpanic("aoccCount_Stmt");
   5223    }
   5224 }
   5225 
   5226 /* Look up a binding for tmp in the env.  If found, return the bound
   5227    expression, and set the env's binding to NULL so it is marked as
   5228    used.  If not found, return NULL. */
   5229 
   5230 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
   5231 {
   5232    Int i;
   5233    for (i = 0; i < A_NENV; i++) {
   5234       if (env[i].binder == tmp && env[i].bindee != NULL) {
   5235          IRExpr* bindee = env[i].bindee;
   5236          env[i].bindee = NULL;
   5237          return bindee;
   5238       }
   5239    }
   5240    return NULL;
   5241 }
   5242 
   5243 /* Traverse e, looking for temps.  For each observed temp, see if env
   5244    contains a binding for the temp, and if so return the bound value.
   5245    The env has the property that any binding it holds is
   5246    'single-shot', so once a binding is used, it is marked as no longer
   5247    available, by setting its .bindee field to NULL. */
   5248 
   5249 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
   5250    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
   5251 }
   5252 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
   5253    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
   5254 }
   5255 
   5256 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
   5257 {
   5258    switch (op) {
   5259    case Iop_Or32:
   5260       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
   5261       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
   5262          return IRExpr_Unop( Iop_CmpwNEZ32,
   5263                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
   5264                                                      a2->Iex.Unop.arg ) );
   5265       break;
   5266 
   5267    case Iop_CmpNE32:
   5268       /* Since X has type Ity_I1 we can simplify:
   5269          CmpNE32(1Uto32(X),0)) ==> X */
   5270       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
   5271          return a1->Iex.Unop.arg;
   5272       break;
   5273 
   5274    default:
   5275       break;
   5276    }
   5277    /* no reduction rule applies */
   5278    return IRExpr_Binop( op, a1, a2 );
   5279 }
   5280 
   5281 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
   5282 {
   5283    switch (op) {
   5284    case Iop_CmpwNEZ64:
   5285       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
   5286       if (is_Unop(aa, Iop_CmpwNEZ64))
   5287          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
   5288       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5289       if (is_Binop(aa, Iop_Or64)
   5290           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
   5291          return fold_IRExpr_Unop(
   5292                    Iop_CmpwNEZ64,
   5293                    IRExpr_Binop(Iop_Or64,
   5294                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
   5295                                 aa->Iex.Binop.arg2));
   5296       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
   5297       if (is_Binop(aa, Iop_Or64)
   5298           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
   5299          return fold_IRExpr_Unop(
   5300                    Iop_CmpwNEZ64,
   5301                    IRExpr_Binop(Iop_Or64,
   5302                                 aa->Iex.Binop.arg1,
   5303                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
   5304       break;
   5305    case Iop_CmpNEZ64:
   5306       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
   5307       if (is_Unop(aa, Iop_Left64))
   5308          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
   5309       /* CmpNEZ64( 1Uto64(X) ) --> X */
   5310       if (is_Unop(aa, Iop_1Uto64))
   5311          return aa->Iex.Unop.arg;
   5312       break;
   5313    case Iop_CmpwNEZ32:
   5314       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
   5315       if (is_Unop(aa, Iop_CmpwNEZ32))
   5316          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
   5317       break;
   5318    case Iop_CmpNEZ32:
   5319       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
   5320       if (is_Unop(aa, Iop_Left32))
   5321          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
   5322       /* CmpNEZ32( 1Uto32(X) ) --> X */
   5323       if (is_Unop(aa, Iop_1Uto32))
   5324          return aa->Iex.Unop.arg;
   5325       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
   5326       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
   5327          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
   5328       break;
   5329    case Iop_CmpNEZ8:
   5330       /* CmpNEZ8( 1Uto8(X) ) --> X */
   5331       if (is_Unop(aa, Iop_1Uto8))
   5332          return aa->Iex.Unop.arg;
   5333       break;
   5334    case Iop_Left32:
   5335       /* Left32( Left32(x) ) --> Left32(x) */
   5336       if (is_Unop(aa, Iop_Left32))
   5337          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
   5338       break;
   5339    case Iop_Left64:
   5340       /* Left64( Left64(x) ) --> Left64(x) */
   5341       if (is_Unop(aa, Iop_Left64))
   5342          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
   5343       break;
   5344    case Iop_ZeroHI64ofV128:
   5345       /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
   5346       if (is_Unop(aa, Iop_ZeroHI64ofV128))
   5347          return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
   5348       break;
   5349    case Iop_32to1:
   5350       /* 32to1( 1Uto32 ( x ) ) --> x */
   5351       if (is_Unop(aa, Iop_1Uto32))
   5352          return aa->Iex.Unop.arg;
   5353       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
   5354       if (is_Unop(aa, Iop_CmpwNEZ32))
   5355          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
   5356       break;
   5357    case Iop_64to1:
   5358       /* 64to1( 1Uto64 ( x ) ) --> x */
   5359       if (is_Unop(aa, Iop_1Uto64))
   5360          return aa->Iex.Unop.arg;
   5361       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
   5362       if (is_Unop(aa, Iop_CmpwNEZ64))
   5363          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
   5364       break;
   5365    case Iop_64to32:
   5366       /* 64to32( 32Uto64 ( x )) --> x */
   5367       if (is_Unop(aa, Iop_32Uto64))
   5368          return aa->Iex.Unop.arg;
   5369       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
   5370       if (is_Unop(aa, Iop_8Uto64))
   5371          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
   5372       break;
   5373 
   5374    case Iop_32Uto64:
   5375       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
   5376       if (is_Unop(aa, Iop_8Uto32))
   5377          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
   5378       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
   5379       if (is_Unop(aa, Iop_16Uto32))
   5380          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
   5381       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
   5382                      --> Shr64( 32Uto64(64to32(x)), sh )) */
   5383       if (is_Unop(aa, Iop_64to32)
   5384           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
   5385           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5386           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5387                      Iop_64to32)) {
   5388          return aa->Iex.Unop.arg;
   5389       }
   5390       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
   5391          --> 32Uto64(64to32( Shl64(                x,   sh )) */
   5392       if (is_Unop(aa, Iop_64to32)
   5393           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
   5394           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
   5395           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
   5396                      Iop_64to32)) {
   5397          return
   5398             IRExpr_Unop(
   5399                Iop_32Uto64,
   5400                IRExpr_Unop(
   5401                   Iop_64to32,
   5402                   IRExpr_Binop(
   5403                      Iop_Shl64,
   5404                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
   5405                      aa->Iex.Unop.arg->Iex.Binop.arg2
   5406             )));
   5407       }
   5408       break;
   5409 
   5410    case Iop_1Sto32:
   5411       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
   5412       if (is_Unop(aa, Iop_CmpNEZ8)
   5413           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
   5414           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
   5415           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
   5416                      Iop_CmpNEZ32)) {
   5417          return IRExpr_Unop( Iop_CmpwNEZ32,
   5418                              aa->Iex.Unop.arg->Iex.Unop.arg
   5419                                ->Iex.Unop.arg->Iex.Unop.arg);
   5420       }
   5421       break;
   5422 
   5423    default:
   5424       break;
   5425    }
   5426    /* no reduction rule applies */
   5427    return IRExpr_Unop( op, aa );
   5428 }
   5429 
   5430 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
   5431 {
   5432    IRExpr*  e2;
   5433    IRExpr** args2;
   5434    Int      i;
   5435 
   5436    switch (e->tag) {
   5437 
   5438       case Iex_CCall:
   5439          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
   5440          for (i = 0; args2[i]; i++)
   5441             args2[i] = atbSubst_Expr(env,args2[i]);
   5442          return IRExpr_CCall(
   5443                    e->Iex.CCall.cee,
   5444                    e->Iex.CCall.retty,
   5445                    args2
   5446                 );
   5447       case Iex_RdTmp:
   5448          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
   5449          return e2 ? e2 : e;
   5450       case Iex_ITE:
   5451          return IRExpr_ITE(
   5452                    atbSubst_Expr(env, e->Iex.ITE.cond),
   5453                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
   5454                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
   5455                 );
   5456       case Iex_Qop:
   5457          return IRExpr_Qop(
   5458                    e->Iex.Qop.details->op,
   5459                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
   5460                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
   5461                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
   5462                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
   5463                 );
   5464       case Iex_Triop:
   5465          return IRExpr_Triop(
   5466                    e->Iex.Triop.details->op,
   5467                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
   5468                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
   5469                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
   5470                 );
   5471       case Iex_Binop:
   5472          return fold_IRExpr_Binop(
   5473                    e->Iex.Binop.op,
   5474                    atbSubst_Expr(env, e->Iex.Binop.arg1),
   5475                    atbSubst_Expr(env, e->Iex.Binop.arg2)
   5476                 );
   5477       case Iex_Unop:
   5478          return fold_IRExpr_Unop(
   5479                    e->Iex.Unop.op,
   5480                    atbSubst_Expr(env, e->Iex.Unop.arg)
   5481                 );
   5482       case Iex_Load:
   5483          return IRExpr_Load(
   5484                    e->Iex.Load.end,
   5485                    e->Iex.Load.ty,
   5486                    atbSubst_Expr(env, e->Iex.Load.addr)
   5487                 );
   5488       case Iex_GetI:
   5489          return IRExpr_GetI(
   5490                    e->Iex.GetI.descr,
   5491                    atbSubst_Expr(env, e->Iex.GetI.ix),
   5492                    e->Iex.GetI.bias
   5493                 );
   5494       case Iex_Const:
   5495       case Iex_Get:
   5496          return e;
   5497       default:
   5498          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5499          vpanic("atbSubst_Expr");
   5500    }
   5501 }
   5502 
   5503 /* Same deal as atbSubst_Expr, except for stmts. */
   5504 
   5505 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
   5506 {
   5507    Int     i;
   5508    IRDirty *d, *d2;
   5509    IRCAS   *cas, *cas2;
   5510    IRPutI  *puti, *puti2;
   5511 
   5512    switch (st->tag) {
   5513       case Ist_AbiHint:
   5514          return IRStmt_AbiHint(
   5515                    atbSubst_Expr(env, st->Ist.AbiHint.base),
   5516                    st->Ist.AbiHint.len,
   5517                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
   5518                 );
   5519       case Ist_Store:
   5520          return IRStmt_Store(
   5521                    st->Ist.Store.end,
   5522                    atbSubst_Expr(env, st->Ist.Store.addr),
   5523                    atbSubst_Expr(env, st->Ist.Store.data)
   5524                 );
   5525       case Ist_StoreG: {
   5526          IRStoreG* sg = st->Ist.StoreG.details;
   5527          return IRStmt_StoreG(sg->end,
   5528                               atbSubst_Expr(env, sg->addr),
   5529                               atbSubst_Expr(env, sg->data),
   5530                               atbSubst_Expr(env, sg->guard));
   5531       }
   5532       case Ist_LoadG: {
   5533          IRLoadG* lg = st->Ist.LoadG.details;
   5534          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
   5535                              atbSubst_Expr(env, lg->addr),
   5536                              atbSubst_Expr(env, lg->alt),
   5537                              atbSubst_Expr(env, lg->guard));
   5538       }
   5539       case Ist_WrTmp:
   5540          return IRStmt_WrTmp(
   5541                    st->Ist.WrTmp.tmp,
   5542                    atbSubst_Expr(env, st->Ist.WrTmp.data)
   5543                 );
   5544       case Ist_Put:
   5545          return IRStmt_Put(
   5546                    st->Ist.Put.offset,
   5547                    atbSubst_Expr(env, st->Ist.Put.data)
   5548                 );
   5549       case Ist_PutI:
   5550          puti  = st->Ist.PutI.details;
   5551          puti2 = mkIRPutI(puti->descr,
   5552                           atbSubst_Expr(env, puti->ix),
   5553                           puti->bias,
   5554                           atbSubst_Expr(env, puti->data));
   5555          return IRStmt_PutI(puti2);
   5556 
   5557       case Ist_Exit:
   5558          return IRStmt_Exit(
   5559                    atbSubst_Expr(env, st->Ist.Exit.guard),
   5560                    st->Ist.Exit.jk,
   5561                    st->Ist.Exit.dst,
   5562                    st->Ist.Exit.offsIP
   5563                 );
   5564       case Ist_IMark:
   5565          return IRStmt_IMark(st->Ist.IMark.addr,
   5566                              st->Ist.IMark.len,
   5567                              st->Ist.IMark.delta);
   5568       case Ist_NoOp:
   5569          return IRStmt_NoOp();
   5570       case Ist_MBE:
   5571          return IRStmt_MBE(st->Ist.MBE.event);
   5572       case Ist_CAS:
   5573          cas  = st->Ist.CAS.details;
   5574          cas2 = mkIRCAS(
   5575                    cas->oldHi, cas->oldLo, cas->end,
   5576                    atbSubst_Expr(env, cas->addr),
   5577                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
   5578                    atbSubst_Expr(env, cas->expdLo),
   5579                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
   5580                    atbSubst_Expr(env, cas->dataLo)
   5581                 );
   5582          return IRStmt_CAS(cas2);
   5583       case Ist_LLSC:
   5584          return IRStmt_LLSC(
   5585                    st->Ist.LLSC.end,
   5586                    st->Ist.LLSC.result,
   5587                    atbSubst_Expr(env, st->Ist.LLSC.addr),
   5588                    st->Ist.LLSC.storedata
   5589                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
   5590                 );
   5591       case Ist_Dirty:
   5592          d  = st->Ist.Dirty.details;
   5593          d2 = emptyIRDirty();
   5594          *d2 = *d;
   5595          if (d2->mFx != Ifx_None)
   5596             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
   5597          d2->guard = atbSubst_Expr(env, d2->guard);
   5598          for (i = 0; d2->args[i]; i++) {
   5599             IRExpr* arg = d2->args[i];
   5600             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   5601                d2->args[i] = atbSubst_Expr(env, arg);
   5602          }
   5603          return IRStmt_Dirty(d2);
   5604       default:
   5605          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
   5606          vpanic("atbSubst_Stmt");
   5607    }
   5608 }
   5609 
   5610 inline
   5611 static Bool dirty_helper_stores ( const IRDirty *d )
   5612 {
   5613    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
   5614 }
   5615 
   5616 inline
   5617 static Interval dirty_helper_puts (
   5618                    const IRDirty *d,
   5619                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5620                    VexRegisterUpdates pxControl,
   5621                    /*OUT*/Bool *requiresPreciseMemExns
   5622                 )
   5623 {
   5624    Int i;
   5625    Interval interval;
   5626 
   5627    /* Passing the guest state pointer opens the door to modifying the
   5628       guest state under the covers.  It's not allowed, but let's be
   5629       extra conservative and assume the worst. */
   5630    for (i = 0; d->args[i]; i++) {
   5631       if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
   5632          *requiresPreciseMemExns = True;
   5633          /* Assume all guest state is written. */
   5634          interval.present = True;
   5635          interval.low  = 0;
   5636          interval.high = 0x7FFFFFFF;
   5637          return interval;
   5638       }
   5639    }
   5640 
   5641    /* Check the side effects on the guest state */
   5642    interval.present = False;
   5643    interval.low = interval.high = -1;
   5644    *requiresPreciseMemExns = False;
   5645 
   5646    for (i = 0; i < d->nFxState; ++i) {
   5647       if (d->fxState[i].fx != Ifx_Read) {
   5648          Int offset = d->fxState[i].offset;
   5649          Int size = d->fxState[i].size;
   5650          Int nRepeats = d->fxState[i].nRepeats;
   5651          Int repeatLen = d->fxState[i].repeatLen;
   5652 
   5653          if (preciseMemExnsFn(offset,
   5654                               offset + nRepeats * repeatLen + size - 1,
   5655                               pxControl)) {
   5656             *requiresPreciseMemExns = True;
   5657          }
   5658          update_interval(&interval, offset,
   5659                          offset + nRepeats * repeatLen + size - 1);
   5660       }
   5661    }
   5662 
   5663    return interval;
   5664 }
   5665 
   5666 /* Return an interval if st modifies the guest state.  Via
   5667    requiresPreciseMemExns return whether or not that modification
   5668    requires precise exceptions. */
   5669 static Interval stmt_modifies_guest_state (
   5670                    IRSB *bb, const IRStmt *st,
   5671                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5672                    VexRegisterUpdates pxControl,
   5673                    /*OUT*/Bool *requiresPreciseMemExns
   5674                 )
   5675 {
   5676    Interval interval;
   5677 
   5678    switch (st->tag) {
   5679    case Ist_Put: {
   5680       Int offset = st->Ist.Put.offset;
   5681       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
   5682 
   5683       *requiresPreciseMemExns
   5684          = preciseMemExnsFn(offset, offset + size - 1, pxControl);
   5685       interval.present = True;
   5686       interval.low  = offset;
   5687       interval.high = offset + size - 1;
   5688       return interval;
   5689    }
   5690 
   5691    case Ist_PutI: {
   5692       IRRegArray *descr = st->Ist.PutI.details->descr;
   5693       Int offset = descr->base;
   5694       Int size = sizeofIRType(descr->elemTy);
   5695 
   5696       /* We quietly assume here that all segments are contiguous and there
   5697          are no holes. This is to avoid a loop. The assumption is conservative
   5698          in the sense that we might report that precise memory exceptions are
   5699          needed when in fact they are not. */
   5700       *requiresPreciseMemExns
   5701          = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
   5702                             pxControl);
   5703       interval.present = True;
   5704       interval.low  = offset;
   5705       interval.high = offset + descr->nElems * size - 1;
   5706       return interval;
   5707    }
   5708 
   5709    case Ist_Dirty:
   5710       return dirty_helper_puts(st->Ist.Dirty.details,
   5711                                preciseMemExnsFn, pxControl,
   5712                                requiresPreciseMemExns);
   5713 
   5714    default:
   5715       *requiresPreciseMemExns = False;
   5716       interval.present = False;
   5717       interval.low  = -1;
   5718       interval.high = -1;
   5719       return interval;
   5720    }
   5721 }
   5722 
   5723 /* notstatic */ Addr ado_treebuild_BB (
   5724                         IRSB* bb,
   5725                         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   5726                         VexRegisterUpdates pxControl
   5727                      )
   5728 {
   5729    Int      i, j, k, m;
   5730    Bool     stmtStores, invalidateMe;
   5731    Interval putInterval;
   5732    IRStmt*  st;
   5733    IRStmt*  st2;
   5734    ATmpInfo env[A_NENV];
   5735 
   5736    Bool   max_ga_known = False;
   5737    Addr   max_ga       = 0;
   5738 
   5739    Int       n_tmps = bb->tyenv->types_used;
   5740    UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
   5741 
   5742    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
   5743       temp.  Also count occurrences in the bb->next field.  Take the
   5744       opportunity to also find the maximum guest address in the block,
   5745       since that will be needed later for deciding when we can safely
   5746       elide event checks. */
   5747 
   5748    for (i = 0; i < n_tmps; i++)
   5749       uses[i] = 0;
   5750 
   5751    for (i = 0; i < bb->stmts_used; i++) {
   5752       st = bb->stmts[i];
   5753       switch (st->tag) {
   5754          case Ist_NoOp:
   5755             continue;
   5756          case Ist_IMark: {
   5757             UInt len = st->Ist.IMark.len;
   5758             Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
   5759             max_ga_known = True;
   5760             if (mga > max_ga)
   5761                max_ga = mga;
   5762             break;
   5763          }
   5764          default:
   5765             break;
   5766       }
   5767       aoccCount_Stmt( uses, st );
   5768    }
   5769    aoccCount_Expr(uses, bb->next );
   5770 
   5771 #  if 0
   5772    for (i = 0; i < n_tmps; i++) {
   5773       if (uses[i] == 0)
   5774         continue;
   5775       ppIRTemp( (IRTemp)i );
   5776       vex_printf("  used %d\n", (Int)uses[i] );
   5777    }
   5778 #  endif
   5779 
   5780    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
   5781 
   5782          If the env is full, emit the end element.  This guarantees
   5783          there is at least one free slot in the following.
   5784 
   5785          On seeing 't = E', occ(t)==1,
   5786             let E'=env(E)
   5787             delete this stmt
   5788             add t -> E' to the front of the env
   5789             Examine E' and set the hints for E' appropriately
   5790               (doesLoad? doesGet?)
   5791 
   5792          On seeing any other stmt,
   5793             let stmt' = env(stmt)
   5794             remove from env any 't=E' binds invalidated by stmt
   5795                 emit the invalidated stmts
   5796             emit stmt'
   5797             compact any holes in env
   5798               by sliding entries towards the front
   5799 
   5800       Finally, apply env to bb->next.
   5801    */
   5802 
   5803    for (i = 0; i < A_NENV; i++) {
   5804       env[i].bindee = NULL;
   5805       env[i].binder = IRTemp_INVALID;
   5806    }
   5807 
   5808    /* The stmts in bb are being reordered, and we are guaranteed to
   5809       end up with no more than the number we started with.  Use i to
   5810       be the cursor of the current stmt examined and j <= i to be that
   5811       for the current stmt being written.
   5812    */
   5813    j = 0;
   5814    for (i = 0; i < bb->stmts_used; i++) {
   5815 
   5816       st = bb->stmts[i];
   5817       if (st->tag == Ist_NoOp)
   5818          continue;
   5819 
   5820       /* Ensure there's at least one space in the env, by emitting
   5821          the oldest binding if necessary. */
   5822       if (env[A_NENV-1].bindee != NULL) {
   5823          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
   5824                                       env[A_NENV-1].bindee );
   5825          j++;
   5826          vassert(j <= i);
   5827          env[A_NENV-1].bindee = NULL;
   5828       }
   5829 
   5830       /* Consider current stmt. */
   5831       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
   5832          IRExpr *e, *e2;
   5833 
   5834          /* optional extra: dump dead bindings as we find them.
   5835             Removes the need for a prior dead-code removal pass. */
   5836          if (uses[st->Ist.WrTmp.tmp] == 0) {
   5837 	    if (0) vex_printf("DEAD binding\n");
   5838             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5839          }
   5840          vassert(uses[st->Ist.WrTmp.tmp] == 1);
   5841 
   5842          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
   5843             actions. */
   5844          e  = st->Ist.WrTmp.data;
   5845          e2 = atbSubst_Expr(env, e);
   5846          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
   5847          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
   5848          /* don't advance j, as we are deleting this stmt and instead
   5849             holding it temporarily in the env. */
   5850          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
   5851       }
   5852 
   5853       /* we get here for any other kind of statement. */
   5854       /* 'use up' any bindings required by the current statement. */
   5855       st2 = atbSubst_Stmt(env, st);
   5856 
   5857       /* Now, before this stmt, dump any bindings in env that it
   5858          invalidates.  These need to be dumped in the order in which
   5859          they originally entered env -- that means from oldest to
   5860          youngest. */
   5861 
   5862       /* putInterval/stmtStores characterise what the stmt under
   5863          consideration does, or might do (sidely safe @ True). */
   5864 
   5865       Bool putRequiresPreciseMemExns;
   5866       putInterval = stmt_modifies_guest_state(
   5867                        bb, st, preciseMemExnsFn, pxControl,
   5868                        &putRequiresPreciseMemExns
   5869                     );
   5870 
   5871       /* be True if this stmt writes memory or might do (==> we don't
   5872          want to reorder other loads or stores relative to it).  Also,
   5873          both LL and SC fall under this classification, since we
   5874          really ought to be conservative and not reorder any other
   5875          memory transactions relative to them. */
   5876       stmtStores
   5877          = toBool( st->tag == Ist_Store
   5878                    || (st->tag == Ist_Dirty
   5879                        && dirty_helper_stores(st->Ist.Dirty.details))
   5880                    || st->tag == Ist_LLSC
   5881                    || st->tag == Ist_CAS );
   5882 
   5883       for (k = A_NENV-1; k >= 0; k--) {
   5884          if (env[k].bindee == NULL)
   5885             continue;
   5886          /* Compare the actions of this stmt with the actions of
   5887             binding 'k', to see if they invalidate the binding. */
   5888          invalidateMe
   5889             = toBool(
   5890               /* a store invalidates loaded data */
   5891               (env[k].doesLoad && stmtStores)
   5892               /* a put invalidates get'd data, if they overlap */
   5893               || ((env[k].getInterval.present && putInterval.present) &&
   5894                   intervals_overlap(env[k].getInterval, putInterval))
   5895               /* a put invalidates loaded data. That means, in essense, that
   5896                  a load expression cannot be substituted into a statement
   5897                  that follows the put. But there is nothing wrong doing so
   5898                  except when the put statement requries precise exceptions.
   5899                  Think of a load that is moved past a put where the put
   5900                  updates the IP in the guest state. If the load generates
   5901                  a segfault, the wrong address (line number) would be
   5902                  reported. */
   5903               || (env[k].doesLoad && putInterval.present &&
   5904                   putRequiresPreciseMemExns)
   5905               /* probably overly conservative: a memory bus event
   5906                  invalidates absolutely everything, so that all
   5907                  computation prior to it is forced to complete before
   5908                  proceeding with the event (fence,lock,unlock). */
   5909               || st->tag == Ist_MBE
   5910               /* also be (probably overly) paranoid re AbiHints */
   5911               || st->tag == Ist_AbiHint
   5912               );
   5913          if (invalidateMe) {
   5914             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
   5915             j++;
   5916             vassert(j <= i);
   5917             env[k].bindee = NULL;
   5918          }
   5919       }
   5920 
   5921       /* Slide in-use entries in env up to the front */
   5922       m = 0;
   5923       for (k = 0; k < A_NENV; k++) {
   5924          if (env[k].bindee != NULL) {
   5925             env[m] = env[k];
   5926             m++;
   5927 	 }
   5928       }
   5929       for (m = m; m < A_NENV; m++) {
   5930          env[m].bindee = NULL;
   5931       }
   5932 
   5933       /* finally, emit the substituted statement */
   5934       bb->stmts[j] = st2;
   5935       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
   5936       j++;
   5937 
   5938       vassert(j <= i+1);
   5939    } /* for each stmt in the original bb ... */
   5940 
   5941    /* Finally ... substitute the ->next field as much as possible, and
   5942       dump any left-over bindings.  Hmm.  Perhaps there should be no
   5943       left over bindings?  Or any left-over bindings are
   5944       by definition dead? */
   5945    bb->next = atbSubst_Expr(env, bb->next);
   5946    bb->stmts_used = j;
   5947 
   5948    return max_ga_known ? max_ga : ~(Addr)0;
   5949 }
   5950 
   5951 
   5952 /*---------------------------------------------------------------*/
   5953 /*--- MSVC specific transformation hacks                      ---*/
   5954 /*---------------------------------------------------------------*/
   5955 
   5956 /* The purpose of all this is to find MSVC's idiom for non-constant
   5957    bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
   5958    gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
   5959    generates a lot of false positives from the MSVC version because it
   5960    doesn't understand that XORing an undefined bit with itself gives a
   5961    defined result.
   5962 
   5963    This isn't a problem for the simple case "x ^ x", because iropt
   5964    folds it to a constant zero before Memcheck ever sees it.  But in
   5965    this case we have an intervening "& c" which defeats the simple
   5966    case.  So we have to carefully inspect all expressions rooted at an
   5967    XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
   5968    7 other variants resulting from swapping the order of arguments to
   5969    the three binary operations.  If we get a match, we then replace
   5970    the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
   5971 
   5972    The key difficulty is to spot the two uses of "a".  To normalise
   5973    the IR to maximise the chances of success, we first do a CSE pass,
   5974    with CSEing of loads enabled, since the two "a" expressions may be
   5975    loads, which need to be commoned up.  Then we do a constant folding
   5976    pass, so as to remove any tmp-to-tmp assignment chains that would
   5977    make matching the original expression more difficult.
   5978 */
   5979 
   5980 
   5981 /* Helper function for debug printing */
   5982 __attribute__((unused))
   5983 static void print_flat_expr ( IRExpr** env, IRExpr* e )
   5984 {
   5985    if (e == NULL) {
   5986       vex_printf("?");
   5987       return;
   5988    }
   5989    switch (e->tag) {
   5990       case Iex_Binop: {
   5991          ppIROp(e->Iex.Binop.op);
   5992          vex_printf("(");
   5993          print_flat_expr(env, e->Iex.Binop.arg1);
   5994          vex_printf(",");
   5995          print_flat_expr(env, e->Iex.Binop.arg2);
   5996          vex_printf(")");
   5997          break;
   5998       }
   5999       case Iex_Unop: {
   6000          ppIROp(e->Iex.Unop.op);
   6001          vex_printf("(");
   6002          print_flat_expr(env, e->Iex.Unop.arg);
   6003          vex_printf(")");
   6004          break;
   6005       }
   6006       case Iex_RdTmp:
   6007          ppIRTemp(e->Iex.RdTmp.tmp);
   6008          vex_printf("=");
   6009          print_flat_expr(env, chase(env, e));
   6010          break;
   6011       case Iex_Const:
   6012       case Iex_CCall:
   6013       case Iex_Load:
   6014       case Iex_ITE:
   6015       case Iex_Get:
   6016          ppIRExpr(e);
   6017          break;
   6018       default:
   6019          vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
   6020          vassert(0);
   6021    }
   6022 }
   6023 
   6024 /* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
   6025    or any of the other 7 variants generated by switching the order
   6026    of arguments to the outer ^, the inner ^ and the &.
   6027 */
   6028 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
   6029                                      /*OUT*/IRExpr** cc,
   6030                                      IRExpr** env, IRExpr* e,
   6031                                      IROp opAND, IROp opXOR)
   6032 {
   6033 #  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
   6034                               && (_e)->Iex.Binop.op == (_op))
   6035 #  define ISATOM(_e)    isIRAtom(_e)
   6036 #  define STEP(_e)      chase1(env, (_e))
   6037 #  define LL(_e)        ((_e)->Iex.Binop.arg1)
   6038 #  define RR(_e)        ((_e)->Iex.Binop.arg2)
   6039 
   6040    IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
   6041 
   6042    /* This is common to all 8 cases */
   6043    if (!ISBIN(e, opXOR)) goto fail;
   6044 
   6045    /*                        -----and------ */
   6046    /*                         --xor---      */
   6047    /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
   6048    /* find variant 2:   a1 ^ ((b ^ a2) & c) */
   6049    a1 = and = xor = c = a2bL = a2bR = NULL;
   6050 
   6051    a1   = LL(e);
   6052    and  = STEP(RR(e));
   6053    if (!ISBIN(and, opAND)) goto v34;
   6054    xor  = STEP(LL(and));
   6055    c    = RR(and);
   6056    if (!ISBIN(xor, opXOR)) goto v34;
   6057    a2bL = LL(xor);
   6058    a2bR = RR(xor);
   6059 
   6060    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6061       *aa = a1;
   6062       *bb = a2bR;
   6063       *cc = c;
   6064       return 1;
   6065    }
   6066    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6067       *aa = a1;
   6068       *bb = a2bL;
   6069       *cc = c;
   6070       return 2;
   6071    }
   6072 
   6073   v34:
   6074    /*                   -----and------      */
   6075    /*                    --xor---           */
   6076    /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
   6077    /* find variant 4:   ((b ^ a2) & c) ^ a1 */
   6078    a1 = and = xor = c = a2bL = a2bR = NULL;
   6079 
   6080    a1   = RR(e);
   6081    and  = STEP(LL(e));
   6082    if (!ISBIN(and, opAND)) goto v56;
   6083    xor  = STEP(LL(and));
   6084    c    = RR(and);
   6085    if (!ISBIN(xor, opXOR)) goto v56;
   6086    a2bL = LL(xor);
   6087    a2bR = RR(xor);
   6088 
   6089    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6090       *aa = a1;
   6091       *bb = a2bR;
   6092       *cc = c;
   6093       return 3;
   6094    }
   6095    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6096       *aa = a1;
   6097       *bb = a2bL;
   6098       *cc = c;
   6099       return 4;
   6100    }
   6101 
   6102   v56:
   6103    /*                        -----and------ */
   6104    /*                         --xor---      */
   6105    /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
   6106    /* find variant 6:   a1 ^ (c & (b ^ a2)) */
   6107    a1 = and = xor = c = a2bL = a2bR = NULL;
   6108 
   6109    a1   = LL(e);
   6110    and  = STEP(RR(e));
   6111    if (!ISBIN(and, opAND)) goto v78;
   6112    xor  = STEP(RR(and));
   6113    c    = LL(and);
   6114    if (!ISBIN(xor, opXOR)) goto v78;
   6115    a2bL = LL(xor);
   6116    a2bR = RR(xor);
   6117 
   6118    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6119       *aa = a1;
   6120       *bb = a2bR;
   6121       *cc = c;
   6122       vassert(5-5); // ATC
   6123       return 5;
   6124    }
   6125    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6126       *aa = a1;
   6127       *bb = a2bL;
   6128       *cc = c;
   6129       vassert(6-6); // ATC
   6130       return 6;
   6131    }
   6132 
   6133  v78:
   6134    /*                   -----and------      */
   6135    /*                    --xor---           */
   6136    /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
   6137    /* find variant 8:   (c & (b ^ a2)) ^ a1 */
   6138    a1 = and = xor = c = a2bL = a2bR = NULL;
   6139 
   6140    a1   = RR(e);
   6141    and  = STEP(LL(e));
   6142    if (!ISBIN(and, opAND)) goto fail;
   6143    xor  = STEP(RR(and));
   6144    c    = LL(and);
   6145    if (!ISBIN(xor, opXOR)) goto fail;
   6146    a2bL = LL(xor);
   6147    a2bR = RR(xor);
   6148 
   6149    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
   6150       *aa = a1;
   6151       *bb = a2bR;
   6152       *cc = c;
   6153       return 7;
   6154    }
   6155    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
   6156       *aa = a1;
   6157       *bb = a2bL;
   6158       *cc = c;
   6159       return 8;
   6160    }
   6161 
   6162  fail:
   6163    return 0;
   6164 
   6165 #  undef ISBIN
   6166 #  undef ISATOM
   6167 #  undef STEP
   6168 #  undef LL
   6169 #  undef RR
   6170 }
   6171 
   6172 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
   6173    variants thereof generated by switching arguments around), return
   6174    the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
   6175 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
   6176 {
   6177    if (e->tag != Iex_Binop)
   6178       return NULL;
   6179 
   6180    const HChar* tyNm = NULL;
   6181    IROp   opOR  = Iop_INVALID;
   6182    IROp   opAND = Iop_INVALID;
   6183    IROp   opNOT = Iop_INVALID;
   6184    IROp   opXOR = Iop_INVALID;
   6185    switch (e->Iex.Binop.op) {
   6186       case Iop_Xor32:
   6187          tyNm  = "I32";
   6188          opOR  = Iop_Or32;  opAND = Iop_And32;
   6189          opNOT = Iop_Not32; opXOR = Iop_Xor32;
   6190          break;
   6191       case Iop_Xor16:
   6192          tyNm  = "I16";
   6193          opOR  = Iop_Or16;  opAND = Iop_And16;
   6194          opNOT = Iop_Not16; opXOR = Iop_Xor16;
   6195          break;
   6196       case Iop_Xor8:
   6197          tyNm  = "I8";
   6198          opOR  = Iop_Or8;  opAND = Iop_And8;
   6199          opNOT = Iop_Not8; opXOR = Iop_Xor8;
   6200          break;
   6201       default:
   6202          return NULL;
   6203    }
   6204 
   6205    IRExpr* aa = NULL;
   6206    IRExpr* bb = NULL;
   6207    IRExpr* cc = NULL;
   6208    UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
   6209    if (variant > 0) {
   6210       static UInt ctr = 0;
   6211       if (0)
   6212          vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
   6213                     "type %s, variant %u\n",
   6214                     ++ctr, tyNm, variant);
   6215       /* It's vitally important that the returned aa, bb and cc are
   6216          atoms -- either constants or tmps.  If it's anything else
   6217          (eg, a GET) then incorporating them in a tree at this point
   6218          in the SB may erroneously pull them forwards (eg of a PUT
   6219          that originally was after the GET) and so transform the IR
   6220          wrongly.  spotBitfieldAssignment should guarantee only to
   6221          give us atoms, but we check here anyway. */
   6222       vassert(aa && isIRAtom(aa));
   6223       vassert(bb && isIRAtom(bb));
   6224       vassert(cc && isIRAtom(cc));
   6225       return IRExpr_Binop(
   6226                 opOR,
   6227                 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
   6228                 IRExpr_Binop(opAND, bb, cc)
   6229              );
   6230    }
   6231    return NULL;
   6232 }
   6233 
   6234 
   6235 /* SB is modified in-place.  Visit all the IRExprs and, for those
   6236    which are allowed to be non-atomic, perform the XOR transform if
   6237    possible.  This makes |sb| be non-flat, but that's ok, the caller
   6238    can re-flatten it.  Returns True iff any changes were made. */
   6239 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
   6240 {
   6241    Int  i;
   6242    Bool changed = False;
   6243 
   6244    /* Make the tmp->expr environment, so we can use it for
   6245       chasing expressions. */
   6246    Int      n_tmps = sb->tyenv->types_used;
   6247    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
   6248    for (i = 0; i < n_tmps; i++)
   6249       env[i] = NULL;
   6250 
   6251    for (i = 0; i < sb->stmts_used; i++) {
   6252       IRStmt* st = sb->stmts[i];
   6253       if (st->tag != Ist_WrTmp)
   6254          continue;
   6255       IRTemp t = st->Ist.WrTmp.tmp;
   6256       vassert(t >= 0 && t < n_tmps);
   6257       env[t] = st->Ist.WrTmp.data;
   6258    }
   6259 
   6260    for (i = 0; i < sb->stmts_used; i++) {
   6261       IRStmt* st = sb->stmts[i];
   6262 
   6263       switch (st->tag) {
   6264          case Ist_AbiHint:
   6265             vassert(isIRAtom(st->Ist.AbiHint.base));
   6266             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6267             break;
   6268          case Ist_Put:
   6269             vassert(isIRAtom(st->Ist.Put.data));
   6270             break;
   6271          case Ist_PutI: {
   6272             IRPutI* puti = st->Ist.PutI.details;
   6273             vassert(isIRAtom(puti->ix));
   6274             vassert(isIRAtom(puti->data));
   6275             break;
   6276          }
   6277          case Ist_WrTmp: {
   6278             /* This is the one place where an expr (st->Ist.WrTmp.data) is
   6279                allowed to be more than just a constant or a tmp. */
   6280             IRExpr* mb_new_data
   6281                = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
   6282             if (mb_new_data) {
   6283                //ppIRSB(sb);
   6284                st->Ist.WrTmp.data = mb_new_data;
   6285                //ppIRSB(sb);
   6286                changed = True;
   6287             }
   6288             break;
   6289          }
   6290          case Ist_Store:
   6291             vassert(isIRAtom(st->Ist.Store.addr));
   6292             vassert(isIRAtom(st->Ist.Store.data));
   6293             break;
   6294          case Ist_StoreG: {
   6295             IRStoreG* sg = st->Ist.StoreG.details;
   6296             vassert(isIRAtom(sg->addr));
   6297             vassert(isIRAtom(sg->data));
   6298             vassert(isIRAtom(sg->guard));
   6299             break;
   6300          }
   6301          case Ist_LoadG: {
   6302             IRLoadG* lg = st->Ist.LoadG.details;
   6303             vassert(isIRAtom(lg->addr));
   6304             vassert(isIRAtom(lg->alt));
   6305             vassert(isIRAtom(lg->guard));
   6306             break;
   6307          }
   6308          case Ist_CAS: {
   6309             IRCAS* cas = st->Ist.CAS.details;
   6310             vassert(isIRAtom(cas->addr));
   6311             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6312             vassert(isIRAtom(cas->expdLo));
   6313             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6314             vassert(isIRAtom(cas->dataLo));
   6315             break;
   6316          }
   6317          case Ist_LLSC:
   6318             vassert(isIRAtom(st->Ist.LLSC.addr));
   6319             if (st->Ist.LLSC.storedata)
   6320                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6321             break;
   6322          case Ist_Dirty: {
   6323             IRDirty* d = st->Ist.Dirty.details;
   6324             if (d->mFx != Ifx_None) {
   6325                vassert(isIRAtom(d->mAddr));
   6326             }
   6327             vassert(isIRAtom(d->guard));
   6328             for (Int j = 0; d->args[j]; j++) {
   6329                IRExpr* arg = d->args[j];
   6330                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
   6331                   vassert(isIRAtom(arg));
   6332                }
   6333             }
   6334             break;
   6335          }
   6336          case Ist_IMark:
   6337          case Ist_NoOp:
   6338          case Ist_MBE:
   6339             break;
   6340          case Ist_Exit:
   6341             vassert(isIRAtom(st->Ist.Exit.guard));
   6342             break;
   6343          default:
   6344             vex_printf("\n"); ppIRStmt(st);
   6345             vpanic("do_XOR_TRANSFORMS_IRSB");
   6346       }
   6347    }
   6348 
   6349    vassert(isIRAtom(sb->next));
   6350    return changed;
   6351 }
   6352 
   6353 
   6354 static IRSB* do_MSVC_HACKS ( IRSB* sb )
   6355 {
   6356    // Normalise as much as we can.  This is the one-and-only place
   6357    // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
   6358    Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
   6359    if (any_cse_changes) {
   6360       // CSEing might have created dead code.  Remove it.
   6361       sb = cprop_BB ( sb );
   6362       do_deadcode_BB(sb);
   6363    }
   6364 
   6365    // Visit all atoms, do the transformation proper.  bb is modified
   6366    // in-place.
   6367    Bool changed = do_XOR_TRANSFORM_IRSB(sb);
   6368 
   6369    if (changed) {
   6370       // The transformation generates non-flat expressions, so we now
   6371       // need to re-flatten the block.
   6372       sb = flatten_BB(sb);
   6373    }
   6374 
   6375    return sb;
   6376 }
   6377 
   6378 
   6379 /*---------------------------------------------------------------*/
   6380 /*--- iropt main                                              ---*/
   6381 /*---------------------------------------------------------------*/
   6382 
   6383 static Bool iropt_verbose = False; /* True; */
   6384 
   6385 
   6386 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
   6387    redundant Put removal, constant propagation, dead code removal,
   6388    clean helper specialisation, and dead code removal (again).
   6389 */
   6390 
   6391 
   6392 static
   6393 IRSB* cheap_transformations (
   6394          IRSB* bb,
   6395          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6396          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6397          VexRegisterUpdates pxControl
   6398       )
   6399 {
   6400    redundant_get_removal_BB ( bb );
   6401    if (iropt_verbose) {
   6402       vex_printf("\n========= REDUNDANT GET\n\n" );
   6403       ppIRSB(bb);
   6404    }
   6405 
   6406    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6407       redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6408    }
   6409    if (iropt_verbose) {
   6410       vex_printf("\n========= REDUNDANT PUT\n\n" );
   6411       ppIRSB(bb);
   6412    }
   6413 
   6414    bb = cprop_BB ( bb );
   6415    if (iropt_verbose) {
   6416       vex_printf("\n========= CPROPD\n\n" );
   6417       ppIRSB(bb);
   6418    }
   6419 
   6420    do_deadcode_BB ( bb );
   6421    if (iropt_verbose) {
   6422       vex_printf("\n========= DEAD\n\n" );
   6423       ppIRSB(bb);
   6424    }
   6425 
   6426    bb = spec_helpers_BB ( bb, specHelper );
   6427    do_deadcode_BB ( bb );
   6428    if (iropt_verbose) {
   6429       vex_printf("\n========= SPECd \n\n" );
   6430       ppIRSB(bb);
   6431    }
   6432 
   6433    return bb;
   6434 }
   6435 
   6436 
   6437 /* Do some more expensive transformations on bb, which are aimed at
   6438    optimising as much as possible in the presence of GetI and PutI.  */
   6439 
   6440 static
   6441 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
   6442 {
   6443    (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6444    collapse_AddSub_chains_BB( bb );
   6445    do_redundant_GetI_elimination( bb );
   6446    if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6447       do_redundant_PutI_elimination( bb, pxControl );
   6448    }
   6449    do_deadcode_BB( bb );
   6450    return bb;
   6451 }
   6452 
   6453 
   6454 /* Scan a flattened BB to look for signs that more expensive
   6455    optimisations might be useful:
   6456    - find out if there are any GetIs and PutIs
   6457    - find out if there are any floating or vector-typed temporaries
   6458 */
   6459 
   6460 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
   6461                                  /*OUT*/Bool* hasVorFtemps,
   6462                                  IRSB* bb )
   6463 {
   6464    Int      i, j;
   6465    IRStmt*  st;
   6466    IRDirty* d;
   6467    IRCAS*   cas;
   6468 
   6469    *hasGetIorPutI = False;
   6470    *hasVorFtemps  = False;
   6471 
   6472    for (i = 0; i < bb->stmts_used; i++) {
   6473       st = bb->stmts[i];
   6474       switch (st->tag) {
   6475          case Ist_AbiHint:
   6476             vassert(isIRAtom(st->Ist.AbiHint.base));
   6477             vassert(isIRAtom(st->Ist.AbiHint.nia));
   6478             break;
   6479          case Ist_PutI:
   6480             *hasGetIorPutI = True;
   6481             break;
   6482          case Ist_WrTmp:
   6483             if (st->Ist.WrTmp.data->tag == Iex_GetI)
   6484                *hasGetIorPutI = True;
   6485             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
   6486                case Ity_I1: case Ity_I8: case Ity_I16:
   6487                case Ity_I32: case Ity_I64: case Ity_I128:
   6488                   break;
   6489                case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
   6490                case Ity_V128: case Ity_V256:
   6491                   *hasVorFtemps = True;
   6492                   break;
   6493                case Ity_D32: case Ity_D64: case Ity_D128:
   6494                   *hasVorFtemps = True;
   6495                   break;
   6496                default:
   6497                   goto bad;
   6498             }
   6499             break;
   6500          case Ist_Put:
   6501             vassert(isIRAtom(st->Ist.Put.data));
   6502             break;
   6503          case Ist_Store:
   6504             vassert(isIRAtom(st->Ist.Store.addr));
   6505             vassert(isIRAtom(st->Ist.Store.data));
   6506             break;
   6507          case Ist_StoreG: {
   6508             IRStoreG* sg = st->Ist.StoreG.details;
   6509             vassert(isIRAtom(sg->addr));
   6510             vassert(isIRAtom(sg->data));
   6511             vassert(isIRAtom(sg->guard));
   6512             break;
   6513          }
   6514          case Ist_LoadG: {
   6515             IRLoadG* lg = st->Ist.LoadG.details;
   6516             vassert(isIRAtom(lg->addr));
   6517             vassert(isIRAtom(lg->alt));
   6518             vassert(isIRAtom(lg->guard));
   6519             break;
   6520          }
   6521          case Ist_CAS:
   6522             cas = st->Ist.CAS.details;
   6523             vassert(isIRAtom(cas->addr));
   6524             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
   6525             vassert(isIRAtom(cas->expdLo));
   6526             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
   6527             vassert(isIRAtom(cas->dataLo));
   6528             break;
   6529          case Ist_LLSC:
   6530             vassert(isIRAtom(st->Ist.LLSC.addr));
   6531             if (st->Ist.LLSC.storedata)
   6532                vassert(isIRAtom(st->Ist.LLSC.storedata));
   6533             break;
   6534          case Ist_Dirty:
   6535             d = st->Ist.Dirty.details;
   6536             vassert(isIRAtom(d->guard));
   6537             for (j = 0; d->args[j]; j++) {
   6538                IRExpr* arg = d->args[j];
   6539                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
   6540                   vassert(isIRAtom(arg));
   6541             }
   6542             if (d->mFx != Ifx_None)
   6543                vassert(isIRAtom(d->mAddr));
   6544             break;
   6545          case Ist_NoOp:
   6546          case Ist_IMark:
   6547          case Ist_MBE:
   6548             break;
   6549          case Ist_Exit:
   6550             vassert(isIRAtom(st->Ist.Exit.guard));
   6551             break;
   6552          default:
   6553          bad:
   6554             ppIRStmt(st);
   6555             vpanic("considerExpensives");
   6556       }
   6557    }
   6558 }
   6559 
   6560 
   6561 /* ---------------- The main iropt entry point. ---------------- */
   6562 
   6563 /* exported from this file */
   6564 /* Rules of the game:
   6565 
   6566    - IRExpr/IRStmt trees should be treated as immutable, as they
   6567      may get shared.  So never change a field of such a tree node;
   6568      instead construct and return a new one if needed.
   6569 */
   6570 
   6571 
   6572 IRSB* do_iropt_BB(
   6573          IRSB* bb0,
   6574          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
   6575          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
   6576          VexRegisterUpdates pxControl,
   6577          Addr    guest_addr,
   6578          VexArch guest_arch
   6579       )
   6580 {
   6581    static Int n_total     = 0;
   6582    static Int n_expensive = 0;
   6583 
   6584    Bool hasGetIorPutI, hasVorFtemps;
   6585    IRSB *bb, *bb2;
   6586 
   6587    n_total++;
   6588 
   6589    /* First flatten the block out, since all other
   6590       phases assume flat code. */
   6591 
   6592    bb = flatten_BB ( bb0 );
   6593 
   6594    if (iropt_verbose) {
   6595       vex_printf("\n========= FLAT\n\n" );
   6596       ppIRSB(bb);
   6597    }
   6598 
   6599    /* If at level 0, stop now. */
   6600    if (vex_control.iropt_level <= 0) return bb;
   6601 
   6602    /* Now do a preliminary cleanup pass, and figure out if we also
   6603       need to do 'expensive' optimisations.  Expensive optimisations
   6604       are deemed necessary if the block contains any GetIs or PutIs.
   6605       If needed, do expensive transformations and then another cheap
   6606       cleanup pass. */
   6607 
   6608    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
   6609 
   6610    if (guest_arch == VexArchARM) {
   6611       /* Translating Thumb2 code produces a lot of chaff.  We have to
   6612          work extra hard to get rid of it. */
   6613       bb = cprop_BB(bb);
   6614       bb = spec_helpers_BB ( bb, specHelper );
   6615       if (pxControl < VexRegUpdAllregsAtEachInsn) {
   6616          redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
   6617       }
   6618       do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6619       do_deadcode_BB( bb );
   6620    }
   6621 
   6622    if (vex_control.iropt_level > 1) {
   6623 
   6624       /* Peer at what we have, to decide how much more effort to throw
   6625          at it. */
   6626       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
   6627 
   6628       if (hasVorFtemps && !hasGetIorPutI) {
   6629          /* If any evidence of FP or Vector activity, CSE, as that
   6630             tends to mop up all manner of lardy code to do with
   6631             rounding modes.  Don't bother if hasGetIorPutI since that
   6632             case leads into the expensive transformations, which do
   6633             CSE anyway. */
   6634          (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6635          do_deadcode_BB( bb );
   6636       }
   6637 
   6638       if (hasGetIorPutI) {
   6639          Bool cses;
   6640          n_expensive++;
   6641          if (DEBUG_IROPT)
   6642             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
   6643          bb = expensive_transformations( bb, pxControl );
   6644          bb = cheap_transformations( bb, specHelper,
   6645                                      preciseMemExnsFn, pxControl );
   6646          /* Potentially common up GetIs */
   6647          cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6648          if (cses)
   6649             bb = cheap_transformations( bb, specHelper,
   6650                                         preciseMemExnsFn, pxControl );
   6651       }
   6652 
   6653       ///////////////////////////////////////////////////////////
   6654       // BEGIN MSVC optimised code transformation hacks
   6655       if (0)
   6656          bb = do_MSVC_HACKS(bb);
   6657       // END   MSVC optimised code transformation hacks
   6658       ///////////////////////////////////////////////////////////
   6659 
   6660       /* Now have a go at unrolling simple (single-BB) loops.  If
   6661          successful, clean up the results as much as possible. */
   6662 
   6663       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
   6664       if (bb2) {
   6665          bb = cheap_transformations( bb2, specHelper,
   6666                                      preciseMemExnsFn, pxControl );
   6667          if (hasGetIorPutI) {
   6668             bb = expensive_transformations( bb, pxControl );
   6669             bb = cheap_transformations( bb, specHelper,
   6670                                         preciseMemExnsFn, pxControl );
   6671          } else {
   6672             /* at least do CSE and dead code removal */
   6673             do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
   6674             do_deadcode_BB( bb );
   6675          }
   6676          if (0) vex_printf("vex iropt: unrolled a loop\n");
   6677       }
   6678 
   6679    }
   6680 
   6681    return bb;
   6682 }
   6683 
   6684 
   6685 /*---------------------------------------------------------------*/
   6686 /*--- end                                            ir_opt.c ---*/
   6687 /*---------------------------------------------------------------*/
   6688