Home | History | Annotate | Download | only in pub
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                       libvex_ir.h ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #ifndef __LIBVEX_IR_H
     37 #define __LIBVEX_IR_H
     38 
     39 #include "libvex_basictypes.h"
     40 
     41 
     42 /*---------------------------------------------------------------*/
     43 /*--- High-level IR description                               ---*/
     44 /*---------------------------------------------------------------*/
     45 
     46 /* Vex IR is an architecture-neutral intermediate representation.
     47    Unlike some IRs in systems similar to Vex, it is not like assembly
     48    language (ie. a list of instructions).  Rather, it is more like the
     49    IR that might be used in a compiler.
     50 
     51    Code blocks
     52    ~~~~~~~~~~~
     53    The code is broken into small code blocks ("superblocks", type:
     54    'IRSB').  Each code block typically represents from 1 to perhaps 50
     55    instructions.  IRSBs are single-entry, multiple-exit code blocks.
     56    Each IRSB contains three things:
     57    - a type environment, which indicates the type of each temporary
     58      value present in the IRSB
     59    - a list of statements, which represent code
     60    - a jump that exits from the end the IRSB
     61    Because the blocks are multiple-exit, there can be additional
     62    conditional exit statements that cause control to leave the IRSB
     63    before the final exit.  Also because of this, IRSBs can cover
     64    multiple non-consecutive sequences of code (up to 3).  These are
     65    recorded in the type VexGuestExtents (see libvex.h).
     66 
     67    Statements and expressions
     68    ~~~~~~~~~~~~~~~~~~~~~~~~~~
     69    Statements (type 'IRStmt') represent operations with side-effects,
     70    eg.  guest register writes, stores, and assignments to temporaries.
     71    Expressions (type 'IRExpr') represent operations without
     72    side-effects, eg. arithmetic operations, loads, constants.
     73    Expressions can contain sub-expressions, forming expression trees,
     74    eg. (3 + (4 * load(addr1)).
     75 
     76    Storage of guest state
     77    ~~~~~~~~~~~~~~~~~~~~~~
     78    The "guest state" contains the guest registers of the guest machine
     79    (ie.  the machine that we are simulating).  It is stored by default
     80    in a block of memory supplied by the user of the VEX library,
     81    generally referred to as the guest state (area).  To operate on
     82    these registers, one must first read ("Get") them from the guest
     83    state into a temporary value.  Afterwards, one can write ("Put")
     84    them back into the guest state.
     85 
     86    Get and Put are characterised by a byte offset into the guest
     87    state, a small integer which effectively gives the identity of the
     88    referenced guest register, and a type, which indicates the size of
     89    the value to be transferred.
     90 
     91    The basic "Get" and "Put" operations are sufficient to model normal
     92    fixed registers on the guest.  Selected areas of the guest state
     93    can be treated as a circular array of registers (type:
     94    'IRRegArray'), which can be indexed at run-time.  This is done with
     95    the "GetI" and "PutI" primitives.  This is necessary to describe
     96    rotating register files, for example the x87 FPU stack, SPARC
     97    register windows, and the Itanium register files.
     98 
     99    Examples, and flattened vs. unflattened code
    100    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    101    For example, consider this x86 instruction:
    102 
    103      addl %eax, %ebx
    104 
    105    One Vex IR translation for this code would be this:
    106 
    107      ------ IMark(0x24F275, 7, 0) ------
    108      t3 = GET:I32(0)             # get %eax, a 32-bit integer
    109      t2 = GET:I32(12)            # get %ebx, a 32-bit integer
    110      t1 = Add32(t3,t2)           # addl
    111      PUT(0) = t1                 # put %eax
    112 
    113    (For simplicity, this ignores the effects on the condition codes, and
    114    the update of the instruction pointer.)
    115 
    116    The "IMark" is an IR statement that doesn't represent actual code.
    117    Instead it indicates the address and length of the original
    118    instruction.  The numbers 0 and 12 are offsets into the guest state
    119    for %eax and %ebx.  The full list of offsets for an architecture
    120    <ARCH> can be found in the type VexGuest<ARCH>State in the file
    121    VEX/pub/libvex_guest_<ARCH>.h.
    122 
    123    The five statements in this example are:
    124    - the IMark
    125    - three assignments to temporaries
    126    - one register write (put)
    127 
    128    The six expressions in this example are:
    129    - two register reads (gets)
    130    - one arithmetic (add) operation
    131    - three temporaries (two nested within the Add32, one in the PUT)
    132 
    133    The above IR is "flattened", ie. all sub-expressions are "atoms",
    134    either constants or temporaries.  An equivalent, unflattened version
    135    would be:
    136 
    137      PUT(0) = Add32(GET:I32(0), GET:I32(12))
    138 
    139    IR is guaranteed to be flattened at instrumentation-time.  This makes
    140    instrumentation easier.  Equivalent flattened and unflattened IR
    141    typically results in the same generated code.
    142 
    143    Another example, this one showing loads and stores:
    144 
    145      addl %edx,4(%eax)
    146 
    147    This becomes (again ignoring condition code and instruction pointer
    148    updates):
    149 
    150      ------ IMark(0x4000ABA, 3, 0) ------
    151      t3 = Add32(GET:I32(0),0x4:I32)
    152      t2 = LDle:I32(t3)
    153      t1 = GET:I32(8)
    154      t0 = Add32(t2,t1)
    155      STle(t3) = t0
    156 
    157    The "le" in "LDle" and "STle" is short for "little-endian".
    158 
    159    No need for deallocations
    160    ~~~~~~~~~~~~~~~~~~~~~~~~~
    161    Although there are allocation functions for various data structures
    162    in this file, there are no deallocation functions.  This is because
    163    Vex uses a memory allocation scheme that automatically reclaims the
    164    memory used by allocated structures once translation is completed.
    165    This makes things easier for tools that instruments/transforms code
    166    blocks.
    167 
    168    SSAness and typing
    169    ~~~~~~~~~~~~~~~~~~
    170    The IR is fully typed.  For every IRSB (IR block) it is possible to
    171    say unambiguously whether or not it is correctly typed.
    172    Incorrectly typed IR has no meaning and the VEX will refuse to
    173    process it.  At various points during processing VEX typechecks the
    174    IR and aborts if any violations are found.  This seems overkill but
    175    makes it a great deal easier to build a reliable JIT.
    176 
    177    IR also has the SSA property.  SSA stands for Static Single
    178    Assignment, and what it means is that each IR temporary may be
    179    assigned to only once.  This idea became widely used in compiler
    180    construction in the mid to late 90s.  It makes many IR-level
    181    transformations/code improvements easier, simpler and faster.
    182    Whenever it typechecks an IR block, VEX also checks the SSA
    183    property holds, and will abort if not so.  So SSAness is
    184    mechanically and rigidly enforced.
    185 */
    186 
    187 /*---------------------------------------------------------------*/
    188 /*--- Type definitions for the IR                             ---*/
    189 /*---------------------------------------------------------------*/
    190 
    191 /* General comments about naming schemes:
    192 
    193    All publically visible functions contain the name of the primary
    194    type on which they operate (IRFoo, IRBar, etc).  Hence you should
    195    be able to identify these functions by grepping for "IR[A-Z]".
    196 
    197    For some type 'IRFoo':
    198 
    199    - ppIRFoo is the printing method for IRFoo, printing it to the
    200      output channel specified in the LibVEX_Initialise call.
    201 
    202    - eqIRFoo is a structural equality predicate for IRFoos.
    203 
    204    - deepCopyIRFoo is a deep copy constructor for IRFoos.
    205      It recursively traverses the entire argument tree and
    206      produces a complete new tree.  All types have a deep copy
    207      constructor.
    208 
    209    - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
    210      It creates a new top-level copy of the supplied object,
    211      but does not copy any sub-objects.  Only some types have a
    212      shallow copy constructor.
    213 */
    214 
    215 /* ------------------ Types ------------------ */
    216 
    217 /* A type indicates the size of a value, and whether it's an integer, a
    218    float, or a vector (SIMD) value. */
    219 typedef
    220    enum {
    221       Ity_INVALID=0x11000,
    222       Ity_I1,
    223       Ity_I8,
    224       Ity_I16,
    225       Ity_I32,
    226       Ity_I64,
    227       Ity_I128,  /* 128-bit scalar */
    228       Ity_F32,   /* IEEE 754 float */
    229       Ity_F64,   /* IEEE 754 double */
    230       Ity_F128,  /* 128-bit floating point; implementation defined */
    231       Ity_V128   /* 128-bit SIMD */
    232    }
    233    IRType;
    234 
    235 /* Pretty-print an IRType */
    236 extern void ppIRType ( IRType );
    237 
    238 /* Get the size (in bytes) of an IRType */
    239 extern Int sizeofIRType ( IRType );
    240 
    241 
    242 /* ------------------ Endianness ------------------ */
    243 
    244 /* IREndness is used in load IRExprs and store IRStmts. */
    245 typedef
    246    enum {
    247       Iend_LE=0x12000, /* little endian */
    248       Iend_BE          /* big endian */
    249    }
    250    IREndness;
    251 
    252 
    253 /* ------------------ Constants ------------------ */
    254 
    255 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */
    256 
    257 /* The various kinds of constant. */
    258 typedef
    259    enum {
    260       Ico_U1=0x13000,
    261       Ico_U8,
    262       Ico_U16,
    263       Ico_U32,
    264       Ico_U64,
    265       Ico_F32,   /* 32-bit IEEE754 floating */
    266       Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
    267                     as a IEEE754 single value. */
    268       Ico_F64,   /* 64-bit IEEE754 floating */
    269       Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
    270                     as a IEEE754 double value. */
    271       Ico_V128   /* 128-bit restricted vector constant, with 1 bit
    272                     (repeated 8 times) for each of the 16 x 1-byte lanes */
    273    }
    274    IRConstTag;
    275 
    276 /* A constant.  Stored as a tagged union.  'tag' indicates what kind of
    277    constant this is.  'Ico' is the union that holds the fields.  If an
    278    IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
    279    and its value can be accessed with 'c.Ico.U32'. */
    280 typedef
    281    struct _IRConst {
    282       IRConstTag tag;
    283       union {
    284          Bool   U1;
    285          UChar  U8;
    286          UShort U16;
    287          UInt   U32;
    288          ULong  U64;
    289          Float  F32;
    290          UInt   F32i;
    291          Double F64;
    292          ULong  F64i;
    293          UShort V128;   /* 16-bit value; see Ico_V128 comment above */
    294       } Ico;
    295    }
    296    IRConst;
    297 
    298 /* IRConst constructors */
    299 extern IRConst* IRConst_U1   ( Bool );
    300 extern IRConst* IRConst_U8   ( UChar );
    301 extern IRConst* IRConst_U16  ( UShort );
    302 extern IRConst* IRConst_U32  ( UInt );
    303 extern IRConst* IRConst_U64  ( ULong );
    304 extern IRConst* IRConst_F32  ( Float );
    305 extern IRConst* IRConst_F32i ( UInt );
    306 extern IRConst* IRConst_F64  ( Double );
    307 extern IRConst* IRConst_F64i ( ULong );
    308 extern IRConst* IRConst_V128 ( UShort );
    309 
    310 /* Deep-copy an IRConst */
    311 extern IRConst* deepCopyIRConst ( IRConst* );
    312 
    313 /* Pretty-print an IRConst */
    314 extern void ppIRConst ( IRConst* );
    315 
    316 /* Compare two IRConsts for equality */
    317 extern Bool eqIRConst ( IRConst*, IRConst* );
    318 
    319 
    320 /* ------------------ Call targets ------------------ */
    321 
    322 /* Describes a helper function to call.  The name part is purely for
    323    pretty printing and not actually used.  regparms=n tells the back
    324    end that the callee has been declared
    325    "__attribute__((regparm(n)))", although indirectly using the
    326    VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
    327    to construct a non-standard sequence to call a function declared
    328    like this.
    329 
    330    mcx_mask is a sop to Memcheck.  It indicates which args should be
    331    considered 'always defined' when lazily computing definedness of
    332    the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
    333    args[1], etc.  If a bit is set, the corresponding arg is excluded
    334    (hence "x" in "mcx") from definedness checking.
    335 */
    336 
    337 typedef
    338    struct {
    339       Int    regparms;
    340       HChar* name;
    341       void*  addr;
    342       UInt   mcx_mask;
    343    }
    344    IRCallee;
    345 
    346 /* Create an IRCallee. */
    347 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
    348 
    349 /* Deep-copy an IRCallee. */
    350 extern IRCallee* deepCopyIRCallee ( IRCallee* );
    351 
    352 /* Pretty-print an IRCallee. */
    353 extern void ppIRCallee ( IRCallee* );
    354 
    355 
    356 /* ------------------ Guest state arrays ------------------ */
    357 
    358 /* This describes a section of the guest state that we want to
    359    be able to index at run time, so as to be able to describe
    360    indexed or rotating register files on the guest. */
    361 typedef
    362    struct {
    363       Int    base;   /* guest state offset of start of indexed area */
    364       IRType elemTy; /* type of each element in the indexed area */
    365       Int    nElems; /* number of elements in the indexed area */
    366    }
    367    IRRegArray;
    368 
    369 extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
    370 
    371 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
    372 
    373 extern void ppIRRegArray ( IRRegArray* );
    374 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
    375 
    376 
    377 /* ------------------ Temporaries ------------------ */
    378 
    379 /* This represents a temporary, eg. t1.  The IR optimiser relies on the
    380    fact that IRTemps are 32-bit ints.  Do not change them to be ints of
    381    any other size. */
    382 typedef UInt IRTemp;
    383 
    384 /* Pretty-print an IRTemp. */
    385 extern void ppIRTemp ( IRTemp );
    386 
    387 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
    388 
    389 
    390 /* --------------- Primops (arity 1,2,3 and 4) --------------- */
    391 
    392 /* Primitive operations that are used in Unop, Binop, Triop and Qop
    393    IRExprs.  Once we take into account integer, floating point and SIMD
    394    operations of all the different sizes, there are quite a lot of them.
    395    Most instructions supported by the architectures that Vex supports
    396    (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
    397    are not;  they are instead handled with dirty helpers that emulate
    398    their functionality.  Such obscure ones are thus not directly visible
    399    in the IR, but their effects on guest state (memory and registers)
    400    are made visible via the annotations in IRDirty structures.
    401 */
    402 typedef
    403    enum {
    404       /* -- Do not change this ordering.  The IR generators rely on
    405             (eg) Iop_Add64 == IopAdd8 + 3. -- */
    406 
    407       Iop_INVALID=0x14000,
    408       Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
    409       Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
    410       /* Signless mul.  MullS/MullU is elsewhere. */
    411       Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
    412       Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
    413       Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
    414       Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
    415       Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
    416       Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
    417       Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
    418       /* Integer comparisons. */
    419       Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
    420       Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
    421       /* Tags for unary ops */
    422       Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
    423 
    424       /* Exactly like CmpEQ8/16/32/64, but carrying the additional
    425          hint that these compute the success/failure of a CAS
    426          operation, and hence are almost certainly applied to two
    427          copies of the same value, which in turn has implications for
    428          Memcheck's instrumentation. */
    429       Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
    430       Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
    431 
    432       /* -- Ordering not important after here. -- */
    433 
    434       /* Widening multiplies */
    435       Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
    436       Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
    437 
    438       /* Wierdo integer stuff */
    439       Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
    440       Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
    441       /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
    442          zero.  You must ensure they are never given a zero argument.
    443       */
    444 
    445       /* Standard integer comparisons */
    446       Iop_CmpLT32S, Iop_CmpLT64S,
    447       Iop_CmpLE32S, Iop_CmpLE64S,
    448       Iop_CmpLT32U, Iop_CmpLT64U,
    449       Iop_CmpLE32U, Iop_CmpLE64U,
    450 
    451       /* As a sop to Valgrind-Memcheck, the following are useful. */
    452       Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
    453       Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
    454       Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
    455       Iop_Max32U, /* unsigned max */
    456 
    457       /* PowerPC-style 3-way integer comparisons.  Without them it is
    458          difficult to simulate PPC efficiently.
    459          op(x,y) | x < y  = 0x8 else
    460                  | x > y  = 0x4 else
    461                  | x == y = 0x2
    462       */
    463       Iop_CmpORD32U, Iop_CmpORD64U,
    464       Iop_CmpORD32S, Iop_CmpORD64S,
    465 
    466       /* Division */
    467       /* TODO: clarify semantics wrt rounding, negative values, whatever */
    468       Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
    469       Iop_DivS32,   // ditto, signed
    470       Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
    471       Iop_DivS64,   // ditto, signed
    472       Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
    473       Iop_DivS64E,  // ditto, signed
    474       Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
    475       Iop_DivS32E,  // ditto, signed
    476 
    477       Iop_DivModU64to32, // :: I64,I32 -> I64
    478                          // of which lo half is div and hi half is mod
    479       Iop_DivModS64to32, // ditto, signed
    480 
    481       Iop_DivModU128to64, // :: V128,I64 -> V128
    482                           // of which lo half is div and hi half is mod
    483       Iop_DivModS128to64, // ditto, signed
    484 
    485       Iop_DivModS64to64, // :: I64,I64 -> I128
    486                          // of which lo half is div and hi half is mod
    487 
    488       /* Integer conversions.  Some of these are redundant (eg
    489          Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
    490          having a complete set reduces the typical dynamic size of IR
    491          and makes the instruction selectors easier to write. */
    492 
    493       /* Widening conversions */
    494       Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
    495                   Iop_16Uto32, Iop_16Uto64,
    496                                Iop_32Uto64,
    497       Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
    498                   Iop_16Sto32, Iop_16Sto64,
    499                                Iop_32Sto64,
    500 
    501       /* Narrowing conversions */
    502       Iop_64to8, Iop_32to8, Iop_64to16,
    503       /* 8 <-> 16 bit conversions */
    504       Iop_16to8,      // :: I16 -> I8, low half
    505       Iop_16HIto8,    // :: I16 -> I8, high half
    506       Iop_8HLto16,    // :: (I8,I8) -> I16
    507       /* 16 <-> 32 bit conversions */
    508       Iop_32to16,     // :: I32 -> I16, low half
    509       Iop_32HIto16,   // :: I32 -> I16, high half
    510       Iop_16HLto32,   // :: (I16,I16) -> I32
    511       /* 32 <-> 64 bit conversions */
    512       Iop_64to32,     // :: I64 -> I32, low half
    513       Iop_64HIto32,   // :: I64 -> I32, high half
    514       Iop_32HLto64,   // :: (I32,I32) -> I64
    515       /* 64 <-> 128 bit conversions */
    516       Iop_128to64,    // :: I128 -> I64, low half
    517       Iop_128HIto64,  // :: I128 -> I64, high half
    518       Iop_64HLto128,  // :: (I64,I64) -> I128
    519       /* 1-bit stuff */
    520       Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
    521       Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
    522       Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
    523       Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
    524       Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
    525       Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
    526       Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
    527       Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
    528       Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
    529       Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
    530 
    531       /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
    532 
    533       /* --- Simple stuff as mandated by 754. --- */
    534 
    535       /* Binary operations, with rounding. */
    536       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    537       Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
    538 
    539       /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
    540       Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
    541 
    542       /* Variants of the above which produce a 64-bit result but which
    543          round their result to a IEEE float range first. */
    544       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    545       Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
    546 
    547       /* Unary operations, without rounding. */
    548       /* :: F64 -> F64 */
    549       Iop_NegF64, Iop_AbsF64,
    550 
    551       /* :: F32 -> F32 */
    552       Iop_NegF32, Iop_AbsF32,
    553 
    554       /* Unary operations, with rounding. */
    555       /* :: IRRoundingMode(I32) x F64 -> F64 */
    556       Iop_SqrtF64, Iop_SqrtF64r32,
    557 
    558       /* :: IRRoundingMode(I32) x F32 -> F32 */
    559       Iop_SqrtF32,
    560 
    561       /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
    562             0x45 Unordered
    563             0x01 LT
    564             0x00 GT
    565             0x40 EQ
    566          This just happens to be the Intel encoding.  The values
    567          are recorded in the type IRCmpF64Result.
    568       */
    569       /* :: F64 x F64 -> IRCmpF64Result(I32) */
    570       Iop_CmpF64,
    571       Iop_CmpF32,
    572       Iop_CmpF128,
    573 
    574       /* --- Int to/from FP conversions. --- */
    575 
    576       /* For the most part, these take a first argument :: Ity_I32 (as
    577          IRRoundingMode) which is an indication of the rounding mode
    578          to use, as per the following encoding ("the standard
    579          encoding"):
    580             00b  to nearest (the default)
    581             01b  to -infinity
    582             10b  to +infinity
    583             11b  to zero
    584          This just happens to be the Intel encoding.  For reference only,
    585          the PPC encoding is:
    586             00b  to nearest (the default)
    587             01b  to zero
    588             10b  to +infinity
    589             11b  to -infinity
    590          Any PPC -> IR front end will have to translate these PPC
    591          encodings, as encoded in the guest state, to the standard
    592          encodings, to pass to the primops.
    593          For reference only, the ARM VFP encoding is:
    594             00b  to nearest
    595             01b  to +infinity
    596             10b  to -infinity
    597             11b  to zero
    598          Again, this will have to be converted to the standard encoding
    599          to pass to primops.
    600 
    601          If one of these conversions gets an out-of-range condition,
    602          or a NaN, as an argument, the result is host-defined.  On x86
    603          the "integer indefinite" value 0x80..00 is produced.  On PPC
    604          it is either 0x80..00 or 0x7F..FF depending on the sign of
    605          the argument.
    606 
    607          On ARMvfp, when converting to a signed integer result, the
    608          overflow result is 0x80..00 for negative args and 0x7F..FF
    609          for positive args.  For unsigned integer results it is
    610          0x00..00 and 0xFF..FF respectively.
    611 
    612          Rounding is required whenever the destination type cannot
    613          represent exactly all values of the source type.
    614       */
    615       Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
    616       Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
    617       Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
    618       Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
    619 
    620       Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
    621 
    622       Iop_I16StoF64, /*                       signed I16 -> F64 */
    623       Iop_I32StoF64, /*                       signed I32 -> F64 */
    624       Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
    625       Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
    626       Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
    627 
    628       Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
    629 
    630       Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
    631       Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
    632       Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
    633 
    634       Iop_I16StoF32, /*                       signed I16 -> F32 */
    635       Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
    636       Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
    637 
    638       /* Conversion between floating point formats */
    639       Iop_F32toF64,  /*                       F32 -> F64 */
    640       Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
    641 
    642       /* Reinterpretation.  Take an F64 and produce an I64 with
    643          the same bit pattern, or vice versa. */
    644       Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
    645       Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
    646 
    647       /* Support for 128-bit floating point */
    648       Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
    649       Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
    650       Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
    651 
    652       /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
    653       Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
    654 
    655       /* :: F128 -> F128 */
    656       Iop_NegF128, Iop_AbsF128,
    657 
    658       /* :: IRRoundingMode(I32) x F128 -> F128 */
    659       Iop_SqrtF128,
    660 
    661       Iop_I32StoF128, /*                signed I32  -> F128 */
    662       Iop_I64StoF128, /*                signed I64  -> F128 */
    663       Iop_F32toF128,  /*                       F32  -> F128 */
    664       Iop_F64toF128,  /*                       F64  -> F128 */
    665 
    666       Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
    667       Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
    668       Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
    669       Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
    670 
    671       /* --- guest x86/amd64 specifics, not mandated by 754. --- */
    672 
    673       /* Binary ops, with rounding. */
    674       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    675       Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
    676       Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
    677       Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
    678       Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
    679       Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
    680       Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
    681       Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
    682       Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
    683       /* Note that on x86 guest, PRem1{C3210} has the same behaviour
    684          as the IEEE mandated RemF64, except it is limited in the
    685          range of its operand.  Hence the partialness. */
    686 
    687       /* Unary ops, with rounding. */
    688       /* :: IRRoundingMode(I32) x F64 -> F64 */
    689       Iop_SinF64,    /* FSIN */
    690       Iop_CosF64,    /* FCOS */
    691       Iop_TanF64,    /* FTAN */
    692       Iop_2xm1F64,   /* (2^arg - 1.0) */
    693       Iop_RoundF64toInt, /* F64 value to nearest integral value (still
    694                             as F64) */
    695       Iop_RoundF32toInt, /* F32 value to nearest integral value (still
    696                             as F32) */
    697 
    698       /* --- guest s390 specifics, not mandated by 754. --- */
    699 
    700       /* Fused multiply-add/sub */
    701       /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
    702             (computes op3 * op2 +/- op1 */
    703       Iop_MAddF32, Iop_MSubF32,
    704 
    705       /* --- guest ppc32/64 specifics, not mandated by 754. --- */
    706 
    707       /* Ternary operations, with rounding. */
    708       /* Fused multiply-add/sub, with 112-bit intermediate
    709          precision for ppc.
    710          Also used to implement fused multiply-add/sub for s390. */
    711       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
    712             (computes arg2 * arg3 +/- arg4) */
    713       Iop_MAddF64, Iop_MSubF64,
    714 
    715       /* Variants of the above which produce a 64-bit result but which
    716          round their result to a IEEE float range first. */
    717       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
    718       Iop_MAddF64r32, Iop_MSubF64r32,
    719 
    720       /* :: F64 -> F64 */
    721       Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
    722       Iop_RoundF64toF64_NEAREST, /* frin */
    723       Iop_RoundF64toF64_NegINF,  /* frim */
    724       Iop_RoundF64toF64_PosINF,  /* frip */
    725       Iop_RoundF64toF64_ZERO,    /* friz */
    726 
    727       /* :: F64 -> F32 */
    728       Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
    729 
    730       /* :: IRRoundingMode(I32) x F64 -> F64 */
    731       Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
    732       /* NB: pretty much the same as Iop_F64toF32, except no change
    733          of type. */
    734 
    735       /* :: F64 -> I32 */
    736       Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
    737                        from FP result */
    738 
    739       /* ------------------ 32-bit SIMD Integer ------------------ */
    740 
    741       /* 16x2 add/sub, also signed/unsigned saturating variants */
    742       Iop_Add16x2, Iop_Sub16x2,
    743       Iop_QAdd16Sx2, Iop_QAdd16Ux2,
    744       Iop_QSub16Sx2, Iop_QSub16Ux2,
    745 
    746       /* 16x2 signed/unsigned halving add/sub.  For each lane, these
    747          compute bits 16:1 of (eg) sx(argL) + sx(argR),
    748          or zx(argL) - zx(argR) etc. */
    749       Iop_HAdd16Ux2, Iop_HAdd16Sx2,
    750       Iop_HSub16Ux2, Iop_HSub16Sx2,
    751 
    752       /* 8x4 add/sub, also signed/unsigned saturating variants */
    753       Iop_Add8x4, Iop_Sub8x4,
    754       Iop_QAdd8Sx4, Iop_QAdd8Ux4,
    755       Iop_QSub8Sx4, Iop_QSub8Ux4,
    756 
    757       /* 8x4 signed/unsigned halving add/sub.  For each lane, these
    758          compute bits 8:1 of (eg) sx(argL) + sx(argR),
    759          or zx(argL) - zx(argR) etc. */
    760       Iop_HAdd8Ux4, Iop_HAdd8Sx4,
    761       Iop_HSub8Ux4, Iop_HSub8Sx4,
    762 
    763       /* 8x4 sum of absolute unsigned differences. */
    764       Iop_Sad8Ux4,
    765 
    766       /* MISC (vector integer cmp != 0) */
    767       Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
    768 
    769       /* ------------------ 64-bit SIMD FP ------------------------ */
    770 
    771       /* Convertion to/from int */
    772       Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
    773       Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
    774       /* Fixed32 format is floating-point number with fixed number of fraction
    775          bits. The number of fraction bits is passed as a second argument of
    776          type I8. */
    777       Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
    778       Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
    779 
    780       /* Binary operations */
    781       Iop_Max32Fx2,      Iop_Min32Fx2,
    782       /* Pairwise Min and Max. See integer pairwise operations for more
    783          details. */
    784       Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
    785       /* Note: For the following compares, the arm front-end assumes a
    786          nan in a lane of either argument returns zero for that lane. */
    787       Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
    788 
    789       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
    790       element in the operand vector, and places the results in the destination
    791       vector.  */
    792       Iop_Recip32Fx2,
    793 
    794       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
    795          Note, that if one of the arguments is zero and another one is infinity
    796          of arbitrary sign the result of the operation is 2.0. */
    797       Iop_Recps32Fx2,
    798 
    799       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
    800          square root of each element in the operand vector. */
    801       Iop_Rsqrte32Fx2,
    802 
    803       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
    804          Note, that of one of the arguments is zero and another one is infiinty
    805          of arbitrary sign the result of the operation is 1.5. */
    806       Iop_Rsqrts32Fx2,
    807 
    808       /* Unary */
    809       Iop_Neg32Fx2, Iop_Abs32Fx2,
    810 
    811       /* ------------------ 64-bit SIMD Integer. ------------------ */
    812 
    813       /* MISC (vector integer cmp != 0) */
    814       Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
    815 
    816       /* ADDITION (normal / unsigned sat / signed sat) */
    817       Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
    818       Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
    819       Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
    820 
    821       /* PAIRWISE operations */
    822       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
    823             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
    824       Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
    825       Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
    826       Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
    827       Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
    828       Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
    829       /* Longening variant is unary. The resulting vector contains two times
    830          less elements than operand, but they are two times wider.
    831          Example:
    832             Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
    833                where a+b and c+d are unsigned 32-bit values. */
    834       Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
    835       Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
    836 
    837       /* SUBTRACTION (normal / unsigned sat / signed sat) */
    838       Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
    839       Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
    840       Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
    841 
    842       /* ABSOLUTE VALUE */
    843       Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
    844 
    845       /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
    846       Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
    847       Iop_Mul32Fx2,
    848       Iop_MulHi16Ux4,
    849       Iop_MulHi16Sx4,
    850       /* Plynomial multiplication treats it's arguments as coefficients of
    851          polynoms over {0, 1}. */
    852       Iop_PolynomialMul8x8,
    853 
    854       /* Vector Saturating Doubling Multiply Returning High Half and
    855          Vector Saturating Rounding Doubling Multiply Returning High Half */
    856       /* These IROp's multiply corresponding elements in two vectors, double
    857          the results, and place the most significant half of the final results
    858          in the destination vector. The results are truncated or rounded. If
    859          any of the results overflow, they are saturated. */
    860       Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
    861       Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
    862 
    863       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
    864       Iop_Avg8Ux8,
    865       Iop_Avg16Ux4,
    866 
    867       /* MIN/MAX */
    868       Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
    869       Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
    870       Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
    871       Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
    872 
    873       /* COMPARISON */
    874       Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
    875       Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
    876       Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
    877 
    878       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
    879          bit) */
    880       Iop_Cnt8x8,
    881       Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
    882       Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
    883 
    884       /* VECTOR x VECTOR SHIFT / ROTATE */
    885       Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
    886       Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
    887       Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
    888       Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
    889 
    890       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
    891       Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
    892       Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
    893       Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
    894 
    895       /* VECTOR x VECTOR SATURATING SHIFT */
    896       Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
    897       Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
    898       /* VECTOR x INTEGER SATURATING SHIFT */
    899       Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
    900       Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
    901       Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
    902 
    903       /* NARROWING (binary)
    904          -- narrow 2xI64 into 1xI64, hi half from left arg */
    905       /* For saturated narrowing, I believe there are 4 variants of
    906          the basic arithmetic operation, depending on the signedness
    907          of argument and result.  Here are examples that exemplify
    908          what I mean:
    909 
    910          QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
    911                                       return x[7:0];
    912 
    913          QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
    914                                       if (x >s  127) x = 127;
    915                                       return x[7:0];
    916 
    917          QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
    918                                       return x[7:0];
    919 
    920          QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
    921                                       if (x >s 255) x = 255;
    922                                       return x[7:0];
    923       */
    924       Iop_QNarrowBin16Sto8Ux8,
    925       Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
    926       Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
    927 
    928       /* INTERLEAVING */
    929       /* Interleave lanes from low or high halves of
    930          operands.  Most-significant result lane is from the left
    931          arg. */
    932       Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
    933       Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
    934       /* Interleave odd/even lanes of operands.  Most-significant result lane
    935          is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
    936          identical to Interleave{HI,LO}32x2 and so are omitted.*/
    937       Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
    938       Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
    939 
    940 
    941       /* CONCATENATION -- build a new value by concatenating either
    942          the even or odd lanes of both operands.  Note that
    943          Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
    944          and so are omitted. */
    945       Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
    946       Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
    947 
    948       /* GET / SET elements of VECTOR
    949          GET is binop (I64, I8) -> I<elem_size>
    950          SET is triop (I64, I8, I<elem_size>) -> I64 */
    951       /* Note: the arm back-end handles only constant second argument */
    952       Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
    953       Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
    954 
    955       /* DUPLICATING -- copy value to all lanes */
    956       Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
    957 
    958       /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
    959          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
    960          result.
    961          It is a triop: (I64, I64, I8) -> I64 */
    962       /* Note: the arm back-end handles only constant third argumnet. */
    963       Iop_Extract64,
    964 
    965       /* REVERSE the order of elements in each Half-words, Words,
    966          Double-words */
    967       /* Examples:
    968             Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
    969             Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
    970             Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
    971       Iop_Reverse16_8x8,
    972       Iop_Reverse32_8x8, Iop_Reverse32_16x4,
    973       Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
    974 
    975       /* PERMUTING -- copy src bytes to dst,
    976          as indexed by control vector bytes:
    977             for i in 0 .. 7 . result[i] = argL[ argR[i] ]
    978          argR[i] values may only be in the range 0 .. 7, else behaviour
    979          is undefined. */
    980       Iop_Perm8x8,
    981 
    982       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
    983          See floating-point equiwalents for details. */
    984       Iop_Recip32x2, Iop_Rsqrte32x2,
    985 
    986       /* ------------------ 128-bit SIMD FP. ------------------ */
    987 
    988       /* --- 32x4 vector FP --- */
    989 
    990       /* binary */
    991       Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
    992       Iop_Max32Fx4, Iop_Min32Fx4,
    993       Iop_Add32Fx2, Iop_Sub32Fx2,
    994       /* Note: For the following compares, the ppc and arm front-ends assume a
    995          nan in a lane of either argument returns zero for that lane. */
    996       Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
    997       Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
    998 
    999       /* Vector Absolute */
   1000       Iop_Abs32Fx4,
   1001 
   1002       /* Pairwise Max and Min. See integer pairwise operations for details. */
   1003       Iop_PwMax32Fx4, Iop_PwMin32Fx4,
   1004 
   1005       /* unary */
   1006       Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
   1007       Iop_Neg32Fx4,
   1008 
   1009       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
   1010       element in the operand vector, and places the results in the destination
   1011       vector.  */
   1012       Iop_Recip32Fx4,
   1013 
   1014       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
   1015          Note, that if one of the arguments is zero and another one is infinity
   1016          of arbitrary sign the result of the operation is 2.0. */
   1017       Iop_Recps32Fx4,
   1018 
   1019       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
   1020          square root of each element in the operand vector. */
   1021       Iop_Rsqrte32Fx4,
   1022 
   1023       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
   1024          Note, that of one of the arguments is zero and another one is infiinty
   1025          of arbitrary sign the result of the operation is 1.5. */
   1026       Iop_Rsqrts32Fx4,
   1027 
   1028 
   1029       /* --- Int to/from FP conversion --- */
   1030       /* Unlike the standard fp conversions, these irops take no
   1031          rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
   1032          indicate the mode: {-inf, +inf, nearest, zero} respectively. */
   1033       Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
   1034       Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
   1035       Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
   1036       Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
   1037       Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
   1038       /* Fixed32 format is floating-point number with fixed number of fraction
   1039          bits. The number of fraction bits is passed as a second argument of
   1040          type I8. */
   1041       Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
   1042       Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
   1043 
   1044       /* --- Single to/from half conversion --- */
   1045       /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
   1046       Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
   1047 
   1048       /* --- 32x4 lowest-lane-only scalar FP --- */
   1049 
   1050       /* In binary cases, upper 3/4 is copied from first operand.  In
   1051          unary cases, upper 3/4 is copied from the operand. */
   1052 
   1053       /* binary */
   1054       Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
   1055       Iop_Max32F0x4, Iop_Min32F0x4,
   1056       Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
   1057 
   1058       /* unary */
   1059       Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
   1060 
   1061       /* --- 64x2 vector FP --- */
   1062 
   1063       /* binary */
   1064       Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
   1065       Iop_Max64Fx2, Iop_Min64Fx2,
   1066       Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
   1067 
   1068       /* unary */
   1069       Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
   1070 
   1071       /* --- 64x2 lowest-lane-only scalar FP --- */
   1072 
   1073       /* In binary cases, upper half is copied from first operand.  In
   1074          unary cases, upper half is copied from the operand. */
   1075 
   1076       /* binary */
   1077       Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
   1078       Iop_Max64F0x2, Iop_Min64F0x2,
   1079       Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
   1080 
   1081       /* unary */
   1082       Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
   1083 
   1084       /* --- pack / unpack --- */
   1085 
   1086       /* 64 <-> 128 bit vector */
   1087       Iop_V128to64,     // :: V128 -> I64, low half
   1088       Iop_V128HIto64,   // :: V128 -> I64, high half
   1089       Iop_64HLtoV128,   // :: (I64,I64) -> V128
   1090 
   1091       Iop_64UtoV128,
   1092       Iop_SetV128lo64,
   1093 
   1094       /* 32 <-> 128 bit vector */
   1095       Iop_32UtoV128,
   1096       Iop_V128to32,     // :: V128 -> I32, lowest lane
   1097       Iop_SetV128lo32,  // :: (V128,I32) -> V128
   1098 
   1099       /* ------------------ 128-bit SIMD Integer. ------------------ */
   1100 
   1101       /* BITWISE OPS */
   1102       Iop_NotV128,
   1103       Iop_AndV128, Iop_OrV128, Iop_XorV128,
   1104 
   1105       /* VECTOR SHIFT (shift amt :: Ity_I8) */
   1106       Iop_ShlV128, Iop_ShrV128,
   1107 
   1108       /* MISC (vector integer cmp != 0) */
   1109       Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
   1110 
   1111       /* ADDITION (normal / unsigned sat / signed sat) */
   1112       Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
   1113       Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
   1114       Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
   1115 
   1116       /* SUBTRACTION (normal / unsigned sat / signed sat) */
   1117       Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
   1118       Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
   1119       Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
   1120 
   1121       /* MULTIPLICATION (normal / high half of signed/unsigned) */
   1122       Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
   1123                     Iop_MulHi16Ux8, Iop_MulHi32Ux4,
   1124                     Iop_MulHi16Sx8, Iop_MulHi32Sx4,
   1125       /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
   1126       Iop_MullEven8Ux16, Iop_MullEven16Ux8,
   1127       Iop_MullEven8Sx16, Iop_MullEven16Sx8,
   1128       /* FIXME: document these */
   1129       Iop_Mull8Ux8, Iop_Mull8Sx8,
   1130       Iop_Mull16Ux4, Iop_Mull16Sx4,
   1131       Iop_Mull32Ux2, Iop_Mull32Sx2,
   1132       /* Vector Saturating Doubling Multiply Returning High Half and
   1133          Vector Saturating Rounding Doubling Multiply Returning High Half */
   1134       /* These IROp's multiply corresponding elements in two vectors, double
   1135          the results, and place the most significant half of the final results
   1136          in the destination vector. The results are truncated or rounded. If
   1137          any of the results overflow, they are saturated. */
   1138       Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
   1139       Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
   1140       /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
   1141       Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
   1142       /* Plynomial multiplication treats it's arguments as coefficients of
   1143          polynoms over {0, 1}. */
   1144       Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
   1145       Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
   1146 
   1147       /* PAIRWISE operations */
   1148       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
   1149             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
   1150       Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
   1151       Iop_PwAdd32Fx2,
   1152       /* Longening variant is unary. The resulting vector contains two times
   1153          less elements than operand, but they are two times wider.
   1154          Example:
   1155             Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
   1156                where a+b and c+d are unsigned 32-bit values. */
   1157       Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
   1158       Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
   1159 
   1160       /* ABSOLUTE VALUE */
   1161       Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
   1162 
   1163       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
   1164       Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
   1165       Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
   1166 
   1167       /* MIN/MAX */
   1168       Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
   1169       Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
   1170       Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
   1171       Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
   1172 
   1173       /* COMPARISON */
   1174       Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
   1175       Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
   1176       Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
   1177 
   1178       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
   1179          bit) */
   1180       Iop_Cnt8x16,
   1181       Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
   1182       Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
   1183 
   1184       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
   1185       Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
   1186       Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
   1187       Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
   1188 
   1189       /* VECTOR x VECTOR SHIFT / ROTATE */
   1190       Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
   1191       Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
   1192       Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
   1193       Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
   1194       Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
   1195 
   1196       /* VECTOR x VECTOR SATURATING SHIFT */
   1197       Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
   1198       Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
   1199       /* VECTOR x INTEGER SATURATING SHIFT */
   1200       Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
   1201       Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
   1202       Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
   1203 
   1204       /* NARROWING (binary)
   1205          -- narrow 2xV128 into 1xV128, hi half from left arg */
   1206       /* See comments above w.r.t. U vs S issues in saturated narrowing. */
   1207       Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
   1208       Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
   1209       Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
   1210       Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
   1211 
   1212       /* NARROWING (unary) -- narrow V128 into I64 */
   1213       Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
   1214       /* Saturating narrowing from signed source to signed/unsigned destination */
   1215       Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
   1216       Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
   1217       /* Saturating narrowing from unsigned source to unsigned destination */
   1218       Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
   1219 
   1220       /* WIDENING -- sign or zero extend each element of the argument
   1221          vector to the twice original size.  The resulting vector consists of
   1222          the same number of elements but each element and the vector itself
   1223          are twice as wide.
   1224          All operations are I64->V128.
   1225          Example
   1226             Iop_Widen32Sto64x2( [a, b] ) = [c, d]
   1227                where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
   1228       Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
   1229       Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
   1230 
   1231       /* INTERLEAVING */
   1232       /* Interleave lanes from low or high halves of
   1233          operands.  Most-significant result lane is from the left
   1234          arg. */
   1235       Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
   1236       Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
   1237       Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
   1238       Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
   1239       /* Interleave odd/even lanes of operands.  Most-significant result lane
   1240          is from the left arg. */
   1241       Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
   1242       Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
   1243       Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
   1244 
   1245       /* CONCATENATION -- build a new value by concatenating either
   1246          the even or odd lanes of both operands. */
   1247       Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
   1248       Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
   1249 
   1250       /* GET elements of VECTOR
   1251          GET is binop (V128, I8) -> I<elem_size> */
   1252       /* Note: the arm back-end handles only constant second argument. */
   1253       Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
   1254 
   1255       /* DUPLICATING -- copy value to all lanes */
   1256       Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
   1257 
   1258       /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
   1259          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
   1260          result.
   1261          It is a triop: (V128, V128, I8) -> V128 */
   1262       /* Note: the ARM back end handles only constant arg3 in this operation. */
   1263       Iop_ExtractV128,
   1264 
   1265       /* REVERSE the order of elements in each Half-words, Words,
   1266          Double-words */
   1267       /* Examples:
   1268             Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
   1269             Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
   1270       Iop_Reverse16_8x16,
   1271       Iop_Reverse32_8x16, Iop_Reverse32_16x8,
   1272       Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
   1273 
   1274       /* PERMUTING -- copy src bytes to dst,
   1275          as indexed by control vector bytes:
   1276             for i in 0 .. 15 . result[i] = argL[ argR[i] ]
   1277          argR[i] values may only be in the range 0 .. 15, else behaviour
   1278          is undefined. */
   1279       Iop_Perm8x16,
   1280 
   1281       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
   1282          See floating-point equiwalents for details. */
   1283       Iop_Recip32x4, Iop_Rsqrte32x4
   1284    }
   1285    IROp;
   1286 
   1287 /* Pretty-print an op. */
   1288 extern void ppIROp ( IROp );
   1289 
   1290 
   1291 /* Encoding of IEEE754-specified rounding modes.  This is the same as
   1292    the encoding used by Intel IA32 to indicate x87 rounding mode.
   1293    Note, various front and back ends rely on the actual numerical
   1294    values of these, so do not change them. */
   1295 typedef
   1296    enum {
   1297       Irrm_NEAREST = 0,
   1298       Irrm_NegINF  = 1,
   1299       Irrm_PosINF  = 2,
   1300       Irrm_ZERO    = 3
   1301    }
   1302    IRRoundingMode;
   1303 
   1304 /* Floating point comparison result values, as created by Iop_CmpF64.
   1305    This is also derived from what IA32 does. */
   1306 typedef
   1307    enum {
   1308       Ircr_UN = 0x45,
   1309       Ircr_LT = 0x01,
   1310       Ircr_GT = 0x00,
   1311       Ircr_EQ = 0x40
   1312    }
   1313    IRCmpF64Result;
   1314 
   1315 typedef IRCmpF64Result IRCmpF32Result;
   1316 typedef IRCmpF64Result IRCmpF128Result;
   1317 
   1318 /* ------------------ Expressions ------------------ */
   1319 
   1320 /* The different kinds of expressions.  Their meaning is explained below
   1321    in the comments for IRExpr. */
   1322 typedef
   1323    enum {
   1324       Iex_Binder=0x15000,
   1325       Iex_Get,
   1326       Iex_GetI,
   1327       Iex_RdTmp,
   1328       Iex_Qop,
   1329       Iex_Triop,
   1330       Iex_Binop,
   1331       Iex_Unop,
   1332       Iex_Load,
   1333       Iex_Const,
   1334       Iex_Mux0X,
   1335       Iex_CCall
   1336    }
   1337    IRExprTag;
   1338 
   1339 /* An expression.  Stored as a tagged union.  'tag' indicates what kind
   1340    of expression this is.  'Iex' is the union that holds the fields.  If
   1341    an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
   1342    expression, and the fields can be accessed with
   1343    'e.Iex.Load.<fieldname>'.
   1344 
   1345    For each kind of expression, we show what it looks like when
   1346    pretty-printed with ppIRExpr().
   1347 */
   1348 typedef
   1349    struct _IRExpr
   1350    IRExpr;
   1351 
   1352 struct _IRExpr {
   1353    IRExprTag tag;
   1354    union {
   1355       /* Used only in pattern matching within Vex.  Should not be seen
   1356          outside of Vex. */
   1357       struct {
   1358          Int binder;
   1359       } Binder;
   1360 
   1361       /* Read a guest register, at a fixed offset in the guest state.
   1362          ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
   1363       */
   1364       struct {
   1365          Int    offset;    /* Offset into the guest state */
   1366          IRType ty;        /* Type of the value being read */
   1367       } Get;
   1368 
   1369       /* Read a guest register at a non-fixed offset in the guest
   1370          state.  This allows circular indexing into parts of the guest
   1371          state, which is essential for modelling situations where the
   1372          identity of guest registers is not known until run time.  One
   1373          example is the x87 FP register stack.
   1374 
   1375          The part of the guest state to be treated as a circular array
   1376          is described in the IRRegArray 'descr' field.  It holds the
   1377          offset of the first element in the array, the type of each
   1378          element, and the number of elements.
   1379 
   1380          The array index is indicated rather indirectly, in a way
   1381          which makes optimisation easy: as the sum of variable part
   1382          (the 'ix' field) and a constant offset (the 'bias' field).
   1383 
   1384          Since the indexing is circular, the actual array index to use
   1385          is computed as (ix + bias) % num-of-elems-in-the-array.
   1386 
   1387          Here's an example.  The description
   1388 
   1389             (96:8xF64)[t39,-7]
   1390 
   1391          describes an array of 8 F64-typed values, the
   1392          guest-state-offset of the first being 96.  This array is
   1393          being indexed at (t39 - 7) % 8.
   1394 
   1395          It is important to get the array size/type exactly correct
   1396          since IR optimisation looks closely at such info in order to
   1397          establish aliasing/non-aliasing between seperate GetI and
   1398          PutI events, which is used to establish when they can be
   1399          reordered, etc.  Putting incorrect info in will lead to
   1400          obscure IR optimisation bugs.
   1401 
   1402             ppIRExpr output: GETI<descr>[<ix>,<bias]
   1403                          eg. GETI(128:8xI8)[t1,0]
   1404       */
   1405       struct {
   1406          IRRegArray* descr; /* Part of guest state treated as circular */
   1407          IRExpr*     ix;    /* Variable part of index into array */
   1408          Int         bias;  /* Constant offset part of index into array */
   1409       } GetI;
   1410 
   1411       /* The value held by a temporary.
   1412          ppIRExpr output: t<tmp>, eg. t1
   1413       */
   1414       struct {
   1415          IRTemp tmp;       /* The temporary number */
   1416       } RdTmp;
   1417 
   1418       /* A quaternary operation.
   1419          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
   1420                       eg. MAddF64r32(t1, t2, t3, t4)
   1421       */
   1422       struct {
   1423          IROp op;          /* op-code   */
   1424          IRExpr* arg1;     /* operand 1 */
   1425          IRExpr* arg2;     /* operand 2 */
   1426          IRExpr* arg3;     /* operand 3 */
   1427          IRExpr* arg4;     /* operand 4 */
   1428       } Qop;
   1429 
   1430       /* A ternary operation.
   1431          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
   1432                       eg. MulF64(1, 2.0, 3.0)
   1433       */
   1434       struct {
   1435          IROp op;          /* op-code   */
   1436          IRExpr* arg1;     /* operand 1 */
   1437          IRExpr* arg2;     /* operand 2 */
   1438          IRExpr* arg3;     /* operand 3 */
   1439       } Triop;
   1440 
   1441       /* A binary operation.
   1442          ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
   1443       */
   1444       struct {
   1445          IROp op;          /* op-code   */
   1446          IRExpr* arg1;     /* operand 1 */
   1447          IRExpr* arg2;     /* operand 2 */
   1448       } Binop;
   1449 
   1450       /* A unary operation.
   1451          ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
   1452       */
   1453       struct {
   1454          IROp    op;       /* op-code */
   1455          IRExpr* arg;      /* operand */
   1456       } Unop;
   1457 
   1458       /* A load from memory -- a normal load, not a load-linked.
   1459          Load-Linkeds (and Store-Conditionals) are instead represented
   1460          by IRStmt.LLSC since Load-Linkeds have side effects and so
   1461          are not semantically valid IRExpr's.
   1462          ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
   1463       */
   1464       struct {
   1465          IREndness end;    /* Endian-ness of the load */
   1466          IRType    ty;     /* Type of the loaded value */
   1467          IRExpr*   addr;   /* Address being loaded from */
   1468       } Load;
   1469 
   1470       /* A constant-valued expression.
   1471          ppIRExpr output: <con>, eg. 0x4:I32
   1472       */
   1473       struct {
   1474          IRConst* con;     /* The constant itself */
   1475       } Const;
   1476 
   1477       /* A call to a pure (no side-effects) helper C function.
   1478 
   1479          With the 'cee' field, 'name' is the function's name.  It is
   1480          only used for pretty-printing purposes.  The address to call
   1481          (host address, of course) is stored in the 'addr' field
   1482          inside 'cee'.
   1483 
   1484          The 'args' field is a NULL-terminated array of arguments.
   1485          The stated return IRType, and the implied argument types,
   1486          must match that of the function being called well enough so
   1487          that the back end can actually generate correct code for the
   1488          call.
   1489 
   1490          The called function **must** satisfy the following:
   1491 
   1492          * no side effects -- must be a pure function, the result of
   1493            which depends only on the passed parameters.
   1494 
   1495          * it may not look at, nor modify, any of the guest state
   1496            since that would hide guest state transitions from
   1497            instrumenters
   1498 
   1499          * it may not access guest memory, since that would hide
   1500            guest memory transactions from the instrumenters
   1501 
   1502          This is restrictive, but makes the semantics clean, and does
   1503          not interfere with IR optimisation.
   1504 
   1505          If you want to call a helper which can mess with guest state
   1506          and/or memory, instead use Ist_Dirty.  This is a lot more
   1507          flexible, but you have to give a bunch of details about what
   1508          the helper does (and you better be telling the truth,
   1509          otherwise any derived instrumentation will be wrong).  Also
   1510          Ist_Dirty inhibits various IR optimisations and so can cause
   1511          quite poor code to be generated.  Try to avoid it.
   1512 
   1513          ppIRExpr output: <cee>(<args>):<retty>
   1514                       eg. foo{0x80489304}(t1, t2):I32
   1515       */
   1516       struct {
   1517          IRCallee* cee;    /* Function to call. */
   1518          IRType    retty;  /* Type of return value. */
   1519          IRExpr**  args;   /* Vector of argument expressions. */
   1520       }  CCall;
   1521 
   1522       /* A ternary if-then-else operator.  It returns expr0 if cond is
   1523          zero, exprX otherwise.  Note that it is STRICT, ie. both
   1524          expr0 and exprX are evaluated in all cases.
   1525 
   1526          ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
   1527                          eg. Mux0X(t6,t7,t8)
   1528       */
   1529       struct {
   1530          IRExpr* cond;     /* Condition */
   1531          IRExpr* expr0;    /* True expression */
   1532          IRExpr* exprX;    /* False expression */
   1533       } Mux0X;
   1534    } Iex;
   1535 };
   1536 
   1537 /* Expression constructors. */
   1538 extern IRExpr* IRExpr_Binder ( Int binder );
   1539 extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
   1540 extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
   1541 extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
   1542 extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
   1543                                         IRExpr* arg3, IRExpr* arg4 );
   1544 extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
   1545                                         IRExpr* arg2, IRExpr* arg3 );
   1546 extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
   1547 extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
   1548 extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
   1549 extern IRExpr* IRExpr_Const  ( IRConst* con );
   1550 extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
   1551 extern IRExpr* IRExpr_Mux0X  ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
   1552 
   1553 /* Deep-copy an IRExpr. */
   1554 extern IRExpr* deepCopyIRExpr ( IRExpr* );
   1555 
   1556 /* Pretty-print an IRExpr. */
   1557 extern void ppIRExpr ( IRExpr* );
   1558 
   1559 /* NULL-terminated IRExpr vector constructors, suitable for
   1560    use as arg lists in clean/dirty helper calls. */
   1561 extern IRExpr** mkIRExprVec_0 ( void );
   1562 extern IRExpr** mkIRExprVec_1 ( IRExpr* );
   1563 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
   1564 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
   1565 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
   1566 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1567                                 IRExpr* );
   1568 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1569                                 IRExpr*, IRExpr* );
   1570 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1571                                 IRExpr*, IRExpr*, IRExpr* );
   1572 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1573                                 IRExpr*, IRExpr*, IRExpr*, IRExpr*);
   1574 
   1575 /* IRExpr copiers:
   1576    - shallowCopy: shallow-copy (ie. create a new vector that shares the
   1577      elements with the original).
   1578    - deepCopy: deep-copy (ie. create a completely new vector). */
   1579 extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
   1580 extern IRExpr** deepCopyIRExprVec ( IRExpr** );
   1581 
   1582 /* Make a constant expression from the given host word taking into
   1583    account (of course) the host word size. */
   1584 extern IRExpr* mkIRExpr_HWord ( HWord );
   1585 
   1586 /* Convenience function for constructing clean helper calls. */
   1587 extern
   1588 IRExpr* mkIRExprCCall ( IRType retty,
   1589                         Int regparms, HChar* name, void* addr,
   1590                         IRExpr** args );
   1591 
   1592 
   1593 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
   1594  * Iex_Const). */
   1595 static inline Bool isIRAtom ( IRExpr* e ) {
   1596    return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
   1597 }
   1598 
   1599 /* Are these two IR atoms identical?  Causes an assertion
   1600    failure if they are passed non-atoms. */
   1601 extern Bool eqIRAtom ( IRExpr*, IRExpr* );
   1602 
   1603 
   1604 /* ------------------ Jump kinds ------------------ */
   1605 
   1606 /* This describes hints which can be passed to the dispatcher at guest
   1607    control-flow transfer points.
   1608 
   1609    Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
   1610    guest_TISTART and guest_TILEN, which specify the start and length
   1611    of the region to be invalidated.  These are both the size of a
   1612    guest word.  It is the responsibility of the relevant toIR.c to
   1613    ensure that these are filled in with suitable values before issuing
   1614    a jump of kind Ijk_TInval.
   1615 
   1616    Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
   1617    pseudo-register guest_EMWARN, which is 32-bits regardless of the
   1618    host or guest word size.  That register should be made to hold an
   1619    EmWarn_* value to indicate the reason for the exit.
   1620 
   1621    In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
   1622    cannot continue) and so the jump destination can be anything.
   1623 
   1624    Re Ijk_Sys_ (syscall jumps): the guest state must have a
   1625    pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
   1626    word.  Front ends should set this to be the IP at the most recently
   1627    executed kernel-entering (system call) instruction.  This makes it
   1628    very much easier (viz, actually possible at all) to back up the
   1629    guest to restart a syscall that has been interrupted by a signal.
   1630 */
   1631 typedef
   1632    enum {
   1633       Ijk_Boring=0x16000, /* not interesting; just goto next */
   1634       Ijk_Call,           /* guest is doing a call */
   1635       Ijk_Ret,            /* guest is doing a return */
   1636       Ijk_ClientReq,      /* do guest client req before continuing */
   1637       Ijk_Yield,          /* client is yielding to thread scheduler */
   1638       Ijk_YieldNoRedir,   /* client is yielding to thread scheduler AND jump to
   1639                              un-redirected guest addr */
   1640       Ijk_EmWarn,         /* report emulation warning before continuing */
   1641       Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
   1642       Ijk_NoDecode,       /* next instruction cannot be decoded */
   1643       Ijk_MapFail,        /* Vex-provided address translation failed */
   1644       Ijk_TInval,         /* Invalidate translations before continuing. */
   1645       Ijk_NoRedir,        /* Jump to un-redirected guest addr */
   1646       Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
   1647       Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
   1648       Ijk_SigBUS,         /* current instruction synths SIGBUS */
   1649       /* Unfortunately, various guest-dependent syscall kinds.  They
   1650 	 all mean: do a syscall before continuing. */
   1651       Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
   1652       Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
   1653       Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
   1654       Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
   1655       Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
   1656       Ijk_Sys_sysenter   /* x86 'sysenter'.  guest_EIP becomes
   1657                              invalid at the point this happens. */
   1658    }
   1659    IRJumpKind;
   1660 
   1661 extern void ppIRJumpKind ( IRJumpKind );
   1662 
   1663 
   1664 /* ------------------ Dirty helper calls ------------------ */
   1665 
   1666 /* A dirty call is a flexible mechanism for calling (possibly
   1667    conditionally) a helper function or procedure.  The helper function
   1668    may read, write or modify client memory, and may read, write or
   1669    modify client state.  It can take arguments and optionally return a
   1670    value.  It may return different results and/or do different things
   1671    when called repeatedly with the same arguments, by means of storing
   1672    private state.
   1673 
   1674    If a value is returned, it is assigned to the nominated return
   1675    temporary.
   1676 
   1677    Dirty calls are statements rather than expressions for obvious
   1678    reasons.  If a dirty call is marked as writing guest state, any
   1679    values derived from the written parts of the guest state are
   1680    invalid.  Similarly, if the dirty call is stated as writing
   1681    memory, any loaded values are invalidated by it.
   1682 
   1683    In order that instrumentation is possible, the call must state, and
   1684    state correctly:
   1685 
   1686    * whether it reads, writes or modifies memory, and if so where
   1687      (only one chunk can be stated)
   1688 
   1689    * whether it reads, writes or modifies guest state, and if so which
   1690      pieces (several pieces may be stated, and currently their extents
   1691      must be known at translation-time).
   1692 
   1693    Normally, code is generated to pass just the args to the helper.
   1694    However, if .needsBBP is set, then an extra first argument is
   1695    passed, which is the baseblock pointer, so that the callee can
   1696    access the guest state.  It is invalid for .nFxState to be zero
   1697    but .needsBBP to be True, since .nFxState==0 is a claim that the
   1698    call does not access guest state.
   1699 
   1700    IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
   1701    arguments are evaluated REGARDLESS of the guard value.  It is
   1702    unspecified the relative order of arg evaluation and guard
   1703    evaluation.
   1704 */
   1705 
   1706 #define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
   1707 
   1708 /* Effects on resources (eg. registers, memory locations) */
   1709 typedef
   1710    enum {
   1711       Ifx_None = 0x17000,   /* no effect */
   1712       Ifx_Read,             /* reads the resource */
   1713       Ifx_Write,            /* writes the resource */
   1714       Ifx_Modify,           /* modifies the resource */
   1715    }
   1716    IREffect;
   1717 
   1718 /* Pretty-print an IREffect */
   1719 extern void ppIREffect ( IREffect );
   1720 
   1721 
   1722 typedef
   1723    struct {
   1724       /* What to call, and details of args/results */
   1725       IRCallee* cee;    /* where to call */
   1726       IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
   1727       IRExpr**  args;   /* arg list, ends in NULL */
   1728       IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
   1729 
   1730       /* Mem effects; we allow only one R/W/M region to be stated */
   1731       IREffect  mFx;    /* indicates memory effects, if any */
   1732       IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
   1733       Int       mSize;  /* of access, or zero if mFx==Ifx_None */
   1734 
   1735       /* Guest state effects; up to N allowed */
   1736       Bool needsBBP; /* True => also pass guest state ptr to callee */
   1737       Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
   1738       struct {
   1739          IREffect fx;   /* read, write or modify?  Ifx_None is invalid. */
   1740          Int      offset;
   1741          Int      size;
   1742       } fxState[VEX_N_FXSTATE];
   1743    }
   1744    IRDirty;
   1745 
   1746 /* Pretty-print a dirty call */
   1747 extern void     ppIRDirty ( IRDirty* );
   1748 
   1749 /* Allocate an uninitialised dirty call */
   1750 extern IRDirty* emptyIRDirty ( void );
   1751 
   1752 /* Deep-copy a dirty call */
   1753 extern IRDirty* deepCopyIRDirty ( IRDirty* );
   1754 
   1755 /* A handy function which takes some of the tedium out of constructing
   1756    dirty helper calls.  The called function impliedly does not return
   1757    any value and has a constant-True guard.  The call is marked as
   1758    accessing neither guest state nor memory (hence the "unsafe"
   1759    designation) -- you can change this marking later if need be.  A
   1760    suitable IRCallee is constructed from the supplied bits. */
   1761 extern
   1762 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
   1763                              IRExpr** args );
   1764 
   1765 /* Similarly, make a zero-annotation dirty call which returns a value,
   1766    and assign that to the given temp. */
   1767 extern
   1768 IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
   1769                              Int regparms, HChar* name, void* addr,
   1770                              IRExpr** args );
   1771 
   1772 
   1773 /* --------------- Memory Bus Events --------------- */
   1774 
   1775 typedef
   1776    enum {
   1777       Imbe_Fence=0x18000,
   1778       /* Needed only on ARM.  It cancels a reservation made by a
   1779          preceding Linked-Load, and needs to be handed through to the
   1780          back end, just as LL and SC themselves are. */
   1781       Imbe_CancelReservation
   1782    }
   1783    IRMBusEvent;
   1784 
   1785 extern void ppIRMBusEvent ( IRMBusEvent );
   1786 
   1787 
   1788 /* --------------- Compare and Swap --------------- */
   1789 
   1790 /* This denotes an atomic compare and swap operation, either
   1791    a single-element one or a double-element one.
   1792 
   1793    In the single-element case:
   1794 
   1795      .addr is the memory address.
   1796      .end  is the endianness with which memory is accessed
   1797 
   1798      If .addr contains the same value as .expdLo, then .dataLo is
   1799      written there, else there is no write.  In both cases, the
   1800      original value at .addr is copied into .oldLo.
   1801 
   1802      Types: .expdLo, .dataLo and .oldLo must all have the same type.
   1803      It may be any integral type, viz: I8, I16, I32 or, for 64-bit
   1804      guests, I64.
   1805 
   1806      .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
   1807      be NULL.
   1808 
   1809    In the double-element case:
   1810 
   1811      .addr is the memory address.
   1812      .end  is the endianness with which memory is accessed
   1813 
   1814      The operation is the same:
   1815 
   1816      If .addr contains the same value as .expdHi:.expdLo, then
   1817      .dataHi:.dataLo is written there, else there is no write.  In
   1818      both cases the original value at .addr is copied into
   1819      .oldHi:.oldLo.
   1820 
   1821      Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
   1822      all have the same type, which may be any integral type, viz: I8,
   1823      I16, I32 or, for 64-bit guests, I64.
   1824 
   1825      The double-element case is complicated by the issue of
   1826      endianness.  In all cases, the two elements are understood to be
   1827      located adjacently in memory, starting at the address .addr.
   1828 
   1829        If .end is Iend_LE, then the .xxxLo component is at the lower
   1830        address and the .xxxHi component is at the higher address, and
   1831        each component is itself stored little-endianly.
   1832 
   1833        If .end is Iend_BE, then the .xxxHi component is at the lower
   1834        address and the .xxxLo component is at the higher address, and
   1835        each component is itself stored big-endianly.
   1836 
   1837    This allows representing more cases than most architectures can
   1838    handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
   1839 
   1840    How to know if the CAS succeeded?
   1841 
   1842    * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
   1843      then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
   1844      stored at .addr, and the original value there was .oldLo (resp
   1845      .oldHi:.oldLo).
   1846 
   1847    * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
   1848      then the CAS failed, and the original value at .addr was .oldLo
   1849      (resp. .oldHi:.oldLo).
   1850 
   1851    Hence it is easy to know whether or not the CAS succeeded.
   1852 */
   1853 typedef
   1854    struct {
   1855       IRTemp    oldHi;  /* old value of *addr is written here */
   1856       IRTemp    oldLo;
   1857       IREndness end;    /* endianness of the data in memory */
   1858       IRExpr*   addr;   /* store address */
   1859       IRExpr*   expdHi; /* expected old value at *addr */
   1860       IRExpr*   expdLo;
   1861       IRExpr*   dataHi; /* new value for *addr */
   1862       IRExpr*   dataLo;
   1863    }
   1864    IRCAS;
   1865 
   1866 extern void ppIRCAS ( IRCAS* cas );
   1867 
   1868 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
   1869                         IREndness end, IRExpr* addr,
   1870                         IRExpr* expdHi, IRExpr* expdLo,
   1871                         IRExpr* dataHi, IRExpr* dataLo );
   1872 
   1873 extern IRCAS* deepCopyIRCAS ( IRCAS* );
   1874 
   1875 /* ------------------ Statements ------------------ */
   1876 
   1877 /* The different kinds of statements.  Their meaning is explained
   1878    below in the comments for IRStmt.
   1879 
   1880    Those marked META do not represent code, but rather extra
   1881    information about the code.  These statements can be removed
   1882    without affecting the functional behaviour of the code, however
   1883    they are required by some IR consumers such as tools that
   1884    instrument the code.
   1885 */
   1886 
   1887 typedef
   1888    enum {
   1889       Ist_NoOp=0x19000,
   1890       Ist_IMark,     /* META */
   1891       Ist_AbiHint,   /* META */
   1892       Ist_Put,
   1893       Ist_PutI,
   1894       Ist_WrTmp,
   1895       Ist_Store,
   1896       Ist_CAS,
   1897       Ist_LLSC,
   1898       Ist_Dirty,
   1899       Ist_MBE,       /* META (maybe) */
   1900       Ist_Exit
   1901    }
   1902    IRStmtTag;
   1903 
   1904 /* A statement.  Stored as a tagged union.  'tag' indicates what kind
   1905    of expression this is.  'Ist' is the union that holds the fields.
   1906    If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
   1907    statement, and the fields can be accessed with
   1908    'st.Ist.Store.<fieldname>'.
   1909 
   1910    For each kind of statement, we show what it looks like when
   1911    pretty-printed with ppIRStmt().
   1912 */
   1913 typedef
   1914    struct _IRStmt {
   1915       IRStmtTag tag;
   1916       union {
   1917          /* A no-op (usually resulting from IR optimisation).  Can be
   1918             omitted without any effect.
   1919 
   1920             ppIRStmt output: IR-NoOp
   1921          */
   1922          struct {
   1923 	 } NoOp;
   1924 
   1925          /* META: instruction mark.  Marks the start of the statements
   1926             that represent a single machine instruction (the end of
   1927             those statements is marked by the next IMark or the end of
   1928             the IRSB).  Contains the address and length of the
   1929             instruction.
   1930 
   1931             It also contains a delta value.  The delta must be
   1932             subtracted from a guest program counter value before
   1933             attempting to establish, by comparison with the address
   1934             and length values, whether or not that program counter
   1935             value refers to this instruction.  For x86, amd64, ppc32,
   1936             ppc64 and arm, the delta value is zero.  For Thumb
   1937             instructions, the delta value is one.  This is because, on
   1938             Thumb, guest PC values (guest_R15T) are encoded using the
   1939             top 31 bits of the instruction address and a 1 in the lsb;
   1940             hence they appear to be (numerically) 1 past the start of
   1941             the instruction they refer to.  IOW, guest_R15T on ARM
   1942             holds a standard ARM interworking address.
   1943 
   1944             ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
   1945                          eg. ------ IMark(0x4000792, 5, 0) ------,
   1946          */
   1947          struct {
   1948             Addr64 addr;   /* instruction address */
   1949             Int    len;    /* instruction length */
   1950             UChar  delta;  /* addr = program counter as encoded in guest state
   1951                                      - delta */
   1952          } IMark;
   1953 
   1954          /* META: An ABI hint, which says something about this
   1955             platform's ABI.
   1956 
   1957             At the moment, the only AbiHint is one which indicates
   1958             that a given chunk of address space, [base .. base+len-1],
   1959             has become undefined.  This is used on amd64-linux and
   1960             some ppc variants to pass stack-redzoning hints to whoever
   1961             wants to see them.  It also indicates the address of the
   1962             next (dynamic) instruction that will be executed.  This is
   1963             to help Memcheck to origin tracking.
   1964 
   1965             ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
   1966                          eg. ====== AbiHint(t1, 16, t2) ======
   1967          */
   1968          struct {
   1969             IRExpr* base;     /* Start  of undefined chunk */
   1970             Int     len;      /* Length of undefined chunk */
   1971             IRExpr* nia;      /* Address of next (guest) insn */
   1972          } AbiHint;
   1973 
   1974          /* Write a guest register, at a fixed offset in the guest state.
   1975             ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
   1976          */
   1977          struct {
   1978             Int     offset;   /* Offset into the guest state */
   1979             IRExpr* data;     /* The value to write */
   1980          } Put;
   1981 
   1982          /* Write a guest register, at a non-fixed offset in the guest
   1983             state.  See the comment for GetI expressions for more
   1984             information.
   1985 
   1986             ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
   1987                          eg. PUTI(64:8xF64)[t5,0] = t1
   1988          */
   1989          struct {
   1990             IRRegArray* descr; /* Part of guest state treated as circular */
   1991             IRExpr*     ix;    /* Variable part of index into array */
   1992             Int         bias;  /* Constant offset part of index into array */
   1993             IRExpr*     data;  /* The value to write */
   1994          } PutI;
   1995 
   1996          /* Assign a value to a temporary.  Note that SSA rules require
   1997             each tmp is only assigned to once.  IR sanity checking will
   1998             reject any block containing a temporary which is not assigned
   1999             to exactly once.
   2000 
   2001             ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
   2002          */
   2003          struct {
   2004             IRTemp  tmp;   /* Temporary  (LHS of assignment) */
   2005             IRExpr* data;  /* Expression (RHS of assignment) */
   2006          } WrTmp;
   2007 
   2008          /* Write a value to memory.  This is a normal store, not a
   2009             Store-Conditional.  To represent a Store-Conditional,
   2010             instead use IRStmt.LLSC.
   2011             ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
   2012          */
   2013          struct {
   2014             IREndness end;    /* Endianness of the store */
   2015             IRExpr*   addr;   /* store address */
   2016             IRExpr*   data;   /* value to write */
   2017          } Store;
   2018 
   2019          /* Do an atomic compare-and-swap operation.  Semantics are
   2020             described above on a comment at the definition of IRCAS.
   2021 
   2022             ppIRStmt output:
   2023                t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
   2024             eg
   2025                t1 = CASle(t2 :: t3->Add32(t3,1))
   2026                which denotes a 32-bit atomic increment
   2027                of a value at address t2
   2028 
   2029             A double-element CAS may also be denoted, in which case <tmp>,
   2030             <expected> and <new> are all pairs of items, separated by
   2031             commas.
   2032          */
   2033          struct {
   2034             IRCAS* details;
   2035          } CAS;
   2036 
   2037          /* Either Load-Linked or Store-Conditional, depending on
   2038             STOREDATA.
   2039 
   2040             If STOREDATA is NULL then this is a Load-Linked, meaning
   2041             that data is loaded from memory as normal, but a
   2042             'reservation' for the address is also lodged in the
   2043             hardware.
   2044 
   2045                result = Load-Linked(addr, end)
   2046 
   2047             The data transfer type is the type of RESULT (I32, I64,
   2048             etc).  ppIRStmt output:
   2049 
   2050                result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
   2051 
   2052             If STOREDATA is not NULL then this is a Store-Conditional,
   2053             hence:
   2054 
   2055                result = Store-Conditional(addr, storedata, end)
   2056 
   2057             The data transfer type is the type of STOREDATA and RESULT
   2058             has type Ity_I1. The store may fail or succeed depending
   2059             on the state of a previously lodged reservation on this
   2060             address.  RESULT is written 1 if the store succeeds and 0
   2061             if it fails.  eg ppIRStmt output:
   2062 
   2063                result = ( ST<end>-Cond(<addr>) = <storedata> )
   2064                eg t3 = ( STbe-Cond(t1, t2) )
   2065 
   2066             In all cases, the address must be naturally aligned for
   2067             the transfer type -- any misaligned addresses should be
   2068             caught by a dominating IR check and side exit.  This
   2069             alignment restriction exists because on at least some
   2070             LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
   2071             misaligned addresses, and we have to actually generate
   2072             stwcx. on the host, and we don't want it trapping on the
   2073             host.
   2074 
   2075             Summary of rules for transfer type:
   2076               STOREDATA == NULL (LL):
   2077                 transfer type = type of RESULT
   2078               STOREDATA != NULL (SC):
   2079                 transfer type = type of STOREDATA, and RESULT :: Ity_I1
   2080          */
   2081          struct {
   2082             IREndness end;
   2083             IRTemp    result;
   2084             IRExpr*   addr;
   2085             IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
   2086          } LLSC;
   2087 
   2088          /* Call (possibly conditionally) a C function that has side
   2089             effects (ie. is "dirty").  See the comments above the
   2090             IRDirty type declaration for more information.
   2091 
   2092             ppIRStmt output:
   2093                t<tmp> = DIRTY <guard> <effects>
   2094                   ::: <callee>(<args>)
   2095             eg.
   2096                t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
   2097                      ::: foo{0x380035f4}(t2)
   2098          */
   2099          struct {
   2100             IRDirty* details;
   2101          } Dirty;
   2102 
   2103          /* A memory bus event - a fence, or acquisition/release of the
   2104             hardware bus lock.  IR optimisation treats all these as fences
   2105             across which no memory references may be moved.
   2106             ppIRStmt output: MBusEvent-Fence,
   2107                              MBusEvent-BusLock, MBusEvent-BusUnlock.
   2108          */
   2109          struct {
   2110             IRMBusEvent event;
   2111          } MBE;
   2112 
   2113          /* Conditional exit from the middle of an IRSB.
   2114             ppIRStmt output: if (<guard>) goto {<jk>} <dst>
   2115                          eg. if (t69) goto {Boring} 0x4000AAA:I32
   2116          */
   2117          struct {
   2118             IRExpr*    guard;    /* Conditional expression */
   2119             IRJumpKind jk;       /* Jump kind */
   2120             IRConst*   dst;      /* Jump target (constant only) */
   2121          } Exit;
   2122       } Ist;
   2123    }
   2124    IRStmt;
   2125 
   2126 /* Statement constructors. */
   2127 extern IRStmt* IRStmt_NoOp    ( void );
   2128 extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
   2129 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
   2130 extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
   2131 extern IRStmt* IRStmt_PutI    ( IRRegArray* descr, IRExpr* ix, Int bias,
   2132                                 IRExpr* data );
   2133 extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
   2134 extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
   2135 extern IRStmt* IRStmt_CAS     ( IRCAS* details );
   2136 extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
   2137                                 IRExpr* addr, IRExpr* storedata );
   2138 extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
   2139 extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
   2140 extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
   2141 
   2142 /* Deep-copy an IRStmt. */
   2143 extern IRStmt* deepCopyIRStmt ( IRStmt* );
   2144 
   2145 /* Pretty-print an IRStmt. */
   2146 extern void ppIRStmt ( IRStmt* );
   2147 
   2148 
   2149 /* ------------------ Basic Blocks ------------------ */
   2150 
   2151 /* Type environments: a bunch of statements, expressions, etc, are
   2152    incomplete without an environment indicating the type of each
   2153    IRTemp.  So this provides one.  IR temporaries are really just
   2154    unsigned ints and so this provides an array, 0 .. n_types_used-1 of
   2155    them.
   2156 */
   2157 typedef
   2158    struct {
   2159       IRType* types;
   2160       Int     types_size;
   2161       Int     types_used;
   2162    }
   2163    IRTypeEnv;
   2164 
   2165 /* Obtain a new IRTemp */
   2166 extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
   2167 
   2168 /* Deep-copy a type environment */
   2169 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
   2170 
   2171 /* Pretty-print a type environment */
   2172 extern void ppIRTypeEnv ( IRTypeEnv* );
   2173 
   2174 
   2175 /* Code blocks, which in proper compiler terminology are superblocks
   2176    (single entry, multiple exit code sequences) contain:
   2177 
   2178    - A table giving a type for each temp (the "type environment")
   2179    - An expandable array of statements
   2180    - An expression of type 32 or 64 bits, depending on the
   2181      guest's word size, indicating the next destination if the block
   2182      executes all the way to the end, without a side exit
   2183    - An indication of any special actions (JumpKind) needed
   2184      for this final jump.
   2185 
   2186    "IRSB" stands for "IR Super Block".
   2187 */
   2188 typedef
   2189    struct {
   2190       IRTypeEnv* tyenv;
   2191       IRStmt**   stmts;
   2192       Int        stmts_size;
   2193       Int        stmts_used;
   2194       IRExpr*    next;
   2195       IRJumpKind jumpkind;
   2196    }
   2197    IRSB;
   2198 
   2199 /* Allocate a new, uninitialised IRSB */
   2200 extern IRSB* emptyIRSB ( void );
   2201 
   2202 /* Deep-copy an IRSB */
   2203 extern IRSB* deepCopyIRSB ( IRSB* );
   2204 
   2205 /* Deep-copy an IRSB, except for the statements list, which set to be
   2206    a new, empty, list of statements. */
   2207 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
   2208 
   2209 /* Pretty-print an IRSB */
   2210 extern void ppIRSB ( IRSB* );
   2211 
   2212 /* Append an IRStmt to an IRSB */
   2213 extern void addStmtToIRSB ( IRSB*, IRStmt* );
   2214 
   2215 
   2216 /*---------------------------------------------------------------*/
   2217 /*--- Helper functions for the IR                             ---*/
   2218 /*---------------------------------------------------------------*/
   2219 
   2220 /* For messing with IR type environments */
   2221 extern IRTypeEnv* emptyIRTypeEnv  ( void );
   2222 
   2223 /* What is the type of this expression? */
   2224 extern IRType typeOfIRConst ( IRConst* );
   2225 extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
   2226 extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
   2227 
   2228 /* Sanity check a BB of IR */
   2229 extern void sanityCheckIRSB ( IRSB*  bb,
   2230                               HChar* caller,
   2231                               Bool   require_flatness,
   2232                               IRType guest_word_size );
   2233 extern Bool isFlatIRStmt ( IRStmt* );
   2234 
   2235 /* Is this any value actually in the enumeration 'IRType' ? */
   2236 extern Bool isPlausibleIRType ( IRType ty );
   2237 
   2238 #endif /* ndef __LIBVEX_IR_H */
   2239 
   2240 
   2241 /*---------------------------------------------------------------*/
   2242 /*---                                             libvex_ir.h ---*/
   2243 /*---------------------------------------------------------------*/
   2244