Home | History | Annotate | Download | only in pub
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                       libvex_ir.h ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #ifndef __LIBVEX_IR_H
     37 #define __LIBVEX_IR_H
     38 
     39 #include "libvex_basictypes.h"
     40 
     41 
     42 /*---------------------------------------------------------------*/
     43 /*--- High-level IR description                               ---*/
     44 /*---------------------------------------------------------------*/
     45 
     46 /* Vex IR is an architecture-neutral intermediate representation.
     47    Unlike some IRs in systems similar to Vex, it is not like assembly
     48    language (ie. a list of instructions).  Rather, it is more like the
     49    IR that might be used in a compiler.
     50 
     51    Code blocks
     52    ~~~~~~~~~~~
     53    The code is broken into small code blocks ("superblocks", type:
     54    'IRSB').  Each code block typically represents from 1 to perhaps 50
     55    instructions.  IRSBs are single-entry, multiple-exit code blocks.
     56    Each IRSB contains three things:
     57    - a type environment, which indicates the type of each temporary
     58      value present in the IRSB
     59    - a list of statements, which represent code
     60    - a jump that exits from the end the IRSB
     61    Because the blocks are multiple-exit, there can be additional
     62    conditional exit statements that cause control to leave the IRSB
     63    before the final exit.  Also because of this, IRSBs can cover
     64    multiple non-consecutive sequences of code (up to 3).  These are
     65    recorded in the type VexGuestExtents (see libvex.h).
     66 
     67    Statements and expressions
     68    ~~~~~~~~~~~~~~~~~~~~~~~~~~
     69    Statements (type 'IRStmt') represent operations with side-effects,
     70    eg.  guest register writes, stores, and assignments to temporaries.
     71    Expressions (type 'IRExpr') represent operations without
     72    side-effects, eg. arithmetic operations, loads, constants.
     73    Expressions can contain sub-expressions, forming expression trees,
     74    eg. (3 + (4 * load(addr1)).
     75 
     76    Storage of guest state
     77    ~~~~~~~~~~~~~~~~~~~~~~
     78    The "guest state" contains the guest registers of the guest machine
     79    (ie.  the machine that we are simulating).  It is stored by default
     80    in a block of memory supplied by the user of the VEX library,
     81    generally referred to as the guest state (area).  To operate on
     82    these registers, one must first read ("Get") them from the guest
     83    state into a temporary value.  Afterwards, one can write ("Put")
     84    them back into the guest state.
     85 
     86    Get and Put are characterised by a byte offset into the guest
     87    state, a small integer which effectively gives the identity of the
     88    referenced guest register, and a type, which indicates the size of
     89    the value to be transferred.
     90 
     91    The basic "Get" and "Put" operations are sufficient to model normal
     92    fixed registers on the guest.  Selected areas of the guest state
     93    can be treated as a circular array of registers (type:
     94    'IRRegArray'), which can be indexed at run-time.  This is done with
     95    the "GetI" and "PutI" primitives.  This is necessary to describe
     96    rotating register files, for example the x87 FPU stack, SPARC
     97    register windows, and the Itanium register files.
     98 
     99    Examples, and flattened vs. unflattened code
    100    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    101    For example, consider this x86 instruction:
    102 
    103      addl %eax, %ebx
    104 
    105    One Vex IR translation for this code would be this:
    106 
    107      ------ IMark(0x24F275, 7) ------
    108      t3 = GET:I32(0)             # get %eax, a 32-bit integer
    109      t2 = GET:I32(12)            # get %ebx, a 32-bit integer
    110      t1 = Add32(t3,t2)           # addl
    111      PUT(0) = t1                 # put %eax
    112 
    113    (For simplicity, this ignores the effects on the condition codes, and
    114    the update of the instruction pointer.)
    115 
    116    The "IMark" is an IR statement that doesn't represent actual code.
    117    Instead it indicates the address and length of the original
    118    instruction.  The numbers 0 and 12 are offsets into the guest state
    119    for %eax and %ebx.  The full list of offsets for an architecture
    120    <ARCH> can be found in the type VexGuest<ARCH>State in the file
    121    VEX/pub/libvex_guest_<ARCH>.h.
    122 
    123    The five statements in this example are:
    124    - the IMark
    125    - three assignments to temporaries
    126    - one register write (put)
    127 
    128    The six expressions in this example are:
    129    - two register reads (gets)
    130    - one arithmetic (add) operation
    131    - three temporaries (two nested within the Add32, one in the PUT)
    132 
    133    The above IR is "flattened", ie. all sub-expressions are "atoms",
    134    either constants or temporaries.  An equivalent, unflattened version
    135    would be:
    136 
    137      PUT(0) = Add32(GET:I32(0), GET:I32(12))
    138 
    139    IR is guaranteed to be flattened at instrumentation-time.  This makes
    140    instrumentation easier.  Equivalent flattened and unflattened IR
    141    typically results in the same generated code.
    142 
    143    Another example, this one showing loads and stores:
    144 
    145      addl %edx,4(%eax)
    146 
    147    This becomes (again ignoring condition code and instruction pointer
    148    updates):
    149 
    150      ------ IMark(0x4000ABA, 3) ------
    151      t3 = Add32(GET:I32(0),0x4:I32)
    152      t2 = LDle:I32(t3)
    153      t1 = GET:I32(8)
    154      t0 = Add32(t2,t1)
    155      STle(t3) = t0
    156 
    157    The "le" in "LDle" and "STle" is short for "little-endian".
    158 
    159    No need for deallocations
    160    ~~~~~~~~~~~~~~~~~~~~~~~~~
    161    Although there are allocation functions for various data structures
    162    in this file, there are no deallocation functions.  This is because
    163    Vex uses a memory allocation scheme that automatically reclaims the
    164    memory used by allocated structures once translation is completed.
    165    This makes things easier for tools that instruments/transforms code
    166    blocks.
    167 
    168    SSAness and typing
    169    ~~~~~~~~~~~~~~~~~~
    170    The IR is fully typed.  For every IRSB (IR block) it is possible to
    171    say unambiguously whether or not it is correctly typed.
    172    Incorrectly typed IR has no meaning and the VEX will refuse to
    173    process it.  At various points during processing VEX typechecks the
    174    IR and aborts if any violations are found.  This seems overkill but
    175    makes it a great deal easier to build a reliable JIT.
    176 
    177    IR also has the SSA property.  SSA stands for Static Single
    178    Assignment, and what it means is that each IR temporary may be
    179    assigned to only once.  This idea became widely used in compiler
    180    construction in the mid to late 90s.  It makes many IR-level
    181    transformations/code improvements easier, simpler and faster.
    182    Whenever it typechecks an IR block, VEX also checks the SSA
    183    property holds, and will abort if not so.  So SSAness is
    184    mechanically and rigidly enforced.
    185 */
    186 
    187 /*---------------------------------------------------------------*/
    188 /*--- Type definitions for the IR                             ---*/
    189 /*---------------------------------------------------------------*/
    190 
    191 /* General comments about naming schemes:
    192 
    193    All publically visible functions contain the name of the primary
    194    type on which they operate (IRFoo, IRBar, etc).  Hence you should
    195    be able to identify these functions by grepping for "IR[A-Z]".
    196 
    197    For some type 'IRFoo':
    198 
    199    - ppIRFoo is the printing method for IRFoo, printing it to the
    200      output channel specified in the LibVEX_Initialise call.
    201 
    202    - eqIRFoo is a structural equality predicate for IRFoos.
    203 
    204    - deepCopyIRFoo is a deep copy constructor for IRFoos.
    205      It recursively traverses the entire argument tree and
    206      produces a complete new tree.  All types have a deep copy
    207      constructor.
    208 
    209    - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
    210      It creates a new top-level copy of the supplied object,
    211      but does not copy any sub-objects.  Only some types have a
    212      shallow copy constructor.
    213 */
    214 
    215 /* ------------------ Types ------------------ */
    216 
    217 /* A type indicates the size of a value, and whether it's an integer, a
    218    float, or a vector (SIMD) value. */
    219 typedef
    220    enum {
    221       Ity_INVALID=0x11000,
    222       Ity_I1,
    223       Ity_I8,
    224       Ity_I16,
    225       Ity_I32,
    226       Ity_I64,
    227       Ity_I128,  /* 128-bit scalar */
    228       Ity_F32,   /* IEEE 754 float */
    229       Ity_F64,   /* IEEE 754 double */
    230       Ity_V128   /* 128-bit SIMD */
    231    }
    232    IRType;
    233 
    234 /* Pretty-print an IRType */
    235 extern void ppIRType ( IRType );
    236 
    237 /* Get the size (in bytes) of an IRType */
    238 extern Int sizeofIRType ( IRType );
    239 
    240 
    241 /* ------------------ Endianness ------------------ */
    242 
    243 /* IREndness is used in load IRExprs and store IRStmts. */
    244 typedef
    245    enum {
    246       Iend_LE=0x12000, /* little endian */
    247       Iend_BE          /* big endian */
    248    }
    249    IREndness;
    250 
    251 
    252 /* ------------------ Constants ------------------ */
    253 
    254 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */
    255 
    256 /* The various kinds of constant. */
    257 typedef
    258    enum {
    259       Ico_U1=0x13000,
    260       Ico_U8,
    261       Ico_U16,
    262       Ico_U32,
    263       Ico_U64,
    264       Ico_F64,   /* 64-bit IEEE754 floating */
    265       Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
    266                     as a IEEE754 double value. */
    267       Ico_V128   /* 128-bit restricted vector constant, with 1 bit
    268                     (repeated 8 times) for each of the 16 x 1-byte lanes */
    269    }
    270    IRConstTag;
    271 
    272 /* A constant.  Stored as a tagged union.  'tag' indicates what kind of
    273    constant this is.  'Ico' is the union that holds the fields.  If an
    274    IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
    275    and its value can be accessed with 'c.Ico.U32'. */
    276 typedef
    277    struct _IRConst {
    278       IRConstTag tag;
    279       union {
    280          Bool   U1;
    281          UChar  U8;
    282          UShort U16;
    283          UInt   U32;
    284          ULong  U64;
    285          Double F64;
    286          ULong  F64i;
    287          UShort V128;   /* 16-bit value; see Ico_V128 comment above */
    288       } Ico;
    289    }
    290    IRConst;
    291 
    292 /* IRConst constructors */
    293 extern IRConst* IRConst_U1   ( Bool );
    294 extern IRConst* IRConst_U8   ( UChar );
    295 extern IRConst* IRConst_U16  ( UShort );
    296 extern IRConst* IRConst_U32  ( UInt );
    297 extern IRConst* IRConst_U64  ( ULong );
    298 extern IRConst* IRConst_F64  ( Double );
    299 extern IRConst* IRConst_F64i ( ULong );
    300 extern IRConst* IRConst_V128 ( UShort );
    301 
    302 /* Deep-copy an IRConst */
    303 extern IRConst* deepCopyIRConst ( IRConst* );
    304 
    305 /* Pretty-print an IRConst */
    306 extern void ppIRConst ( IRConst* );
    307 
    308 /* Compare two IRConsts for equality */
    309 extern Bool eqIRConst ( IRConst*, IRConst* );
    310 
    311 
    312 /* ------------------ Call targets ------------------ */
    313 
    314 /* Describes a helper function to call.  The name part is purely for
    315    pretty printing and not actually used.  regparms=n tells the back
    316    end that the callee has been declared
    317    "__attribute__((regparm(n)))".  On some targets (x86) the back end
    318    will need to construct a non-standard sequence to call a function
    319    declared like this.
    320 
    321    mcx_mask is a sop to Memcheck.  It indicates which args should be
    322    considered 'always defined' when lazily computing definedness of
    323    the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
    324    args[1], etc.  If a bit is set, the corresponding arg is excluded
    325    (hence "x" in "mcx") from definedness checking.
    326 */
    327 
    328 typedef
    329    struct {
    330       Int    regparms;
    331       HChar* name;
    332       void*  addr;
    333       UInt   mcx_mask;
    334    }
    335    IRCallee;
    336 
    337 /* Create an IRCallee. */
    338 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
    339 
    340 /* Deep-copy an IRCallee. */
    341 extern IRCallee* deepCopyIRCallee ( IRCallee* );
    342 
    343 /* Pretty-print an IRCallee. */
    344 extern void ppIRCallee ( IRCallee* );
    345 
    346 
    347 /* ------------------ Guest state arrays ------------------ */
    348 
    349 /* This describes a section of the guest state that we want to
    350    be able to index at run time, so as to be able to describe
    351    indexed or rotating register files on the guest. */
    352 typedef
    353    struct {
    354       Int    base;   /* guest state offset of start of indexed area */
    355       IRType elemTy; /* type of each element in the indexed area */
    356       Int    nElems; /* number of elements in the indexed area */
    357    }
    358    IRRegArray;
    359 
    360 extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
    361 
    362 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
    363 
    364 extern void ppIRRegArray ( IRRegArray* );
    365 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
    366 
    367 
    368 /* ------------------ Temporaries ------------------ */
    369 
    370 /* This represents a temporary, eg. t1.  The IR optimiser relies on the
    371    fact that IRTemps are 32-bit ints.  Do not change them to be ints of
    372    any other size. */
    373 typedef UInt IRTemp;
    374 
    375 /* Pretty-print an IRTemp. */
    376 extern void ppIRTemp ( IRTemp );
    377 
    378 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
    379 
    380 
    381 /* --------------- Primops (arity 1,2,3 and 4) --------------- */
    382 
    383 /* Primitive operations that are used in Unop, Binop, Triop and Qop
    384    IRExprs.  Once we take into account integer, floating point and SIMD
    385    operations of all the different sizes, there are quite a lot of them.
    386    Most instructions supported by the architectures that Vex supports
    387    (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
    388    are not;  they are instead handled with dirty helpers that emulate
    389    their functionality.  Such obscure ones are thus not directly visible
    390    in the IR, but their effects on guest state (memory and registers)
    391    are made visible via the annotations in IRDirty structures.
    392 */
    393 typedef
    394    enum {
    395       /* -- Do not change this ordering.  The IR generators rely on
    396             (eg) Iop_Add64 == IopAdd8 + 3. -- */
    397 
    398       Iop_INVALID=0x14000,
    399       Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
    400       Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
    401       /* Signless mul.  MullS/MullU is elsewhere. */
    402       Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
    403       Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
    404       Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
    405       Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
    406       Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
    407       Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
    408       Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
    409       /* Integer comparisons. */
    410       Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
    411       Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
    412       /* Tags for unary ops */
    413       Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
    414 
    415       /* Exactly like CmpEQ8/16/32/64, but carrying the additional
    416          hint that these compute the success/failure of a CAS
    417          operation, and hence are almost certainly applied to two
    418          copies of the same value, which in turn has implications for
    419          Memcheck's instrumentation. */
    420       Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
    421       Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
    422 
    423       /* -- Ordering not important after here. -- */
    424 
    425       /* Widening multiplies */
    426       Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
    427       Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
    428 
    429       /* Wierdo integer stuff */
    430       Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
    431       Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
    432       /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
    433          zero.  You must ensure they are never given a zero argument.
    434       */
    435 
    436       /* Standard integer comparisons */
    437       Iop_CmpLT32S, Iop_CmpLT64S,
    438       Iop_CmpLE32S, Iop_CmpLE64S,
    439       Iop_CmpLT32U, Iop_CmpLT64U,
    440       Iop_CmpLE32U, Iop_CmpLE64U,
    441 
    442       /* As a sop to Valgrind-Memcheck, the following are useful. */
    443       Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
    444       Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
    445       Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
    446       Iop_Max32U, /* unsigned max */
    447 
    448       /* PowerPC-style 3-way integer comparisons.  Without them it is
    449          difficult to simulate PPC efficiently.
    450          op(x,y) | x < y  = 0x8 else
    451                  | x > y  = 0x4 else
    452                  | x == y = 0x2
    453       */
    454       Iop_CmpORD32U, Iop_CmpORD64U,
    455       Iop_CmpORD32S, Iop_CmpORD64S,
    456 
    457       /* Division */
    458       /* TODO: clarify semantics wrt rounding, negative values, whatever */
    459       Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
    460       Iop_DivS32,   // ditto, signed
    461       Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
    462       Iop_DivS64,   // ditto, signed
    463 
    464       Iop_DivModU64to32, // :: I64,I32 -> I64
    465                          // of which lo half is div and hi half is mod
    466       Iop_DivModS64to32, // ditto, signed
    467 
    468       Iop_DivModU128to64, // :: V128,I64 -> V128
    469                           // of which lo half is div and hi half is mod
    470       Iop_DivModS128to64, // ditto, signed
    471 
    472       /* Integer conversions.  Some of these are redundant (eg
    473          Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
    474          having a complete set reduces the typical dynamic size of IR
    475          and makes the instruction selectors easier to write. */
    476 
    477       /* Widening conversions */
    478       Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
    479                   Iop_16Uto32, Iop_16Uto64,
    480                                Iop_32Uto64,
    481       Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
    482                   Iop_16Sto32, Iop_16Sto64,
    483                                Iop_32Sto64,
    484 
    485       /* Narrowing conversions */
    486       Iop_64to8, Iop_32to8, Iop_64to16,
    487       /* 8 <-> 16 bit conversions */
    488       Iop_16to8,      // :: I16 -> I8, low half
    489       Iop_16HIto8,    // :: I16 -> I8, high half
    490       Iop_8HLto16,    // :: (I8,I8) -> I16
    491       /* 16 <-> 32 bit conversions */
    492       Iop_32to16,     // :: I32 -> I16, low half
    493       Iop_32HIto16,   // :: I32 -> I16, high half
    494       Iop_16HLto32,   // :: (I16,I16) -> I32
    495       /* 32 <-> 64 bit conversions */
    496       Iop_64to32,     // :: I64 -> I32, low half
    497       Iop_64HIto32,   // :: I64 -> I32, high half
    498       Iop_32HLto64,   // :: (I32,I32) -> I64
    499       /* 64 <-> 128 bit conversions */
    500       Iop_128to64,    // :: I128 -> I64, low half
    501       Iop_128HIto64,  // :: I128 -> I64, high half
    502       Iop_64HLto128,  // :: (I64,I64) -> I128
    503       /* 1-bit stuff */
    504       Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
    505       Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
    506       Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
    507       Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
    508       Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
    509       Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
    510       Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
    511       Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
    512       Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
    513       Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
    514 
    515       /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
    516 
    517       /* --- Simple stuff as mandated by 754. --- */
    518 
    519       /* Binary operations, with rounding. */
    520       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    521       Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
    522 
    523       /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
    524       Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
    525 
    526       /* Variants of the above which produce a 64-bit result but which
    527          round their result to a IEEE float range first. */
    528       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    529       Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
    530 
    531       /* Unary operations, without rounding. */
    532       /* :: F64 -> F64 */
    533       Iop_NegF64, Iop_AbsF64,
    534 
    535       /* :: F32 -> F32 */
    536       Iop_NegF32, Iop_AbsF32,
    537 
    538       /* Unary operations, with rounding. */
    539       /* :: IRRoundingMode(I32) x F64 -> F64 */
    540       Iop_SqrtF64, Iop_SqrtF64r32,
    541 
    542       /* :: IRRoundingMode(I32) x F32 -> F32 */
    543       Iop_SqrtF32,
    544 
    545       /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
    546             0x45 Unordered
    547             0x01 LT
    548             0x00 GT
    549             0x40 EQ
    550          This just happens to be the Intel encoding.  The values
    551          are recorded in the type IRCmpF64Result.
    552       */
    553       /* :: F64 x F64 -> IRCmpF64Result(I32) */
    554       Iop_CmpF64,
    555 
    556       /* --- Int to/from FP conversions. --- */
    557 
    558       /* For the most part, these take a first argument :: Ity_I32 (as
    559          IRRoundingMode) which is an indication of the rounding mode
    560          to use, as per the following encoding ("the standard
    561          encoding"):
    562             00b  to nearest (the default)
    563             01b  to -infinity
    564             10b  to +infinity
    565             11b  to zero
    566          This just happens to be the Intel encoding.  For reference only,
    567          the PPC encoding is:
    568             00b  to nearest (the default)
    569             01b  to zero
    570             10b  to +infinity
    571             11b  to -infinity
    572          Any PPC -> IR front end will have to translate these PPC
    573          encodings, as encoded in the guest state, to the standard
    574          encodings, to pass to the primops.
    575          For reference only, the ARM VFP encoding is:
    576             00b  to nearest
    577             01b  to +infinity
    578             10b  to -infinity
    579             11b  to zero
    580          Again, this will have to be converted to the standard encoding
    581          to pass to primops.
    582 
    583          If one of these conversions gets an out-of-range condition,
    584          or a NaN, as an argument, the result is host-defined.  On x86
    585          the "integer indefinite" value 0x80..00 is produced.  On PPC
    586          it is either 0x80..00 or 0x7F..FF depending on the sign of
    587          the argument.
    588 
    589          On ARMvfp, when converting to a signed integer result, the
    590          overflow result is 0x80..00 for negative args and 0x7F..FF
    591          for positive args.  For unsigned integer results it is
    592          0x00..00 and 0xFF..FF respectively.
    593 
    594          Rounding is required whenever the destination type cannot
    595          represent exactly all values of the source type.
    596       */
    597       Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
    598       Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
    599       Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
    600 
    601       Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
    602 
    603       Iop_I16StoF64, /*                       signed I16 -> F64 */
    604       Iop_I32StoF64, /*                       signed I32 -> F64 */
    605       Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
    606 
    607       Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
    608 
    609       /* Conversion between floating point formats */
    610       Iop_F32toF64,  /*                       F32 -> F64 */
    611       Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
    612 
    613       /* Reinterpretation.  Take an F64 and produce an I64 with
    614          the same bit pattern, or vice versa. */
    615       Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
    616       Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
    617 
    618       /* --- guest x86/amd64 specifics, not mandated by 754. --- */
    619 
    620       /* Binary ops, with rounding. */
    621       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
    622       Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
    623       Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
    624       Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
    625       Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
    626       Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
    627       Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
    628       Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
    629       Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
    630       /* Note that on x86 guest, PRem1{C3210} has the same behaviour
    631          as the IEEE mandated RemF64, except it is limited in the
    632          range of its operand.  Hence the partialness. */
    633 
    634       /* Unary ops, with rounding. */
    635       /* :: IRRoundingMode(I32) x F64 -> F64 */
    636       Iop_SinF64,    /* FSIN */
    637       Iop_CosF64,    /* FCOS */
    638       Iop_TanF64,    /* FTAN */
    639       Iop_2xm1F64,   /* (2^arg - 1.0) */
    640       Iop_RoundF64toInt, /* F64 value to nearest integral value (still
    641                             as F64) */
    642       Iop_RoundF32toInt, /* F32 value to nearest integral value (still
    643                             as F32) */
    644 
    645       /* --- guest ppc32/64 specifics, not mandated by 754. --- */
    646 
    647       /* Ternary operations, with rounding. */
    648       /* Fused multiply-add/sub, with 112-bit intermediate
    649 	 precision */
    650       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
    651             (computes arg2 * arg3 +/- arg4) */
    652       Iop_MAddF64, Iop_MSubF64,
    653 
    654       /* Variants of the above which produce a 64-bit result but which
    655          round their result to a IEEE float range first. */
    656       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
    657       Iop_MAddF64r32, Iop_MSubF64r32,
    658 
    659       /* :: F64 -> F64 */
    660       Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
    661       Iop_RoundF64toF64_NEAREST, /* frin */
    662       Iop_RoundF64toF64_NegINF,  /* frim */
    663       Iop_RoundF64toF64_PosINF,  /* frip */
    664       Iop_RoundF64toF64_ZERO,    /* friz */
    665 
    666       /* :: F64 -> F32 */
    667       Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
    668 
    669       /* :: IRRoundingMode(I32) x F64 -> F64 */
    670       Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
    671       /* NB: pretty much the same as Iop_F64toF32, except no change
    672          of type. */
    673 
    674       /* :: F64 -> I32 */
    675       Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
    676                        from FP result */
    677 
    678       /* ------------------ 32-bit SIMD Integer ------------------ */
    679 
    680       /* 16x2 add/sub, also signed/unsigned saturating variants */
    681       Iop_Add16x2, Iop_Sub16x2,
    682       Iop_QAdd16Sx2, Iop_QAdd16Ux2,
    683       Iop_QSub16Sx2, Iop_QSub16Ux2,
    684 
    685       /* 16x2 signed/unsigned halving add/sub.  For each lane, these
    686          compute bits 16:1 of (eg) sx(argL) + sx(argR),
    687          or zx(argL) - zx(argR) etc. */
    688       Iop_HAdd16Ux2, Iop_HAdd16Sx2,
    689       Iop_HSub16Ux2, Iop_HSub16Sx2,
    690 
    691       /* 8x4 add/sub, also signed/unsigned saturating variants */
    692       Iop_Add8x4, Iop_Sub8x4,
    693       Iop_QAdd8Sx4, Iop_QAdd8Ux4,
    694       Iop_QSub8Sx4, Iop_QSub8Ux4,
    695 
    696       /* 8x4 signed/unsigned halving add/sub.  For each lane, these
    697          compute bits 8:1 of (eg) sx(argL) + sx(argR),
    698          or zx(argL) - zx(argR) etc. */
    699       Iop_HAdd8Ux4, Iop_HAdd8Sx4,
    700       Iop_HSub8Ux4, Iop_HSub8Sx4,
    701 
    702       /* 8x4 sum of absolute unsigned differences. */
    703       Iop_Sad8Ux4,
    704 
    705       /* MISC (vector integer cmp != 0) */
    706       Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
    707 
    708       /* ------------------ 64-bit SIMD FP ------------------------ */
    709 
    710       /* Convertion to/from int */
    711       Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
    712       Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
    713       /* Fixed32 format is floating-point number with fixed number of fraction
    714          bits. The number of fraction bits is passed as a second argument of
    715          type I8. */
    716       Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
    717       Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
    718 
    719       /* Binary operations */
    720       Iop_Max32Fx2,      Iop_Min32Fx2,
    721       /* Pairwise Min and Max. See integer pairwise operations for more
    722          details. */
    723       Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
    724       /* Note: For the following compares, the arm front-end assumes a
    725          nan in a lane of either argument returns zero for that lane. */
    726       Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
    727 
    728       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
    729       element in the operand vector, and places the results in the destination
    730       vector.  */
    731       Iop_Recip32Fx2,
    732 
    733       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
    734          Note, that if one of the arguments is zero and another one is infinity
    735          of arbitrary sign the result of the operation is 2.0. */
    736       Iop_Recps32Fx2,
    737 
    738       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
    739          square root of each element in the operand vector. */
    740       Iop_Rsqrte32Fx2,
    741 
    742       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
    743          Note, that of one of the arguments is zero and another one is infiinty
    744          of arbitrary sign the result of the operation is 1.5. */
    745       Iop_Rsqrts32Fx2,
    746 
    747       /* Unary */
    748       Iop_Neg32Fx2, Iop_Abs32Fx2,
    749 
    750       /* ------------------ 64-bit SIMD Integer. ------------------ */
    751 
    752       /* MISC (vector integer cmp != 0) */
    753       Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
    754 
    755       /* ADDITION (normal / unsigned sat / signed sat) */
    756       Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
    757       Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
    758       Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
    759 
    760       /* PAIRWISE operations */
    761       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
    762             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
    763       Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
    764       Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
    765       Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
    766       Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
    767       Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
    768       /* Longening variant is unary. The resulting vector contains two times
    769          less elements than operand, but they are two times wider.
    770          Example:
    771             Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
    772                where a+b and c+d are unsigned 32-bit values. */
    773       Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
    774       Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
    775 
    776       /* SUBTRACTION (normal / unsigned sat / signed sat) */
    777       Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
    778       Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
    779       Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
    780 
    781       /* ABSOLUTE VALUE */
    782       Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
    783 
    784       /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
    785       Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
    786       Iop_Mul32Fx2,
    787       Iop_MulHi16Ux4,
    788       Iop_MulHi16Sx4,
    789       /* Plynomial multiplication treats it's arguments as coefficients of
    790          polynoms over {0, 1}. */
    791       Iop_PolynomialMul8x8,
    792 
    793       /* Vector Saturating Doubling Multiply Returning High Half and
    794          Vector Saturating Rounding Doubling Multiply Returning High Half */
    795       /* These IROp's multiply corresponding elements in two vectors, double
    796          the results, and place the most significant half of the final results
    797          in the destination vector. The results are truncated or rounded. If
    798          any of the results overflow, they are saturated. */
    799       Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
    800       Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
    801 
    802       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
    803       Iop_Avg8Ux8,
    804       Iop_Avg16Ux4,
    805 
    806       /* MIN/MAX */
    807       Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
    808       Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
    809       Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
    810       Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
    811 
    812       /* COMPARISON */
    813       Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
    814       Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
    815       Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
    816 
    817       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
    818          bit) */
    819       Iop_Cnt8x8,
    820       Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
    821       Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
    822 
    823       /* VECTOR x VECTOR SHIFT / ROTATE */
    824       Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
    825       Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
    826       Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
    827       Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
    828 
    829       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
    830       Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
    831       Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
    832       Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
    833 
    834       /* VECTOR x VECTOR SATURATING SHIFT */
    835       Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
    836       Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
    837       /* VECTOR x INTEGER SATURATING SHIFT */
    838       Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
    839       Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
    840       Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
    841 
    842       /* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */
    843       Iop_QNarrow16Ux4,
    844       Iop_QNarrow16Sx4,
    845       Iop_QNarrow32Sx2,
    846 
    847       /* INTERLEAVING */
    848       /* Interleave lanes from low or high halves of
    849          operands.  Most-significant result lane is from the left
    850          arg. */
    851       Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
    852       Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
    853       /* Interleave odd/even lanes of operands.  Most-significant result lane
    854          is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
    855          identical to Interleave{HI,LO}32x2 and so are omitted.*/
    856       Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
    857       Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
    858 
    859 
    860       /* CONCATENATION -- build a new value by concatenating either
    861          the even or odd lanes of both operands.  Note that
    862          Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
    863          and so are omitted. */
    864       Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
    865       Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
    866 
    867       /* GET / SET elements of VECTOR
    868          GET is binop (I64, I8) -> I<elem_size>
    869          SET is triop (I64, I8, I<elem_size>) -> I64 */
    870       /* Note: the arm back-end handles only constant second argument */
    871       Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
    872       Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
    873 
    874       /* DUPLICATING -- copy value to all lanes */
    875       Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
    876 
    877       /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
    878          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
    879          result.
    880          It is a triop: (I64, I64, I8) -> I64 */
    881       /* Note: the arm back-end handles only constant third argumnet. */
    882       Iop_Extract64,
    883 
    884       /* REVERSE the order of elements in each Half-words, Words,
    885          Double-words */
    886       /* Examples:
    887             Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
    888             Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
    889             Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
    890       Iop_Reverse16_8x8,
    891       Iop_Reverse32_8x8, Iop_Reverse32_16x4,
    892       Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
    893 
    894       /* PERMUTING -- copy src bytes to dst,
    895          as indexed by control vector bytes:
    896             for i in 0 .. 7 . result[i] = argL[ argR[i] ]
    897          argR[i] values may only be in the range 0 .. 7, else behaviour
    898          is undefined. */
    899       Iop_Perm8x8,
    900 
    901       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
    902          See floating-point equiwalents for details. */
    903       Iop_Recip32x2, Iop_Rsqrte32x2,
    904 
    905       /* ------------------ 128-bit SIMD FP. ------------------ */
    906 
    907       /* --- 32x4 vector FP --- */
    908 
    909       /* binary */
    910       Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
    911       Iop_Max32Fx4, Iop_Min32Fx4,
    912       Iop_Add32Fx2, Iop_Sub32Fx2,
    913       /* Note: For the following compares, the ppc and arm front-ends assume a
    914          nan in a lane of either argument returns zero for that lane. */
    915       Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
    916       Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
    917 
    918       /* Vector Absolute */
    919       Iop_Abs32Fx4,
    920 
    921       /* Pairwise Max and Min. See integer pairwise operations for details. */
    922       Iop_PwMax32Fx4, Iop_PwMin32Fx4,
    923 
    924       /* unary */
    925       Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
    926       Iop_Neg32Fx4,
    927 
    928       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
    929       element in the operand vector, and places the results in the destination
    930       vector.  */
    931       Iop_Recip32Fx4,
    932 
    933       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
    934          Note, that if one of the arguments is zero and another one is infinity
    935          of arbitrary sign the result of the operation is 2.0. */
    936       Iop_Recps32Fx4,
    937 
    938       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
    939          square root of each element in the operand vector. */
    940       Iop_Rsqrte32Fx4,
    941 
    942       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
    943          Note, that of one of the arguments is zero and another one is infiinty
    944          of arbitrary sign the result of the operation is 1.5. */
    945       Iop_Rsqrts32Fx4,
    946 
    947 
    948       /* --- Int to/from FP conversion --- */
    949       /* Unlike the standard fp conversions, these irops take no
    950          rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
    951          indicate the mode: {-inf, +inf, nearest, zero} respectively. */
    952       Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
    953       Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
    954       Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
    955       Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
    956       Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
    957       /* Fixed32 format is floating-point number with fixed number of fraction
    958          bits. The number of fraction bits is passed as a second argument of
    959          type I8. */
    960       Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
    961       Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
    962 
    963       /* --- Single to/from half conversion --- */
    964       Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
    965 
    966       /* --- 32x4 lowest-lane-only scalar FP --- */
    967 
    968       /* In binary cases, upper 3/4 is copied from first operand.  In
    969          unary cases, upper 3/4 is copied from the operand. */
    970 
    971       /* binary */
    972       Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
    973       Iop_Max32F0x4, Iop_Min32F0x4,
    974       Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
    975 
    976       /* unary */
    977       Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
    978 
    979       /* --- 64x2 vector FP --- */
    980 
    981       /* binary */
    982       Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
    983       Iop_Max64Fx2, Iop_Min64Fx2,
    984       Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
    985 
    986       /* unary */
    987       Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
    988 
    989       /* --- 64x2 lowest-lane-only scalar FP --- */
    990 
    991       /* In binary cases, upper half is copied from first operand.  In
    992          unary cases, upper half is copied from the operand. */
    993 
    994       /* binary */
    995       Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
    996       Iop_Max64F0x2, Iop_Min64F0x2,
    997       Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
    998 
    999       /* unary */
   1000       Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
   1001 
   1002       /* --- pack / unpack --- */
   1003 
   1004       /* 64 <-> 128 bit vector */
   1005       Iop_V128to64,     // :: V128 -> I64, low half
   1006       Iop_V128HIto64,   // :: V128 -> I64, high half
   1007       Iop_64HLtoV128,   // :: (I64,I64) -> V128
   1008 
   1009       Iop_64UtoV128,
   1010       Iop_SetV128lo64,
   1011 
   1012       /* 32 <-> 128 bit vector */
   1013       Iop_32UtoV128,
   1014       Iop_V128to32,     // :: V128 -> I32, lowest lane
   1015       Iop_SetV128lo32,  // :: (V128,I32) -> V128
   1016 
   1017       /* ------------------ 128-bit SIMD Integer. ------------------ */
   1018 
   1019       /* BITWISE OPS */
   1020       Iop_NotV128,
   1021       Iop_AndV128, Iop_OrV128, Iop_XorV128,
   1022 
   1023       /* VECTOR SHIFT (shift amt :: Ity_I8) */
   1024       Iop_ShlV128, Iop_ShrV128,
   1025 
   1026       /* MISC (vector integer cmp != 0) */
   1027       Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
   1028 
   1029       /* ADDITION (normal / unsigned sat / signed sat) */
   1030       Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
   1031       Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
   1032       Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
   1033 
   1034       /* SUBTRACTION (normal / unsigned sat / signed sat) */
   1035       Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
   1036       Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
   1037       Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
   1038 
   1039       /* MULTIPLICATION (normal / high half of signed/unsigned) */
   1040       Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
   1041                     Iop_MulHi16Ux8, Iop_MulHi32Ux4,
   1042                     Iop_MulHi16Sx8, Iop_MulHi32Sx4,
   1043       /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
   1044       Iop_MullEven8Ux16, Iop_MullEven16Ux8,
   1045       Iop_MullEven8Sx16, Iop_MullEven16Sx8,
   1046       /* FIXME: document these */
   1047       Iop_Mull8Ux8, Iop_Mull8Sx8,
   1048       Iop_Mull16Ux4, Iop_Mull16Sx4,
   1049       Iop_Mull32Ux2, Iop_Mull32Sx2,
   1050       /* Vector Saturating Doubling Multiply Returning High Half and
   1051          Vector Saturating Rounding Doubling Multiply Returning High Half */
   1052       /* These IROp's multiply corresponding elements in two vectors, double
   1053          the results, and place the most significant half of the final results
   1054          in the destination vector. The results are truncated or rounded. If
   1055          any of the results overflow, they are saturated. */
   1056       Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
   1057       Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
   1058       /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
   1059       Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
   1060       /* Plynomial multiplication treats it's arguments as coefficients of
   1061          polynoms over {0, 1}. */
   1062       Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
   1063       Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
   1064 
   1065       /* PAIRWISE operations */
   1066       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
   1067             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
   1068       Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
   1069       Iop_PwAdd32Fx2,
   1070       /* Longening variant is unary. The resulting vector contains two times
   1071          less elements than operand, but they are two times wider.
   1072          Example:
   1073             Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
   1074                where a+b and c+d are unsigned 32-bit values. */
   1075       Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
   1076       Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
   1077 
   1078       /* ABSOLUTE VALUE */
   1079       Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
   1080 
   1081       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
   1082       Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
   1083       Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
   1084 
   1085       /* MIN/MAX */
   1086       Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
   1087       Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
   1088       Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
   1089       Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
   1090 
   1091       /* COMPARISON */
   1092       Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,
   1093       Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
   1094       Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
   1095 
   1096       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
   1097          bit) */
   1098       Iop_Cnt8x16,
   1099       Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
   1100       Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
   1101 
   1102       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
   1103       Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
   1104       Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
   1105       Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
   1106 
   1107       /* VECTOR x VECTOR SHIFT / ROTATE */
   1108       Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
   1109       Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
   1110       Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
   1111       Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
   1112       Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
   1113 
   1114       /* VECTOR x VECTOR SATURATING SHIFT */
   1115       Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
   1116       Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
   1117       /* VECTOR x INTEGER SATURATING SHIFT */
   1118       Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
   1119       Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
   1120       Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
   1121 
   1122       /* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
   1123       /* Note: the 16{U,S} and 32{U,S} are the pre-narrow lane widths. */
   1124       Iop_QNarrow16Ux8, Iop_QNarrow32Ux4,
   1125       Iop_QNarrow16Sx8, Iop_QNarrow32Sx4,
   1126       Iop_Narrow16x8, Iop_Narrow32x4,
   1127       /* Shortening V128->I64, lo half from each element */
   1128       Iop_Shorten16x8, Iop_Shorten32x4, Iop_Shorten64x2,
   1129       /* Saturating shortening from signed source to signed/unsigned destination */
   1130       Iop_QShortenS16Sx8, Iop_QShortenS32Sx4, Iop_QShortenS64Sx2,
   1131       Iop_QShortenU16Sx8, Iop_QShortenU32Sx4, Iop_QShortenU64Sx2,
   1132       /* Saturating shortening from unsigned source to unsigned destination */
   1133       Iop_QShortenU16Ux8, Iop_QShortenU32Ux4, Iop_QShortenU64Ux2,
   1134 
   1135       /* WIDENING */
   1136       /* Longening --- sign or zero extends each element of the argument
   1137          vector to the twice original size. The resulting vector consists of
   1138          the same number of elements but each element and the vector itself
   1139          are two times wider.
   1140          All operations are I64->V128.
   1141          Example
   1142             Iop_Longen32Sx2( [a, b] ) = [c, d]
   1143                where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
   1144       Iop_Longen8Ux8, Iop_Longen16Ux4, Iop_Longen32Ux2,
   1145       Iop_Longen8Sx8, Iop_Longen16Sx4, Iop_Longen32Sx2,
   1146 
   1147       /* INTERLEAVING */
   1148       /* Interleave lanes from low or high halves of
   1149          operands.  Most-significant result lane is from the left
   1150          arg. */
   1151       Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
   1152       Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
   1153       Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
   1154       Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
   1155       /* Interleave odd/even lanes of operands.  Most-significant result lane
   1156          is from the left arg. */
   1157       Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
   1158       Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
   1159       Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
   1160 
   1161       /* CONCATENATION -- build a new value by concatenating either
   1162          the even or odd lanes of both operands. */
   1163       Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
   1164       Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
   1165 
   1166       /* GET elements of VECTOR
   1167          GET is binop (V128, I8) -> I<elem_size> */
   1168       /* Note: the arm back-end handles only constant second argument. */
   1169       Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
   1170 
   1171       /* DUPLICATING -- copy value to all lanes */
   1172       Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
   1173 
   1174       /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
   1175          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
   1176          result.
   1177          It is a triop: (V128, V128, I8) -> V128 */
   1178       /* Note: the ARM back end handles only constant arg3 in this operation. */
   1179       Iop_ExtractV128,
   1180 
   1181       /* REVERSE the order of elements in each Half-words, Words,
   1182          Double-words */
   1183       /* Examples:
   1184             Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
   1185             Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
   1186       Iop_Reverse16_8x16,
   1187       Iop_Reverse32_8x16, Iop_Reverse32_16x8,
   1188       Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
   1189 
   1190       /* PERMUTING -- copy src bytes to dst,
   1191          as indexed by control vector bytes:
   1192             for i in 0 .. 15 . result[i] = argL[ argR[i] ]
   1193          argR[i] values may only be in the range 0 .. 15, else behaviour
   1194          is undefined. */
   1195       Iop_Perm8x16,
   1196 
   1197       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
   1198          See floating-point equiwalents for details. */
   1199       Iop_Recip32x4, Iop_Rsqrte32x4
   1200    }
   1201    IROp;
   1202 
   1203 /* Pretty-print an op. */
   1204 extern void ppIROp ( IROp );
   1205 
   1206 
   1207 /* Encoding of IEEE754-specified rounding modes.  This is the same as
   1208    the encoding used by Intel IA32 to indicate x87 rounding mode.
   1209    Note, various front and back ends rely on the actual numerical
   1210    values of these, so do not change them. */
   1211 typedef
   1212    enum {
   1213       Irrm_NEAREST = 0,
   1214       Irrm_NegINF  = 1,
   1215       Irrm_PosINF  = 2,
   1216       Irrm_ZERO    = 3
   1217    }
   1218    IRRoundingMode;
   1219 
   1220 /* Floating point comparison result values, as created by Iop_CmpF64.
   1221    This is also derived from what IA32 does. */
   1222 typedef
   1223    enum {
   1224       Ircr_UN = 0x45,
   1225       Ircr_LT = 0x01,
   1226       Ircr_GT = 0x00,
   1227       Ircr_EQ = 0x40
   1228    }
   1229    IRCmpF64Result;
   1230 
   1231 
   1232 /* ------------------ Expressions ------------------ */
   1233 
   1234 /* The different kinds of expressions.  Their meaning is explained below
   1235    in the comments for IRExpr. */
   1236 typedef
   1237    enum {
   1238       Iex_Binder=0x15000,
   1239       Iex_Get,
   1240       Iex_GetI,
   1241       Iex_RdTmp,
   1242       Iex_Qop,
   1243       Iex_Triop,
   1244       Iex_Binop,
   1245       Iex_Unop,
   1246       Iex_Load,
   1247       Iex_Const,
   1248       Iex_Mux0X,
   1249       Iex_CCall
   1250    }
   1251    IRExprTag;
   1252 
   1253 /* An expression.  Stored as a tagged union.  'tag' indicates what kind
   1254    of expression this is.  'Iex' is the union that holds the fields.  If
   1255    an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
   1256    expression, and the fields can be accessed with
   1257    'e.Iex.Load.<fieldname>'.
   1258 
   1259    For each kind of expression, we show what it looks like when
   1260    pretty-printed with ppIRExpr().
   1261 */
   1262 typedef
   1263    struct _IRExpr
   1264    IRExpr;
   1265 
   1266 struct _IRExpr {
   1267    IRExprTag tag;
   1268    union {
   1269       /* Used only in pattern matching within Vex.  Should not be seen
   1270          outside of Vex. */
   1271       struct {
   1272          Int binder;
   1273       } Binder;
   1274 
   1275       /* Read a guest register, at a fixed offset in the guest state.
   1276          ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
   1277       */
   1278       struct {
   1279          Int    offset;    /* Offset into the guest state */
   1280          IRType ty;        /* Type of the value being read */
   1281       } Get;
   1282 
   1283       /* Read a guest register at a non-fixed offset in the guest
   1284          state.  This allows circular indexing into parts of the guest
   1285          state, which is essential for modelling situations where the
   1286          identity of guest registers is not known until run time.  One
   1287          example is the x87 FP register stack.
   1288 
   1289          The part of the guest state to be treated as a circular array
   1290          is described in the IRRegArray 'descr' field.  It holds the
   1291          offset of the first element in the array, the type of each
   1292          element, and the number of elements.
   1293 
   1294          The array index is indicated rather indirectly, in a way
   1295          which makes optimisation easy: as the sum of variable part
   1296          (the 'ix' field) and a constant offset (the 'bias' field).
   1297 
   1298          Since the indexing is circular, the actual array index to use
   1299          is computed as (ix + bias) % num-of-elems-in-the-array.
   1300 
   1301          Here's an example.  The description
   1302 
   1303             (96:8xF64)[t39,-7]
   1304 
   1305          describes an array of 8 F64-typed values, the
   1306          guest-state-offset of the first being 96.  This array is
   1307          being indexed at (t39 - 7) % 8.
   1308 
   1309          It is important to get the array size/type exactly correct
   1310          since IR optimisation looks closely at such info in order to
   1311          establish aliasing/non-aliasing between seperate GetI and
   1312          PutI events, which is used to establish when they can be
   1313          reordered, etc.  Putting incorrect info in will lead to
   1314          obscure IR optimisation bugs.
   1315 
   1316             ppIRExpr output: GETI<descr>[<ix>,<bias]
   1317                          eg. GETI(128:8xI8)[t1,0]
   1318       */
   1319       struct {
   1320          IRRegArray* descr; /* Part of guest state treated as circular */
   1321          IRExpr*     ix;    /* Variable part of index into array */
   1322          Int         bias;  /* Constant offset part of index into array */
   1323       } GetI;
   1324 
   1325       /* The value held by a temporary.
   1326          ppIRExpr output: t<tmp>, eg. t1
   1327       */
   1328       struct {
   1329          IRTemp tmp;       /* The temporary number */
   1330       } RdTmp;
   1331 
   1332       /* A quaternary operation.
   1333          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
   1334                       eg. MAddF64r32(t1, t2, t3, t4)
   1335       */
   1336       struct {
   1337          IROp op;          /* op-code   */
   1338          IRExpr* arg1;     /* operand 1 */
   1339          IRExpr* arg2;     /* operand 2 */
   1340          IRExpr* arg3;     /* operand 3 */
   1341          IRExpr* arg4;     /* operand 4 */
   1342       } Qop;
   1343 
   1344       /* A ternary operation.
   1345          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
   1346                       eg. MulF64(1, 2.0, 3.0)
   1347       */
   1348       struct {
   1349          IROp op;          /* op-code   */
   1350          IRExpr* arg1;     /* operand 1 */
   1351          IRExpr* arg2;     /* operand 2 */
   1352          IRExpr* arg3;     /* operand 3 */
   1353       } Triop;
   1354 
   1355       /* A binary operation.
   1356          ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
   1357       */
   1358       struct {
   1359          IROp op;          /* op-code   */
   1360          IRExpr* arg1;     /* operand 1 */
   1361          IRExpr* arg2;     /* operand 2 */
   1362       } Binop;
   1363 
   1364       /* A unary operation.
   1365          ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
   1366       */
   1367       struct {
   1368          IROp    op;       /* op-code */
   1369          IRExpr* arg;      /* operand */
   1370       } Unop;
   1371 
   1372       /* A load from memory -- a normal load, not a load-linked.
   1373          Load-Linkeds (and Store-Conditionals) are instead represented
   1374          by IRStmt.LLSC since Load-Linkeds have side effects and so
   1375          are not semantically valid IRExpr's.
   1376          ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
   1377       */
   1378       struct {
   1379          IREndness end;    /* Endian-ness of the load */
   1380          IRType    ty;     /* Type of the loaded value */
   1381          IRExpr*   addr;   /* Address being loaded from */
   1382       } Load;
   1383 
   1384       /* A constant-valued expression.
   1385          ppIRExpr output: <con>, eg. 0x4:I32
   1386       */
   1387       struct {
   1388          IRConst* con;     /* The constant itself */
   1389       } Const;
   1390 
   1391       /* A call to a pure (no side-effects) helper C function.
   1392 
   1393          With the 'cee' field, 'name' is the function's name.  It is
   1394          only used for pretty-printing purposes.  The address to call
   1395          (host address, of course) is stored in the 'addr' field
   1396          inside 'cee'.
   1397 
   1398          The 'args' field is a NULL-terminated array of arguments.
   1399          The stated return IRType, and the implied argument types,
   1400          must match that of the function being called well enough so
   1401          that the back end can actually generate correct code for the
   1402          call.
   1403 
   1404          The called function **must** satisfy the following:
   1405 
   1406          * no side effects -- must be a pure function, the result of
   1407            which depends only on the passed parameters.
   1408 
   1409          * it may not look at, nor modify, any of the guest state
   1410            since that would hide guest state transitions from
   1411            instrumenters
   1412 
   1413          * it may not access guest memory, since that would hide
   1414            guest memory transactions from the instrumenters
   1415 
   1416          This is restrictive, but makes the semantics clean, and does
   1417          not interfere with IR optimisation.
   1418 
   1419          If you want to call a helper which can mess with guest state
   1420          and/or memory, instead use Ist_Dirty.  This is a lot more
   1421          flexible, but you have to give a bunch of details about what
   1422          the helper does (and you better be telling the truth,
   1423          otherwise any derived instrumentation will be wrong).  Also
   1424          Ist_Dirty inhibits various IR optimisations and so can cause
   1425          quite poor code to be generated.  Try to avoid it.
   1426 
   1427          ppIRExpr output: <cee>(<args>):<retty>
   1428                       eg. foo{0x80489304}(t1, t2):I32
   1429       */
   1430       struct {
   1431          IRCallee* cee;    /* Function to call. */
   1432          IRType    retty;  /* Type of return value. */
   1433          IRExpr**  args;   /* Vector of argument expressions. */
   1434       }  CCall;
   1435 
   1436       /* A ternary if-then-else operator.  It returns expr0 if cond is
   1437          zero, exprX otherwise.  Note that it is STRICT, ie. both
   1438          expr0 and exprX are evaluated in all cases.
   1439 
   1440          ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
   1441                          eg. Mux0X(t6,t7,t8)
   1442       */
   1443       struct {
   1444          IRExpr* cond;     /* Condition */
   1445          IRExpr* expr0;    /* True expression */
   1446          IRExpr* exprX;    /* False expression */
   1447       } Mux0X;
   1448    } Iex;
   1449 };
   1450 
   1451 /* Expression constructors. */
   1452 extern IRExpr* IRExpr_Binder ( Int binder );
   1453 extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
   1454 extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
   1455 extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
   1456 extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
   1457                                         IRExpr* arg3, IRExpr* arg4 );
   1458 extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
   1459                                         IRExpr* arg2, IRExpr* arg3 );
   1460 extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
   1461 extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
   1462 extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
   1463 extern IRExpr* IRExpr_Const  ( IRConst* con );
   1464 extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
   1465 extern IRExpr* IRExpr_Mux0X  ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
   1466 
   1467 /* Deep-copy an IRExpr. */
   1468 extern IRExpr* deepCopyIRExpr ( IRExpr* );
   1469 
   1470 /* Pretty-print an IRExpr. */
   1471 extern void ppIRExpr ( IRExpr* );
   1472 
   1473 /* NULL-terminated IRExpr vector constructors, suitable for
   1474    use as arg lists in clean/dirty helper calls. */
   1475 extern IRExpr** mkIRExprVec_0 ( void );
   1476 extern IRExpr** mkIRExprVec_1 ( IRExpr* );
   1477 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
   1478 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
   1479 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
   1480 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1481                                 IRExpr* );
   1482 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1483                                 IRExpr*, IRExpr* );
   1484 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1485                                 IRExpr*, IRExpr*, IRExpr* );
   1486 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
   1487                                 IRExpr*, IRExpr*, IRExpr*, IRExpr*);
   1488 
   1489 /* IRExpr copiers:
   1490    - shallowCopy: shallow-copy (ie. create a new vector that shares the
   1491      elements with the original).
   1492    - deepCopy: deep-copy (ie. create a completely new vector). */
   1493 extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
   1494 extern IRExpr** deepCopyIRExprVec ( IRExpr** );
   1495 
   1496 /* Make a constant expression from the given host word taking into
   1497    account (of course) the host word size. */
   1498 extern IRExpr* mkIRExpr_HWord ( HWord );
   1499 
   1500 /* Convenience function for constructing clean helper calls. */
   1501 extern
   1502 IRExpr* mkIRExprCCall ( IRType retty,
   1503                         Int regparms, HChar* name, void* addr,
   1504                         IRExpr** args );
   1505 
   1506 
   1507 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
   1508  * Iex_Const). */
   1509 static inline Bool isIRAtom ( IRExpr* e ) {
   1510    return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
   1511 }
   1512 
   1513 /* Are these two IR atoms identical?  Causes an assertion
   1514    failure if they are passed non-atoms. */
   1515 extern Bool eqIRAtom ( IRExpr*, IRExpr* );
   1516 
   1517 
   1518 /* ------------------ Jump kinds ------------------ */
   1519 
   1520 /* This describes hints which can be passed to the dispatcher at guest
   1521    control-flow transfer points.
   1522 
   1523    Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
   1524    guest_TISTART and guest_TILEN, which specify the start and length
   1525    of the region to be invalidated.  These are both the size of a
   1526    guest word.  It is the responsibility of the relevant toIR.c to
   1527    ensure that these are filled in with suitable values before issuing
   1528    a jump of kind Ijk_TInval.
   1529 
   1530    Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
   1531    pseudo-register guest_EMWARN, which is 32-bits regardless of the
   1532    host or guest word size.  That register should be made to hold an
   1533    EmWarn_* value to indicate the reason for the exit.
   1534 
   1535    In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
   1536    cannot continue) and so the jump destination can be anything.
   1537 
   1538    Re Ijk_Sys_ (syscall jumps): the guest state must have a
   1539    pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
   1540    word.  Front ends should set this to be the IP at the most recently
   1541    executed kernel-entering (system call) instruction.  This makes it
   1542    very much easier (viz, actually possible at all) to back up the
   1543    guest to restart a syscall that has been interrupted by a signal.
   1544 */
   1545 typedef
   1546    enum {
   1547       Ijk_Boring=0x16000, /* not interesting; just goto next */
   1548       Ijk_Call,           /* guest is doing a call */
   1549       Ijk_Ret,            /* guest is doing a return */
   1550       Ijk_ClientReq,      /* do guest client req before continuing */
   1551       Ijk_Yield,          /* client is yielding to thread scheduler */
   1552       Ijk_YieldNoRedir,   /* client is yielding to thread scheduler AND jump to
   1553                              un-redirected guest addr */
   1554       Ijk_EmWarn,         /* report emulation warning before continuing */
   1555       Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
   1556       Ijk_NoDecode,       /* next instruction cannot be decoded */
   1557       Ijk_MapFail,        /* Vex-provided address translation failed */
   1558       Ijk_TInval,         /* Invalidate translations before continuing. */
   1559       Ijk_NoRedir,        /* Jump to un-redirected guest addr */
   1560       Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
   1561       Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
   1562       Ijk_SigBUS,         /* current instruction synths SIGBUS */
   1563       /* Unfortunately, various guest-dependent syscall kinds.  They
   1564 	 all mean: do a syscall before continuing. */
   1565       Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
   1566       Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
   1567       Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
   1568       Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
   1569       Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
   1570       Ijk_Sys_sysenter   /* x86 'sysenter'.  guest_EIP becomes
   1571                              invalid at the point this happens. */
   1572    }
   1573    IRJumpKind;
   1574 
   1575 extern void ppIRJumpKind ( IRJumpKind );
   1576 
   1577 
   1578 /* ------------------ Dirty helper calls ------------------ */
   1579 
   1580 /* A dirty call is a flexible mechanism for calling (possibly
   1581    conditionally) a helper function or procedure.  The helper function
   1582    may read, write or modify client memory, and may read, write or
   1583    modify client state.  It can take arguments and optionally return a
   1584    value.  It may return different results and/or do different things
   1585    when called repeatedly with the same arguments, by means of storing
   1586    private state.
   1587 
   1588    If a value is returned, it is assigned to the nominated return
   1589    temporary.
   1590 
   1591    Dirty calls are statements rather than expressions for obvious
   1592    reasons.  If a dirty call is marked as writing guest state, any
   1593    values derived from the written parts of the guest state are
   1594    invalid.  Similarly, if the dirty call is stated as writing
   1595    memory, any loaded values are invalidated by it.
   1596 
   1597    In order that instrumentation is possible, the call must state, and
   1598    state correctly:
   1599 
   1600    * whether it reads, writes or modifies memory, and if so where
   1601      (only one chunk can be stated)
   1602 
   1603    * whether it reads, writes or modifies guest state, and if so which
   1604      pieces (several pieces may be stated, and currently their extents
   1605      must be known at translation-time).
   1606 
   1607    Normally, code is generated to pass just the args to the helper.
   1608    However, if .needsBBP is set, then an extra first argument is
   1609    passed, which is the baseblock pointer, so that the callee can
   1610    access the guest state.  It is invalid for .nFxState to be zero
   1611    but .needsBBP to be True, since .nFxState==0 is a claim that the
   1612    call does not access guest state.
   1613 
   1614    IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
   1615    arguments are evaluated REGARDLESS of the guard value.  It is
   1616    unspecified the relative order of arg evaluation and guard
   1617    evaluation.
   1618 */
   1619 
   1620 #define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
   1621 
   1622 /* Effects on resources (eg. registers, memory locations) */
   1623 typedef
   1624    enum {
   1625       Ifx_None = 0x17000,   /* no effect */
   1626       Ifx_Read,             /* reads the resource */
   1627       Ifx_Write,            /* writes the resource */
   1628       Ifx_Modify,           /* modifies the resource */
   1629    }
   1630    IREffect;
   1631 
   1632 /* Pretty-print an IREffect */
   1633 extern void ppIREffect ( IREffect );
   1634 
   1635 
   1636 typedef
   1637    struct {
   1638       /* What to call, and details of args/results */
   1639       IRCallee* cee;    /* where to call */
   1640       IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
   1641       IRExpr**  args;   /* arg list, ends in NULL */
   1642       IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
   1643 
   1644       /* Mem effects; we allow only one R/W/M region to be stated */
   1645       IREffect  mFx;    /* indicates memory effects, if any */
   1646       IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
   1647       Int       mSize;  /* of access, or zero if mFx==Ifx_None */
   1648 
   1649       /* Guest state effects; up to N allowed */
   1650       Bool needsBBP; /* True => also pass guest state ptr to callee */
   1651       Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
   1652       struct {
   1653          IREffect fx;   /* read, write or modify?  Ifx_None is invalid. */
   1654          Int      offset;
   1655          Int      size;
   1656       } fxState[VEX_N_FXSTATE];
   1657    }
   1658    IRDirty;
   1659 
   1660 /* Pretty-print a dirty call */
   1661 extern void     ppIRDirty ( IRDirty* );
   1662 
   1663 /* Allocate an uninitialised dirty call */
   1664 extern IRDirty* emptyIRDirty ( void );
   1665 
   1666 /* Deep-copy a dirty call */
   1667 extern IRDirty* deepCopyIRDirty ( IRDirty* );
   1668 
   1669 /* A handy function which takes some of the tedium out of constructing
   1670    dirty helper calls.  The called function impliedly does not return
   1671    any value and has a constant-True guard.  The call is marked as
   1672    accessing neither guest state nor memory (hence the "unsafe"
   1673    designation) -- you can change this marking later if need be.  A
   1674    suitable IRCallee is constructed from the supplied bits. */
   1675 extern
   1676 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
   1677                              IRExpr** args );
   1678 
   1679 /* Similarly, make a zero-annotation dirty call which returns a value,
   1680    and assign that to the given temp. */
   1681 extern
   1682 IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
   1683                              Int regparms, HChar* name, void* addr,
   1684                              IRExpr** args );
   1685 
   1686 
   1687 /* --------------- Memory Bus Events --------------- */
   1688 
   1689 typedef
   1690    enum {
   1691       Imbe_Fence=0x18000,
   1692    }
   1693    IRMBusEvent;
   1694 
   1695 extern void ppIRMBusEvent ( IRMBusEvent );
   1696 
   1697 
   1698 /* --------------- Compare and Swap --------------- */
   1699 
   1700 /* This denotes an atomic compare and swap operation, either
   1701    a single-element one or a double-element one.
   1702 
   1703    In the single-element case:
   1704 
   1705      .addr is the memory address.
   1706      .end  is the endianness with which memory is accessed
   1707 
   1708      If .addr contains the same value as .expdLo, then .dataLo is
   1709      written there, else there is no write.  In both cases, the
   1710      original value at .addr is copied into .oldLo.
   1711 
   1712      Types: .expdLo, .dataLo and .oldLo must all have the same type.
   1713      It may be any integral type, viz: I8, I16, I32 or, for 64-bit
   1714      guests, I64.
   1715 
   1716      .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
   1717      be NULL.
   1718 
   1719    In the double-element case:
   1720 
   1721      .addr is the memory address.
   1722      .end  is the endianness with which memory is accessed
   1723 
   1724      The operation is the same:
   1725 
   1726      If .addr contains the same value as .expdHi:.expdLo, then
   1727      .dataHi:.dataLo is written there, else there is no write.  In
   1728      both cases the original value at .addr is copied into
   1729      .oldHi:.oldLo.
   1730 
   1731      Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
   1732      all have the same type, which may be any integral type, viz: I8,
   1733      I16, I32 or, for 64-bit guests, I64.
   1734 
   1735      The double-element case is complicated by the issue of
   1736      endianness.  In all cases, the two elements are understood to be
   1737      located adjacently in memory, starting at the address .addr.
   1738 
   1739        If .end is Iend_LE, then the .xxxLo component is at the lower
   1740        address and the .xxxHi component is at the higher address, and
   1741        each component is itself stored little-endianly.
   1742 
   1743        If .end is Iend_BE, then the .xxxHi component is at the lower
   1744        address and the .xxxLo component is at the higher address, and
   1745        each component is itself stored big-endianly.
   1746 
   1747    This allows representing more cases than most architectures can
   1748    handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
   1749 
   1750    How to know if the CAS succeeded?
   1751 
   1752    * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
   1753      then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
   1754      stored at .addr, and the original value there was .oldLo (resp
   1755      .oldHi:.oldLo).
   1756 
   1757    * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
   1758      then the CAS failed, and the original value at .addr was .oldLo
   1759      (resp. .oldHi:.oldLo).
   1760 
   1761    Hence it is easy to know whether or not the CAS succeeded.
   1762 */
   1763 typedef
   1764    struct {
   1765       IRTemp    oldHi;  /* old value of *addr is written here */
   1766       IRTemp    oldLo;
   1767       IREndness end;    /* endianness of the data in memory */
   1768       IRExpr*   addr;   /* store address */
   1769       IRExpr*   expdHi; /* expected old value at *addr */
   1770       IRExpr*   expdLo;
   1771       IRExpr*   dataHi; /* new value for *addr */
   1772       IRExpr*   dataLo;
   1773    }
   1774    IRCAS;
   1775 
   1776 extern void ppIRCAS ( IRCAS* cas );
   1777 
   1778 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
   1779                         IREndness end, IRExpr* addr,
   1780                         IRExpr* expdHi, IRExpr* expdLo,
   1781                         IRExpr* dataHi, IRExpr* dataLo );
   1782 
   1783 extern IRCAS* deepCopyIRCAS ( IRCAS* );
   1784 
   1785 /* ------------------ Statements ------------------ */
   1786 
   1787 /* The different kinds of statements.  Their meaning is explained
   1788    below in the comments for IRStmt.
   1789 
   1790    Those marked META do not represent code, but rather extra
   1791    information about the code.  These statements can be removed
   1792    without affecting the functional behaviour of the code, however
   1793    they are required by some IR consumers such as tools that
   1794    instrument the code.
   1795 */
   1796 
   1797 typedef
   1798    enum {
   1799       Ist_NoOp=0x19000,
   1800       Ist_IMark,     /* META */
   1801       Ist_AbiHint,   /* META */
   1802       Ist_Put,
   1803       Ist_PutI,
   1804       Ist_WrTmp,
   1805       Ist_Store,
   1806       Ist_CAS,
   1807       Ist_LLSC,
   1808       Ist_Dirty,
   1809       Ist_MBE,       /* META (maybe) */
   1810       Ist_Exit
   1811    }
   1812    IRStmtTag;
   1813 
   1814 /* A statement.  Stored as a tagged union.  'tag' indicates what kind
   1815    of expression this is.  'Ist' is the union that holds the fields.
   1816    If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
   1817    statement, and the fields can be accessed with
   1818    'st.Ist.Store.<fieldname>'.
   1819 
   1820    For each kind of statement, we show what it looks like when
   1821    pretty-printed with ppIRStmt().
   1822 */
   1823 typedef
   1824    struct _IRStmt {
   1825       IRStmtTag tag;
   1826       union {
   1827          /* A no-op (usually resulting from IR optimisation).  Can be
   1828             omitted without any effect.
   1829 
   1830             ppIRStmt output: IR-NoOp
   1831          */
   1832          struct {
   1833 	 } NoOp;
   1834 
   1835          /* META: instruction mark.  Marks the start of the statements
   1836             that represent a single machine instruction (the end of
   1837             those statements is marked by the next IMark or the end of
   1838             the IRSB).  Contains the address and length of the
   1839             instruction.
   1840 
   1841             ppIRStmt output: ------ IMark(<addr>, <len>) ------,
   1842                          eg. ------ IMark(0x4000792, 5) ------,
   1843          */
   1844          struct {
   1845             Addr64 addr;   /* instruction address */
   1846             Int    len;    /* instruction length */
   1847          } IMark;
   1848 
   1849          /* META: An ABI hint, which says something about this
   1850             platform's ABI.
   1851 
   1852             At the moment, the only AbiHint is one which indicates
   1853             that a given chunk of address space, [base .. base+len-1],
   1854             has become undefined.  This is used on amd64-linux and
   1855             some ppc variants to pass stack-redzoning hints to whoever
   1856             wants to see them.  It also indicates the address of the
   1857             next (dynamic) instruction that will be executed.  This is
   1858             to help Memcheck to origin tracking.
   1859 
   1860             ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
   1861                          eg. ====== AbiHint(t1, 16, t2) ======
   1862          */
   1863          struct {
   1864             IRExpr* base;     /* Start  of undefined chunk */
   1865             Int     len;      /* Length of undefined chunk */
   1866             IRExpr* nia;      /* Address of next (guest) insn */
   1867          } AbiHint;
   1868 
   1869          /* Write a guest register, at a fixed offset in the guest state.
   1870             ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
   1871          */
   1872          struct {
   1873             Int     offset;   /* Offset into the guest state */
   1874             IRExpr* data;     /* The value to write */
   1875          } Put;
   1876 
   1877          /* Write a guest register, at a non-fixed offset in the guest
   1878             state.  See the comment for GetI expressions for more
   1879             information.
   1880 
   1881             ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
   1882                          eg. PUTI(64:8xF64)[t5,0] = t1
   1883          */
   1884          struct {
   1885             IRRegArray* descr; /* Part of guest state treated as circular */
   1886             IRExpr*     ix;    /* Variable part of index into array */
   1887             Int         bias;  /* Constant offset part of index into array */
   1888             IRExpr*     data;  /* The value to write */
   1889          } PutI;
   1890 
   1891          /* Assign a value to a temporary.  Note that SSA rules require
   1892             each tmp is only assigned to once.  IR sanity checking will
   1893             reject any block containing a temporary which is not assigned
   1894             to exactly once.
   1895 
   1896             ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
   1897          */
   1898          struct {
   1899             IRTemp  tmp;   /* Temporary  (LHS of assignment) */
   1900             IRExpr* data;  /* Expression (RHS of assignment) */
   1901          } WrTmp;
   1902 
   1903          /* Write a value to memory.  This is a normal store, not a
   1904             Store-Conditional.  To represent a Store-Conditional,
   1905             instead use IRStmt.LLSC.
   1906             ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
   1907          */
   1908          struct {
   1909             IREndness end;    /* Endianness of the store */
   1910             IRExpr*   addr;   /* store address */
   1911             IRExpr*   data;   /* value to write */
   1912          } Store;
   1913 
   1914          /* Do an atomic compare-and-swap operation.  Semantics are
   1915             described above on a comment at the definition of IRCAS.
   1916 
   1917             ppIRStmt output:
   1918                t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
   1919             eg
   1920                t1 = CASle(t2 :: t3->Add32(t3,1))
   1921                which denotes a 32-bit atomic increment
   1922                of a value at address t2
   1923 
   1924             A double-element CAS may also be denoted, in which case <tmp>,
   1925             <expected> and <new> are all pairs of items, separated by
   1926             commas.
   1927          */
   1928          struct {
   1929             IRCAS* details;
   1930          } CAS;
   1931 
   1932          /* Either Load-Linked or Store-Conditional, depending on
   1933             STOREDATA.
   1934 
   1935             If STOREDATA is NULL then this is a Load-Linked, meaning
   1936             that data is loaded from memory as normal, but a
   1937             'reservation' for the address is also lodged in the
   1938             hardware.
   1939 
   1940                result = Load-Linked(addr, end)
   1941 
   1942             The data transfer type is the type of RESULT (I32, I64,
   1943             etc).  ppIRStmt output:
   1944 
   1945                result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
   1946 
   1947             If STOREDATA is not NULL then this is a Store-Conditional,
   1948             hence:
   1949 
   1950                result = Store-Conditional(addr, storedata, end)
   1951 
   1952             The data transfer type is the type of STOREDATA and RESULT
   1953             has type Ity_I1. The store may fail or succeed depending
   1954             on the state of a previously lodged reservation on this
   1955             address.  RESULT is written 1 if the store succeeds and 0
   1956             if it fails.  eg ppIRStmt output:
   1957 
   1958                result = ( ST<end>-Cond(<addr>) = <storedata> )
   1959                eg t3 = ( STbe-Cond(t1, t2) )
   1960 
   1961             In all cases, the address must be naturally aligned for
   1962             the transfer type -- any misaligned addresses should be
   1963             caught by a dominating IR check and side exit.  This
   1964             alignment restriction exists because on at least some
   1965             LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
   1966             misaligned addresses, and we have to actually generate
   1967             stwcx. on the host, and we don't want it trapping on the
   1968             host.
   1969 
   1970             Summary of rules for transfer type:
   1971               STOREDATA == NULL (LL):
   1972                 transfer type = type of RESULT
   1973               STOREDATA != NULL (SC):
   1974                 transfer type = type of STOREDATA, and RESULT :: Ity_I1
   1975          */
   1976          struct {
   1977             IREndness end;
   1978             IRTemp    result;
   1979             IRExpr*   addr;
   1980             IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
   1981          } LLSC;
   1982 
   1983          /* Call (possibly conditionally) a C function that has side
   1984             effects (ie. is "dirty").  See the comments above the
   1985             IRDirty type declaration for more information.
   1986 
   1987             ppIRStmt output:
   1988                t<tmp> = DIRTY <guard> <effects>
   1989                   ::: <callee>(<args>)
   1990             eg.
   1991                t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
   1992                      ::: foo{0x380035f4}(t2)
   1993          */
   1994          struct {
   1995             IRDirty* details;
   1996          } Dirty;
   1997 
   1998          /* A memory bus event - a fence, or acquisition/release of the
   1999             hardware bus lock.  IR optimisation treats all these as fences
   2000             across which no memory references may be moved.
   2001             ppIRStmt output: MBusEvent-Fence,
   2002                              MBusEvent-BusLock, MBusEvent-BusUnlock.
   2003          */
   2004          struct {
   2005             IRMBusEvent event;
   2006          } MBE;
   2007 
   2008          /* Conditional exit from the middle of an IRSB.
   2009             ppIRStmt output: if (<guard>) goto {<jk>} <dst>
   2010                          eg. if (t69) goto {Boring} 0x4000AAA:I32
   2011          */
   2012          struct {
   2013             IRExpr*    guard;    /* Conditional expression */
   2014             IRJumpKind jk;       /* Jump kind */
   2015             IRConst*   dst;      /* Jump target (constant only) */
   2016          } Exit;
   2017       } Ist;
   2018    }
   2019    IRStmt;
   2020 
   2021 /* Statement constructors. */
   2022 extern IRStmt* IRStmt_NoOp    ( void );
   2023 extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len );
   2024 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
   2025 extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
   2026 extern IRStmt* IRStmt_PutI    ( IRRegArray* descr, IRExpr* ix, Int bias,
   2027                                 IRExpr* data );
   2028 extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
   2029 extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
   2030 extern IRStmt* IRStmt_CAS     ( IRCAS* details );
   2031 extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
   2032                                 IRExpr* addr, IRExpr* storedata );
   2033 extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
   2034 extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
   2035 extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
   2036 
   2037 /* Deep-copy an IRStmt. */
   2038 extern IRStmt* deepCopyIRStmt ( IRStmt* );
   2039 
   2040 /* Pretty-print an IRStmt. */
   2041 extern void ppIRStmt ( IRStmt* );
   2042 
   2043 
   2044 /* ------------------ Basic Blocks ------------------ */
   2045 
   2046 /* Type environments: a bunch of statements, expressions, etc, are
   2047    incomplete without an environment indicating the type of each
   2048    IRTemp.  So this provides one.  IR temporaries are really just
   2049    unsigned ints and so this provides an array, 0 .. n_types_used-1 of
   2050    them.
   2051 */
   2052 typedef
   2053    struct {
   2054       IRType* types;
   2055       Int     types_size;
   2056       Int     types_used;
   2057    }
   2058    IRTypeEnv;
   2059 
   2060 /* Obtain a new IRTemp */
   2061 extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
   2062 
   2063 /* Deep-copy a type environment */
   2064 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
   2065 
   2066 /* Pretty-print a type environment */
   2067 extern void ppIRTypeEnv ( IRTypeEnv* );
   2068 
   2069 
   2070 /* Code blocks, which in proper compiler terminology are superblocks
   2071    (single entry, multiple exit code sequences) contain:
   2072 
   2073    - A table giving a type for each temp (the "type environment")
   2074    - An expandable array of statements
   2075    - An expression of type 32 or 64 bits, depending on the
   2076      guest's word size, indicating the next destination if the block
   2077      executes all the way to the end, without a side exit
   2078    - An indication of any special actions (JumpKind) needed
   2079      for this final jump.
   2080 
   2081    "IRSB" stands for "IR Super Block".
   2082 */
   2083 typedef
   2084    struct {
   2085       IRTypeEnv* tyenv;
   2086       IRStmt**   stmts;
   2087       Int        stmts_size;
   2088       Int        stmts_used;
   2089       IRExpr*    next;
   2090       IRJumpKind jumpkind;
   2091    }
   2092    IRSB;
   2093 
   2094 /* Allocate a new, uninitialised IRSB */
   2095 extern IRSB* emptyIRSB ( void );
   2096 
   2097 /* Deep-copy an IRSB */
   2098 extern IRSB* deepCopyIRSB ( IRSB* );
   2099 
   2100 /* Deep-copy an IRSB, except for the statements list, which set to be
   2101    a new, empty, list of statements. */
   2102 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
   2103 
   2104 /* Pretty-print an IRSB */
   2105 extern void ppIRSB ( IRSB* );
   2106 
   2107 /* Append an IRStmt to an IRSB */
   2108 extern void addStmtToIRSB ( IRSB*, IRStmt* );
   2109 
   2110 
   2111 /*---------------------------------------------------------------*/
   2112 /*--- Helper functions for the IR                             ---*/
   2113 /*---------------------------------------------------------------*/
   2114 
   2115 /* For messing with IR type environments */
   2116 extern IRTypeEnv* emptyIRTypeEnv  ( void );
   2117 
   2118 /* What is the type of this expression? */
   2119 extern IRType typeOfIRConst ( IRConst* );
   2120 extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
   2121 extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
   2122 
   2123 /* Sanity check a BB of IR */
   2124 extern void sanityCheckIRSB ( IRSB*  bb,
   2125                               HChar* caller,
   2126                               Bool   require_flatness,
   2127                               IRType guest_word_size );
   2128 extern Bool isFlatIRStmt ( IRStmt* );
   2129 
   2130 /* Is this any value actually in the enumeration 'IRType' ? */
   2131 extern Bool isPlausibleIRType ( IRType ty );
   2132 
   2133 #endif /* ndef __LIBVEX_IR_H */
   2134 
   2135 
   2136 /*---------------------------------------------------------------*/
   2137 /*---                                             libvex_ir.h ---*/
   2138 /*---------------------------------------------------------------*/
   2139