1 2 /*---------------------------------------------------------------*/ 3 /*--- begin libvex_ir.h ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #ifndef __LIBVEX_IR_H 37 #define __LIBVEX_IR_H 38 39 #include "libvex_basictypes.h" 40 41 42 /*---------------------------------------------------------------*/ 43 /*--- High-level IR description ---*/ 44 /*---------------------------------------------------------------*/ 45 46 /* Vex IR is an architecture-neutral intermediate representation. 47 Unlike some IRs in systems similar to Vex, it is not like assembly 48 language (ie. a list of instructions). Rather, it is more like the 49 IR that might be used in a compiler. 50 51 Code blocks 52 ~~~~~~~~~~~ 53 The code is broken into small code blocks ("superblocks", type: 54 'IRSB'). Each code block typically represents from 1 to perhaps 50 55 instructions. IRSBs are single-entry, multiple-exit code blocks. 56 Each IRSB contains three things: 57 - a type environment, which indicates the type of each temporary 58 value present in the IRSB 59 - a list of statements, which represent code 60 - a jump that exits from the end the IRSB 61 Because the blocks are multiple-exit, there can be additional 62 conditional exit statements that cause control to leave the IRSB 63 before the final exit. Also because of this, IRSBs can cover 64 multiple non-consecutive sequences of code (up to 3). These are 65 recorded in the type VexGuestExtents (see libvex.h). 66 67 Statements and expressions 68 ~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 Statements (type 'IRStmt') represent operations with side-effects, 70 eg. guest register writes, stores, and assignments to temporaries. 71 Expressions (type 'IRExpr') represent operations without 72 side-effects, eg. arithmetic operations, loads, constants. 73 Expressions can contain sub-expressions, forming expression trees, 74 eg. (3 + (4 * load(addr1)). 75 76 Storage of guest state 77 ~~~~~~~~~~~~~~~~~~~~~~ 78 The "guest state" contains the guest registers of the guest machine 79 (ie. the machine that we are simulating). It is stored by default 80 in a block of memory supplied by the user of the VEX library, 81 generally referred to as the guest state (area). To operate on 82 these registers, one must first read ("Get") them from the guest 83 state into a temporary value. Afterwards, one can write ("Put") 84 them back into the guest state. 85 86 Get and Put are characterised by a byte offset into the guest 87 state, a small integer which effectively gives the identity of the 88 referenced guest register, and a type, which indicates the size of 89 the value to be transferred. 90 91 The basic "Get" and "Put" operations are sufficient to model normal 92 fixed registers on the guest. Selected areas of the guest state 93 can be treated as a circular array of registers (type: 94 'IRRegArray'), which can be indexed at run-time. This is done with 95 the "GetI" and "PutI" primitives. This is necessary to describe 96 rotating register files, for example the x87 FPU stack, SPARC 97 register windows, and the Itanium register files. 98 99 Examples, and flattened vs. unflattened code 100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 For example, consider this x86 instruction: 102 103 addl %eax, %ebx 104 105 One Vex IR translation for this code would be this: 106 107 ------ IMark(0x24F275, 7, 0) ------ 108 t3 = GET:I32(0) # get %eax, a 32-bit integer 109 t2 = GET:I32(12) # get %ebx, a 32-bit integer 110 t1 = Add32(t3,t2) # addl 111 PUT(0) = t1 # put %eax 112 113 (For simplicity, this ignores the effects on the condition codes, and 114 the update of the instruction pointer.) 115 116 The "IMark" is an IR statement that doesn't represent actual code. 117 Instead it indicates the address and length of the original 118 instruction. The numbers 0 and 12 are offsets into the guest state 119 for %eax and %ebx. The full list of offsets for an architecture 120 <ARCH> can be found in the type VexGuest<ARCH>State in the file 121 VEX/pub/libvex_guest_<ARCH>.h. 122 123 The five statements in this example are: 124 - the IMark 125 - three assignments to temporaries 126 - one register write (put) 127 128 The six expressions in this example are: 129 - two register reads (gets) 130 - one arithmetic (add) operation 131 - three temporaries (two nested within the Add32, one in the PUT) 132 133 The above IR is "flattened", ie. all sub-expressions are "atoms", 134 either constants or temporaries. An equivalent, unflattened version 135 would be: 136 137 PUT(0) = Add32(GET:I32(0), GET:I32(12)) 138 139 IR is guaranteed to be flattened at instrumentation-time. This makes 140 instrumentation easier. Equivalent flattened and unflattened IR 141 typically results in the same generated code. 142 143 Another example, this one showing loads and stores: 144 145 addl %edx,4(%eax) 146 147 This becomes (again ignoring condition code and instruction pointer 148 updates): 149 150 ------ IMark(0x4000ABA, 3, 0) ------ 151 t3 = Add32(GET:I32(0),0x4:I32) 152 t2 = LDle:I32(t3) 153 t1 = GET:I32(8) 154 t0 = Add32(t2,t1) 155 STle(t3) = t0 156 157 The "le" in "LDle" and "STle" is short for "little-endian". 158 159 No need for deallocations 160 ~~~~~~~~~~~~~~~~~~~~~~~~~ 161 Although there are allocation functions for various data structures 162 in this file, there are no deallocation functions. This is because 163 Vex uses a memory allocation scheme that automatically reclaims the 164 memory used by allocated structures once translation is completed. 165 This makes things easier for tools that instruments/transforms code 166 blocks. 167 168 SSAness and typing 169 ~~~~~~~~~~~~~~~~~~ 170 The IR is fully typed. For every IRSB (IR block) it is possible to 171 say unambiguously whether or not it is correctly typed. 172 Incorrectly typed IR has no meaning and the VEX will refuse to 173 process it. At various points during processing VEX typechecks the 174 IR and aborts if any violations are found. This seems overkill but 175 makes it a great deal easier to build a reliable JIT. 176 177 IR also has the SSA property. SSA stands for Static Single 178 Assignment, and what it means is that each IR temporary may be 179 assigned to only once. This idea became widely used in compiler 180 construction in the mid to late 90s. It makes many IR-level 181 transformations/code improvements easier, simpler and faster. 182 Whenever it typechecks an IR block, VEX also checks the SSA 183 property holds, and will abort if not so. So SSAness is 184 mechanically and rigidly enforced. 185 */ 186 187 /*---------------------------------------------------------------*/ 188 /*--- Type definitions for the IR ---*/ 189 /*---------------------------------------------------------------*/ 190 191 /* General comments about naming schemes: 192 193 All publically visible functions contain the name of the primary 194 type on which they operate (IRFoo, IRBar, etc). Hence you should 195 be able to identify these functions by grepping for "IR[A-Z]". 196 197 For some type 'IRFoo': 198 199 - ppIRFoo is the printing method for IRFoo, printing it to the 200 output channel specified in the LibVEX_Initialise call. 201 202 - eqIRFoo is a structural equality predicate for IRFoos. 203 204 - deepCopyIRFoo is a deep copy constructor for IRFoos. 205 It recursively traverses the entire argument tree and 206 produces a complete new tree. All types have a deep copy 207 constructor. 208 209 - shallowCopyIRFoo is the shallow copy constructor for IRFoos. 210 It creates a new top-level copy of the supplied object, 211 but does not copy any sub-objects. Only some types have a 212 shallow copy constructor. 213 */ 214 215 /* ------------------ Types ------------------ */ 216 217 /* A type indicates the size of a value, and whether it's an integer, a 218 float, or a vector (SIMD) value. */ 219 typedef 220 enum { 221 Ity_INVALID=0x11000, 222 Ity_I1, 223 Ity_I8, 224 Ity_I16, 225 Ity_I32, 226 Ity_I64, 227 Ity_I128, /* 128-bit scalar */ 228 Ity_F32, /* IEEE 754 float */ 229 Ity_F64, /* IEEE 754 double */ 230 Ity_D32, /* 32-bit Decimal floating point */ 231 Ity_D64, /* 64-bit Decimal floating point */ 232 Ity_D128, /* 128-bit Decimal floating point */ 233 Ity_F128, /* 128-bit floating point; implementation defined */ 234 Ity_V128, /* 128-bit SIMD */ 235 Ity_V256 /* 256-bit SIMD */ 236 } 237 IRType; 238 239 /* Pretty-print an IRType */ 240 extern void ppIRType ( IRType ); 241 242 /* Get the size (in bytes) of an IRType */ 243 extern Int sizeofIRType ( IRType ); 244 245 246 /* ------------------ Endianness ------------------ */ 247 248 /* IREndness is used in load IRExprs and store IRStmts. */ 249 typedef 250 enum { 251 Iend_LE=0x12000, /* little endian */ 252 Iend_BE /* big endian */ 253 } 254 IREndness; 255 256 257 /* ------------------ Constants ------------------ */ 258 259 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */ 260 261 /* The various kinds of constant. */ 262 typedef 263 enum { 264 Ico_U1=0x13000, 265 Ico_U8, 266 Ico_U16, 267 Ico_U32, 268 Ico_U64, 269 Ico_F32, /* 32-bit IEEE754 floating */ 270 Ico_F32i, /* 32-bit unsigned int to be interpreted literally 271 as a IEEE754 single value. */ 272 Ico_F64, /* 64-bit IEEE754 floating */ 273 Ico_F64i, /* 64-bit unsigned int to be interpreted literally 274 as a IEEE754 double value. */ 275 Ico_V128, /* 128-bit restricted vector constant, with 1 bit 276 (repeated 8 times) for each of the 16 x 1-byte lanes */ 277 Ico_V256 /* 256-bit restricted vector constant, with 1 bit 278 (repeated 8 times) for each of the 32 x 1-byte lanes */ 279 } 280 IRConstTag; 281 282 /* A constant. Stored as a tagged union. 'tag' indicates what kind of 283 constant this is. 'Ico' is the union that holds the fields. If an 284 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant, 285 and its value can be accessed with 'c.Ico.U32'. */ 286 typedef 287 struct _IRConst { 288 IRConstTag tag; 289 union { 290 Bool U1; 291 UChar U8; 292 UShort U16; 293 UInt U32; 294 ULong U64; 295 Float F32; 296 UInt F32i; 297 Double F64; 298 ULong F64i; 299 UShort V128; /* 16-bit value; see Ico_V128 comment above */ 300 UInt V256; /* 32-bit value; see Ico_V256 comment above */ 301 } Ico; 302 } 303 IRConst; 304 305 /* IRConst constructors */ 306 extern IRConst* IRConst_U1 ( Bool ); 307 extern IRConst* IRConst_U8 ( UChar ); 308 extern IRConst* IRConst_U16 ( UShort ); 309 extern IRConst* IRConst_U32 ( UInt ); 310 extern IRConst* IRConst_U64 ( ULong ); 311 extern IRConst* IRConst_F32 ( Float ); 312 extern IRConst* IRConst_F32i ( UInt ); 313 extern IRConst* IRConst_F64 ( Double ); 314 extern IRConst* IRConst_F64i ( ULong ); 315 extern IRConst* IRConst_V128 ( UShort ); 316 extern IRConst* IRConst_V256 ( UInt ); 317 318 /* Deep-copy an IRConst */ 319 extern IRConst* deepCopyIRConst ( IRConst* ); 320 321 /* Pretty-print an IRConst */ 322 extern void ppIRConst ( IRConst* ); 323 324 /* Compare two IRConsts for equality */ 325 extern Bool eqIRConst ( IRConst*, IRConst* ); 326 327 328 /* ------------------ Call targets ------------------ */ 329 330 /* Describes a helper function to call. The name part is purely for 331 pretty printing and not actually used. regparms=n tells the back 332 end that the callee has been declared 333 "__attribute__((regparm(n)))", although indirectly using the 334 VEX_REGPARM(n) macro. On some targets (x86) the back end will need 335 to construct a non-standard sequence to call a function declared 336 like this. 337 338 mcx_mask is a sop to Memcheck. It indicates which args should be 339 considered 'always defined' when lazily computing definedness of 340 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to 341 args[1], etc. If a bit is set, the corresponding arg is excluded 342 (hence "x" in "mcx") from definedness checking. 343 */ 344 345 typedef 346 struct { 347 Int regparms; 348 HChar* name; 349 void* addr; 350 UInt mcx_mask; 351 } 352 IRCallee; 353 354 /* Create an IRCallee. */ 355 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr ); 356 357 /* Deep-copy an IRCallee. */ 358 extern IRCallee* deepCopyIRCallee ( IRCallee* ); 359 360 /* Pretty-print an IRCallee. */ 361 extern void ppIRCallee ( IRCallee* ); 362 363 364 /* ------------------ Guest state arrays ------------------ */ 365 366 /* This describes a section of the guest state that we want to 367 be able to index at run time, so as to be able to describe 368 indexed or rotating register files on the guest. */ 369 typedef 370 struct { 371 Int base; /* guest state offset of start of indexed area */ 372 IRType elemTy; /* type of each element in the indexed area */ 373 Int nElems; /* number of elements in the indexed area */ 374 } 375 IRRegArray; 376 377 extern IRRegArray* mkIRRegArray ( Int, IRType, Int ); 378 379 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* ); 380 381 extern void ppIRRegArray ( IRRegArray* ); 382 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* ); 383 384 385 /* ------------------ Temporaries ------------------ */ 386 387 /* This represents a temporary, eg. t1. The IR optimiser relies on the 388 fact that IRTemps are 32-bit ints. Do not change them to be ints of 389 any other size. */ 390 typedef UInt IRTemp; 391 392 /* Pretty-print an IRTemp. */ 393 extern void ppIRTemp ( IRTemp ); 394 395 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) 396 397 398 /* --------------- Primops (arity 1,2,3 and 4) --------------- */ 399 400 /* Primitive operations that are used in Unop, Binop, Triop and Qop 401 IRExprs. Once we take into account integer, floating point and SIMD 402 operations of all the different sizes, there are quite a lot of them. 403 Most instructions supported by the architectures that Vex supports 404 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid) 405 are not; they are instead handled with dirty helpers that emulate 406 their functionality. Such obscure ones are thus not directly visible 407 in the IR, but their effects on guest state (memory and registers) 408 are made visible via the annotations in IRDirty structures. 409 */ 410 typedef 411 enum { 412 /* -- Do not change this ordering. The IR generators rely on 413 (eg) Iop_Add64 == IopAdd8 + 3. -- */ 414 415 Iop_INVALID=0x14000, 416 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, 417 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, 418 /* Signless mul. MullS/MullU is elsewhere. */ 419 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64, 420 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64, 421 Iop_And8, Iop_And16, Iop_And32, Iop_And64, 422 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64, 423 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64, 424 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64, 425 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64, 426 /* Integer comparisons. */ 427 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64, 428 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, 429 /* Tags for unary ops */ 430 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, 431 432 /* Exactly like CmpEQ8/16/32/64, but carrying the additional 433 hint that these compute the success/failure of a CAS 434 operation, and hence are almost certainly applied to two 435 copies of the same value, which in turn has implications for 436 Memcheck's instrumentation. */ 437 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, 438 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, 439 440 /* -- Ordering not important after here. -- */ 441 442 /* Widening multiplies */ 443 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64, 444 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64, 445 446 /* Wierdo integer stuff */ 447 Iop_Clz64, Iop_Clz32, /* count leading zeroes */ 448 Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */ 449 /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of 450 zero. You must ensure they are never given a zero argument. 451 */ 452 453 /* Standard integer comparisons */ 454 Iop_CmpLT32S, Iop_CmpLT64S, 455 Iop_CmpLE32S, Iop_CmpLE64S, 456 Iop_CmpLT32U, Iop_CmpLT64U, 457 Iop_CmpLE32U, Iop_CmpLE64U, 458 459 /* As a sop to Valgrind-Memcheck, the following are useful. */ 460 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, 461 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ 462 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ 463 Iop_Max32U, /* unsigned max */ 464 465 /* PowerPC-style 3-way integer comparisons. Without them it is 466 difficult to simulate PPC efficiently. 467 op(x,y) | x < y = 0x8 else 468 | x > y = 0x4 else 469 | x == y = 0x2 470 */ 471 Iop_CmpORD32U, Iop_CmpORD64U, 472 Iop_CmpORD32S, Iop_CmpORD64S, 473 474 /* Division */ 475 /* TODO: clarify semantics wrt rounding, negative values, whatever */ 476 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod) 477 Iop_DivS32, // ditto, signed 478 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) 479 Iop_DivS64, // ditto, signed 480 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low)) 481 Iop_DivS64E, // ditto, signed 482 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low)) 483 Iop_DivS32E, // ditto, signed 484 485 Iop_DivModU64to32, // :: I64,I32 -> I64 486 // of which lo half is div and hi half is mod 487 Iop_DivModS64to32, // ditto, signed 488 489 Iop_DivModU128to64, // :: V128,I64 -> V128 490 // of which lo half is div and hi half is mod 491 Iop_DivModS128to64, // ditto, signed 492 493 Iop_DivModS64to64, // :: I64,I64 -> I128 494 // of which lo half is div and hi half is mod 495 496 /* Integer conversions. Some of these are redundant (eg 497 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but 498 having a complete set reduces the typical dynamic size of IR 499 and makes the instruction selectors easier to write. */ 500 501 /* Widening conversions */ 502 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64, 503 Iop_16Uto32, Iop_16Uto64, 504 Iop_32Uto64, 505 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64, 506 Iop_16Sto32, Iop_16Sto64, 507 Iop_32Sto64, 508 509 /* Narrowing conversions */ 510 Iop_64to8, Iop_32to8, Iop_64to16, 511 /* 8 <-> 16 bit conversions */ 512 Iop_16to8, // :: I16 -> I8, low half 513 Iop_16HIto8, // :: I16 -> I8, high half 514 Iop_8HLto16, // :: (I8,I8) -> I16 515 /* 16 <-> 32 bit conversions */ 516 Iop_32to16, // :: I32 -> I16, low half 517 Iop_32HIto16, // :: I32 -> I16, high half 518 Iop_16HLto32, // :: (I16,I16) -> I32 519 /* 32 <-> 64 bit conversions */ 520 Iop_64to32, // :: I64 -> I32, low half 521 Iop_64HIto32, // :: I64 -> I32, high half 522 Iop_32HLto64, // :: (I32,I32) -> I64 523 /* 64 <-> 128 bit conversions */ 524 Iop_128to64, // :: I128 -> I64, low half 525 Iop_128HIto64, // :: I128 -> I64, high half 526 Iop_64HLto128, // :: (I64,I64) -> I128 527 /* 1-bit stuff */ 528 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */ 529 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ 530 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */ 531 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ 532 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */ 533 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */ 534 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */ 535 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */ 536 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */ 537 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */ 538 539 /* ------ Floating point. We try to be IEEE754 compliant. ------ */ 540 541 /* --- Simple stuff as mandated by 754. --- */ 542 543 /* Binary operations, with rounding. */ 544 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 545 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, 546 547 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ 548 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, 549 550 /* Variants of the above which produce a 64-bit result but which 551 round their result to a IEEE float range first. */ 552 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 553 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, 554 555 /* Unary operations, without rounding. */ 556 /* :: F64 -> F64 */ 557 Iop_NegF64, Iop_AbsF64, 558 559 /* :: F32 -> F32 */ 560 Iop_NegF32, Iop_AbsF32, 561 562 /* Unary operations, with rounding. */ 563 /* :: IRRoundingMode(I32) x F64 -> F64 */ 564 Iop_SqrtF64, Iop_SqrtF64r32, 565 566 /* :: IRRoundingMode(I32) x F32 -> F32 */ 567 Iop_SqrtF32, 568 569 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following: 570 0x45 Unordered 571 0x01 LT 572 0x00 GT 573 0x40 EQ 574 This just happens to be the Intel encoding. The values 575 are recorded in the type IRCmpF64Result. 576 */ 577 /* :: F64 x F64 -> IRCmpF64Result(I32) */ 578 Iop_CmpF64, 579 Iop_CmpF32, 580 Iop_CmpF128, 581 582 /* --- Int to/from FP conversions. --- */ 583 584 /* For the most part, these take a first argument :: Ity_I32 (as 585 IRRoundingMode) which is an indication of the rounding mode 586 to use, as per the following encoding ("the standard 587 encoding"): 588 00b to nearest (the default) 589 01b to -infinity 590 10b to +infinity 591 11b to zero 592 This just happens to be the Intel encoding. For reference only, 593 the PPC encoding is: 594 00b to nearest (the default) 595 01b to zero 596 10b to +infinity 597 11b to -infinity 598 Any PPC -> IR front end will have to translate these PPC 599 encodings, as encoded in the guest state, to the standard 600 encodings, to pass to the primops. 601 For reference only, the ARM VFP encoding is: 602 00b to nearest 603 01b to +infinity 604 10b to -infinity 605 11b to zero 606 Again, this will have to be converted to the standard encoding 607 to pass to primops. 608 609 If one of these conversions gets an out-of-range condition, 610 or a NaN, as an argument, the result is host-defined. On x86 611 the "integer indefinite" value 0x80..00 is produced. On PPC 612 it is either 0x80..00 or 0x7F..FF depending on the sign of 613 the argument. 614 615 On ARMvfp, when converting to a signed integer result, the 616 overflow result is 0x80..00 for negative args and 0x7F..FF 617 for positive args. For unsigned integer results it is 618 0x00..00 and 0xFF..FF respectively. 619 620 Rounding is required whenever the destination type cannot 621 represent exactly all values of the source type. 622 */ 623 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */ 624 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */ 625 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */ 626 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */ 627 628 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */ 629 630 Iop_I16StoF64, /* signed I16 -> F64 */ 631 Iop_I32StoF64, /* signed I32 -> F64 */ 632 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ 633 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ 634 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ 635 636 Iop_I32UtoF64, /* unsigned I32 -> F64 */ 637 638 Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */ 639 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */ 640 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */ 641 642 Iop_I16StoF32, /* signed I16 -> F32 */ 643 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */ 644 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */ 645 646 /* Conversion between floating point formats */ 647 Iop_F32toF64, /* F32 -> F64 */ 648 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */ 649 650 /* Reinterpretation. Take an F64 and produce an I64 with 651 the same bit pattern, or vice versa. */ 652 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64, 653 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32, 654 655 /* Support for 128-bit floating point */ 656 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */ 657 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */ 658 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */ 659 660 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */ 661 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128, 662 663 /* :: F128 -> F128 */ 664 Iop_NegF128, Iop_AbsF128, 665 666 /* :: IRRoundingMode(I32) x F128 -> F128 */ 667 Iop_SqrtF128, 668 669 Iop_I32StoF128, /* signed I32 -> F128 */ 670 Iop_I64StoF128, /* signed I64 -> F128 */ 671 Iop_F32toF128, /* F32 -> F128 */ 672 Iop_F64toF128, /* F64 -> F128 */ 673 674 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */ 675 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */ 676 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */ 677 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */ 678 679 /* --- guest x86/amd64 specifics, not mandated by 754. --- */ 680 681 /* Binary ops, with rounding. */ 682 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 683 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */ 684 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */ 685 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */ 686 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */ 687 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */ 688 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */ 689 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */ 690 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */ 691 /* Note that on x86 guest, PRem1{C3210} has the same behaviour 692 as the IEEE mandated RemF64, except it is limited in the 693 range of its operand. Hence the partialness. */ 694 695 /* Unary ops, with rounding. */ 696 /* :: IRRoundingMode(I32) x F64 -> F64 */ 697 Iop_SinF64, /* FSIN */ 698 Iop_CosF64, /* FCOS */ 699 Iop_TanF64, /* FTAN */ 700 Iop_2xm1F64, /* (2^arg - 1.0) */ 701 Iop_RoundF64toInt, /* F64 value to nearest integral value (still 702 as F64) */ 703 Iop_RoundF32toInt, /* F32 value to nearest integral value (still 704 as F32) */ 705 706 /* --- guest s390 specifics, not mandated by 754. --- */ 707 708 /* Fused multiply-add/sub */ 709 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32 710 (computes op3 * op2 +/- op1 */ 711 Iop_MAddF32, Iop_MSubF32, 712 713 /* --- guest ppc32/64 specifics, not mandated by 754. --- */ 714 715 /* Ternary operations, with rounding. */ 716 /* Fused multiply-add/sub, with 112-bit intermediate 717 precision for ppc. 718 Also used to implement fused multiply-add/sub for s390. */ 719 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 720 (computes arg2 * arg3 +/- arg4) */ 721 Iop_MAddF64, Iop_MSubF64, 722 723 /* Variants of the above which produce a 64-bit result but which 724 round their result to a IEEE float range first. */ 725 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ 726 Iop_MAddF64r32, Iop_MSubF64r32, 727 728 /* :: F64 -> F64 */ 729 Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */ 730 Iop_RoundF64toF64_NEAREST, /* frin */ 731 Iop_RoundF64toF64_NegINF, /* frim */ 732 Iop_RoundF64toF64_PosINF, /* frip */ 733 Iop_RoundF64toF64_ZERO, /* friz */ 734 735 /* :: F64 -> F32 */ 736 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */ 737 738 /* :: IRRoundingMode(I32) x F64 -> F64 */ 739 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */ 740 /* NB: pretty much the same as Iop_F64toF32, except no change 741 of type. */ 742 743 /* :: F64 -> I32 */ 744 Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord) 745 from FP result */ 746 747 /* ------------------ 32-bit SIMD Integer ------------------ */ 748 749 /* 32x1 saturating add/sub (ok, well, not really SIMD :) */ 750 Iop_QAdd32S, 751 Iop_QSub32S, 752 753 /* 16x2 add/sub, also signed/unsigned saturating variants */ 754 Iop_Add16x2, Iop_Sub16x2, 755 Iop_QAdd16Sx2, Iop_QAdd16Ux2, 756 Iop_QSub16Sx2, Iop_QSub16Ux2, 757 758 /* 16x2 signed/unsigned halving add/sub. For each lane, these 759 compute bits 16:1 of (eg) sx(argL) + sx(argR), 760 or zx(argL) - zx(argR) etc. */ 761 Iop_HAdd16Ux2, Iop_HAdd16Sx2, 762 Iop_HSub16Ux2, Iop_HSub16Sx2, 763 764 /* 8x4 add/sub, also signed/unsigned saturating variants */ 765 Iop_Add8x4, Iop_Sub8x4, 766 Iop_QAdd8Sx4, Iop_QAdd8Ux4, 767 Iop_QSub8Sx4, Iop_QSub8Ux4, 768 769 /* 8x4 signed/unsigned halving add/sub. For each lane, these 770 compute bits 8:1 of (eg) sx(argL) + sx(argR), 771 or zx(argL) - zx(argR) etc. */ 772 Iop_HAdd8Ux4, Iop_HAdd8Sx4, 773 Iop_HSub8Ux4, Iop_HSub8Sx4, 774 775 /* 8x4 sum of absolute unsigned differences. */ 776 Iop_Sad8Ux4, 777 778 /* MISC (vector integer cmp != 0) */ 779 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4, 780 781 /* ------------------ 64-bit SIMD FP ------------------------ */ 782 783 /* Convertion to/from int */ 784 Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */ 785 Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */ 786 /* Fixed32 format is floating-point number with fixed number of fraction 787 bits. The number of fraction bits is passed as a second argument of 788 type I8. */ 789 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */ 790 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */ 791 792 /* Binary operations */ 793 Iop_Max32Fx2, Iop_Min32Fx2, 794 /* Pairwise Min and Max. See integer pairwise operations for more 795 details. */ 796 Iop_PwMax32Fx2, Iop_PwMin32Fx2, 797 /* Note: For the following compares, the arm front-end assumes a 798 nan in a lane of either argument returns zero for that lane. */ 799 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2, 800 801 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 802 element in the operand vector, and places the results in the destination 803 vector. */ 804 Iop_Recip32Fx2, 805 806 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 807 Note, that if one of the arguments is zero and another one is infinity 808 of arbitrary sign the result of the operation is 2.0. */ 809 Iop_Recps32Fx2, 810 811 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 812 square root of each element in the operand vector. */ 813 Iop_Rsqrte32Fx2, 814 815 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 816 Note, that of one of the arguments is zero and another one is infiinty 817 of arbitrary sign the result of the operation is 1.5. */ 818 Iop_Rsqrts32Fx2, 819 820 /* Unary */ 821 Iop_Neg32Fx2, Iop_Abs32Fx2, 822 823 /* ------------------ 64-bit SIMD Integer. ------------------ */ 824 825 /* MISC (vector integer cmp != 0) */ 826 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2, 827 828 /* ADDITION (normal / unsigned sat / signed sat) */ 829 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2, 830 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1, 831 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1, 832 833 /* PAIRWISE operations */ 834 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 835 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 836 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2, 837 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2, 838 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2, 839 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2, 840 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2, 841 /* Longening variant is unary. The resulting vector contains two times 842 less elements than operand, but they are two times wider. 843 Example: 844 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 845 where a+b and c+d are unsigned 32-bit values. */ 846 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2, 847 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2, 848 849 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 850 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2, 851 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1, 852 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1, 853 854 /* ABSOLUTE VALUE */ 855 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2, 856 857 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */ 858 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2, 859 Iop_Mul32Fx2, 860 Iop_MulHi16Ux4, 861 Iop_MulHi16Sx4, 862 /* Plynomial multiplication treats it's arguments as coefficients of 863 polynoms over {0, 1}. */ 864 Iop_PolynomialMul8x8, 865 866 /* Vector Saturating Doubling Multiply Returning High Half and 867 Vector Saturating Rounding Doubling Multiply Returning High Half */ 868 /* These IROp's multiply corresponding elements in two vectors, double 869 the results, and place the most significant half of the final results 870 in the destination vector. The results are truncated or rounded. If 871 any of the results overflow, they are saturated. */ 872 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2, 873 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2, 874 875 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 876 Iop_Avg8Ux8, 877 Iop_Avg16Ux4, 878 879 /* MIN/MAX */ 880 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2, 881 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2, 882 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2, 883 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2, 884 885 /* COMPARISON */ 886 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2, 887 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2, 888 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, 889 890 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 891 bit) */ 892 Iop_Cnt8x8, 893 Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2, 894 Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2, 895 896 /* VECTOR x VECTOR SHIFT / ROTATE */ 897 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2, 898 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2, 899 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2, 900 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1, 901 902 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 903 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2, 904 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2, 905 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, 906 907 /* VECTOR x VECTOR SATURATING SHIFT */ 908 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1, 909 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1, 910 /* VECTOR x INTEGER SATURATING SHIFT */ 911 Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1, 912 Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1, 913 Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1, 914 915 /* NARROWING (binary) 916 -- narrow 2xI64 into 1xI64, hi half from left arg */ 917 /* For saturated narrowing, I believe there are 4 variants of 918 the basic arithmetic operation, depending on the signedness 919 of argument and result. Here are examples that exemplify 920 what I mean: 921 922 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; 923 return x[7:0]; 924 925 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128; 926 if (x >s 127) x = 127; 927 return x[7:0]; 928 929 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; 930 return x[7:0]; 931 932 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0; 933 if (x >s 255) x = 255; 934 return x[7:0]; 935 */ 936 Iop_QNarrowBin16Sto8Ux8, 937 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, 938 Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4, 939 940 /* INTERLEAVING */ 941 /* Interleave lanes from low or high halves of 942 operands. Most-significant result lane is from the left 943 arg. */ 944 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2, 945 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2, 946 /* Interleave odd/even lanes of operands. Most-significant result lane 947 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are 948 identical to Interleave{HI,LO}32x2 and so are omitted.*/ 949 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8, 950 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4, 951 952 953 /* CONCATENATION -- build a new value by concatenating either 954 the even or odd lanes of both operands. Note that 955 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2 956 and so are omitted. */ 957 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4, 958 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4, 959 960 /* GET / SET elements of VECTOR 961 GET is binop (I64, I8) -> I<elem_size> 962 SET is triop (I64, I8, I<elem_size>) -> I64 */ 963 /* Note: the arm back-end handles only constant second argument */ 964 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2, 965 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2, 966 967 /* DUPLICATING -- copy value to all lanes */ 968 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2, 969 970 /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes 971 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 972 result. 973 It is a triop: (I64, I64, I8) -> I64 */ 974 /* Note: the arm back-end handles only constant third argumnet. */ 975 Iop_Extract64, 976 977 /* REVERSE the order of elements in each Half-words, Words, 978 Double-words */ 979 /* Examples: 980 Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 981 Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] 982 Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */ 983 Iop_Reverse16_8x8, 984 Iop_Reverse32_8x8, Iop_Reverse32_16x4, 985 Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2, 986 987 /* PERMUTING -- copy src bytes to dst, 988 as indexed by control vector bytes: 989 for i in 0 .. 7 . result[i] = argL[ argR[i] ] 990 argR[i] values may only be in the range 0 .. 7, else behaviour 991 is undefined. */ 992 Iop_Perm8x8, 993 994 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 995 See floating-point equiwalents for details. */ 996 Iop_Recip32x2, Iop_Rsqrte32x2, 997 998 /* ------------------ Decimal Floating Point ------------------ */ 999 1000 /* ARITHMETIC INSTRUCTIONS 64-bit 1001 ---------------------------------- 1002 IRRoundingModeDFP(I32) X D64 X D64 -> D64 1003 */ 1004 Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64, 1005 1006 /* ARITHMETIC INSTRUCTIONS 128-bit 1007 ---------------------------------- 1008 IRRoundingModeDFP(I32) X D128 X D128 -> D128 1009 */ 1010 Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128, 1011 1012 /* SHIFT SIGNIFICAND INSTRUCTIONS 1013 * The DFP significand is shifted by the number of digits specified 1014 * by the U8 operand. Digits shifted out of the leftmost digit are 1015 * lost. Zeros are supplied to the vacated positions on the right. 1016 * The sign of the result is the same as the sign of the original 1017 * operand. 1018 * 1019 * D64 x U8 -> D64 left shift and right shift respectively */ 1020 Iop_ShlD64, Iop_ShrD64, 1021 1022 /* D128 x U8 -> D128 left shift and right shift respectively */ 1023 Iop_ShlD128, Iop_ShrD128, 1024 1025 1026 /* FORMAT CONVERSION INSTRUCTIONS 1027 * D32 -> D64 1028 */ 1029 Iop_D32toD64, 1030 1031 /* D64 -> D128 */ 1032 Iop_D64toD128, 1033 1034 /* I64S -> D128 */ 1035 Iop_I64StoD128, 1036 1037 /* IRRoundingModeDFP(I32) x D64 -> D32 */ 1038 Iop_D64toD32, 1039 1040 /* IRRoundingModeDFP(I32) x D128 -> D64 */ 1041 Iop_D128toD64, 1042 1043 /* IRRoundingModeDFP(I32) x I64 -> D64 */ 1044 Iop_I64StoD64, 1045 1046 /* IRRoundingModeDFP(I32) x D64 -> I64 */ 1047 Iop_D64toI64S, 1048 1049 /* IRRoundingModeDFP(I32) x D128 -> I64 */ 1050 Iop_D128toI64S, 1051 1052 /* ROUNDING INSTRUCTIONS 1053 * IRRoundingMode(I32) x D64 -> D64 1054 * The D64 operand, if a finite number, is rounded to an integer value. 1055 */ 1056 Iop_RoundD64toInt, 1057 1058 /* IRRoundingMode(I32) x D128 -> D128 */ 1059 Iop_RoundD128toInt, 1060 1061 /* COMPARE INSTRUCTIONS 1062 * D64 x D64 -> IRCmpD64Result(I32) */ 1063 Iop_CmpD64, 1064 1065 /* D128 x D128 -> IRCmpD64Result(I32) */ 1066 Iop_CmpD128, 1067 1068 /* QUANTIZE AND ROUND INSTRUCTIONS 1069 * The source operand is converted and rounded to the form with the 1070 * immediate exponent specified by the rounding and exponent parameter. 1071 * 1072 * The second operand is converted and rounded to the form 1073 * of the first operand's exponent and the rounded based on the specified 1074 * rounding mode parameter. 1075 * 1076 * IRRoundingModeDFP(I32) x D64 x D64-> D64 */ 1077 Iop_QuantizeD64, 1078 1079 /* IRRoundingModeDFP(I32) x D128 x D128 -> D128 */ 1080 Iop_QuantizeD128, 1081 1082 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 1083 * The Decimal Floating point operand is rounded to the requested 1084 * significance given by the I8 operand as specified by the rounding 1085 * mode. 1086 */ 1087 Iop_SignificanceRoundD64, 1088 1089 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */ 1090 Iop_SignificanceRoundD128, 1091 1092 /* EXTRACT AND INSERT INSTRUCTIONS 1093 * D64 -> I64 1094 * The exponent of the D32 or D64 operand is extracted. The 1095 * extracted exponent is converted to a 64-bit signed binary integer. 1096 */ 1097 Iop_ExtractExpD64, 1098 1099 /* D128 -> I64 */ 1100 Iop_ExtractExpD128, 1101 1102 /* I64 x I64 -> D64 1103 * The exponent is specified by the first I64 operand the signed 1104 * significand is given by the second I64 value. The result is a D64 1105 * value consisting of the specified significand and exponent whose 1106 * sign is that of the specified significand. 1107 */ 1108 Iop_InsertExpD64, 1109 1110 /* I64 x I128 -> D128 */ 1111 Iop_InsertExpD128, 1112 1113 /* Support for 128-bit DFP type */ 1114 Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64, 1115 1116 /* I64 -> I64 1117 * Convert 50-bit densely packed BCD string to 60 bit BCD string 1118 */ 1119 Iop_DPBtoBCD, 1120 1121 /* I64 -> I64 1122 * Convert 60 bit BCD string to 50-bit densely packed BCD string 1123 */ 1124 Iop_BCDtoDPB, 1125 1126 /* Conversion I64 -> D64 */ 1127 Iop_ReinterpI64asD64, 1128 1129 /* Conversion D64 -> I64 */ 1130 Iop_ReinterpD64asI64, 1131 1132 /* ------------------ 128-bit SIMD FP. ------------------ */ 1133 1134 /* --- 32x4 vector FP --- */ 1135 1136 /* binary */ 1137 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, 1138 Iop_Max32Fx4, Iop_Min32Fx4, 1139 Iop_Add32Fx2, Iop_Sub32Fx2, 1140 /* Note: For the following compares, the ppc and arm front-ends assume a 1141 nan in a lane of either argument returns zero for that lane. */ 1142 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, 1143 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, 1144 1145 /* Vector Absolute */ 1146 Iop_Abs32Fx4, 1147 1148 /* Pairwise Max and Min. See integer pairwise operations for details. */ 1149 Iop_PwMax32Fx4, Iop_PwMin32Fx4, 1150 1151 /* unary */ 1152 Iop_Sqrt32Fx4, Iop_RSqrt32Fx4, 1153 Iop_Neg32Fx4, 1154 1155 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 1156 element in the operand vector, and places the results in the destination 1157 vector. */ 1158 Iop_Recip32Fx4, 1159 1160 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 1161 Note, that if one of the arguments is zero and another one is infinity 1162 of arbitrary sign the result of the operation is 2.0. */ 1163 Iop_Recps32Fx4, 1164 1165 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 1166 square root of each element in the operand vector. */ 1167 Iop_Rsqrte32Fx4, 1168 1169 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 1170 Note, that of one of the arguments is zero and another one is infiinty 1171 of arbitrary sign the result of the operation is 1.5. */ 1172 Iop_Rsqrts32Fx4, 1173 1174 1175 /* --- Int to/from FP conversion --- */ 1176 /* Unlike the standard fp conversions, these irops take no 1177 rounding mode argument. Instead the irop trailers _R{M,P,N,Z} 1178 indicate the mode: {-inf, +inf, nearest, zero} respectively. */ 1179 Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ 1180 Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */ 1181 Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */ 1182 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ 1183 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ 1184 /* Fixed32 format is floating-point number with fixed number of fraction 1185 bits. The number of fraction bits is passed as a second argument of 1186 type I8. */ 1187 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */ 1188 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */ 1189 1190 /* --- Single to/from half conversion --- */ 1191 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */ 1192 Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */ 1193 1194 /* --- 32x4 lowest-lane-only scalar FP --- */ 1195 1196 /* In binary cases, upper 3/4 is copied from first operand. In 1197 unary cases, upper 3/4 is copied from the operand. */ 1198 1199 /* binary */ 1200 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4, 1201 Iop_Max32F0x4, Iop_Min32F0x4, 1202 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4, 1203 1204 /* unary */ 1205 Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4, 1206 1207 /* --- 64x2 vector FP --- */ 1208 1209 /* binary */ 1210 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, 1211 Iop_Max64Fx2, Iop_Min64Fx2, 1212 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, 1213 1214 /* unary */ 1215 Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2, 1216 1217 /* --- 64x2 lowest-lane-only scalar FP --- */ 1218 1219 /* In binary cases, upper half is copied from first operand. In 1220 unary cases, upper half is copied from the operand. */ 1221 1222 /* binary */ 1223 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2, 1224 Iop_Max64F0x2, Iop_Min64F0x2, 1225 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2, 1226 1227 /* unary */ 1228 Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2, 1229 1230 /* --- pack / unpack --- */ 1231 1232 /* 64 <-> 128 bit vector */ 1233 Iop_V128to64, // :: V128 -> I64, low half 1234 Iop_V128HIto64, // :: V128 -> I64, high half 1235 Iop_64HLtoV128, // :: (I64,I64) -> V128 1236 1237 Iop_64UtoV128, 1238 Iop_SetV128lo64, 1239 1240 /* 32 <-> 128 bit vector */ 1241 Iop_32UtoV128, 1242 Iop_V128to32, // :: V128 -> I32, lowest lane 1243 Iop_SetV128lo32, // :: (V128,I32) -> V128 1244 1245 /* ------------------ 128-bit SIMD Integer. ------------------ */ 1246 1247 /* BITWISE OPS */ 1248 Iop_NotV128, 1249 Iop_AndV128, Iop_OrV128, Iop_XorV128, 1250 1251 /* VECTOR SHIFT (shift amt :: Ity_I8) */ 1252 Iop_ShlV128, Iop_ShrV128, 1253 1254 /* MISC (vector integer cmp != 0) */ 1255 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, 1256 1257 /* ADDITION (normal / unsigned sat / signed sat) */ 1258 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, 1259 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, 1260 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, 1261 1262 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 1263 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, 1264 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, 1265 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, 1266 1267 /* MULTIPLICATION (normal / high half of signed/unsigned) */ 1268 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, 1269 Iop_MulHi16Ux8, Iop_MulHi32Ux4, 1270 Iop_MulHi16Sx8, Iop_MulHi32Sx4, 1271 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ 1272 Iop_MullEven8Ux16, Iop_MullEven16Ux8, 1273 Iop_MullEven8Sx16, Iop_MullEven16Sx8, 1274 /* FIXME: document these */ 1275 Iop_Mull8Ux8, Iop_Mull8Sx8, 1276 Iop_Mull16Ux4, Iop_Mull16Sx4, 1277 Iop_Mull32Ux2, Iop_Mull32Sx2, 1278 /* Vector Saturating Doubling Multiply Returning High Half and 1279 Vector Saturating Rounding Doubling Multiply Returning High Half */ 1280 /* These IROp's multiply corresponding elements in two vectors, double 1281 the results, and place the most significant half of the final results 1282 in the destination vector. The results are truncated or rounded. If 1283 any of the results overflow, they are saturated. */ 1284 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, 1285 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, 1286 /* Doubling saturating multiplication (long) (I64, I64) -> V128 */ 1287 Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2, 1288 /* Plynomial multiplication treats it's arguments as coefficients of 1289 polynoms over {0, 1}. */ 1290 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ 1291 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ 1292 1293 /* PAIRWISE operations */ 1294 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 1295 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 1296 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4, 1297 Iop_PwAdd32Fx2, 1298 /* Longening variant is unary. The resulting vector contains two times 1299 less elements than operand, but they are two times wider. 1300 Example: 1301 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 1302 where a+b and c+d are unsigned 32-bit values. */ 1303 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, 1304 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, 1305 1306 /* ABSOLUTE VALUE */ 1307 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, 1308 1309 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 1310 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, 1311 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, 1312 1313 /* MIN/MAX */ 1314 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, 1315 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, 1316 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, 1317 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, 1318 1319 /* COMPARISON */ 1320 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, 1321 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, 1322 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, 1323 1324 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 1325 bit) */ 1326 Iop_Cnt8x16, 1327 Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4, 1328 Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4, 1329 1330 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 1331 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, 1332 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, 1333 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, 1334 1335 /* VECTOR x VECTOR SHIFT / ROTATE */ 1336 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, 1337 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, 1338 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, 1339 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, 1340 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, 1341 1342 /* VECTOR x VECTOR SATURATING SHIFT */ 1343 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, 1344 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2, 1345 /* VECTOR x INTEGER SATURATING SHIFT */ 1346 Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2, 1347 Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2, 1348 Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2, 1349 1350 /* NARROWING (binary) 1351 -- narrow 2xV128 into 1xV128, hi half from left arg */ 1352 /* See comments above w.r.t. U vs S issues in saturated narrowing. */ 1353 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8, 1354 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, 1355 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, 1356 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, 1357 1358 /* NARROWING (unary) -- narrow V128 into I64 */ 1359 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, 1360 /* Saturating narrowing from signed source to signed/unsigned destination */ 1361 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, 1362 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, 1363 /* Saturating narrowing from unsigned source to unsigned destination */ 1364 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2, 1365 1366 /* WIDENING -- sign or zero extend each element of the argument 1367 vector to the twice original size. The resulting vector consists of 1368 the same number of elements but each element and the vector itself 1369 are twice as wide. 1370 All operations are I64->V128. 1371 Example 1372 Iop_Widen32Sto64x2( [a, b] ) = [c, d] 1373 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */ 1374 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2, 1375 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2, 1376 1377 /* INTERLEAVING */ 1378 /* Interleave lanes from low or high halves of 1379 operands. Most-significant result lane is from the left 1380 arg. */ 1381 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 1382 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, 1383 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 1384 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, 1385 /* Interleave odd/even lanes of operands. Most-significant result lane 1386 is from the left arg. */ 1387 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16, 1388 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8, 1389 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4, 1390 1391 /* CONCATENATION -- build a new value by concatenating either 1392 the even or odd lanes of both operands. */ 1393 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4, 1394 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4, 1395 1396 /* GET elements of VECTOR 1397 GET is binop (V128, I8) -> I<elem_size> */ 1398 /* Note: the arm back-end handles only constant second argument. */ 1399 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, 1400 1401 /* DUPLICATING -- copy value to all lanes */ 1402 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4, 1403 1404 /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes 1405 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 1406 result. 1407 It is a triop: (V128, V128, I8) -> V128 */ 1408 /* Note: the ARM back end handles only constant arg3 in this operation. */ 1409 Iop_ExtractV128, 1410 1411 /* REVERSE the order of elements in each Half-words, Words, 1412 Double-words */ 1413 /* Examples: 1414 Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 1415 Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */ 1416 Iop_Reverse16_8x16, 1417 Iop_Reverse32_8x16, Iop_Reverse32_16x8, 1418 Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4, 1419 1420 /* PERMUTING -- copy src bytes to dst, 1421 as indexed by control vector bytes: 1422 for i in 0 .. 15 . result[i] = argL[ argR[i] ] 1423 argR[i] values may only be in the range 0 .. 15, else behaviour 1424 is undefined. */ 1425 Iop_Perm8x16, 1426 Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */ 1427 1428 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1429 See floating-point equiwalents for details. */ 1430 Iop_Recip32x4, Iop_Rsqrte32x4, 1431 1432 /* ------------------ 256-bit SIMD Integer. ------------------ */ 1433 1434 /* Pack/unpack */ 1435 Iop_V256to64_0, // V256 -> I64, extract least significant lane 1436 Iop_V256to64_1, 1437 Iop_V256to64_2, 1438 Iop_V256to64_3, // V256 -> I64, extract most significant lane 1439 1440 Iop_64x4toV256, // (I64,I64,I64,I64)->V256 1441 // first arg is most significant lane 1442 1443 Iop_V256toV128_0, // V256 -> V128, less significant lane 1444 Iop_V256toV128_1, // V256 -> V128, more significant lane 1445 Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif 1446 1447 Iop_AndV256, 1448 Iop_OrV256, 1449 Iop_XorV256, 1450 Iop_NotV256, 1451 1452 /* MISC (vector integer cmp != 0) */ 1453 Iop_CmpNEZ32x8, Iop_CmpNEZ64x4, 1454 1455 /* ------------------ 256-bit SIMD FP. ------------------ */ 1456 Iop_Add64Fx4, 1457 Iop_Sub64Fx4, 1458 Iop_Mul64Fx4, 1459 Iop_Div64Fx4, 1460 Iop_Add32Fx8, 1461 Iop_Sub32Fx8, 1462 Iop_Mul32Fx8, 1463 Iop_Div32Fx8, 1464 1465 Iop_Sqrt32Fx8, 1466 Iop_Sqrt64Fx4, 1467 Iop_RSqrt32Fx8, 1468 Iop_Recip32Fx8, 1469 1470 Iop_Max32Fx8, Iop_Min32Fx8, 1471 Iop_Max64Fx4, Iop_Min64Fx4 1472 } 1473 IROp; 1474 1475 /* Pretty-print an op. */ 1476 extern void ppIROp ( IROp ); 1477 1478 1479 /* Encoding of IEEE754-specified rounding modes. This is the same as 1480 the encoding used by Intel IA32 to indicate x87 rounding mode. 1481 Note, various front and back ends rely on the actual numerical 1482 values of these, so do not change them. */ 1483 typedef 1484 enum { 1485 Irrm_NEAREST = 0, 1486 Irrm_NegINF = 1, 1487 Irrm_PosINF = 2, 1488 Irrm_ZERO = 3 1489 } 1490 IRRoundingMode; 1491 1492 /* DFP encoding of IEEE754 2008 specified rounding modes extends the two bit 1493 * binary floating point rounding mode (IRRoundingMode) to three bits. The 1494 * DFP rounding modes are a super set of the binary rounding modes. The 1495 * encoding was chosen such that the mapping of the least significant two bits 1496 * of the IR to POWER encodings is same. The upper IR encoding bit is just 1497 * a logical OR of the upper rounding mode bit from the POWER encoding. 1498 */ 1499 typedef 1500 enum { 1501 Irrm_DFP_NEAREST = 0, // Round to nearest, ties to even 1502 Irrm_DFP_NegINF = 1, // Round to negative infinity 1503 Irrm_DFP_PosINF = 2, // Round to posative infinity 1504 Irrm_DFP_ZERO = 3, // Round toward zero 1505 Irrm_DFP_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0 1506 Irrm_DFP_PREPARE_SHORTER = 5, // Round to prepare for storter 1507 // precision 1508 Irrm_DFP_AWAY_FROM_ZERO = 6, // Round to away from 0 1509 Irrm_DFP_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0 1510 } 1511 IRRoundingModeDFP; 1512 1513 /* Floating point comparison result values, as created by Iop_CmpF64. 1514 This is also derived from what IA32 does. */ 1515 typedef 1516 enum { 1517 Ircr_UN = 0x45, 1518 Ircr_LT = 0x01, 1519 Ircr_GT = 0x00, 1520 Ircr_EQ = 0x40 1521 } 1522 IRCmpF64Result; 1523 1524 typedef IRCmpF64Result IRCmpF32Result; 1525 typedef IRCmpF64Result IRCmpF128Result; 1526 1527 /* ------------------ Expressions ------------------ */ 1528 1529 typedef struct _IRQop IRQop; /* forward declaration */ 1530 typedef struct _IRTriop IRTriop; /* forward declaration */ 1531 1532 1533 /* The different kinds of expressions. Their meaning is explained below 1534 in the comments for IRExpr. */ 1535 typedef 1536 enum { 1537 Iex_Binder=0x15000, 1538 Iex_Get, 1539 Iex_GetI, 1540 Iex_RdTmp, 1541 Iex_Qop, 1542 Iex_Triop, 1543 Iex_Binop, 1544 Iex_Unop, 1545 Iex_Load, 1546 Iex_Const, 1547 Iex_Mux0X, 1548 Iex_CCall 1549 } 1550 IRExprTag; 1551 1552 /* An expression. Stored as a tagged union. 'tag' indicates what kind 1553 of expression this is. 'Iex' is the union that holds the fields. If 1554 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load 1555 expression, and the fields can be accessed with 1556 'e.Iex.Load.<fieldname>'. 1557 1558 For each kind of expression, we show what it looks like when 1559 pretty-printed with ppIRExpr(). 1560 */ 1561 typedef 1562 struct _IRExpr 1563 IRExpr; 1564 1565 struct _IRExpr { 1566 IRExprTag tag; 1567 union { 1568 /* Used only in pattern matching within Vex. Should not be seen 1569 outside of Vex. */ 1570 struct { 1571 Int binder; 1572 } Binder; 1573 1574 /* Read a guest register, at a fixed offset in the guest state. 1575 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0) 1576 */ 1577 struct { 1578 Int offset; /* Offset into the guest state */ 1579 IRType ty; /* Type of the value being read */ 1580 } Get; 1581 1582 /* Read a guest register at a non-fixed offset in the guest 1583 state. This allows circular indexing into parts of the guest 1584 state, which is essential for modelling situations where the 1585 identity of guest registers is not known until run time. One 1586 example is the x87 FP register stack. 1587 1588 The part of the guest state to be treated as a circular array 1589 is described in the IRRegArray 'descr' field. It holds the 1590 offset of the first element in the array, the type of each 1591 element, and the number of elements. 1592 1593 The array index is indicated rather indirectly, in a way 1594 which makes optimisation easy: as the sum of variable part 1595 (the 'ix' field) and a constant offset (the 'bias' field). 1596 1597 Since the indexing is circular, the actual array index to use 1598 is computed as (ix + bias) % num-of-elems-in-the-array. 1599 1600 Here's an example. The description 1601 1602 (96:8xF64)[t39,-7] 1603 1604 describes an array of 8 F64-typed values, the 1605 guest-state-offset of the first being 96. This array is 1606 being indexed at (t39 - 7) % 8. 1607 1608 It is important to get the array size/type exactly correct 1609 since IR optimisation looks closely at such info in order to 1610 establish aliasing/non-aliasing between seperate GetI and 1611 PutI events, which is used to establish when they can be 1612 reordered, etc. Putting incorrect info in will lead to 1613 obscure IR optimisation bugs. 1614 1615 ppIRExpr output: GETI<descr>[<ix>,<bias] 1616 eg. GETI(128:8xI8)[t1,0] 1617 */ 1618 struct { 1619 IRRegArray* descr; /* Part of guest state treated as circular */ 1620 IRExpr* ix; /* Variable part of index into array */ 1621 Int bias; /* Constant offset part of index into array */ 1622 } GetI; 1623 1624 /* The value held by a temporary. 1625 ppIRExpr output: t<tmp>, eg. t1 1626 */ 1627 struct { 1628 IRTemp tmp; /* The temporary number */ 1629 } RdTmp; 1630 1631 /* A quaternary operation. 1632 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>), 1633 eg. MAddF64r32(t1, t2, t3, t4) 1634 */ 1635 struct { 1636 IRQop* details; 1637 } Qop; 1638 1639 /* A ternary operation. 1640 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>), 1641 eg. MulF64(1, 2.0, 3.0) 1642 */ 1643 struct { 1644 IRTriop* details; 1645 } Triop; 1646 1647 /* A binary operation. 1648 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2) 1649 */ 1650 struct { 1651 IROp op; /* op-code */ 1652 IRExpr* arg1; /* operand 1 */ 1653 IRExpr* arg2; /* operand 2 */ 1654 } Binop; 1655 1656 /* A unary operation. 1657 ppIRExpr output: <op>(<arg>), eg. Neg8(t1) 1658 */ 1659 struct { 1660 IROp op; /* op-code */ 1661 IRExpr* arg; /* operand */ 1662 } Unop; 1663 1664 /* A load from memory -- a normal load, not a load-linked. 1665 Load-Linkeds (and Store-Conditionals) are instead represented 1666 by IRStmt.LLSC since Load-Linkeds have side effects and so 1667 are not semantically valid IRExpr's. 1668 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1) 1669 */ 1670 struct { 1671 IREndness end; /* Endian-ness of the load */ 1672 IRType ty; /* Type of the loaded value */ 1673 IRExpr* addr; /* Address being loaded from */ 1674 } Load; 1675 1676 /* A constant-valued expression. 1677 ppIRExpr output: <con>, eg. 0x4:I32 1678 */ 1679 struct { 1680 IRConst* con; /* The constant itself */ 1681 } Const; 1682 1683 /* A call to a pure (no side-effects) helper C function. 1684 1685 With the 'cee' field, 'name' is the function's name. It is 1686 only used for pretty-printing purposes. The address to call 1687 (host address, of course) is stored in the 'addr' field 1688 inside 'cee'. 1689 1690 The 'args' field is a NULL-terminated array of arguments. 1691 The stated return IRType, and the implied argument types, 1692 must match that of the function being called well enough so 1693 that the back end can actually generate correct code for the 1694 call. 1695 1696 The called function **must** satisfy the following: 1697 1698 * no side effects -- must be a pure function, the result of 1699 which depends only on the passed parameters. 1700 1701 * it may not look at, nor modify, any of the guest state 1702 since that would hide guest state transitions from 1703 instrumenters 1704 1705 * it may not access guest memory, since that would hide 1706 guest memory transactions from the instrumenters 1707 1708 * it must not assume that arguments are being evaluated in a 1709 particular order. The oder of evaluation is unspecified. 1710 1711 This is restrictive, but makes the semantics clean, and does 1712 not interfere with IR optimisation. 1713 1714 If you want to call a helper which can mess with guest state 1715 and/or memory, instead use Ist_Dirty. This is a lot more 1716 flexible, but you have to give a bunch of details about what 1717 the helper does (and you better be telling the truth, 1718 otherwise any derived instrumentation will be wrong). Also 1719 Ist_Dirty inhibits various IR optimisations and so can cause 1720 quite poor code to be generated. Try to avoid it. 1721 1722 ppIRExpr output: <cee>(<args>):<retty> 1723 eg. foo{0x80489304}(t1, t2):I32 1724 */ 1725 struct { 1726 IRCallee* cee; /* Function to call. */ 1727 IRType retty; /* Type of return value. */ 1728 IRExpr** args; /* Vector of argument expressions. */ 1729 } CCall; 1730 1731 /* A ternary if-then-else operator. It returns expr0 if cond is 1732 zero, exprX otherwise. Note that it is STRICT, ie. both 1733 expr0 and exprX are evaluated in all cases. 1734 1735 ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>), 1736 eg. Mux0X(t6,t7,t8) 1737 */ 1738 struct { 1739 IRExpr* cond; /* Condition */ 1740 IRExpr* expr0; /* True expression */ 1741 IRExpr* exprX; /* False expression */ 1742 } Mux0X; 1743 } Iex; 1744 }; 1745 1746 /* ------------------ A ternary expression ---------------------- */ 1747 struct _IRTriop { 1748 IROp op; /* op-code */ 1749 IRExpr* arg1; /* operand 1 */ 1750 IRExpr* arg2; /* operand 2 */ 1751 IRExpr* arg3; /* operand 3 */ 1752 }; 1753 1754 /* ------------------ A quarternary expression ------------------ */ 1755 struct _IRQop { 1756 IROp op; /* op-code */ 1757 IRExpr* arg1; /* operand 1 */ 1758 IRExpr* arg2; /* operand 2 */ 1759 IRExpr* arg3; /* operand 3 */ 1760 IRExpr* arg4; /* operand 4 */ 1761 }; 1762 1763 /* Expression constructors. */ 1764 extern IRExpr* IRExpr_Binder ( Int binder ); 1765 extern IRExpr* IRExpr_Get ( Int off, IRType ty ); 1766 extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ); 1767 extern IRExpr* IRExpr_RdTmp ( IRTemp tmp ); 1768 extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, 1769 IRExpr* arg3, IRExpr* arg4 ); 1770 extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, 1771 IRExpr* arg2, IRExpr* arg3 ); 1772 extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); 1773 extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); 1774 extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ); 1775 extern IRExpr* IRExpr_Const ( IRConst* con ); 1776 extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ); 1777 extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX ); 1778 1779 /* Deep-copy an IRExpr. */ 1780 extern IRExpr* deepCopyIRExpr ( IRExpr* ); 1781 1782 /* Pretty-print an IRExpr. */ 1783 extern void ppIRExpr ( IRExpr* ); 1784 1785 /* NULL-terminated IRExpr vector constructors, suitable for 1786 use as arg lists in clean/dirty helper calls. */ 1787 extern IRExpr** mkIRExprVec_0 ( void ); 1788 extern IRExpr** mkIRExprVec_1 ( IRExpr* ); 1789 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* ); 1790 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* ); 1791 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 1792 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1793 IRExpr* ); 1794 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1795 IRExpr*, IRExpr* ); 1796 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1797 IRExpr*, IRExpr*, IRExpr* ); 1798 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1799 IRExpr*, IRExpr*, IRExpr*, IRExpr*); 1800 1801 /* IRExpr copiers: 1802 - shallowCopy: shallow-copy (ie. create a new vector that shares the 1803 elements with the original). 1804 - deepCopy: deep-copy (ie. create a completely new vector). */ 1805 extern IRExpr** shallowCopyIRExprVec ( IRExpr** ); 1806 extern IRExpr** deepCopyIRExprVec ( IRExpr** ); 1807 1808 /* Make a constant expression from the given host word taking into 1809 account (of course) the host word size. */ 1810 extern IRExpr* mkIRExpr_HWord ( HWord ); 1811 1812 /* Convenience function for constructing clean helper calls. */ 1813 extern 1814 IRExpr* mkIRExprCCall ( IRType retty, 1815 Int regparms, HChar* name, void* addr, 1816 IRExpr** args ); 1817 1818 1819 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or 1820 * Iex_Const). */ 1821 static inline Bool isIRAtom ( IRExpr* e ) { 1822 return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const); 1823 } 1824 1825 /* Are these two IR atoms identical? Causes an assertion 1826 failure if they are passed non-atoms. */ 1827 extern Bool eqIRAtom ( IRExpr*, IRExpr* ); 1828 1829 1830 /* ------------------ Jump kinds ------------------ */ 1831 1832 /* This describes hints which can be passed to the dispatcher at guest 1833 control-flow transfer points. 1834 1835 Re Ijk_TInval: the guest state _must_ have two pseudo-registers, 1836 guest_TISTART and guest_TILEN, which specify the start and length 1837 of the region to be invalidated. These are both the size of a 1838 guest word. It is the responsibility of the relevant toIR.c to 1839 ensure that these are filled in with suitable values before issuing 1840 a jump of kind Ijk_TInval. 1841 1842 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a 1843 pseudo-register guest_EMWARN, which is 32-bits regardless of the 1844 host or guest word size. That register should be made to hold an 1845 EmWarn_* value to indicate the reason for the exit. 1846 1847 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code 1848 cannot continue) and so the jump destination can be anything. 1849 1850 Re Ijk_Sys_ (syscall jumps): the guest state must have a 1851 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest 1852 word. Front ends should set this to be the IP at the most recently 1853 executed kernel-entering (system call) instruction. This makes it 1854 very much easier (viz, actually possible at all) to back up the 1855 guest to restart a syscall that has been interrupted by a signal. 1856 */ 1857 typedef 1858 enum { 1859 Ijk_INVALID=0x16000, 1860 Ijk_Boring, /* not interesting; just goto next */ 1861 Ijk_Call, /* guest is doing a call */ 1862 Ijk_Ret, /* guest is doing a return */ 1863 Ijk_ClientReq, /* do guest client req before continuing */ 1864 Ijk_Yield, /* client is yielding to thread scheduler */ 1865 Ijk_EmWarn, /* report emulation warning before continuing */ 1866 Ijk_EmFail, /* emulation critical (FATAL) error; give up */ 1867 Ijk_NoDecode, /* next instruction cannot be decoded */ 1868 Ijk_MapFail, /* Vex-provided address translation failed */ 1869 Ijk_TInval, /* Invalidate translations before continuing. */ 1870 Ijk_NoRedir, /* Jump to un-redirected guest addr */ 1871 Ijk_SigTRAP, /* current instruction synths SIGTRAP */ 1872 Ijk_SigSEGV, /* current instruction synths SIGSEGV */ 1873 Ijk_SigBUS, /* current instruction synths SIGBUS */ 1874 /* Unfortunately, various guest-dependent syscall kinds. They 1875 all mean: do a syscall before continuing. */ 1876 Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */ 1877 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ 1878 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ 1879 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ 1880 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */ 1881 Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes 1882 invalid at the point this happens. */ 1883 } 1884 IRJumpKind; 1885 1886 extern void ppIRJumpKind ( IRJumpKind ); 1887 1888 1889 /* ------------------ Dirty helper calls ------------------ */ 1890 1891 /* A dirty call is a flexible mechanism for calling (possibly 1892 conditionally) a helper function or procedure. The helper function 1893 may read, write or modify client memory, and may read, write or 1894 modify client state. It can take arguments and optionally return a 1895 value. It may return different results and/or do different things 1896 when called repeatedly with the same arguments, by means of storing 1897 private state. 1898 1899 If a value is returned, it is assigned to the nominated return 1900 temporary. 1901 1902 Dirty calls are statements rather than expressions for obvious 1903 reasons. If a dirty call is marked as writing guest state, any 1904 values derived from the written parts of the guest state are 1905 invalid. Similarly, if the dirty call is stated as writing 1906 memory, any loaded values are invalidated by it. 1907 1908 In order that instrumentation is possible, the call must state, and 1909 state correctly: 1910 1911 * whether it reads, writes or modifies memory, and if so where 1912 (only one chunk can be stated) 1913 1914 * whether it reads, writes or modifies guest state, and if so which 1915 pieces (several pieces may be stated, and currently their extents 1916 must be known at translation-time). 1917 1918 Normally, code is generated to pass just the args to the helper. 1919 However, if .needsBBP is set, then an extra first argument is 1920 passed, which is the baseblock pointer, so that the callee can 1921 access the guest state. It is invalid for .nFxState to be zero 1922 but .needsBBP to be True, since .nFxState==0 is a claim that the 1923 call does not access guest state. 1924 1925 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The 1926 arguments are evaluated REGARDLESS of the guard value. The order of 1927 argument evaluation is unspecified. The guard expression is evaluated 1928 AFTER the arguments have been evaluated. 1929 */ 1930 1931 #define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */ 1932 1933 /* Effects on resources (eg. registers, memory locations) */ 1934 typedef 1935 enum { 1936 Ifx_None = 0x1700, /* no effect */ 1937 Ifx_Read, /* reads the resource */ 1938 Ifx_Write, /* writes the resource */ 1939 Ifx_Modify, /* modifies the resource */ 1940 } 1941 IREffect; 1942 1943 /* Pretty-print an IREffect */ 1944 extern void ppIREffect ( IREffect ); 1945 1946 1947 typedef 1948 struct _IRDirty { 1949 /* What to call, and details of args/results. .guard must be 1950 non-NULL. If .tmp is not IRTemp_INVALID (that is, the call 1951 returns a result) then .guard must be demonstrably (at 1952 JIT-time) always true, that is, the call must be 1953 unconditional. Conditional calls that assign .tmp are not 1954 allowed. */ 1955 IRCallee* cee; /* where to call */ 1956 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */ 1957 IRExpr** args; /* arg list, ends in NULL */ 1958 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */ 1959 1960 /* Mem effects; we allow only one R/W/M region to be stated */ 1961 IREffect mFx; /* indicates memory effects, if any */ 1962 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */ 1963 Int mSize; /* of access, or zero if mFx==Ifx_None */ 1964 1965 /* Guest state effects; up to N allowed */ 1966 Bool needsBBP; /* True => also pass guest state ptr to callee */ 1967 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */ 1968 struct { 1969 IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */ 1970 UShort offset; 1971 UShort size; 1972 UChar nRepeats; 1973 UChar repeatLen; 1974 } fxState[VEX_N_FXSTATE]; 1975 /* The access can be repeated, as specified by nRepeats and 1976 repeatLen. To describe only a single access, nRepeats and 1977 repeatLen should be zero. Otherwise, repeatLen must be a 1978 multiple of size and greater than size. */ 1979 /* Overall, the parts of the guest state denoted by (offset, 1980 size, nRepeats, repeatLen) is 1981 [offset, +size) 1982 and, if nRepeats > 0, 1983 for (i = 1; i <= nRepeats; i++) 1984 [offset + i * repeatLen, +size) 1985 A convenient way to enumerate all segments is therefore 1986 for (i = 0; i < 1 + nRepeats; i++) 1987 [offset + i * repeatLen, +size) 1988 */ 1989 } 1990 IRDirty; 1991 1992 /* Pretty-print a dirty call */ 1993 extern void ppIRDirty ( IRDirty* ); 1994 1995 /* Allocate an uninitialised dirty call */ 1996 extern IRDirty* emptyIRDirty ( void ); 1997 1998 /* Deep-copy a dirty call */ 1999 extern IRDirty* deepCopyIRDirty ( IRDirty* ); 2000 2001 /* A handy function which takes some of the tedium out of constructing 2002 dirty helper calls. The called function impliedly does not return 2003 any value and has a constant-True guard. The call is marked as 2004 accessing neither guest state nor memory (hence the "unsafe" 2005 designation) -- you can change this marking later if need be. A 2006 suitable IRCallee is constructed from the supplied bits. */ 2007 extern 2008 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr, 2009 IRExpr** args ); 2010 2011 /* Similarly, make a zero-annotation dirty call which returns a value, 2012 and assign that to the given temp. */ 2013 extern 2014 IRDirty* unsafeIRDirty_1_N ( IRTemp dst, 2015 Int regparms, HChar* name, void* addr, 2016 IRExpr** args ); 2017 2018 2019 /* --------------- Memory Bus Events --------------- */ 2020 2021 typedef 2022 enum { 2023 Imbe_Fence=0x18000, 2024 /* Needed only on ARM. It cancels a reservation made by a 2025 preceding Linked-Load, and needs to be handed through to the 2026 back end, just as LL and SC themselves are. */ 2027 Imbe_CancelReservation 2028 } 2029 IRMBusEvent; 2030 2031 extern void ppIRMBusEvent ( IRMBusEvent ); 2032 2033 2034 /* --------------- Compare and Swap --------------- */ 2035 2036 /* This denotes an atomic compare and swap operation, either 2037 a single-element one or a double-element one. 2038 2039 In the single-element case: 2040 2041 .addr is the memory address. 2042 .end is the endianness with which memory is accessed 2043 2044 If .addr contains the same value as .expdLo, then .dataLo is 2045 written there, else there is no write. In both cases, the 2046 original value at .addr is copied into .oldLo. 2047 2048 Types: .expdLo, .dataLo and .oldLo must all have the same type. 2049 It may be any integral type, viz: I8, I16, I32 or, for 64-bit 2050 guests, I64. 2051 2052 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must 2053 be NULL. 2054 2055 In the double-element case: 2056 2057 .addr is the memory address. 2058 .end is the endianness with which memory is accessed 2059 2060 The operation is the same: 2061 2062 If .addr contains the same value as .expdHi:.expdLo, then 2063 .dataHi:.dataLo is written there, else there is no write. In 2064 both cases the original value at .addr is copied into 2065 .oldHi:.oldLo. 2066 2067 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must 2068 all have the same type, which may be any integral type, viz: I8, 2069 I16, I32 or, for 64-bit guests, I64. 2070 2071 The double-element case is complicated by the issue of 2072 endianness. In all cases, the two elements are understood to be 2073 located adjacently in memory, starting at the address .addr. 2074 2075 If .end is Iend_LE, then the .xxxLo component is at the lower 2076 address and the .xxxHi component is at the higher address, and 2077 each component is itself stored little-endianly. 2078 2079 If .end is Iend_BE, then the .xxxHi component is at the lower 2080 address and the .xxxLo component is at the higher address, and 2081 each component is itself stored big-endianly. 2082 2083 This allows representing more cases than most architectures can 2084 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements. 2085 2086 How to know if the CAS succeeded? 2087 2088 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo), 2089 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now 2090 stored at .addr, and the original value there was .oldLo (resp 2091 .oldHi:.oldLo). 2092 2093 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo), 2094 then the CAS failed, and the original value at .addr was .oldLo 2095 (resp. .oldHi:.oldLo). 2096 2097 Hence it is easy to know whether or not the CAS succeeded. 2098 */ 2099 typedef 2100 struct { 2101 IRTemp oldHi; /* old value of *addr is written here */ 2102 IRTemp oldLo; 2103 IREndness end; /* endianness of the data in memory */ 2104 IRExpr* addr; /* store address */ 2105 IRExpr* expdHi; /* expected old value at *addr */ 2106 IRExpr* expdLo; 2107 IRExpr* dataHi; /* new value for *addr */ 2108 IRExpr* dataLo; 2109 } 2110 IRCAS; 2111 2112 extern void ppIRCAS ( IRCAS* cas ); 2113 2114 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo, 2115 IREndness end, IRExpr* addr, 2116 IRExpr* expdHi, IRExpr* expdLo, 2117 IRExpr* dataHi, IRExpr* dataLo ); 2118 2119 extern IRCAS* deepCopyIRCAS ( IRCAS* ); 2120 2121 2122 /* ------------------ Circular Array Put ------------------ */ 2123 typedef 2124 struct { 2125 IRRegArray* descr; /* Part of guest state treated as circular */ 2126 IRExpr* ix; /* Variable part of index into array */ 2127 Int bias; /* Constant offset part of index into array */ 2128 IRExpr* data; /* The value to write */ 2129 } IRPutI; 2130 2131 extern void ppIRPutI ( IRPutI* puti ); 2132 2133 extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix, 2134 Int bias, IRExpr* data ); 2135 2136 extern IRPutI* deepCopyIRPutI ( IRPutI* ); 2137 2138 2139 /* ------------------ Statements ------------------ */ 2140 2141 /* The different kinds of statements. Their meaning is explained 2142 below in the comments for IRStmt. 2143 2144 Those marked META do not represent code, but rather extra 2145 information about the code. These statements can be removed 2146 without affecting the functional behaviour of the code, however 2147 they are required by some IR consumers such as tools that 2148 instrument the code. 2149 */ 2150 2151 typedef 2152 enum { 2153 Ist_NoOp=0x19000, 2154 Ist_IMark, /* META */ 2155 Ist_AbiHint, /* META */ 2156 Ist_Put, 2157 Ist_PutI, 2158 Ist_WrTmp, 2159 Ist_Store, 2160 Ist_CAS, 2161 Ist_LLSC, 2162 Ist_Dirty, 2163 Ist_MBE, /* META (maybe) */ 2164 Ist_Exit 2165 } 2166 IRStmtTag; 2167 2168 /* A statement. Stored as a tagged union. 'tag' indicates what kind 2169 of expression this is. 'Ist' is the union that holds the fields. 2170 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store 2171 statement, and the fields can be accessed with 2172 'st.Ist.Store.<fieldname>'. 2173 2174 For each kind of statement, we show what it looks like when 2175 pretty-printed with ppIRStmt(). 2176 */ 2177 typedef 2178 struct _IRStmt { 2179 IRStmtTag tag; 2180 union { 2181 /* A no-op (usually resulting from IR optimisation). Can be 2182 omitted without any effect. 2183 2184 ppIRStmt output: IR-NoOp 2185 */ 2186 struct { 2187 } NoOp; 2188 2189 /* META: instruction mark. Marks the start of the statements 2190 that represent a single machine instruction (the end of 2191 those statements is marked by the next IMark or the end of 2192 the IRSB). Contains the address and length of the 2193 instruction. 2194 2195 It also contains a delta value. The delta must be 2196 subtracted from a guest program counter value before 2197 attempting to establish, by comparison with the address 2198 and length values, whether or not that program counter 2199 value refers to this instruction. For x86, amd64, ppc32, 2200 ppc64 and arm, the delta value is zero. For Thumb 2201 instructions, the delta value is one. This is because, on 2202 Thumb, guest PC values (guest_R15T) are encoded using the 2203 top 31 bits of the instruction address and a 1 in the lsb; 2204 hence they appear to be (numerically) 1 past the start of 2205 the instruction they refer to. IOW, guest_R15T on ARM 2206 holds a standard ARM interworking address. 2207 2208 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, 2209 eg. ------ IMark(0x4000792, 5, 0) ------, 2210 */ 2211 struct { 2212 Addr64 addr; /* instruction address */ 2213 Int len; /* instruction length */ 2214 UChar delta; /* addr = program counter as encoded in guest state 2215 - delta */ 2216 } IMark; 2217 2218 /* META: An ABI hint, which says something about this 2219 platform's ABI. 2220 2221 At the moment, the only AbiHint is one which indicates 2222 that a given chunk of address space, [base .. base+len-1], 2223 has become undefined. This is used on amd64-linux and 2224 some ppc variants to pass stack-redzoning hints to whoever 2225 wants to see them. It also indicates the address of the 2226 next (dynamic) instruction that will be executed. This is 2227 to help Memcheck to origin tracking. 2228 2229 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== 2230 eg. ====== AbiHint(t1, 16, t2) ====== 2231 */ 2232 struct { 2233 IRExpr* base; /* Start of undefined chunk */ 2234 Int len; /* Length of undefined chunk */ 2235 IRExpr* nia; /* Address of next (guest) insn */ 2236 } AbiHint; 2237 2238 /* Write a guest register, at a fixed offset in the guest state. 2239 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 2240 */ 2241 struct { 2242 Int offset; /* Offset into the guest state */ 2243 IRExpr* data; /* The value to write */ 2244 } Put; 2245 2246 /* Write a guest register, at a non-fixed offset in the guest 2247 state. See the comment for GetI expressions for more 2248 information. 2249 2250 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, 2251 eg. PUTI(64:8xF64)[t5,0] = t1 2252 */ 2253 struct { 2254 IRPutI* details; 2255 } PutI; 2256 2257 /* Assign a value to a temporary. Note that SSA rules require 2258 each tmp is only assigned to once. IR sanity checking will 2259 reject any block containing a temporary which is not assigned 2260 to exactly once. 2261 2262 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 2263 */ 2264 struct { 2265 IRTemp tmp; /* Temporary (LHS of assignment) */ 2266 IRExpr* data; /* Expression (RHS of assignment) */ 2267 } WrTmp; 2268 2269 /* Write a value to memory. This is a normal store, not a 2270 Store-Conditional. To represent a Store-Conditional, 2271 instead use IRStmt.LLSC. 2272 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 2273 */ 2274 struct { 2275 IREndness end; /* Endianness of the store */ 2276 IRExpr* addr; /* store address */ 2277 IRExpr* data; /* value to write */ 2278 } Store; 2279 2280 /* Do an atomic compare-and-swap operation. Semantics are 2281 described above on a comment at the definition of IRCAS. 2282 2283 ppIRStmt output: 2284 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) 2285 eg 2286 t1 = CASle(t2 :: t3->Add32(t3,1)) 2287 which denotes a 32-bit atomic increment 2288 of a value at address t2 2289 2290 A double-element CAS may also be denoted, in which case <tmp>, 2291 <expected> and <new> are all pairs of items, separated by 2292 commas. 2293 */ 2294 struct { 2295 IRCAS* details; 2296 } CAS; 2297 2298 /* Either Load-Linked or Store-Conditional, depending on 2299 STOREDATA. 2300 2301 If STOREDATA is NULL then this is a Load-Linked, meaning 2302 that data is loaded from memory as normal, but a 2303 'reservation' for the address is also lodged in the 2304 hardware. 2305 2306 result = Load-Linked(addr, end) 2307 2308 The data transfer type is the type of RESULT (I32, I64, 2309 etc). ppIRStmt output: 2310 2311 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) 2312 2313 If STOREDATA is not NULL then this is a Store-Conditional, 2314 hence: 2315 2316 result = Store-Conditional(addr, storedata, end) 2317 2318 The data transfer type is the type of STOREDATA and RESULT 2319 has type Ity_I1. The store may fail or succeed depending 2320 on the state of a previously lodged reservation on this 2321 address. RESULT is written 1 if the store succeeds and 0 2322 if it fails. eg ppIRStmt output: 2323 2324 result = ( ST<end>-Cond(<addr>) = <storedata> ) 2325 eg t3 = ( STbe-Cond(t1, t2) ) 2326 2327 In all cases, the address must be naturally aligned for 2328 the transfer type -- any misaligned addresses should be 2329 caught by a dominating IR check and side exit. This 2330 alignment restriction exists because on at least some 2331 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on 2332 misaligned addresses, and we have to actually generate 2333 stwcx. on the host, and we don't want it trapping on the 2334 host. 2335 2336 Summary of rules for transfer type: 2337 STOREDATA == NULL (LL): 2338 transfer type = type of RESULT 2339 STOREDATA != NULL (SC): 2340 transfer type = type of STOREDATA, and RESULT :: Ity_I1 2341 */ 2342 struct { 2343 IREndness end; 2344 IRTemp result; 2345 IRExpr* addr; 2346 IRExpr* storedata; /* NULL => LL, non-NULL => SC */ 2347 } LLSC; 2348 2349 /* Call (possibly conditionally) a C function that has side 2350 effects (ie. is "dirty"). See the comments above the 2351 IRDirty type declaration for more information. 2352 2353 ppIRStmt output: 2354 t<tmp> = DIRTY <guard> <effects> 2355 ::: <callee>(<args>) 2356 eg. 2357 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) 2358 ::: foo{0x380035f4}(t2) 2359 */ 2360 struct { 2361 IRDirty* details; 2362 } Dirty; 2363 2364 /* A memory bus event - a fence, or acquisition/release of the 2365 hardware bus lock. IR optimisation treats all these as fences 2366 across which no memory references may be moved. 2367 ppIRStmt output: MBusEvent-Fence, 2368 MBusEvent-BusLock, MBusEvent-BusUnlock. 2369 */ 2370 struct { 2371 IRMBusEvent event; 2372 } MBE; 2373 2374 /* Conditional exit from the middle of an IRSB. 2375 ppIRStmt output: if (<guard>) goto {<jk>} <dst> 2376 eg. if (t69) goto {Boring} 0x4000AAA:I32 2377 If <guard> is true, the guest state is also updated by 2378 PUT-ing <dst> at <offsIP>. This is done because a 2379 taken exit must update the guest program counter. 2380 */ 2381 struct { 2382 IRExpr* guard; /* Conditional expression */ 2383 IRConst* dst; /* Jump target (constant only) */ 2384 IRJumpKind jk; /* Jump kind */ 2385 Int offsIP; /* Guest state offset for IP */ 2386 } Exit; 2387 } Ist; 2388 } 2389 IRStmt; 2390 2391 /* Statement constructors. */ 2392 extern IRStmt* IRStmt_NoOp ( void ); 2393 extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta ); 2394 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ); 2395 extern IRStmt* IRStmt_Put ( Int off, IRExpr* data ); 2396 extern IRStmt* IRStmt_PutI ( IRPutI* details ); 2397 extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); 2398 extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); 2399 extern IRStmt* IRStmt_CAS ( IRCAS* details ); 2400 extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, 2401 IRExpr* addr, IRExpr* storedata ); 2402 extern IRStmt* IRStmt_Dirty ( IRDirty* details ); 2403 extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); 2404 extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, 2405 Int offsIP ); 2406 2407 /* Deep-copy an IRStmt. */ 2408 extern IRStmt* deepCopyIRStmt ( IRStmt* ); 2409 2410 /* Pretty-print an IRStmt. */ 2411 extern void ppIRStmt ( IRStmt* ); 2412 2413 2414 /* ------------------ Basic Blocks ------------------ */ 2415 2416 /* Type environments: a bunch of statements, expressions, etc, are 2417 incomplete without an environment indicating the type of each 2418 IRTemp. So this provides one. IR temporaries are really just 2419 unsigned ints and so this provides an array, 0 .. n_types_used-1 of 2420 them. 2421 */ 2422 typedef 2423 struct { 2424 IRType* types; 2425 Int types_size; 2426 Int types_used; 2427 } 2428 IRTypeEnv; 2429 2430 /* Obtain a new IRTemp */ 2431 extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); 2432 2433 /* Deep-copy a type environment */ 2434 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* ); 2435 2436 /* Pretty-print a type environment */ 2437 extern void ppIRTypeEnv ( IRTypeEnv* ); 2438 2439 2440 /* Code blocks, which in proper compiler terminology are superblocks 2441 (single entry, multiple exit code sequences) contain: 2442 2443 - A table giving a type for each temp (the "type environment") 2444 - An expandable array of statements 2445 - An expression of type 32 or 64 bits, depending on the 2446 guest's word size, indicating the next destination if the block 2447 executes all the way to the end, without a side exit 2448 - An indication of any special actions (JumpKind) needed 2449 for this final jump. 2450 - Offset of the IP field in the guest state. This will be 2451 updated before the final jump is done. 2452 2453 "IRSB" stands for "IR Super Block". 2454 */ 2455 typedef 2456 struct { 2457 IRTypeEnv* tyenv; 2458 IRStmt** stmts; 2459 Int stmts_size; 2460 Int stmts_used; 2461 IRExpr* next; 2462 IRJumpKind jumpkind; 2463 Int offsIP; 2464 } 2465 IRSB; 2466 2467 /* Allocate a new, uninitialised IRSB */ 2468 extern IRSB* emptyIRSB ( void ); 2469 2470 /* Deep-copy an IRSB */ 2471 extern IRSB* deepCopyIRSB ( IRSB* ); 2472 2473 /* Deep-copy an IRSB, except for the statements list, which set to be 2474 a new, empty, list of statements. */ 2475 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* ); 2476 2477 /* Pretty-print an IRSB */ 2478 extern void ppIRSB ( IRSB* ); 2479 2480 /* Append an IRStmt to an IRSB */ 2481 extern void addStmtToIRSB ( IRSB*, IRStmt* ); 2482 2483 2484 /*---------------------------------------------------------------*/ 2485 /*--- Helper functions for the IR ---*/ 2486 /*---------------------------------------------------------------*/ 2487 2488 /* For messing with IR type environments */ 2489 extern IRTypeEnv* emptyIRTypeEnv ( void ); 2490 2491 /* What is the type of this expression? */ 2492 extern IRType typeOfIRConst ( IRConst* ); 2493 extern IRType typeOfIRTemp ( IRTypeEnv*, IRTemp ); 2494 extern IRType typeOfIRExpr ( IRTypeEnv*, IRExpr* ); 2495 2496 /* Sanity check a BB of IR */ 2497 extern void sanityCheckIRSB ( IRSB* bb, 2498 HChar* caller, 2499 Bool require_flatness, 2500 IRType guest_word_size ); 2501 extern Bool isFlatIRStmt ( IRStmt* ); 2502 2503 /* Is this any value actually in the enumeration 'IRType' ? */ 2504 extern Bool isPlausibleIRType ( IRType ty ); 2505 2506 #endif /* ndef __LIBVEX_IR_H */ 2507 2508 2509 /*---------------------------------------------------------------*/ 2510 /*--- libvex_ir.h ---*/ 2511 /*---------------------------------------------------------------*/ 2512