1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_arm_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 NEON support is 14 Copyright (C) 2010-2010 Samsung Electronics 15 contributed by Dmitry Zhurikhin <zhur (at) ispras.ru> 16 and Kirill Batuzov <batuzovk (at) ispras.ru> 17 18 This program is free software; you can redistribute it and/or 19 modify it under the terms of the GNU General Public License as 20 published by the Free Software Foundation; either version 2 of the 21 License, or (at your option) any later version. 22 23 This program is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received a copy of the GNU General Public License 29 along with this program; if not, write to the Free Software 30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 31 02110-1301, USA. 32 33 The GNU General Public License is contained in the file COPYING. 34 */ 35 36 /* XXXX thumb to check: 37 that all cases where putIRegT writes r15, we generate a jump. 38 39 All uses of newTemp assign to an IRTemp and not a UInt 40 41 For all thumb loads and stores, including VFP ones, new-ITSTATE is 42 backed out before the memory op, and restored afterwards. This 43 needs to happen even after we go uncond. (and for sure it doesn't 44 happen for VFP loads/stores right now). 45 46 VFP on thumb: check that we exclude all r13/r15 cases that we 47 should. 48 49 XXXX thumb to do: improve the ITSTATE-zeroing optimisation by 50 taking into account the number of insns guarded by an IT. 51 52 remove the nasty hack, in the spechelper, of looking for Or32(..., 53 0xE0) in as the first arg to armg_calculate_condition, and instead 54 use Slice44 as specified in comments in the spechelper. 55 56 add specialisations for armg_calculate_flag_c and _v, as they 57 are moderately often needed in Thumb code. 58 59 Correctness: ITSTATE handling in Thumb SVCs is wrong. 60 61 Correctness (obscure): in m_transtab, when invalidating code 62 address ranges, invalidate up to 18 bytes after the end of the 63 range. This is because the ITSTATE optimisation at the top of 64 _THUMB_WRK below analyses up to 18 bytes before the start of any 65 given instruction, and so might depend on the invalidated area. 66 */ 67 68 /* Limitations, etc 69 70 - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt 71 72 - SWP: the restart jump back is Ijk_Boring; it should be 73 Ijk_NoRedir but that's expensive. See comments on casLE() in 74 guest_x86_toIR.c. 75 */ 76 77 /* "Special" instructions. 78 79 This instruction decoder can decode four special instructions 80 which mean nothing natively (are no-ops as far as regs/mem are 81 concerned) but have meaning for supporting Valgrind. A special 82 instruction is flagged by a 16-byte preamble: 83 84 E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC 85 (mov r12, r12, ROR #3; mov r12, r12, ROR #13; 86 mov r12, r12, ROR #29; mov r12, r12, ROR #19) 87 88 Following that, one of the following 3 are allowed 89 (standard interpretation in parentheses): 90 91 E18AA00A (orr r10,r10,r10) R3 = client_request ( R4 ) 92 E18BB00B (orr r11,r11,r11) R3 = guest_NRADDR 93 E18CC00C (orr r12,r12,r12) branch-and-link-to-noredir R4 94 95 Any other bytes following the 16-byte preamble are illegal and 96 constitute a failure in instruction decoding. This all assumes 97 that the preamble will never occur except in specific code 98 fragments designed for Valgrind to catch. 99 */ 100 101 /* Translates ARM(v5) code to IR. */ 102 103 #include "libvex_basictypes.h" 104 #include "libvex_ir.h" 105 #include "libvex.h" 106 #include "libvex_guest_arm.h" 107 108 #include "main_util.h" 109 #include "main_globals.h" 110 #include "guest_generic_bb_to_IR.h" 111 #include "guest_arm_defs.h" 112 113 114 /*------------------------------------------------------------*/ 115 /*--- Globals ---*/ 116 /*------------------------------------------------------------*/ 117 118 /* These are set at the start of the translation of a instruction, so 119 that we don't have to pass them around endlessly. CONST means does 120 not change during translation of the instruction. 121 */ 122 123 /* CONST: is the host bigendian? This has to do with float vs double 124 register accesses on VFP, but it's complex and not properly thought 125 out. */ 126 static Bool host_is_bigendian; 127 128 /* CONST: The guest address for the instruction currently being 129 translated. This is the real, "decoded" address (not subject 130 to the CPSR.T kludge). */ 131 static Addr32 guest_R15_curr_instr_notENC; 132 133 /* CONST, FOR ASSERTIONS ONLY. Indicates whether currently processed 134 insn is Thumb (True) or ARM (False). */ 135 static Bool __curr_is_Thumb; 136 137 /* MOD: The IRSB* into which we're generating code. */ 138 static IRSB* irsb; 139 140 /* These are to do with handling writes to r15. They are initially 141 set at the start of disInstr_ARM_WRK to indicate no update, 142 possibly updated during the routine, and examined again at the end. 143 If they have been set to indicate a r15 update then a jump is 144 generated. Note, "explicit" jumps (b, bx, etc) are generated 145 directly, not using this mechanism -- this is intended to handle 146 the implicit-style jumps resulting from (eg) assigning to r15 as 147 the result of insns we wouldn't normally consider branchy. */ 148 149 /* MOD. Initially False; set to True iff abovementioned handling is 150 required. */ 151 static Bool r15written; 152 153 /* MOD. Initially IRTemp_INVALID. If the r15 branch to be generated 154 is conditional, this holds the gating IRTemp :: Ity_I32. If the 155 branch to be generated is unconditional, this remains 156 IRTemp_INVALID. */ 157 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */ 158 159 /* MOD. Initially Ijk_Boring. If an r15 branch is to be generated, 160 this holds the jump kind. */ 161 static IRTemp r15kind; 162 163 164 /*------------------------------------------------------------*/ 165 /*--- Debugging output ---*/ 166 /*------------------------------------------------------------*/ 167 168 #define DIP(format, args...) \ 169 if (vex_traceflags & VEX_TRACE_FE) \ 170 vex_printf(format, ## args) 171 172 #define DIS(buf, format, args...) \ 173 if (vex_traceflags & VEX_TRACE_FE) \ 174 vex_sprintf(buf, format, ## args) 175 176 #define ASSERT_IS_THUMB \ 177 do { vassert(__curr_is_Thumb); } while (0) 178 179 #define ASSERT_IS_ARM \ 180 do { vassert(! __curr_is_Thumb); } while (0) 181 182 183 /*------------------------------------------------------------*/ 184 /*--- Helper bits and pieces for deconstructing the ---*/ 185 /*--- arm insn stream. ---*/ 186 /*------------------------------------------------------------*/ 187 188 /* Do a little-endian load of a 32-bit word, regardless of the 189 endianness of the underlying host. */ 190 static inline UInt getUIntLittleEndianly ( UChar* p ) 191 { 192 UInt w = 0; 193 w = (w << 8) | p[3]; 194 w = (w << 8) | p[2]; 195 w = (w << 8) | p[1]; 196 w = (w << 8) | p[0]; 197 return w; 198 } 199 200 /* Do a little-endian load of a 16-bit word, regardless of the 201 endianness of the underlying host. */ 202 static inline UShort getUShortLittleEndianly ( UChar* p ) 203 { 204 UShort w = 0; 205 w = (w << 8) | p[1]; 206 w = (w << 8) | p[0]; 207 return w; 208 } 209 210 static UInt ROR32 ( UInt x, UInt sh ) { 211 vassert(sh >= 0 && sh < 32); 212 if (sh == 0) 213 return x; 214 else 215 return (x << (32-sh)) | (x >> sh); 216 } 217 218 static Int popcount32 ( UInt x ) 219 { 220 Int res = 0, i; 221 for (i = 0; i < 32; i++) { 222 res += (x & 1); 223 x >>= 1; 224 } 225 return res; 226 } 227 228 static UInt setbit32 ( UInt x, Int ix, UInt b ) 229 { 230 UInt mask = 1 << ix; 231 x &= ~mask; 232 x |= ((b << ix) & mask); 233 return x; 234 } 235 236 #define BITS2(_b1,_b0) \ 237 (((_b1) << 1) | (_b0)) 238 239 #define BITS3(_b2,_b1,_b0) \ 240 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 241 242 #define BITS4(_b3,_b2,_b1,_b0) \ 243 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 244 245 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 246 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 247 | BITS4((_b3),(_b2),(_b1),(_b0))) 248 249 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 250 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 251 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 252 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 253 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 254 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 255 256 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 257 (((_b8) << 8) \ 258 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 259 260 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 261 (((_b9) << 9) | ((_b8) << 8) \ 262 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 263 264 /* produces _uint[_bMax:_bMin] */ 265 #define SLICE_UInt(_uint,_bMax,_bMin) \ 266 (( ((UInt)(_uint)) >> (_bMin)) \ 267 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 268 269 270 /*------------------------------------------------------------*/ 271 /*--- Helper bits and pieces for creating IR fragments. ---*/ 272 /*------------------------------------------------------------*/ 273 274 static IRExpr* mkU64 ( ULong i ) 275 { 276 return IRExpr_Const(IRConst_U64(i)); 277 } 278 279 static IRExpr* mkU32 ( UInt i ) 280 { 281 return IRExpr_Const(IRConst_U32(i)); 282 } 283 284 static IRExpr* mkU8 ( UInt i ) 285 { 286 vassert(i < 256); 287 return IRExpr_Const(IRConst_U8( (UChar)i )); 288 } 289 290 static IRExpr* mkexpr ( IRTemp tmp ) 291 { 292 return IRExpr_RdTmp(tmp); 293 } 294 295 static IRExpr* unop ( IROp op, IRExpr* a ) 296 { 297 return IRExpr_Unop(op, a); 298 } 299 300 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 301 { 302 return IRExpr_Binop(op, a1, a2); 303 } 304 305 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 306 { 307 return IRExpr_Triop(op, a1, a2, a3); 308 } 309 310 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 311 { 312 return IRExpr_Load(Iend_LE, ty, addr); 313 } 314 315 /* Add a statement to the list held by "irbb". */ 316 static void stmt ( IRStmt* st ) 317 { 318 addStmtToIRSB( irsb, st ); 319 } 320 321 static void assign ( IRTemp dst, IRExpr* e ) 322 { 323 stmt( IRStmt_WrTmp(dst, e) ); 324 } 325 326 static void storeLE ( IRExpr* addr, IRExpr* data ) 327 { 328 stmt( IRStmt_Store(Iend_LE, addr, data) ); 329 } 330 331 /* Generate a new temporary of the given type. */ 332 static IRTemp newTemp ( IRType ty ) 333 { 334 vassert(isPlausibleIRType(ty)); 335 return newIRTemp( irsb->tyenv, ty ); 336 } 337 338 /* Produces a value in 0 .. 3, which is encoded as per the type 339 IRRoundingMode. */ 340 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 341 { 342 return mkU32(Irrm_NEAREST); 343 } 344 345 /* Generate an expression for SRC rotated right by ROT. */ 346 static IRExpr* genROR32( IRTemp src, Int rot ) 347 { 348 vassert(rot >= 0 && rot < 32); 349 if (rot == 0) 350 return mkexpr(src); 351 return 352 binop(Iop_Or32, 353 binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 354 binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 355 } 356 357 static IRExpr* mkU128 ( ULong i ) 358 { 359 return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 360 } 361 362 /* Generate a 4-aligned version of the given expression if 363 the given condition is true. Else return it unchanged. */ 364 static IRExpr* align4if ( IRExpr* e, Bool b ) 365 { 366 if (b) 367 return binop(Iop_And32, e, mkU32(~3)); 368 else 369 return e; 370 } 371 372 373 /*------------------------------------------------------------*/ 374 /*--- Helpers for accessing guest registers. ---*/ 375 /*------------------------------------------------------------*/ 376 377 #define OFFB_R0 offsetof(VexGuestARMState,guest_R0) 378 #define OFFB_R1 offsetof(VexGuestARMState,guest_R1) 379 #define OFFB_R2 offsetof(VexGuestARMState,guest_R2) 380 #define OFFB_R3 offsetof(VexGuestARMState,guest_R3) 381 #define OFFB_R4 offsetof(VexGuestARMState,guest_R4) 382 #define OFFB_R5 offsetof(VexGuestARMState,guest_R5) 383 #define OFFB_R6 offsetof(VexGuestARMState,guest_R6) 384 #define OFFB_R7 offsetof(VexGuestARMState,guest_R7) 385 #define OFFB_R8 offsetof(VexGuestARMState,guest_R8) 386 #define OFFB_R9 offsetof(VexGuestARMState,guest_R9) 387 #define OFFB_R10 offsetof(VexGuestARMState,guest_R10) 388 #define OFFB_R11 offsetof(VexGuestARMState,guest_R11) 389 #define OFFB_R12 offsetof(VexGuestARMState,guest_R12) 390 #define OFFB_R13 offsetof(VexGuestARMState,guest_R13) 391 #define OFFB_R14 offsetof(VexGuestARMState,guest_R14) 392 #define OFFB_R15T offsetof(VexGuestARMState,guest_R15T) 393 394 #define OFFB_CC_OP offsetof(VexGuestARMState,guest_CC_OP) 395 #define OFFB_CC_DEP1 offsetof(VexGuestARMState,guest_CC_DEP1) 396 #define OFFB_CC_DEP2 offsetof(VexGuestARMState,guest_CC_DEP2) 397 #define OFFB_CC_NDEP offsetof(VexGuestARMState,guest_CC_NDEP) 398 #define OFFB_NRADDR offsetof(VexGuestARMState,guest_NRADDR) 399 400 #define OFFB_D0 offsetof(VexGuestARMState,guest_D0) 401 #define OFFB_D1 offsetof(VexGuestARMState,guest_D1) 402 #define OFFB_D2 offsetof(VexGuestARMState,guest_D2) 403 #define OFFB_D3 offsetof(VexGuestARMState,guest_D3) 404 #define OFFB_D4 offsetof(VexGuestARMState,guest_D4) 405 #define OFFB_D5 offsetof(VexGuestARMState,guest_D5) 406 #define OFFB_D6 offsetof(VexGuestARMState,guest_D6) 407 #define OFFB_D7 offsetof(VexGuestARMState,guest_D7) 408 #define OFFB_D8 offsetof(VexGuestARMState,guest_D8) 409 #define OFFB_D9 offsetof(VexGuestARMState,guest_D9) 410 #define OFFB_D10 offsetof(VexGuestARMState,guest_D10) 411 #define OFFB_D11 offsetof(VexGuestARMState,guest_D11) 412 #define OFFB_D12 offsetof(VexGuestARMState,guest_D12) 413 #define OFFB_D13 offsetof(VexGuestARMState,guest_D13) 414 #define OFFB_D14 offsetof(VexGuestARMState,guest_D14) 415 #define OFFB_D15 offsetof(VexGuestARMState,guest_D15) 416 #define OFFB_D16 offsetof(VexGuestARMState,guest_D16) 417 #define OFFB_D17 offsetof(VexGuestARMState,guest_D17) 418 #define OFFB_D18 offsetof(VexGuestARMState,guest_D18) 419 #define OFFB_D19 offsetof(VexGuestARMState,guest_D19) 420 #define OFFB_D20 offsetof(VexGuestARMState,guest_D20) 421 #define OFFB_D21 offsetof(VexGuestARMState,guest_D21) 422 #define OFFB_D22 offsetof(VexGuestARMState,guest_D22) 423 #define OFFB_D23 offsetof(VexGuestARMState,guest_D23) 424 #define OFFB_D24 offsetof(VexGuestARMState,guest_D24) 425 #define OFFB_D25 offsetof(VexGuestARMState,guest_D25) 426 #define OFFB_D26 offsetof(VexGuestARMState,guest_D26) 427 #define OFFB_D27 offsetof(VexGuestARMState,guest_D27) 428 #define OFFB_D28 offsetof(VexGuestARMState,guest_D28) 429 #define OFFB_D29 offsetof(VexGuestARMState,guest_D29) 430 #define OFFB_D30 offsetof(VexGuestARMState,guest_D30) 431 #define OFFB_D31 offsetof(VexGuestARMState,guest_D31) 432 433 #define OFFB_FPSCR offsetof(VexGuestARMState,guest_FPSCR) 434 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO) 435 #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE) 436 #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32) 437 #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0) 438 #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1) 439 #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) 440 #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) 441 442 443 /* ---------------- Integer registers ---------------- */ 444 445 static Int integerGuestRegOffset ( UInt iregNo ) 446 { 447 /* Do we care about endianness here? We do if sub-parts of integer 448 registers are accessed, but I don't think that ever happens on 449 ARM. */ 450 switch (iregNo) { 451 case 0: return OFFB_R0; 452 case 1: return OFFB_R1; 453 case 2: return OFFB_R2; 454 case 3: return OFFB_R3; 455 case 4: return OFFB_R4; 456 case 5: return OFFB_R5; 457 case 6: return OFFB_R6; 458 case 7: return OFFB_R7; 459 case 8: return OFFB_R8; 460 case 9: return OFFB_R9; 461 case 10: return OFFB_R10; 462 case 11: return OFFB_R11; 463 case 12: return OFFB_R12; 464 case 13: return OFFB_R13; 465 case 14: return OFFB_R14; 466 case 15: return OFFB_R15T; 467 default: vassert(0); 468 } 469 } 470 471 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */ 472 static IRExpr* llGetIReg ( UInt iregNo ) 473 { 474 vassert(iregNo < 16); 475 return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 476 } 477 478 /* Architected read from a reg in ARM mode. This automagically adds 8 479 to all reads of r15. */ 480 static IRExpr* getIRegA ( UInt iregNo ) 481 { 482 IRExpr* e; 483 ASSERT_IS_ARM; 484 vassert(iregNo < 16); 485 if (iregNo == 15) { 486 /* If asked for r15, don't read the guest state value, as that 487 may not be up to date in the case where loop unrolling has 488 happened, because the first insn's write to the block is 489 omitted; hence in the 2nd and subsequent unrollings we don't 490 have a correct value in guest r15. Instead produce the 491 constant that we know would be produced at this point. */ 492 vassert(0 == (guest_R15_curr_instr_notENC & 3)); 493 e = mkU32(guest_R15_curr_instr_notENC + 8); 494 } else { 495 e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 496 } 497 return e; 498 } 499 500 /* Architected read from a reg in Thumb mode. This automagically adds 501 4 to all reads of r15. */ 502 static IRExpr* getIRegT ( UInt iregNo ) 503 { 504 IRExpr* e; 505 ASSERT_IS_THUMB; 506 vassert(iregNo < 16); 507 if (iregNo == 15) { 508 /* Ditto comment in getIReg. */ 509 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 510 e = mkU32(guest_R15_curr_instr_notENC + 4); 511 } else { 512 e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 513 } 514 return e; 515 } 516 517 /* Plain ("low level") write to a reg; no jump or alignment magic for 518 r15. */ 519 static void llPutIReg ( UInt iregNo, IRExpr* e ) 520 { 521 vassert(iregNo < 16); 522 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 523 stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) ); 524 } 525 526 /* Architected write to an integer register in ARM mode. If it is to 527 r15, record info so at the end of this insn's translation, a branch 528 to it can be made. Also handles conditional writes to the 529 register: if guardT == IRTemp_INVALID then the write is 530 unconditional. If writing r15, also 4-align it. */ 531 static void putIRegA ( UInt iregNo, 532 IRExpr* e, 533 IRTemp guardT /* :: Ity_I32, 0 or 1 */, 534 IRJumpKind jk /* if a jump is generated */ ) 535 { 536 /* if writing r15, force e to be 4-aligned. */ 537 // INTERWORKING FIXME. this needs to be relaxed so that 538 // puts caused by LDMxx which load r15 interwork right. 539 // but is no aligned too relaxed? 540 //if (iregNo == 15) 541 // e = binop(Iop_And32, e, mkU32(~3)); 542 ASSERT_IS_ARM; 543 /* So, generate either an unconditional or a conditional write to 544 the reg. */ 545 if (guardT == IRTemp_INVALID) { 546 /* unconditional write */ 547 llPutIReg( iregNo, e ); 548 } else { 549 llPutIReg( iregNo, 550 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 551 llGetIReg(iregNo), 552 e )); 553 } 554 if (iregNo == 15) { 555 // assert against competing r15 updates. Shouldn't 556 // happen; should be ruled out by the instr matching 557 // logic. 558 vassert(r15written == False); 559 vassert(r15guard == IRTemp_INVALID); 560 vassert(r15kind == Ijk_Boring); 561 r15written = True; 562 r15guard = guardT; 563 r15kind = jk; 564 } 565 } 566 567 568 /* Architected write to an integer register in Thumb mode. Writes to 569 r15 are not allowed. Handles conditional writes to the register: 570 if guardT == IRTemp_INVALID then the write is unconditional. */ 571 static void putIRegT ( UInt iregNo, 572 IRExpr* e, 573 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 574 { 575 /* So, generate either an unconditional or a conditional write to 576 the reg. */ 577 ASSERT_IS_THUMB; 578 vassert(iregNo >= 0 && iregNo <= 14); 579 if (guardT == IRTemp_INVALID) { 580 /* unconditional write */ 581 llPutIReg( iregNo, e ); 582 } else { 583 llPutIReg( iregNo, 584 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 585 llGetIReg(iregNo), 586 e )); 587 } 588 } 589 590 591 /* Thumb16 and Thumb32 only. 592 Returns true if reg is 13 or 15. Implements the BadReg 593 predicate in the ARM ARM. */ 594 static Bool isBadRegT ( UInt r ) 595 { 596 vassert(r <= 15); 597 ASSERT_IS_THUMB; 598 return r == 13 || r == 15; 599 } 600 601 602 /* ---------------- Double registers ---------------- */ 603 604 static Int doubleGuestRegOffset ( UInt dregNo ) 605 { 606 /* Do we care about endianness here? Probably do if we ever get 607 into the situation of dealing with the single-precision VFP 608 registers. */ 609 switch (dregNo) { 610 case 0: return OFFB_D0; 611 case 1: return OFFB_D1; 612 case 2: return OFFB_D2; 613 case 3: return OFFB_D3; 614 case 4: return OFFB_D4; 615 case 5: return OFFB_D5; 616 case 6: return OFFB_D6; 617 case 7: return OFFB_D7; 618 case 8: return OFFB_D8; 619 case 9: return OFFB_D9; 620 case 10: return OFFB_D10; 621 case 11: return OFFB_D11; 622 case 12: return OFFB_D12; 623 case 13: return OFFB_D13; 624 case 14: return OFFB_D14; 625 case 15: return OFFB_D15; 626 case 16: return OFFB_D16; 627 case 17: return OFFB_D17; 628 case 18: return OFFB_D18; 629 case 19: return OFFB_D19; 630 case 20: return OFFB_D20; 631 case 21: return OFFB_D21; 632 case 22: return OFFB_D22; 633 case 23: return OFFB_D23; 634 case 24: return OFFB_D24; 635 case 25: return OFFB_D25; 636 case 26: return OFFB_D26; 637 case 27: return OFFB_D27; 638 case 28: return OFFB_D28; 639 case 29: return OFFB_D29; 640 case 30: return OFFB_D30; 641 case 31: return OFFB_D31; 642 default: vassert(0); 643 } 644 } 645 646 /* Plain ("low level") read from a VFP Dreg. */ 647 static IRExpr* llGetDReg ( UInt dregNo ) 648 { 649 vassert(dregNo < 32); 650 return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 ); 651 } 652 653 /* Architected read from a VFP Dreg. */ 654 static IRExpr* getDReg ( UInt dregNo ) { 655 return llGetDReg( dregNo ); 656 } 657 658 /* Plain ("low level") write to a VFP Dreg. */ 659 static void llPutDReg ( UInt dregNo, IRExpr* e ) 660 { 661 vassert(dregNo < 32); 662 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64); 663 stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) ); 664 } 665 666 /* Architected write to a VFP Dreg. Handles conditional writes to the 667 register: if guardT == IRTemp_INVALID then the write is 668 unconditional. */ 669 static void putDReg ( UInt dregNo, 670 IRExpr* e, 671 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 672 { 673 /* So, generate either an unconditional or a conditional write to 674 the reg. */ 675 if (guardT == IRTemp_INVALID) { 676 /* unconditional write */ 677 llPutDReg( dregNo, e ); 678 } else { 679 llPutDReg( dregNo, 680 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 681 llGetDReg(dregNo), 682 e )); 683 } 684 } 685 686 /* And now exactly the same stuff all over again, but this time 687 taking/returning I64 rather than F64, to support 64-bit Neon 688 ops. */ 689 690 /* Plain ("low level") read from a Neon Integer Dreg. */ 691 static IRExpr* llGetDRegI64 ( UInt dregNo ) 692 { 693 vassert(dregNo < 32); 694 return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 ); 695 } 696 697 /* Architected read from a Neon Integer Dreg. */ 698 static IRExpr* getDRegI64 ( UInt dregNo ) { 699 return llGetDRegI64( dregNo ); 700 } 701 702 /* Plain ("low level") write to a Neon Integer Dreg. */ 703 static void llPutDRegI64 ( UInt dregNo, IRExpr* e ) 704 { 705 vassert(dregNo < 32); 706 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 707 stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) ); 708 } 709 710 /* Architected write to a Neon Integer Dreg. Handles conditional 711 writes to the register: if guardT == IRTemp_INVALID then the write 712 is unconditional. */ 713 static void putDRegI64 ( UInt dregNo, 714 IRExpr* e, 715 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 716 { 717 /* So, generate either an unconditional or a conditional write to 718 the reg. */ 719 if (guardT == IRTemp_INVALID) { 720 /* unconditional write */ 721 llPutDRegI64( dregNo, e ); 722 } else { 723 llPutDRegI64( dregNo, 724 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 725 llGetDRegI64(dregNo), 726 e )); 727 } 728 } 729 730 /* ---------------- Quad registers ---------------- */ 731 732 static Int quadGuestRegOffset ( UInt qregNo ) 733 { 734 /* Do we care about endianness here? Probably do if we ever get 735 into the situation of dealing with the 64 bit Neon registers. */ 736 switch (qregNo) { 737 case 0: return OFFB_D0; 738 case 1: return OFFB_D2; 739 case 2: return OFFB_D4; 740 case 3: return OFFB_D6; 741 case 4: return OFFB_D8; 742 case 5: return OFFB_D10; 743 case 6: return OFFB_D12; 744 case 7: return OFFB_D14; 745 case 8: return OFFB_D16; 746 case 9: return OFFB_D18; 747 case 10: return OFFB_D20; 748 case 11: return OFFB_D22; 749 case 12: return OFFB_D24; 750 case 13: return OFFB_D26; 751 case 14: return OFFB_D28; 752 case 15: return OFFB_D30; 753 default: vassert(0); 754 } 755 } 756 757 /* Plain ("low level") read from a Neon Qreg. */ 758 static IRExpr* llGetQReg ( UInt qregNo ) 759 { 760 vassert(qregNo < 16); 761 return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 ); 762 } 763 764 /* Architected read from a Neon Qreg. */ 765 static IRExpr* getQReg ( UInt qregNo ) { 766 return llGetQReg( qregNo ); 767 } 768 769 /* Plain ("low level") write to a Neon Qreg. */ 770 static void llPutQReg ( UInt qregNo, IRExpr* e ) 771 { 772 vassert(qregNo < 16); 773 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 774 stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) ); 775 } 776 777 /* Architected write to a Neon Qreg. Handles conditional writes to the 778 register: if guardT == IRTemp_INVALID then the write is 779 unconditional. */ 780 static void putQReg ( UInt qregNo, 781 IRExpr* e, 782 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 783 { 784 /* So, generate either an unconditional or a conditional write to 785 the reg. */ 786 if (guardT == IRTemp_INVALID) { 787 /* unconditional write */ 788 llPutQReg( qregNo, e ); 789 } else { 790 llPutQReg( qregNo, 791 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 792 llGetQReg(qregNo), 793 e )); 794 } 795 } 796 797 798 /* ---------------- Float registers ---------------- */ 799 800 static Int floatGuestRegOffset ( UInt fregNo ) 801 { 802 /* Start with the offset of the containing double, and then correct 803 for endianness. Actually this is completely bogus and needs 804 careful thought. */ 805 Int off; 806 vassert(fregNo < 32); 807 off = doubleGuestRegOffset(fregNo >> 1); 808 if (host_is_bigendian) { 809 vassert(0); 810 } else { 811 if (fregNo & 1) 812 off += 4; 813 } 814 return off; 815 } 816 817 /* Plain ("low level") read from a VFP Freg. */ 818 static IRExpr* llGetFReg ( UInt fregNo ) 819 { 820 vassert(fregNo < 32); 821 return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 ); 822 } 823 824 /* Architected read from a VFP Freg. */ 825 static IRExpr* getFReg ( UInt fregNo ) { 826 return llGetFReg( fregNo ); 827 } 828 829 /* Plain ("low level") write to a VFP Freg. */ 830 static void llPutFReg ( UInt fregNo, IRExpr* e ) 831 { 832 vassert(fregNo < 32); 833 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32); 834 stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) ); 835 } 836 837 /* Architected write to a VFP Freg. Handles conditional writes to the 838 register: if guardT == IRTemp_INVALID then the write is 839 unconditional. */ 840 static void putFReg ( UInt fregNo, 841 IRExpr* e, 842 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 843 { 844 /* So, generate either an unconditional or a conditional write to 845 the reg. */ 846 if (guardT == IRTemp_INVALID) { 847 /* unconditional write */ 848 llPutFReg( fregNo, e ); 849 } else { 850 llPutFReg( fregNo, 851 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 852 llGetFReg(fregNo), 853 e )); 854 } 855 } 856 857 858 /* ---------------- Misc registers ---------------- */ 859 860 static void putMiscReg32 ( UInt gsoffset, 861 IRExpr* e, /* :: Ity_I32 */ 862 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 863 { 864 switch (gsoffset) { 865 case OFFB_FPSCR: break; 866 case OFFB_QFLAG32: break; 867 case OFFB_GEFLAG0: break; 868 case OFFB_GEFLAG1: break; 869 case OFFB_GEFLAG2: break; 870 case OFFB_GEFLAG3: break; 871 default: vassert(0); /* awaiting more cases */ 872 } 873 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 874 875 if (guardT == IRTemp_INVALID) { 876 /* unconditional write */ 877 stmt(IRStmt_Put(gsoffset, e)); 878 } else { 879 stmt(IRStmt_Put( 880 gsoffset, 881 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 882 IRExpr_Get(gsoffset, Ity_I32), 883 e 884 ) 885 )); 886 } 887 } 888 889 static IRTemp get_ITSTATE ( void ) 890 { 891 ASSERT_IS_THUMB; 892 IRTemp t = newTemp(Ity_I32); 893 assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 894 return t; 895 } 896 897 static void put_ITSTATE ( IRTemp t ) 898 { 899 ASSERT_IS_THUMB; 900 stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 901 } 902 903 static IRTemp get_QFLAG32 ( void ) 904 { 905 IRTemp t = newTemp(Ity_I32); 906 assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 907 return t; 908 } 909 910 static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 911 { 912 putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 913 } 914 915 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 916 Status Register) to indicate that overflow or saturation occurred. 917 Nb: t must be zero to denote no saturation, and any nonzero 918 value to indicate saturation. */ 919 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 920 { 921 IRTemp old = get_QFLAG32(); 922 IRTemp nyu = newTemp(Ity_I32); 923 assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 924 put_QFLAG32(nyu, condT); 925 } 926 927 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit. 928 flagNo: which flag bit to set [3...0] 929 lowbits_to_ignore: 0 = look at all 32 bits 930 8 = look at top 24 bits only 931 16 = look at top 16 bits only 932 31 = look at the top bit only 933 e: input value to be evaluated. 934 The new value is taken from 'e' with the lowest 'lowbits_to_ignore' 935 masked out. If the resulting value is zero then the GE flag is 936 set to 0; any other value sets the flag to 1. */ 937 static void put_GEFLAG32 ( Int flagNo, /* 0, 1, 2 or 3 */ 938 Int lowbits_to_ignore, /* 0, 8, 16 or 31 */ 939 IRExpr* e, /* Ity_I32 */ 940 IRTemp condT ) 941 { 942 vassert( flagNo >= 0 && flagNo <= 3 ); 943 vassert( lowbits_to_ignore == 0 || 944 lowbits_to_ignore == 8 || 945 lowbits_to_ignore == 16 || 946 lowbits_to_ignore == 31 ); 947 IRTemp masked = newTemp(Ity_I32); 948 assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore))); 949 950 switch (flagNo) { 951 case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break; 952 case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break; 953 case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break; 954 case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break; 955 default: vassert(0); 956 } 957 } 958 959 /* Return the (32-bit, zero-or-nonzero representation scheme) of 960 the specified GE flag. */ 961 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ ) 962 { 963 switch (flagNo) { 964 case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 ); 965 case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 ); 966 case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 ); 967 case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 ); 968 default: vassert(0); 969 } 970 } 971 972 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and 973 2 are set from bit 31 of the value, and GE 1 and 0 are set from bit 974 15 of the value. All other bits are ignored. */ 975 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT ) 976 { 977 IRTemp ge10 = newTemp(Ity_I32); 978 IRTemp ge32 = newTemp(Ity_I32); 979 assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000))); 980 assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000))); 981 put_GEFLAG32( 0, 0, mkexpr(ge10), condT ); 982 put_GEFLAG32( 1, 0, mkexpr(ge10), condT ); 983 put_GEFLAG32( 2, 0, mkexpr(ge32), condT ); 984 put_GEFLAG32( 3, 0, mkexpr(ge32), condT ); 985 } 986 987 988 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 989 from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from 990 bit 7. All other bits are ignored. */ 991 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT ) 992 { 993 IRTemp ge0 = newTemp(Ity_I32); 994 IRTemp ge1 = newTemp(Ity_I32); 995 IRTemp ge2 = newTemp(Ity_I32); 996 IRTemp ge3 = newTemp(Ity_I32); 997 assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080))); 998 assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000))); 999 assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000))); 1000 assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000))); 1001 put_GEFLAG32( 0, 0, mkexpr(ge0), condT ); 1002 put_GEFLAG32( 1, 0, mkexpr(ge1), condT ); 1003 put_GEFLAG32( 2, 0, mkexpr(ge2), condT ); 1004 put_GEFLAG32( 3, 0, mkexpr(ge3), condT ); 1005 } 1006 1007 1008 /* ---------------- FPSCR stuff ---------------- */ 1009 1010 /* Generate IR to get hold of the rounding mode bits in FPSCR, and 1011 convert them to IR format. Bind the final result to the 1012 returned temp. */ 1013 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1014 { 1015 /* The ARMvfp encoding for rounding mode bits is: 1016 00 to nearest 1017 01 to +infinity 1018 10 to -infinity 1019 11 to zero 1020 We need to convert that to the IR encoding: 1021 00 to nearest (the default) 1022 10 to +infinity 1023 01 to -infinity 1024 11 to zero 1025 Which can be done by swapping bits 0 and 1. 1026 The rmode bits are at 23:22 in FPSCR. 1027 */ 1028 IRTemp armEncd = newTemp(Ity_I32); 1029 IRTemp swapped = newTemp(Ity_I32); 1030 /* Fish FPSCR[23:22] out, and slide to bottom. Doesn't matter that 1031 we don't zero out bits 24 and above, since the assignment to 1032 'swapped' will mask them out anyway. */ 1033 assign(armEncd, 1034 binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22))); 1035 /* Now swap them. */ 1036 assign(swapped, 1037 binop(Iop_Or32, 1038 binop(Iop_And32, 1039 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1040 mkU32(2)), 1041 binop(Iop_And32, 1042 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1043 mkU32(1)) 1044 )); 1045 return swapped; 1046 } 1047 1048 1049 /*------------------------------------------------------------*/ 1050 /*--- Helpers for flag handling and conditional insns ---*/ 1051 /*------------------------------------------------------------*/ 1052 1053 static HChar* name_ARMCondcode ( ARMCondcode cond ) 1054 { 1055 switch (cond) { 1056 case ARMCondEQ: return "{eq}"; 1057 case ARMCondNE: return "{ne}"; 1058 case ARMCondHS: return "{hs}"; // or 'cs' 1059 case ARMCondLO: return "{lo}"; // or 'cc' 1060 case ARMCondMI: return "{mi}"; 1061 case ARMCondPL: return "{pl}"; 1062 case ARMCondVS: return "{vs}"; 1063 case ARMCondVC: return "{vc}"; 1064 case ARMCondHI: return "{hi}"; 1065 case ARMCondLS: return "{ls}"; 1066 case ARMCondGE: return "{ge}"; 1067 case ARMCondLT: return "{lt}"; 1068 case ARMCondGT: return "{gt}"; 1069 case ARMCondLE: return "{le}"; 1070 case ARMCondAL: return ""; // {al}: is the default 1071 case ARMCondNV: return "{nv}"; 1072 default: vpanic("name_ARMCondcode"); 1073 } 1074 } 1075 /* and a handy shorthand for it */ 1076 static HChar* nCC ( ARMCondcode cond ) { 1077 return name_ARMCondcode(cond); 1078 } 1079 1080 1081 /* Build IR to calculate some particular condition from stored 1082 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1083 Ity_I32, suitable for narrowing. Although the return type is 1084 Ity_I32, the returned value is either 0 or 1. 'cond' must be 1085 :: Ity_I32 and must denote the condition to compute in 1086 bits 7:4, and be zero everywhere else. 1087 */ 1088 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond ) 1089 { 1090 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32); 1091 /* And 'cond' had better produce a value in which only bits 7:4 1092 bits are nonzero. However, obviously we can't assert for 1093 that. */ 1094 1095 /* So what we're constructing for the first argument is 1096 "(cond << 4) | stored-operation-operation". However, 1097 as per comments above, must be supplied pre-shifted to this 1098 function. 1099 1100 This pairing scheme requires that the ARM_CC_OP_ values all fit 1101 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1102 8 bits of the first argument. */ 1103 IRExpr** args 1104 = mkIRExprVec_4( 1105 binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond), 1106 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1107 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1108 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) 1109 ); 1110 IRExpr* call 1111 = mkIRExprCCall( 1112 Ity_I32, 1113 0/*regparm*/, 1114 "armg_calculate_condition", &armg_calculate_condition, 1115 args 1116 ); 1117 1118 /* Exclude the requested condition, OP and NDEP from definedness 1119 checking. We're only interested in DEP1 and DEP2. */ 1120 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1121 return call; 1122 } 1123 1124 1125 /* Build IR to calculate some particular condition from stored 1126 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1127 Ity_I32, suitable for narrowing. Although the return type is 1128 Ity_I32, the returned value is either 0 or 1. 1129 */ 1130 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond ) 1131 { 1132 /* First arg is "(cond << 4) | condition". This requires that the 1133 ARM_CC_OP_ values all fit in 4 bits. Hence we are passing a 1134 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1135 vassert(cond >= 0 && cond <= 15); 1136 return mk_armg_calculate_condition_dyn( mkU32(cond << 4) ); 1137 } 1138 1139 1140 /* Build IR to calculate just the carry flag from stored 1141 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1142 Ity_I32. */ 1143 static IRExpr* mk_armg_calculate_flag_c ( void ) 1144 { 1145 IRExpr** args 1146 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1147 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1148 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1149 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1150 IRExpr* call 1151 = mkIRExprCCall( 1152 Ity_I32, 1153 0/*regparm*/, 1154 "armg_calculate_flag_c", &armg_calculate_flag_c, 1155 args 1156 ); 1157 /* Exclude OP and NDEP from definedness checking. We're only 1158 interested in DEP1 and DEP2. */ 1159 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1160 return call; 1161 } 1162 1163 1164 /* Build IR to calculate just the overflow flag from stored 1165 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1166 Ity_I32. */ 1167 static IRExpr* mk_armg_calculate_flag_v ( void ) 1168 { 1169 IRExpr** args 1170 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1171 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1172 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1173 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1174 IRExpr* call 1175 = mkIRExprCCall( 1176 Ity_I32, 1177 0/*regparm*/, 1178 "armg_calculate_flag_v", &armg_calculate_flag_v, 1179 args 1180 ); 1181 /* Exclude OP and NDEP from definedness checking. We're only 1182 interested in DEP1 and DEP2. */ 1183 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1184 return call; 1185 } 1186 1187 1188 /* Build IR to calculate N Z C V in bits 31:28 of the 1189 returned word. */ 1190 static IRExpr* mk_armg_calculate_flags_nzcv ( void ) 1191 { 1192 IRExpr** args 1193 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1194 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1195 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1196 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1197 IRExpr* call 1198 = mkIRExprCCall( 1199 Ity_I32, 1200 0/*regparm*/, 1201 "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv, 1202 args 1203 ); 1204 /* Exclude OP and NDEP from definedness checking. We're only 1205 interested in DEP1 and DEP2. */ 1206 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1207 return call; 1208 } 1209 1210 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q ) 1211 { 1212 IRExpr** args1; 1213 IRExpr** args2; 1214 IRExpr *call1, *call2, *res; 1215 1216 if (Q) { 1217 args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)), 1218 binop(Iop_GetElem32x4, resL, mkU8(1)), 1219 binop(Iop_GetElem32x4, resR, mkU8(0)), 1220 binop(Iop_GetElem32x4, resR, mkU8(1)) ); 1221 args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)), 1222 binop(Iop_GetElem32x4, resL, mkU8(3)), 1223 binop(Iop_GetElem32x4, resR, mkU8(2)), 1224 binop(Iop_GetElem32x4, resR, mkU8(3)) ); 1225 } else { 1226 args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)), 1227 binop(Iop_GetElem32x2, resL, mkU8(1)), 1228 binop(Iop_GetElem32x2, resR, mkU8(0)), 1229 binop(Iop_GetElem32x2, resR, mkU8(1)) ); 1230 } 1231 1232 #if 1 1233 call1 = mkIRExprCCall( 1234 Ity_I32, 1235 0/*regparm*/, 1236 "armg_calculate_flag_qc", &armg_calculate_flag_qc, 1237 args1 1238 ); 1239 if (Q) { 1240 call2 = mkIRExprCCall( 1241 Ity_I32, 1242 0/*regparm*/, 1243 "armg_calculate_flag_qc", &armg_calculate_flag_qc, 1244 args2 1245 ); 1246 } 1247 if (Q) { 1248 res = binop(Iop_Or32, call1, call2); 1249 } else { 1250 res = call1; 1251 } 1252 #else 1253 if (Q) { 1254 res = unop(Iop_1Uto32, 1255 binop(Iop_CmpNE32, 1256 binop(Iop_Or32, 1257 binop(Iop_Or32, 1258 binop(Iop_Xor32, 1259 args1[0], 1260 args1[2]), 1261 binop(Iop_Xor32, 1262 args1[1], 1263 args1[3])), 1264 binop(Iop_Or32, 1265 binop(Iop_Xor32, 1266 args2[0], 1267 args2[2]), 1268 binop(Iop_Xor32, 1269 args2[1], 1270 args2[3]))), 1271 mkU32(0))); 1272 } else { 1273 res = unop(Iop_1Uto32, 1274 binop(Iop_CmpNE32, 1275 binop(Iop_Or32, 1276 binop(Iop_Xor32, 1277 args1[0], 1278 args1[2]), 1279 binop(Iop_Xor32, 1280 args1[1], 1281 args1[3])), 1282 mkU32(0))); 1283 } 1284 #endif 1285 return res; 1286 } 1287 1288 // FIXME: this is named wrongly .. looks like a sticky set of 1289 // QC, not a write to it. 1290 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q, 1291 IRTemp condT ) 1292 { 1293 putMiscReg32 (OFFB_FPSCR, 1294 binop(Iop_Or32, 1295 IRExpr_Get(OFFB_FPSCR, Ity_I32), 1296 binop(Iop_Shl32, 1297 mk_armg_calculate_flag_qc(resL, resR, Q), 1298 mkU8(27))), 1299 condT); 1300 } 1301 1302 /* Build IR to conditionally set the flags thunk. As with putIReg, if 1303 guard is IRTemp_INVALID then it's unconditional, else it holds a 1304 condition :: Ity_I32. */ 1305 static 1306 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1, 1307 IRTemp t_dep2, IRTemp t_ndep, 1308 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1309 { 1310 IRTemp c8; 1311 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32)); 1312 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32)); 1313 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32)); 1314 vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER); 1315 if (guardT == IRTemp_INVALID) { 1316 /* unconditional */ 1317 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(cc_op) )); 1318 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1319 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1320 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1321 } else { 1322 /* conditional */ 1323 c8 = newTemp(Ity_I8); 1324 assign( c8, unop(Iop_32to8, mkexpr(guardT)) ); 1325 stmt( IRStmt_Put( 1326 OFFB_CC_OP, 1327 IRExpr_Mux0X( mkexpr(c8), 1328 IRExpr_Get(OFFB_CC_OP, Ity_I32), 1329 mkU32(cc_op) ))); 1330 stmt( IRStmt_Put( 1331 OFFB_CC_DEP1, 1332 IRExpr_Mux0X( mkexpr(c8), 1333 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1334 mkexpr(t_dep1) ))); 1335 stmt( IRStmt_Put( 1336 OFFB_CC_DEP2, 1337 IRExpr_Mux0X( mkexpr(c8), 1338 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1339 mkexpr(t_dep2) ))); 1340 stmt( IRStmt_Put( 1341 OFFB_CC_NDEP, 1342 IRExpr_Mux0X( mkexpr(c8), 1343 IRExpr_Get(OFFB_CC_NDEP, Ity_I32), 1344 mkexpr(t_ndep) ))); 1345 } 1346 } 1347 1348 1349 /* Minor variant of the above that sets NDEP to zero (if it 1350 sets it at all) */ 1351 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 1352 IRTemp t_dep2, 1353 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1354 { 1355 IRTemp z32 = newTemp(Ity_I32); 1356 assign( z32, mkU32(0) ); 1357 setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 1358 } 1359 1360 1361 /* Minor variant of the above that sets DEP2 to zero (if it 1362 sets it at all) */ 1363 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 1364 IRTemp t_ndep, 1365 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1366 { 1367 IRTemp z32 = newTemp(Ity_I32); 1368 assign( z32, mkU32(0) ); 1369 setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 1370 } 1371 1372 1373 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 1374 sets them at all) */ 1375 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 1376 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1377 { 1378 IRTemp z32 = newTemp(Ity_I32); 1379 assign( z32, mkU32(0) ); 1380 setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 1381 } 1382 1383 1384 /* ARM only */ 1385 /* Generate a side-exit to the next instruction, if the given guard 1386 expression :: Ity_I32 is 0 (note! the side exit is taken if the 1387 condition is false!) This is used to skip over conditional 1388 instructions which we can't generate straight-line code for, either 1389 because they are too complex or (more likely) they potentially 1390 generate exceptions. 1391 */ 1392 static void mk_skip_over_A32_if_cond_is_false ( 1393 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1394 ) 1395 { 1396 ASSERT_IS_ARM; 1397 vassert(guardT != IRTemp_INVALID); 1398 vassert(0 == (guest_R15_curr_instr_notENC & 3)); 1399 stmt( IRStmt_Exit( 1400 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1401 Ijk_Boring, 1402 IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)) 1403 )); 1404 } 1405 1406 /* Thumb16 only */ 1407 /* ditto, but jump over a 16-bit thumb insn */ 1408 static void mk_skip_over_T16_if_cond_is_false ( 1409 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1410 ) 1411 { 1412 ASSERT_IS_THUMB; 1413 vassert(guardT != IRTemp_INVALID); 1414 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1415 stmt( IRStmt_Exit( 1416 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1417 Ijk_Boring, 1418 IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)) 1419 )); 1420 } 1421 1422 1423 /* Thumb32 only */ 1424 /* ditto, but jump over a 32-bit thumb insn */ 1425 static void mk_skip_over_T32_if_cond_is_false ( 1426 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1427 ) 1428 { 1429 ASSERT_IS_THUMB; 1430 vassert(guardT != IRTemp_INVALID); 1431 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1432 stmt( IRStmt_Exit( 1433 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1434 Ijk_Boring, 1435 IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)) 1436 )); 1437 } 1438 1439 1440 /* Thumb16 and Thumb32 only 1441 Generate a SIGILL followed by a restart of the current instruction 1442 if the given temp is nonzero. */ 1443 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ ) 1444 { 1445 ASSERT_IS_THUMB; 1446 vassert(t != IRTemp_INVALID); 1447 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1448 stmt( 1449 IRStmt_Exit( 1450 binop(Iop_CmpNE32, mkexpr(t), mkU32(0)), 1451 Ijk_NoDecode, 1452 IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)) 1453 ) 1454 ); 1455 } 1456 1457 1458 /* Inspect the old_itstate, and generate a SIGILL if it indicates that 1459 we are currently in an IT block and are not the last in the block. 1460 This also rolls back guest_ITSTATE to its old value before the exit 1461 and restores it to its new value afterwards. This is so that if 1462 the exit is taken, we have an up to date version of ITSTATE 1463 available. Without doing that, we have no hope of making precise 1464 exceptions work. */ 1465 static void gen_SIGILL_T_if_in_but_NLI_ITBlock ( 1466 IRTemp old_itstate /* :: Ity_I32 */, 1467 IRTemp new_itstate /* :: Ity_I32 */ 1468 ) 1469 { 1470 ASSERT_IS_THUMB; 1471 put_ITSTATE(old_itstate); // backout 1472 IRTemp guards_for_next3 = newTemp(Ity_I32); 1473 assign(guards_for_next3, 1474 binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8))); 1475 gen_SIGILL_T_if_nonzero(guards_for_next3); 1476 put_ITSTATE(new_itstate); //restore 1477 } 1478 1479 1480 /* Simpler version of the above, which generates a SIGILL if 1481 we're anywhere within an IT block. */ 1482 static void gen_SIGILL_T_if_in_ITBlock ( 1483 IRTemp old_itstate /* :: Ity_I32 */, 1484 IRTemp new_itstate /* :: Ity_I32 */ 1485 ) 1486 { 1487 put_ITSTATE(old_itstate); // backout 1488 gen_SIGILL_T_if_nonzero(old_itstate); 1489 put_ITSTATE(new_itstate); //restore 1490 } 1491 1492 1493 /* Generate an APSR value, from the NZCV thunk, and 1494 from QFLAG32 and GEFLAG0 .. GEFLAG3. */ 1495 static IRTemp synthesise_APSR ( void ) 1496 { 1497 IRTemp res1 = newTemp(Ity_I32); 1498 // Get NZCV 1499 assign( res1, mk_armg_calculate_flags_nzcv() ); 1500 // OR in the Q value 1501 IRTemp res2 = newTemp(Ity_I32); 1502 assign( 1503 res2, 1504 binop(Iop_Or32, 1505 mkexpr(res1), 1506 binop(Iop_Shl32, 1507 unop(Iop_1Uto32, 1508 binop(Iop_CmpNE32, 1509 mkexpr(get_QFLAG32()), 1510 mkU32(0))), 1511 mkU8(ARMG_CC_SHIFT_Q))) 1512 ); 1513 // OR in GE0 .. GE3 1514 IRExpr* ge0 1515 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0))); 1516 IRExpr* ge1 1517 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0))); 1518 IRExpr* ge2 1519 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0))); 1520 IRExpr* ge3 1521 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0))); 1522 IRTemp res3 = newTemp(Ity_I32); 1523 assign(res3, 1524 binop(Iop_Or32, 1525 mkexpr(res2), 1526 binop(Iop_Or32, 1527 binop(Iop_Or32, 1528 binop(Iop_Shl32, ge0, mkU8(16)), 1529 binop(Iop_Shl32, ge1, mkU8(17))), 1530 binop(Iop_Or32, 1531 binop(Iop_Shl32, ge2, mkU8(18)), 1532 binop(Iop_Shl32, ge3, mkU8(19))) ))); 1533 return res3; 1534 } 1535 1536 1537 /* and the inverse transformation: given an APSR value, 1538 set the NZCV thunk, the Q flag, and the GE flags. */ 1539 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge, 1540 IRTemp apsrT, IRTemp condT ) 1541 { 1542 vassert(write_nzcvq || write_ge); 1543 if (write_nzcvq) { 1544 // Do NZCV 1545 IRTemp immT = newTemp(Ity_I32); 1546 assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) ); 1547 setFlags_D1(ARMG_CC_OP_COPY, immT, condT); 1548 // Do Q 1549 IRTemp qnewT = newTemp(Ity_I32); 1550 assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q))); 1551 put_QFLAG32(qnewT, condT); 1552 } 1553 if (write_ge) { 1554 // Do GE3..0 1555 put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)), 1556 condT); 1557 put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)), 1558 condT); 1559 put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)), 1560 condT); 1561 put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)), 1562 condT); 1563 } 1564 } 1565 1566 1567 /*------------------------------------------------------------*/ 1568 /*--- Helpers for saturation ---*/ 1569 /*------------------------------------------------------------*/ 1570 1571 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and 1572 (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in 1573 (b) the floor is computed from the value of imm5. these two fnsn 1574 should be commoned up. */ 1575 1576 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1 1577 Optionally return flag resQ saying whether saturation occurred. 1578 See definition in manual, section A2.2.1, page 41 1579 (bits(N), boolean) UnsignedSatQ( integer i, integer N ) 1580 { 1581 if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; } 1582 elsif ( i < 0 ) { result = 0; saturated = TRUE; } 1583 else { result = i; saturated = FALSE; } 1584 return ( result<N-1:0>, saturated ); 1585 } 1586 */ 1587 static void armUnsignedSatQ( IRTemp* res, /* OUT - Ity_I32 */ 1588 IRTemp* resQ, /* OUT - Ity_I32 */ 1589 IRTemp regT, /* value to clamp - Ity_I32 */ 1590 UInt imm5 ) /* saturation ceiling */ 1591 { 1592 UInt ceil = (1 << imm5) - 1; // (2^imm5)-1 1593 UInt floor = 0; 1594 1595 IRTemp node0 = newTemp(Ity_I32); 1596 IRTemp node1 = newTemp(Ity_I32); 1597 IRTemp node2 = newTemp(Ity_I1); 1598 IRTemp node3 = newTemp(Ity_I32); 1599 IRTemp node4 = newTemp(Ity_I32); 1600 IRTemp node5 = newTemp(Ity_I1); 1601 IRTemp node6 = newTemp(Ity_I32); 1602 1603 assign( node0, mkexpr(regT) ); 1604 assign( node1, mkU32(ceil) ); 1605 assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) ); 1606 assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)), 1607 mkexpr(node0), 1608 mkexpr(node1) ) ); 1609 assign( node4, mkU32(floor) ); 1610 assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) ); 1611 assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)), 1612 mkexpr(node3), 1613 mkexpr(node4) ) ); 1614 assign( *res, mkexpr(node6) ); 1615 1616 /* if saturation occurred, then resQ is set to some nonzero value 1617 if sat did not occur, resQ is guaranteed to be zero. */ 1618 if (resQ) { 1619 assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) ); 1620 } 1621 } 1622 1623 1624 /* SignedSatQ(): 'clamp' each value so it lies between -2^N <= x <= (2^N) - 1 1625 Optionally return flag resQ saying whether saturation occurred. 1626 - see definition in manual, section A2.2.1, page 41 1627 (bits(N), boolean ) SignedSatQ( integer i, integer N ) 1628 { 1629 if ( i > 2^(N-1) - 1 ) { result = 2^(N-1) - 1; saturated = TRUE; } 1630 elsif ( i < -(2^(N-1)) ) { result = -(2^(N-1)); saturated = FALSE; } 1631 else { result = i; saturated = FALSE; } 1632 return ( result[N-1:0], saturated ); 1633 } 1634 */ 1635 static void armSignedSatQ( IRTemp regT, /* value to clamp - Ity_I32 */ 1636 UInt imm5, /* saturation ceiling */ 1637 IRTemp* res, /* OUT - Ity_I32 */ 1638 IRTemp* resQ ) /* OUT - Ity_I32 */ 1639 { 1640 Int ceil = (1 << (imm5-1)) - 1; // (2^(imm5-1))-1 1641 Int floor = -(1 << (imm5-1)); // -(2^(imm5-1)) 1642 1643 IRTemp node0 = newTemp(Ity_I32); 1644 IRTemp node1 = newTemp(Ity_I32); 1645 IRTemp node2 = newTemp(Ity_I1); 1646 IRTemp node3 = newTemp(Ity_I32); 1647 IRTemp node4 = newTemp(Ity_I32); 1648 IRTemp node5 = newTemp(Ity_I1); 1649 IRTemp node6 = newTemp(Ity_I32); 1650 1651 assign( node0, mkexpr(regT) ); 1652 assign( node1, mkU32(ceil) ); 1653 assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) ); 1654 assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)), 1655 mkexpr(node0), mkexpr(node1) ) ); 1656 assign( node4, mkU32(floor) ); 1657 assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) ); 1658 assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)), 1659 mkexpr(node3), mkexpr(node4) ) ); 1660 assign( *res, mkexpr(node6) ); 1661 1662 /* if saturation occurred, then resQ is set to some nonzero value 1663 if sat did not occur, resQ is guaranteed to be zero. */ 1664 if (resQ) { 1665 assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) ); 1666 } 1667 } 1668 1669 1670 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed 1671 overflow occurred for 32-bit addition. Needs both args and the 1672 result. HD p27. */ 1673 static 1674 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE, 1675 IRTemp argL, IRTemp argR ) 1676 { 1677 IRTemp res = newTemp(Ity_I32); 1678 assign(res, resE); 1679 return 1680 binop( Iop_Shr32, 1681 binop( Iop_And32, 1682 binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ), 1683 binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )), 1684 mkU8(31) ); 1685 } 1686 1687 1688 /*------------------------------------------------------------*/ 1689 /*--- Larger helpers ---*/ 1690 /*------------------------------------------------------------*/ 1691 1692 /* Compute both the result and new C flag value for a LSL by an imm5 1693 or by a register operand. May generate reads of the old C value 1694 (hence only safe to use before any writes to guest state happen). 1695 Are factored out so can be used by both ARM and Thumb. 1696 1697 Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg}, 1698 "res" (the result) is a.k.a. "shop", shifter operand 1699 "newC" (the new C) is a.k.a. "shco", shifter carry out 1700 1701 The calling convention for res and newC is a bit funny. They could 1702 be passed by value, but instead are passed by ref. 1703 */ 1704 1705 static void compute_result_and_C_after_LSL_by_imm5 ( 1706 /*OUT*/HChar* buf, 1707 IRTemp* res, 1708 IRTemp* newC, 1709 IRTemp rMt, UInt shift_amt, /* operands */ 1710 UInt rM /* only for debug printing */ 1711 ) 1712 { 1713 if (shift_amt == 0) { 1714 if (newC) { 1715 assign( *newC, mk_armg_calculate_flag_c() ); 1716 } 1717 assign( *res, mkexpr(rMt) ); 1718 DIS(buf, "r%u", rM); 1719 } else { 1720 vassert(shift_amt >= 1 && shift_amt <= 31); 1721 if (newC) { 1722 assign( *newC, 1723 binop(Iop_And32, 1724 binop(Iop_Shr32, mkexpr(rMt), 1725 mkU8(32 - shift_amt)), 1726 mkU32(1))); 1727 } 1728 assign( *res, 1729 binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) ); 1730 DIS(buf, "r%u, LSL #%u", rM, shift_amt); 1731 } 1732 } 1733 1734 1735 static void compute_result_and_C_after_LSL_by_reg ( 1736 /*OUT*/HChar* buf, 1737 IRTemp* res, 1738 IRTemp* newC, 1739 IRTemp rMt, IRTemp rSt, /* operands */ 1740 UInt rM, UInt rS /* only for debug printing */ 1741 ) 1742 { 1743 // shift left in range 0 .. 255 1744 // amt = rS & 255 1745 // res = amt < 32 ? Rm << amt : 0 1746 // newC = amt == 0 ? oldC : 1747 // amt in 1..32 ? Rm[32-amt] : 0 1748 IRTemp amtT = newTemp(Ity_I32); 1749 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 1750 if (newC) { 1751 /* mux0X(amt == 0, 1752 mux0X(amt < 32, 1753 0, 1754 Rm[(32-amt) & 31]) 1755 oldC) 1756 */ 1757 /* About the best you can do is pray that iropt is able 1758 to nuke most or all of the following junk. */ 1759 IRTemp oldC = newTemp(Ity_I32); 1760 assign(oldC, mk_armg_calculate_flag_c() ); 1761 assign( 1762 *newC, 1763 IRExpr_Mux0X( 1764 unop(Iop_1Uto8, 1765 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 1766 IRExpr_Mux0X( 1767 unop(Iop_1Uto8, 1768 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 1769 mkU32(0), 1770 binop(Iop_Shr32, 1771 mkexpr(rMt), 1772 unop(Iop_32to8, 1773 binop(Iop_And32, 1774 binop(Iop_Sub32, 1775 mkU32(32), 1776 mkexpr(amtT)), 1777 mkU32(31) 1778 ) 1779 ) 1780 ) 1781 ), 1782 mkexpr(oldC) 1783 ) 1784 ); 1785 } 1786 // (Rm << (Rs & 31)) & (((Rs & 255) - 32) >>s 31) 1787 // Lhs of the & limits the shift to 31 bits, so as to 1788 // give known IR semantics. Rhs of the & is all 1s for 1789 // Rs <= 31 and all 0s for Rs >= 32. 1790 assign( 1791 *res, 1792 binop( 1793 Iop_And32, 1794 binop(Iop_Shl32, 1795 mkexpr(rMt), 1796 unop(Iop_32to8, 1797 binop(Iop_And32, mkexpr(rSt), mkU32(31)))), 1798 binop(Iop_Sar32, 1799 binop(Iop_Sub32, 1800 mkexpr(amtT), 1801 mkU32(32)), 1802 mkU8(31)))); 1803 DIS(buf, "r%u, LSL r%u", rM, rS); 1804 } 1805 1806 1807 static void compute_result_and_C_after_LSR_by_imm5 ( 1808 /*OUT*/HChar* buf, 1809 IRTemp* res, 1810 IRTemp* newC, 1811 IRTemp rMt, UInt shift_amt, /* operands */ 1812 UInt rM /* only for debug printing */ 1813 ) 1814 { 1815 if (shift_amt == 0) { 1816 // conceptually a 32-bit shift, however: 1817 // res = 0 1818 // newC = Rm[31] 1819 if (newC) { 1820 assign( *newC, 1821 binop(Iop_And32, 1822 binop(Iop_Shr32, mkexpr(rMt), mkU8(31)), 1823 mkU32(1))); 1824 } 1825 assign( *res, mkU32(0) ); 1826 DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM); 1827 } else { 1828 // shift in range 1..31 1829 // res = Rm >>u shift_amt 1830 // newC = Rm[shift_amt - 1] 1831 vassert(shift_amt >= 1 && shift_amt <= 31); 1832 if (newC) { 1833 assign( *newC, 1834 binop(Iop_And32, 1835 binop(Iop_Shr32, mkexpr(rMt), 1836 mkU8(shift_amt - 1)), 1837 mkU32(1))); 1838 } 1839 assign( *res, 1840 binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) ); 1841 DIS(buf, "r%u, LSR #%u", rM, shift_amt); 1842 } 1843 } 1844 1845 1846 static void compute_result_and_C_after_LSR_by_reg ( 1847 /*OUT*/HChar* buf, 1848 IRTemp* res, 1849 IRTemp* newC, 1850 IRTemp rMt, IRTemp rSt, /* operands */ 1851 UInt rM, UInt rS /* only for debug printing */ 1852 ) 1853 { 1854 // shift right in range 0 .. 255 1855 // amt = rS & 255 1856 // res = amt < 32 ? Rm >>u amt : 0 1857 // newC = amt == 0 ? oldC : 1858 // amt in 1..32 ? Rm[amt-1] : 0 1859 IRTemp amtT = newTemp(Ity_I32); 1860 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 1861 if (newC) { 1862 /* mux0X(amt == 0, 1863 mux0X(amt < 32, 1864 0, 1865 Rm[(amt-1) & 31]) 1866 oldC) 1867 */ 1868 IRTemp oldC = newTemp(Ity_I32); 1869 assign(oldC, mk_armg_calculate_flag_c() ); 1870 assign( 1871 *newC, 1872 IRExpr_Mux0X( 1873 unop(Iop_1Uto8, 1874 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 1875 IRExpr_Mux0X( 1876 unop(Iop_1Uto8, 1877 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 1878 mkU32(0), 1879 binop(Iop_Shr32, 1880 mkexpr(rMt), 1881 unop(Iop_32to8, 1882 binop(Iop_And32, 1883 binop(Iop_Sub32, 1884 mkexpr(amtT), 1885 mkU32(1)), 1886 mkU32(31) 1887 ) 1888 ) 1889 ) 1890 ), 1891 mkexpr(oldC) 1892 ) 1893 ); 1894 } 1895 // (Rm >>u (Rs & 31)) & (((Rs & 255) - 32) >>s 31) 1896 // Lhs of the & limits the shift to 31 bits, so as to 1897 // give known IR semantics. Rhs of the & is all 1s for 1898 // Rs <= 31 and all 0s for Rs >= 32. 1899 assign( 1900 *res, 1901 binop( 1902 Iop_And32, 1903 binop(Iop_Shr32, 1904 mkexpr(rMt), 1905 unop(Iop_32to8, 1906 binop(Iop_And32, mkexpr(rSt), mkU32(31)))), 1907 binop(Iop_Sar32, 1908 binop(Iop_Sub32, 1909 mkexpr(amtT), 1910 mkU32(32)), 1911 mkU8(31)))); 1912 DIS(buf, "r%u, LSR r%u", rM, rS); 1913 } 1914 1915 1916 static void compute_result_and_C_after_ASR_by_imm5 ( 1917 /*OUT*/HChar* buf, 1918 IRTemp* res, 1919 IRTemp* newC, 1920 IRTemp rMt, UInt shift_amt, /* operands */ 1921 UInt rM /* only for debug printing */ 1922 ) 1923 { 1924 if (shift_amt == 0) { 1925 // conceptually a 32-bit shift, however: 1926 // res = Rm >>s 31 1927 // newC = Rm[31] 1928 if (newC) { 1929 assign( *newC, 1930 binop(Iop_And32, 1931 binop(Iop_Shr32, mkexpr(rMt), mkU8(31)), 1932 mkU32(1))); 1933 } 1934 assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) ); 1935 DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM); 1936 } else { 1937 // shift in range 1..31 1938 // res = Rm >>s shift_amt 1939 // newC = Rm[shift_amt - 1] 1940 vassert(shift_amt >= 1 && shift_amt <= 31); 1941 if (newC) { 1942 assign( *newC, 1943 binop(Iop_And32, 1944 binop(Iop_Shr32, mkexpr(rMt), 1945 mkU8(shift_amt - 1)), 1946 mkU32(1))); 1947 } 1948 assign( *res, 1949 binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) ); 1950 DIS(buf, "r%u, ASR #%u", rM, shift_amt); 1951 } 1952 } 1953 1954 1955 static void compute_result_and_C_after_ASR_by_reg ( 1956 /*OUT*/HChar* buf, 1957 IRTemp* res, 1958 IRTemp* newC, 1959 IRTemp rMt, IRTemp rSt, /* operands */ 1960 UInt rM, UInt rS /* only for debug printing */ 1961 ) 1962 { 1963 // arithmetic shift right in range 0 .. 255 1964 // amt = rS & 255 1965 // res = amt < 32 ? Rm >>s amt : Rm >>s 31 1966 // newC = amt == 0 ? oldC : 1967 // amt in 1..32 ? Rm[amt-1] : Rm[31] 1968 IRTemp amtT = newTemp(Ity_I32); 1969 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 1970 if (newC) { 1971 /* mux0X(amt == 0, 1972 mux0X(amt < 32, 1973 Rm[31], 1974 Rm[(amt-1) & 31]) 1975 oldC) 1976 */ 1977 IRTemp oldC = newTemp(Ity_I32); 1978 assign(oldC, mk_armg_calculate_flag_c() ); 1979 assign( 1980 *newC, 1981 IRExpr_Mux0X( 1982 unop(Iop_1Uto8, 1983 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 1984 IRExpr_Mux0X( 1985 unop(Iop_1Uto8, 1986 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 1987 binop(Iop_Shr32, 1988 mkexpr(rMt), 1989 mkU8(31) 1990 ), 1991 binop(Iop_Shr32, 1992 mkexpr(rMt), 1993 unop(Iop_32to8, 1994 binop(Iop_And32, 1995 binop(Iop_Sub32, 1996 mkexpr(amtT), 1997 mkU32(1)), 1998 mkU32(31) 1999 ) 2000 ) 2001 ) 2002 ), 2003 mkexpr(oldC) 2004 ) 2005 ); 2006 } 2007 // (Rm >>s (amt <u 32 ? amt : 31)) 2008 assign( 2009 *res, 2010 binop( 2011 Iop_Sar32, 2012 mkexpr(rMt), 2013 unop( 2014 Iop_32to8, 2015 IRExpr_Mux0X( 2016 unop( 2017 Iop_1Uto8, 2018 binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))), 2019 mkU32(31), 2020 mkexpr(amtT))))); 2021 DIS(buf, "r%u, ASR r%u", rM, rS); 2022 } 2023 2024 2025 static void compute_result_and_C_after_ROR_by_reg ( 2026 /*OUT*/HChar* buf, 2027 IRTemp* res, 2028 IRTemp* newC, 2029 IRTemp rMt, IRTemp rSt, /* operands */ 2030 UInt rM, UInt rS /* only for debug printing */ 2031 ) 2032 { 2033 // rotate right in range 0 .. 255 2034 // amt = rS & 255 2035 // shop = Rm `ror` (amt & 31) 2036 // shco = amt == 0 ? oldC : Rm[(amt-1) & 31] 2037 IRTemp amtT = newTemp(Ity_I32); 2038 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 2039 IRTemp amt5T = newTemp(Ity_I32); 2040 assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) ); 2041 IRTemp oldC = newTemp(Ity_I32); 2042 assign(oldC, mk_armg_calculate_flag_c() ); 2043 if (newC) { 2044 assign( 2045 *newC, 2046 IRExpr_Mux0X( 2047 unop(Iop_32to8, mkexpr(amtT)), 2048 mkexpr(oldC), 2049 binop(Iop_And32, 2050 binop(Iop_Shr32, 2051 mkexpr(rMt), 2052 unop(Iop_32to8, 2053 binop(Iop_And32, 2054 binop(Iop_Sub32, 2055 mkexpr(amtT), 2056 mkU32(1) 2057 ), 2058 mkU32(31) 2059 ) 2060 ) 2061 ), 2062 mkU32(1) 2063 ) 2064 ) 2065 ); 2066 } 2067 assign( 2068 *res, 2069 IRExpr_Mux0X( 2070 unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt), 2071 binop(Iop_Or32, 2072 binop(Iop_Shr32, 2073 mkexpr(rMt), 2074 unop(Iop_32to8, mkexpr(amt5T)) 2075 ), 2076 binop(Iop_Shl32, 2077 mkexpr(rMt), 2078 unop(Iop_32to8, 2079 binop(Iop_Sub32, mkU32(32), mkexpr(amt5T)) 2080 ) 2081 ) 2082 ) 2083 ) 2084 ); 2085 DIS(buf, "r%u, ROR r#%u", rM, rS); 2086 } 2087 2088 2089 /* Generate an expression corresponding to the immediate-shift case of 2090 a shifter operand. This is used both for ARM and Thumb2. 2091 2092 Bind it to a temporary, and return that via *res. If newC is 2093 non-NULL, also compute a value for the shifter's carry out (in the 2094 LSB of a word), bind it to a temporary, and return that via *shco. 2095 2096 Generates GETs from the guest state and is therefore not safe to 2097 use once we start doing PUTs to it, for any given instruction. 2098 2099 'how' is encoded thusly: 2100 00b LSL, 01b LSR, 10b ASR, 11b ROR 2101 Most but not all ARM and Thumb integer insns use this encoding. 2102 Be careful to ensure the right value is passed here. 2103 */ 2104 static void compute_result_and_C_after_shift_by_imm5 ( 2105 /*OUT*/HChar* buf, 2106 /*OUT*/IRTemp* res, 2107 /*OUT*/IRTemp* newC, 2108 IRTemp rMt, /* reg to shift */ 2109 UInt how, /* what kind of shift */ 2110 UInt shift_amt, /* shift amount (0..31) */ 2111 UInt rM /* only for debug printing */ 2112 ) 2113 { 2114 vassert(shift_amt < 32); 2115 vassert(how < 4); 2116 2117 switch (how) { 2118 2119 case 0: 2120 compute_result_and_C_after_LSL_by_imm5( 2121 buf, res, newC, rMt, shift_amt, rM 2122 ); 2123 break; 2124 2125 case 1: 2126 compute_result_and_C_after_LSR_by_imm5( 2127 buf, res, newC, rMt, shift_amt, rM 2128 ); 2129 break; 2130 2131 case 2: 2132 compute_result_and_C_after_ASR_by_imm5( 2133 buf, res, newC, rMt, shift_amt, rM 2134 ); 2135 break; 2136 2137 case 3: 2138 if (shift_amt == 0) { 2139 IRTemp oldcT = newTemp(Ity_I32); 2140 // rotate right 1 bit through carry (?) 2141 // RRX -- described at ARM ARM A5-17 2142 // res = (oldC << 31) | (Rm >>u 1) 2143 // newC = Rm[0] 2144 if (newC) { 2145 assign( *newC, 2146 binop(Iop_And32, mkexpr(rMt), mkU32(1))); 2147 } 2148 assign( oldcT, mk_armg_calculate_flag_c() ); 2149 assign( *res, 2150 binop(Iop_Or32, 2151 binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)), 2152 binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) ); 2153 DIS(buf, "r%u, RRX", rM); 2154 } else { 2155 // rotate right in range 1..31 2156 // res = Rm `ror` shift_amt 2157 // newC = Rm[shift_amt - 1] 2158 vassert(shift_amt >= 1 && shift_amt <= 31); 2159 if (newC) { 2160 assign( *newC, 2161 binop(Iop_And32, 2162 binop(Iop_Shr32, mkexpr(rMt), 2163 mkU8(shift_amt - 1)), 2164 mkU32(1))); 2165 } 2166 assign( *res, 2167 binop(Iop_Or32, 2168 binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)), 2169 binop(Iop_Shl32, mkexpr(rMt), 2170 mkU8(32-shift_amt)))); 2171 DIS(buf, "r%u, ROR #%u", rM, shift_amt); 2172 } 2173 break; 2174 2175 default: 2176 /*NOTREACHED*/ 2177 vassert(0); 2178 } 2179 } 2180 2181 2182 /* Generate an expression corresponding to the register-shift case of 2183 a shifter operand. This is used both for ARM and Thumb2. 2184 2185 Bind it to a temporary, and return that via *res. If newC is 2186 non-NULL, also compute a value for the shifter's carry out (in the 2187 LSB of a word), bind it to a temporary, and return that via *shco. 2188 2189 Generates GETs from the guest state and is therefore not safe to 2190 use once we start doing PUTs to it, for any given instruction. 2191 2192 'how' is encoded thusly: 2193 00b LSL, 01b LSR, 10b ASR, 11b ROR 2194 Most but not all ARM and Thumb integer insns use this encoding. 2195 Be careful to ensure the right value is passed here. 2196 */ 2197 static void compute_result_and_C_after_shift_by_reg ( 2198 /*OUT*/HChar* buf, 2199 /*OUT*/IRTemp* res, 2200 /*OUT*/IRTemp* newC, 2201 IRTemp rMt, /* reg to shift */ 2202 UInt how, /* what kind of shift */ 2203 IRTemp rSt, /* shift amount */ 2204 UInt rM, /* only for debug printing */ 2205 UInt rS /* only for debug printing */ 2206 ) 2207 { 2208 vassert(how < 4); 2209 switch (how) { 2210 case 0: { /* LSL */ 2211 compute_result_and_C_after_LSL_by_reg( 2212 buf, res, newC, rMt, rSt, rM, rS 2213 ); 2214 break; 2215 } 2216 case 1: { /* LSR */ 2217 compute_result_and_C_after_LSR_by_reg( 2218 buf, res, newC, rMt, rSt, rM, rS 2219 ); 2220 break; 2221 } 2222 case 2: { /* ASR */ 2223 compute_result_and_C_after_ASR_by_reg( 2224 buf, res, newC, rMt, rSt, rM, rS 2225 ); 2226 break; 2227 } 2228 case 3: { /* ROR */ 2229 compute_result_and_C_after_ROR_by_reg( 2230 buf, res, newC, rMt, rSt, rM, rS 2231 ); 2232 break; 2233 } 2234 default: 2235 /*NOTREACHED*/ 2236 vassert(0); 2237 } 2238 } 2239 2240 2241 /* Generate an expression corresponding to a shifter_operand, bind it 2242 to a temporary, and return that via *shop. If shco is non-NULL, 2243 also compute a value for the shifter's carry out (in the LSB of a 2244 word), bind it to a temporary, and return that via *shco. 2245 2246 If for some reason we can't come up with a shifter operand (missing 2247 case? not really a shifter operand?) return False. 2248 2249 Generates GETs from the guest state and is therefore not safe to 2250 use once we start doing PUTs to it, for any given instruction. 2251 2252 For ARM insns only; not for Thumb. 2253 */ 2254 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0, 2255 /*OUT*/IRTemp* shop, 2256 /*OUT*/IRTemp* shco, 2257 /*OUT*/HChar* buf ) 2258 { 2259 UInt insn_4 = (insn_11_0 >> 4) & 1; 2260 UInt insn_7 = (insn_11_0 >> 7) & 1; 2261 vassert(insn_25 <= 0x1); 2262 vassert(insn_11_0 <= 0xFFF); 2263 2264 vassert(shop && *shop == IRTemp_INVALID); 2265 *shop = newTemp(Ity_I32); 2266 2267 if (shco) { 2268 vassert(*shco == IRTemp_INVALID); 2269 *shco = newTemp(Ity_I32); 2270 } 2271 2272 /* 32-bit immediate */ 2273 2274 if (insn_25 == 1) { 2275 /* immediate: (7:0) rotated right by 2 * (11:8) */ 2276 UInt imm = (insn_11_0 >> 0) & 0xFF; 2277 UInt rot = 2 * ((insn_11_0 >> 8) & 0xF); 2278 vassert(rot <= 30); 2279 imm = ROR32(imm, rot); 2280 if (shco) { 2281 if (rot == 0) { 2282 assign( *shco, mk_armg_calculate_flag_c() ); 2283 } else { 2284 assign( *shco, mkU32( (imm >> 31) & 1 ) ); 2285 } 2286 } 2287 DIS(buf, "#0x%x", imm); 2288 assign( *shop, mkU32(imm) ); 2289 return True; 2290 } 2291 2292 /* Shift/rotate by immediate */ 2293 2294 if (insn_25 == 0 && insn_4 == 0) { 2295 /* Rm (3:0) shifted (6:5) by immediate (11:7) */ 2296 UInt shift_amt = (insn_11_0 >> 7) & 0x1F; 2297 UInt rM = (insn_11_0 >> 0) & 0xF; 2298 UInt how = (insn_11_0 >> 5) & 3; 2299 /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */ 2300 IRTemp rMt = newTemp(Ity_I32); 2301 assign(rMt, getIRegA(rM)); 2302 2303 vassert(shift_amt <= 31); 2304 2305 compute_result_and_C_after_shift_by_imm5( 2306 buf, shop, shco, rMt, how, shift_amt, rM 2307 ); 2308 return True; 2309 } 2310 2311 /* Shift/rotate by register */ 2312 if (insn_25 == 0 && insn_4 == 1) { 2313 /* Rm (3:0) shifted (6:5) by Rs (11:8) */ 2314 UInt rM = (insn_11_0 >> 0) & 0xF; 2315 UInt rS = (insn_11_0 >> 8) & 0xF; 2316 UInt how = (insn_11_0 >> 5) & 3; 2317 /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */ 2318 IRTemp rMt = newTemp(Ity_I32); 2319 IRTemp rSt = newTemp(Ity_I32); 2320 2321 if (insn_7 == 1) 2322 return False; /* not really a shifter operand */ 2323 2324 assign(rMt, getIRegA(rM)); 2325 assign(rSt, getIRegA(rS)); 2326 2327 compute_result_and_C_after_shift_by_reg( 2328 buf, shop, shco, rMt, how, rSt, rM, rS 2329 ); 2330 return True; 2331 } 2332 2333 vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 ); 2334 return False; 2335 } 2336 2337 2338 /* ARM only */ 2339 static 2340 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12, 2341 /*OUT*/HChar* buf ) 2342 { 2343 vassert(rN < 16); 2344 vassert(bU < 2); 2345 vassert(imm12 < 0x1000); 2346 UChar opChar = bU == 1 ? '+' : '-'; 2347 DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12); 2348 return 2349 binop( (bU == 1 ? Iop_Add32 : Iop_Sub32), 2350 getIRegA(rN), 2351 mkU32(imm12) ); 2352 } 2353 2354 2355 /* ARM only. 2356 NB: This is "DecodeImmShift" in newer versions of the the ARM ARM. 2357 */ 2358 static 2359 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM, 2360 UInt sh2, UInt imm5, 2361 /*OUT*/HChar* buf ) 2362 { 2363 vassert(rN < 16); 2364 vassert(bU < 2); 2365 vassert(rM < 16); 2366 vassert(sh2 < 4); 2367 vassert(imm5 < 32); 2368 UChar opChar = bU == 1 ? '+' : '-'; 2369 IRExpr* index = NULL; 2370 switch (sh2) { 2371 case 0: /* LSL */ 2372 /* imm5 can be in the range 0 .. 31 inclusive. */ 2373 index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5)); 2374 DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5); 2375 break; 2376 case 1: /* LSR */ 2377 if (imm5 == 0) { 2378 index = mkU32(0); 2379 vassert(0); // ATC 2380 } else { 2381 index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5)); 2382 } 2383 DIS(buf, "[r%u, %cr%u, LSR #%u]", 2384 rN, opChar, rM, imm5 == 0 ? 32 : imm5); 2385 break; 2386 case 2: /* ASR */ 2387 /* Doesn't this just mean that the behaviour with imm5 == 0 2388 is the same as if it had been 31 ? */ 2389 if (imm5 == 0) { 2390 index = binop(Iop_Sar32, getIRegA(rM), mkU8(31)); 2391 vassert(0); // ATC 2392 } else { 2393 index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5)); 2394 } 2395 DIS(buf, "[r%u, %cr%u, ASR #%u]", 2396 rN, opChar, rM, imm5 == 0 ? 32 : imm5); 2397 break; 2398 case 3: /* ROR or RRX */ 2399 if (imm5 == 0) { 2400 IRTemp rmT = newTemp(Ity_I32); 2401 IRTemp cflagT = newTemp(Ity_I32); 2402 assign(rmT, getIRegA(rM)); 2403 assign(cflagT, mk_armg_calculate_flag_c()); 2404 index = binop(Iop_Or32, 2405 binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)), 2406 binop(Iop_Shr32, mkexpr(rmT), mkU8(1))); 2407 DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM); 2408 } else { 2409 IRTemp rmT = newTemp(Ity_I32); 2410 assign(rmT, getIRegA(rM)); 2411 vassert(imm5 >= 1 && imm5 <= 31); 2412 index = binop(Iop_Or32, 2413 binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)), 2414 binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5))); 2415 DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5); 2416 } 2417 break; 2418 default: 2419 vassert(0); 2420 } 2421 vassert(index); 2422 return binop(bU == 1 ? Iop_Add32 : Iop_Sub32, 2423 getIRegA(rN), index); 2424 } 2425 2426 2427 /* ARM only */ 2428 static 2429 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8, 2430 /*OUT*/HChar* buf ) 2431 { 2432 vassert(rN < 16); 2433 vassert(bU < 2); 2434 vassert(imm8 < 0x100); 2435 UChar opChar = bU == 1 ? '+' : '-'; 2436 DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8); 2437 return 2438 binop( (bU == 1 ? Iop_Add32 : Iop_Sub32), 2439 getIRegA(rN), 2440 mkU32(imm8) ); 2441 } 2442 2443 2444 /* ARM only */ 2445 static 2446 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM, 2447 /*OUT*/HChar* buf ) 2448 { 2449 vassert(rN < 16); 2450 vassert(bU < 2); 2451 vassert(rM < 16); 2452 UChar opChar = bU == 1 ? '+' : '-'; 2453 IRExpr* index = getIRegA(rM); 2454 DIS(buf, "[r%u, %c r%u]", rN, opChar, rM); 2455 return binop(bU == 1 ? Iop_Add32 : Iop_Sub32, 2456 getIRegA(rN), index); 2457 } 2458 2459 2460 /* irRes :: Ity_I32 holds a floating point comparison result encoded 2461 as an IRCmpF64Result. Generate code to convert it to an 2462 ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value. 2463 Assign a new temp to hold that value, and return the temp. */ 2464 static 2465 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes ) 2466 { 2467 IRTemp ix = newTemp(Ity_I32); 2468 IRTemp termL = newTemp(Ity_I32); 2469 IRTemp termR = newTemp(Ity_I32); 2470 IRTemp nzcv = newTemp(Ity_I32); 2471 2472 /* This is where the fun starts. We have to convert 'irRes' from 2473 an IR-convention return result (IRCmpF64Result) to an 2474 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 2475 4 bits of 'nzcv'. */ 2476 /* Map compare result from IR to ARM(nzcv) */ 2477 /* 2478 FP cmp result | IR | ARM(nzcv) 2479 -------------------------------- 2480 UN 0x45 0011 2481 LT 0x01 1000 2482 GT 0x00 0010 2483 EQ 0x40 0110 2484 */ 2485 /* Now since you're probably wondering WTF .. 2486 2487 ix fishes the useful bits out of the IR value, bits 6 and 0, and 2488 places them side by side, giving a number which is 0, 1, 2 or 3. 2489 2490 termL is a sequence cooked up by GNU superopt. It converts ix 2491 into an almost correct value NZCV value (incredibly), except 2492 for the case of UN, where it produces 0100 instead of the 2493 required 0011. 2494 2495 termR is therefore a correction term, also computed from ix. It 2496 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 2497 the final correct value, we subtract termR from termL. 2498 2499 Don't take my word for it. There's a test program at the bottom 2500 of this file, to try this out with. 2501 */ 2502 assign( 2503 ix, 2504 binop(Iop_Or32, 2505 binop(Iop_And32, 2506 binop(Iop_Shr32, mkexpr(irRes), mkU8(5)), 2507 mkU32(3)), 2508 binop(Iop_And32, mkexpr(irRes), mkU32(1)))); 2509 2510 assign( 2511 termL, 2512 binop(Iop_Add32, 2513 binop(Iop_Shr32, 2514 binop(Iop_Sub32, 2515 binop(Iop_Shl32, 2516 binop(Iop_Xor32, mkexpr(ix), mkU32(1)), 2517 mkU8(30)), 2518 mkU32(1)), 2519 mkU8(29)), 2520 mkU32(1))); 2521 2522 assign( 2523 termR, 2524 binop(Iop_And32, 2525 binop(Iop_And32, 2526 mkexpr(ix), 2527 binop(Iop_Shr32, mkexpr(ix), mkU8(1))), 2528 mkU32(1))); 2529 2530 assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR))); 2531 return nzcv; 2532 } 2533 2534 2535 /* Thumb32 only. This is "ThumbExpandImm" in the ARM ARM. If 2536 updatesC is non-NULL, a boolean is written to it indicating whether 2537 or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C". 2538 */ 2539 static UInt thumbExpandImm ( Bool* updatesC, 2540 UInt imm1, UInt imm3, UInt imm8 ) 2541 { 2542 vassert(imm1 < (1<<1)); 2543 vassert(imm3 < (1<<3)); 2544 vassert(imm8 < (1<<8)); 2545 UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1); 2546 UInt abcdefgh = imm8; 2547 UInt lbcdefgh = imm8 | 0x80; 2548 if (updatesC) { 2549 *updatesC = i_imm3_a >= 8; 2550 } 2551 switch (i_imm3_a) { 2552 case 0: case 1: 2553 return abcdefgh; 2554 case 2: case 3: 2555 return (abcdefgh << 16) | abcdefgh; 2556 case 4: case 5: 2557 return (abcdefgh << 24) | (abcdefgh << 8); 2558 case 6: case 7: 2559 return (abcdefgh << 24) | (abcdefgh << 16) 2560 | (abcdefgh << 8) | abcdefgh; 2561 case 8 ... 31: 2562 return lbcdefgh << (32 - i_imm3_a); 2563 default: 2564 break; 2565 } 2566 /*NOTREACHED*/vassert(0); 2567 } 2568 2569 2570 /* Version of thumbExpandImm where we simply feed it the 2571 instruction halfwords (the lowest addressed one is I0). */ 2572 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC, 2573 UShort i0s, UShort i1s ) 2574 { 2575 UInt i0 = (UInt)i0s; 2576 UInt i1 = (UInt)i1s; 2577 UInt imm1 = SLICE_UInt(i0,10,10); 2578 UInt imm3 = SLICE_UInt(i1,14,12); 2579 UInt imm8 = SLICE_UInt(i1,7,0); 2580 return thumbExpandImm(updatesC, imm1, imm3, imm8); 2581 } 2582 2583 2584 /* Thumb16 only. Given the firstcond and mask fields from an IT 2585 instruction, compute the 32-bit ITSTATE value implied, as described 2586 in libvex_guest_arm.h. This is not the ARM ARM representation. 2587 Also produce the t/e chars for the 2nd, 3rd, 4th insns, for 2588 disassembly printing. Returns False if firstcond or mask 2589 denote something invalid. 2590 2591 The number and conditions for the instructions to be 2592 conditionalised depend on firstcond and mask: 2593 2594 mask cond 1 cond 2 cond 3 cond 4 2595 2596 1000 fc[3:0] 2597 x100 fc[3:0] fc[3:1]:x 2598 xy10 fc[3:0] fc[3:1]:x fc[3:1]:y 2599 xyz1 fc[3:0] fc[3:1]:x fc[3:1]:y fc[3:1]:z 2600 2601 The condition fields are assembled in *itstate backwards (cond 4 at 2602 the top, cond 1 at the bottom). Conditions are << 4'd and then 2603 ^0xE'd, and those fields that correspond to instructions in the IT 2604 block are tagged with a 1 bit. 2605 */ 2606 static Bool compute_ITSTATE ( /*OUT*/UInt* itstate, 2607 /*OUT*/UChar* ch1, 2608 /*OUT*/UChar* ch2, 2609 /*OUT*/UChar* ch3, 2610 UInt firstcond, UInt mask ) 2611 { 2612 vassert(firstcond <= 0xF); 2613 vassert(mask <= 0xF); 2614 *itstate = 0; 2615 *ch1 = *ch2 = *ch3 = '.'; 2616 if (mask == 0) 2617 return False; /* the logic below actually ensures this anyway, 2618 but clearer to make it explicit. */ 2619 if (firstcond == 0xF) 2620 return False; /* NV is not allowed */ 2621 if (firstcond == 0xE && popcount32(mask) != 1) 2622 return False; /* if firstcond is AL then all the rest must be too */ 2623 2624 UInt m3 = (mask >> 3) & 1; 2625 UInt m2 = (mask >> 2) & 1; 2626 UInt m1 = (mask >> 1) & 1; 2627 UInt m0 = (mask >> 0) & 1; 2628 2629 UInt fc = (firstcond << 4) | 1/*in-IT-block*/; 2630 UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/; 2631 2632 if (m3 == 1 && (m2|m1|m0) == 0) { 2633 *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc; 2634 *itstate ^= 0xE0E0E0E0; 2635 return True; 2636 } 2637 2638 if (m2 == 1 && (m1|m0) == 0) { 2639 *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc; 2640 *itstate ^= 0xE0E0E0E0; 2641 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2642 return True; 2643 } 2644 2645 if (m1 == 1 && m0 == 0) { 2646 *itstate = (ni << 24) 2647 | (setbit32(fc, 4, m2) << 16) 2648 | (setbit32(fc, 4, m3) << 8) | fc; 2649 *itstate ^= 0xE0E0E0E0; 2650 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2651 *ch2 = m2 == (firstcond & 1) ? 't' : 'e'; 2652 return True; 2653 } 2654 2655 if (m0 == 1) { 2656 *itstate = (setbit32(fc, 4, m1) << 24) 2657 | (setbit32(fc, 4, m2) << 16) 2658 | (setbit32(fc, 4, m3) << 8) | fc; 2659 *itstate ^= 0xE0E0E0E0; 2660 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2661 *ch2 = m2 == (firstcond & 1) ? 't' : 'e'; 2662 *ch3 = m1 == (firstcond & 1) ? 't' : 'e'; 2663 return True; 2664 } 2665 2666 return False; 2667 } 2668 2669 2670 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight 2671 Chapter 7 Section 1. */ 2672 static IRTemp gen_BITREV ( IRTemp x0 ) 2673 { 2674 IRTemp x1 = newTemp(Ity_I32); 2675 IRTemp x2 = newTemp(Ity_I32); 2676 IRTemp x3 = newTemp(Ity_I32); 2677 IRTemp x4 = newTemp(Ity_I32); 2678 IRTemp x5 = newTemp(Ity_I32); 2679 UInt c1 = 0x55555555; 2680 UInt c2 = 0x33333333; 2681 UInt c3 = 0x0F0F0F0F; 2682 UInt c4 = 0x00FF00FF; 2683 UInt c5 = 0x0000FFFF; 2684 assign(x1, 2685 binop(Iop_Or32, 2686 binop(Iop_Shl32, 2687 binop(Iop_And32, mkexpr(x0), mkU32(c1)), 2688 mkU8(1)), 2689 binop(Iop_Shr32, 2690 binop(Iop_And32, mkexpr(x0), mkU32(~c1)), 2691 mkU8(1)) 2692 )); 2693 assign(x2, 2694 binop(Iop_Or32, 2695 binop(Iop_Shl32, 2696 binop(Iop_And32, mkexpr(x1), mkU32(c2)), 2697 mkU8(2)), 2698 binop(Iop_Shr32, 2699 binop(Iop_And32, mkexpr(x1), mkU32(~c2)), 2700 mkU8(2)) 2701 )); 2702 assign(x3, 2703 binop(Iop_Or32, 2704 binop(Iop_Shl32, 2705 binop(Iop_And32, mkexpr(x2), mkU32(c3)), 2706 mkU8(4)), 2707 binop(Iop_Shr32, 2708 binop(Iop_And32, mkexpr(x2), mkU32(~c3)), 2709 mkU8(4)) 2710 )); 2711 assign(x4, 2712 binop(Iop_Or32, 2713 binop(Iop_Shl32, 2714 binop(Iop_And32, mkexpr(x3), mkU32(c4)), 2715 mkU8(8)), 2716 binop(Iop_Shr32, 2717 binop(Iop_And32, mkexpr(x3), mkU32(~c4)), 2718 mkU8(8)) 2719 )); 2720 assign(x5, 2721 binop(Iop_Or32, 2722 binop(Iop_Shl32, 2723 binop(Iop_And32, mkexpr(x4), mkU32(c5)), 2724 mkU8(16)), 2725 binop(Iop_Shr32, 2726 binop(Iop_And32, mkexpr(x4), mkU32(~c5)), 2727 mkU8(16)) 2728 )); 2729 return x5; 2730 } 2731 2732 2733 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order 2734 0:1:2:3 (aka byte-swap). */ 2735 static IRTemp gen_REV ( IRTemp arg ) 2736 { 2737 IRTemp res = newTemp(Ity_I32); 2738 assign(res, 2739 binop(Iop_Or32, 2740 binop(Iop_Shl32, mkexpr(arg), mkU8(24)), 2741 binop(Iop_Or32, 2742 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)), 2743 mkU32(0x00FF0000)), 2744 binop(Iop_Or32, 2745 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)), 2746 mkU32(0x0000FF00)), 2747 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)), 2748 mkU32(0x000000FF) ) 2749 )))); 2750 return res; 2751 } 2752 2753 2754 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order 2755 2:3:0:1 (swap within lo and hi halves). */ 2756 static IRTemp gen_REV16 ( IRTemp arg ) 2757 { 2758 IRTemp res = newTemp(Ity_I32); 2759 assign(res, 2760 binop(Iop_Or32, 2761 binop(Iop_And32, 2762 binop(Iop_Shl32, mkexpr(arg), mkU8(8)), 2763 mkU32(0xFF00FF00)), 2764 binop(Iop_And32, 2765 binop(Iop_Shr32, mkexpr(arg), mkU8(8)), 2766 mkU32(0x00FF00FF)))); 2767 return res; 2768 } 2769 2770 2771 /*------------------------------------------------------------*/ 2772 /*--- Advanced SIMD (NEON) instructions ---*/ 2773 /*------------------------------------------------------------*/ 2774 2775 /*------------------------------------------------------------*/ 2776 /*--- NEON data processing ---*/ 2777 /*------------------------------------------------------------*/ 2778 2779 /* For all NEON DP ops, we use the normal scheme to handle conditional 2780 writes to registers -- pass in condT and hand that on to the 2781 put*Reg functions. In ARM mode condT is always IRTemp_INVALID 2782 since NEON is unconditional for ARM. In Thumb mode condT is 2783 derived from the ITSTATE shift register in the normal way. */ 2784 2785 static 2786 UInt get_neon_d_regno(UInt theInstr) 2787 { 2788 UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF); 2789 if (theInstr & 0x40) { 2790 if (x & 1) { 2791 x = x + 0x100; 2792 } else { 2793 x = x >> 1; 2794 } 2795 } 2796 return x; 2797 } 2798 2799 static 2800 UInt get_neon_n_regno(UInt theInstr) 2801 { 2802 UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF); 2803 if (theInstr & 0x40) { 2804 if (x & 1) { 2805 x = x + 0x100; 2806 } else { 2807 x = x >> 1; 2808 } 2809 } 2810 return x; 2811 } 2812 2813 static 2814 UInt get_neon_m_regno(UInt theInstr) 2815 { 2816 UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF); 2817 if (theInstr & 0x40) { 2818 if (x & 1) { 2819 x = x + 0x100; 2820 } else { 2821 x = x >> 1; 2822 } 2823 } 2824 return x; 2825 } 2826 2827 static 2828 Bool dis_neon_vext ( UInt theInstr, IRTemp condT ) 2829 { 2830 UInt dreg = get_neon_d_regno(theInstr); 2831 UInt mreg = get_neon_m_regno(theInstr); 2832 UInt nreg = get_neon_n_regno(theInstr); 2833 UInt imm4 = (theInstr >> 8) & 0xf; 2834 UInt Q = (theInstr >> 6) & 1; 2835 HChar reg_t = Q ? 'q' : 'd'; 2836 2837 if (Q) { 2838 putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg), 2839 getQReg(mreg), mkU8(imm4)), condT); 2840 } else { 2841 putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg), 2842 getDRegI64(mreg), mkU8(imm4)), condT); 2843 } 2844 DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg, 2845 reg_t, mreg, imm4); 2846 return True; 2847 } 2848 2849 /* VTBL, VTBX */ 2850 static 2851 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT ) 2852 { 2853 UInt op = (theInstr >> 6) & 1; 2854 UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6)); 2855 UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6)); 2856 UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6)); 2857 UInt len = (theInstr >> 8) & 3; 2858 Int i; 2859 IROp cmp; 2860 ULong imm; 2861 IRTemp arg_l; 2862 IRTemp old_mask, new_mask, cur_mask; 2863 IRTemp old_res, new_res; 2864 IRTemp old_arg, new_arg; 2865 2866 if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100) 2867 return False; 2868 if (nreg + len > 31) 2869 return False; 2870 2871 cmp = Iop_CmpGT8Ux8; 2872 2873 old_mask = newTemp(Ity_I64); 2874 old_res = newTemp(Ity_I64); 2875 old_arg = newTemp(Ity_I64); 2876 assign(old_mask, mkU64(0)); 2877 assign(old_res, mkU64(0)); 2878 assign(old_arg, getDRegI64(mreg)); 2879 imm = 8; 2880 imm = (imm << 8) | imm; 2881 imm = (imm << 16) | imm; 2882 imm = (imm << 32) | imm; 2883 2884 for (i = 0; i <= len; i++) { 2885 arg_l = newTemp(Ity_I64); 2886 new_mask = newTemp(Ity_I64); 2887 cur_mask = newTemp(Ity_I64); 2888 new_res = newTemp(Ity_I64); 2889 new_arg = newTemp(Ity_I64); 2890 assign(arg_l, getDRegI64(nreg+i)); 2891 assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm))); 2892 assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg))); 2893 assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask))); 2894 assign(new_res, binop(Iop_Or64, 2895 mkexpr(old_res), 2896 binop(Iop_And64, 2897 binop(Iop_Perm8x8, 2898 mkexpr(arg_l), 2899 binop(Iop_And64, 2900 mkexpr(old_arg), 2901 mkexpr(cur_mask))), 2902 mkexpr(cur_mask)))); 2903 2904 old_arg = new_arg; 2905 old_mask = new_mask; 2906 old_res = new_res; 2907 } 2908 if (op) { 2909 new_res = newTemp(Ity_I64); 2910 assign(new_res, binop(Iop_Or64, 2911 binop(Iop_And64, 2912 getDRegI64(dreg), 2913 unop(Iop_Not64, mkexpr(old_mask))), 2914 mkexpr(old_res))); 2915 old_res = new_res; 2916 } 2917 2918 putDRegI64(dreg, mkexpr(old_res), condT); 2919 DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg); 2920 if (len > 0) { 2921 DIP("d%u-d%u", nreg, nreg + len); 2922 } else { 2923 DIP("d%u", nreg); 2924 } 2925 DIP("}, d%u\n", mreg); 2926 return True; 2927 } 2928 2929 /* VDUP (scalar) */ 2930 static 2931 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT ) 2932 { 2933 UInt Q = (theInstr >> 6) & 1; 2934 UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF); 2935 UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF); 2936 UInt imm4 = (theInstr >> 16) & 0xF; 2937 UInt index; 2938 UInt size; 2939 IRTemp arg_m; 2940 IRTemp res; 2941 IROp op, op2; 2942 2943 if ((imm4 == 0) || (imm4 == 8)) 2944 return False; 2945 if ((Q == 1) && ((dreg & 1) == 1)) 2946 return False; 2947 if (Q) 2948 dreg >>= 1; 2949 arg_m = newTemp(Ity_I64); 2950 assign(arg_m, getDRegI64(mreg)); 2951 if (Q) 2952 res = newTemp(Ity_V128); 2953 else 2954 res = newTemp(Ity_I64); 2955 if ((imm4 & 1) == 1) { 2956 op = Q ? Iop_Dup8x16 : Iop_Dup8x8; 2957 op2 = Iop_GetElem8x8; 2958 index = imm4 >> 1; 2959 size = 8; 2960 } else if ((imm4 & 3) == 2) { 2961 op = Q ? Iop_Dup16x8 : Iop_Dup16x4; 2962 op2 = Iop_GetElem16x4; 2963 index = imm4 >> 2; 2964 size = 16; 2965 } else if ((imm4 & 7) == 4) { 2966 op = Q ? Iop_Dup32x4 : Iop_Dup32x2; 2967 op2 = Iop_GetElem32x2; 2968 index = imm4 >> 3; 2969 size = 32; 2970 } else { 2971 return False; // can this ever happen? 2972 } 2973 assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index)))); 2974 if (Q) { 2975 putQReg(dreg, mkexpr(res), condT); 2976 } else { 2977 putDRegI64(dreg, mkexpr(res), condT); 2978 } 2979 DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index); 2980 return True; 2981 } 2982 2983 /* A7.4.1 Three registers of the same length */ 2984 static 2985 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT ) 2986 { 2987 UInt Q = (theInstr >> 6) & 1; 2988 UInt dreg = get_neon_d_regno(theInstr); 2989 UInt nreg = get_neon_n_regno(theInstr); 2990 UInt mreg = get_neon_m_regno(theInstr); 2991 UInt A = (theInstr >> 8) & 0xF; 2992 UInt B = (theInstr >> 4) & 1; 2993 UInt C = (theInstr >> 20) & 0x3; 2994 UInt U = (theInstr >> 24) & 1; 2995 UInt size = C; 2996 2997 IRTemp arg_n; 2998 IRTemp arg_m; 2999 IRTemp res; 3000 3001 if (Q) { 3002 arg_n = newTemp(Ity_V128); 3003 arg_m = newTemp(Ity_V128); 3004 res = newTemp(Ity_V128); 3005 assign(arg_n, getQReg(nreg)); 3006 assign(arg_m, getQReg(mreg)); 3007 } else { 3008 arg_n = newTemp(Ity_I64); 3009 arg_m = newTemp(Ity_I64); 3010 res = newTemp(Ity_I64); 3011 assign(arg_n, getDRegI64(nreg)); 3012 assign(arg_m, getDRegI64(mreg)); 3013 } 3014 3015 switch(A) { 3016 case 0: 3017 if (B == 0) { 3018 /* VHADD */ 3019 ULong imm = 0; 3020 IRExpr *imm_val; 3021 IROp addOp; 3022 IROp andOp; 3023 IROp shOp; 3024 char regType = Q ? 'q' : 'd'; 3025 3026 if (size == 3) 3027 return False; 3028 switch(size) { 3029 case 0: imm = 0x101010101010101LL; break; 3030 case 1: imm = 0x1000100010001LL; break; 3031 case 2: imm = 0x100000001LL; break; 3032 default: vassert(0); 3033 } 3034 if (Q) { 3035 imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm)); 3036 andOp = Iop_AndV128; 3037 } else { 3038 imm_val = mkU64(imm); 3039 andOp = Iop_And64; 3040 } 3041 if (U) { 3042 switch(size) { 3043 case 0: 3044 addOp = Q ? Iop_Add8x16 : Iop_Add8x8; 3045 shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3046 break; 3047 case 1: 3048 addOp = Q ? Iop_Add16x8 : Iop_Add16x4; 3049 shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3050 break; 3051 case 2: 3052 addOp = Q ? Iop_Add32x4 : Iop_Add32x2; 3053 shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3054 break; 3055 default: 3056 vassert(0); 3057 } 3058 } else { 3059 switch(size) { 3060 case 0: 3061 addOp = Q ? Iop_Add8x16 : Iop_Add8x8; 3062 shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8; 3063 break; 3064 case 1: 3065 addOp = Q ? Iop_Add16x8 : Iop_Add16x4; 3066 shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4; 3067 break; 3068 case 2: 3069 addOp = Q ? Iop_Add32x4 : Iop_Add32x2; 3070 shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2; 3071 break; 3072 default: 3073 vassert(0); 3074 } 3075 } 3076 assign(res, 3077 binop(addOp, 3078 binop(addOp, 3079 binop(shOp, mkexpr(arg_m), mkU8(1)), 3080 binop(shOp, mkexpr(arg_n), mkU8(1))), 3081 binop(shOp, 3082 binop(addOp, 3083 binop(andOp, mkexpr(arg_m), imm_val), 3084 binop(andOp, mkexpr(arg_n), imm_val)), 3085 mkU8(1)))); 3086 DIP("vhadd.%c%d %c%d, %c%d, %c%d\n", 3087 U ? 'u' : 's', 8 << size, regType, 3088 dreg, regType, nreg, regType, mreg); 3089 } else { 3090 /* VQADD */ 3091 IROp op, op2; 3092 IRTemp tmp; 3093 char reg_t = Q ? 'q' : 'd'; 3094 if (Q) { 3095 switch (size) { 3096 case 0: 3097 op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16; 3098 op2 = Iop_Add8x16; 3099 break; 3100 case 1: 3101 op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8; 3102 op2 = Iop_Add16x8; 3103 break; 3104 case 2: 3105 op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4; 3106 op2 = Iop_Add32x4; 3107 break; 3108 case 3: 3109 op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2; 3110 op2 = Iop_Add64x2; 3111 break; 3112 default: 3113 vassert(0); 3114 } 3115 } else { 3116 switch (size) { 3117 case 0: 3118 op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8; 3119 op2 = Iop_Add8x8; 3120 break; 3121 case 1: 3122 op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4; 3123 op2 = Iop_Add16x4; 3124 break; 3125 case 2: 3126 op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2; 3127 op2 = Iop_Add32x2; 3128 break; 3129 case 3: 3130 op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1; 3131 op2 = Iop_Add64; 3132 break; 3133 default: 3134 vassert(0); 3135 } 3136 } 3137 if (Q) { 3138 tmp = newTemp(Ity_V128); 3139 } else { 3140 tmp = newTemp(Ity_I64); 3141 } 3142 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3143 #ifndef DISABLE_QC_FLAG 3144 assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m))); 3145 setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT); 3146 #endif 3147 DIP("vqadd.%c%d %c%d, %c%d, %c%d\n", 3148 U ? 'u' : 's', 3149 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg); 3150 } 3151 break; 3152 case 1: 3153 if (B == 0) { 3154 /* VRHADD */ 3155 /* VRHADD C, A, B ::= 3156 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */ 3157 IROp shift_op, add_op; 3158 IRTemp cc; 3159 ULong one = 1; 3160 HChar reg_t = Q ? 'q' : 'd'; 3161 switch (size) { 3162 case 0: one = (one << 8) | one; /* fall through */ 3163 case 1: one = (one << 16) | one; /* fall through */ 3164 case 2: one = (one << 32) | one; break; 3165 case 3: return False; 3166 default: vassert(0); 3167 } 3168 if (Q) { 3169 switch (size) { 3170 case 0: 3171 shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16; 3172 add_op = Iop_Add8x16; 3173 break; 3174 case 1: 3175 shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8; 3176 add_op = Iop_Add16x8; 3177 break; 3178 case 2: 3179 shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4; 3180 add_op = Iop_Add32x4; 3181 break; 3182 case 3: 3183 return False; 3184 default: 3185 vassert(0); 3186 } 3187 } else { 3188 switch (size) { 3189 case 0: 3190 shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8; 3191 add_op = Iop_Add8x8; 3192 break; 3193 case 1: 3194 shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4; 3195 add_op = Iop_Add16x4; 3196 break; 3197 case 2: 3198 shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2; 3199 add_op = Iop_Add32x2; 3200 break; 3201 case 3: 3202 return False; 3203 default: 3204 vassert(0); 3205 } 3206 } 3207 if (Q) { 3208 cc = newTemp(Ity_V128); 3209 assign(cc, binop(shift_op, 3210 binop(add_op, 3211 binop(add_op, 3212 binop(Iop_AndV128, 3213 mkexpr(arg_n), 3214 binop(Iop_64HLtoV128, 3215 mkU64(one), 3216 mkU64(one))), 3217 binop(Iop_AndV128, 3218 mkexpr(arg_m), 3219 binop(Iop_64HLtoV128, 3220 mkU64(one), 3221 mkU64(one)))), 3222 binop(Iop_64HLtoV128, 3223 mkU64(one), 3224 mkU64(one))), 3225 mkU8(1))); 3226 assign(res, binop(add_op, 3227 binop(add_op, 3228 binop(shift_op, 3229 mkexpr(arg_n), 3230 mkU8(1)), 3231 binop(shift_op, 3232 mkexpr(arg_m), 3233 mkU8(1))), 3234 mkexpr(cc))); 3235 } else { 3236 cc = newTemp(Ity_I64); 3237 assign(cc, binop(shift_op, 3238 binop(add_op, 3239 binop(add_op, 3240 binop(Iop_And64, 3241 mkexpr(arg_n), 3242 mkU64(one)), 3243 binop(Iop_And64, 3244 mkexpr(arg_m), 3245 mkU64(one))), 3246 mkU64(one)), 3247 mkU8(1))); 3248 assign(res, binop(add_op, 3249 binop(add_op, 3250 binop(shift_op, 3251 mkexpr(arg_n), 3252 mkU8(1)), 3253 binop(shift_op, 3254 mkexpr(arg_m), 3255 mkU8(1))), 3256 mkexpr(cc))); 3257 } 3258 DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n", 3259 U ? 'u' : 's', 3260 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg); 3261 } else { 3262 if (U == 0) { 3263 switch(C) { 3264 case 0: { 3265 /* VAND */ 3266 HChar reg_t = Q ? 'q' : 'd'; 3267 if (Q) { 3268 assign(res, binop(Iop_AndV128, mkexpr(arg_n), 3269 mkexpr(arg_m))); 3270 } else { 3271 assign(res, binop(Iop_And64, mkexpr(arg_n), 3272 mkexpr(arg_m))); 3273 } 3274 DIP("vand %c%d, %c%d, %c%d\n", 3275 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3276 break; 3277 } 3278 case 1: { 3279 /* VBIC */ 3280 HChar reg_t = Q ? 'q' : 'd'; 3281 if (Q) { 3282 assign(res, binop(Iop_AndV128,mkexpr(arg_n), 3283 unop(Iop_NotV128, mkexpr(arg_m)))); 3284 } else { 3285 assign(res, binop(Iop_And64, mkexpr(arg_n), 3286 unop(Iop_Not64, mkexpr(arg_m)))); 3287 } 3288 DIP("vbic %c%d, %c%d, %c%d\n", 3289 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3290 break; 3291 } 3292 case 2: 3293 if ( nreg != mreg) { 3294 /* VORR */ 3295 HChar reg_t = Q ? 'q' : 'd'; 3296 if (Q) { 3297 assign(res, binop(Iop_OrV128, mkexpr(arg_n), 3298 mkexpr(arg_m))); 3299 } else { 3300 assign(res, binop(Iop_Or64, mkexpr(arg_n), 3301 mkexpr(arg_m))); 3302 } 3303 DIP("vorr %c%d, %c%d, %c%d\n", 3304 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3305 } else { 3306 /* VMOV */ 3307 HChar reg_t = Q ? 'q' : 'd'; 3308 assign(res, mkexpr(arg_m)); 3309 DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg); 3310 } 3311 break; 3312 case 3:{ 3313 /* VORN */ 3314 HChar reg_t = Q ? 'q' : 'd'; 3315 if (Q) { 3316 assign(res, binop(Iop_OrV128,mkexpr(arg_n), 3317 unop(Iop_NotV128, mkexpr(arg_m)))); 3318 } else { 3319 assign(res, binop(Iop_Or64, mkexpr(arg_n), 3320 unop(Iop_Not64, mkexpr(arg_m)))); 3321 } 3322 DIP("vorn %c%d, %c%d, %c%d\n", 3323 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3324 break; 3325 } 3326 } 3327 } else { 3328 switch(C) { 3329 case 0: 3330 /* VEOR (XOR) */ 3331 if (Q) { 3332 assign(res, binop(Iop_XorV128, mkexpr(arg_n), 3333 mkexpr(arg_m))); 3334 } else { 3335 assign(res, binop(Iop_Xor64, mkexpr(arg_n), 3336 mkexpr(arg_m))); 3337 } 3338 DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg, 3339 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3340 break; 3341 case 1: 3342 /* VBSL */ 3343 if (Q) { 3344 IRTemp reg_d = newTemp(Ity_V128); 3345 assign(reg_d, getQReg(dreg)); 3346 assign(res, 3347 binop(Iop_OrV128, 3348 binop(Iop_AndV128, mkexpr(arg_n), 3349 mkexpr(reg_d)), 3350 binop(Iop_AndV128, 3351 mkexpr(arg_m), 3352 unop(Iop_NotV128, 3353 mkexpr(reg_d)) ) ) ); 3354 } else { 3355 IRTemp reg_d = newTemp(Ity_I64); 3356 assign(reg_d, getDRegI64(dreg)); 3357 assign(res, 3358 binop(Iop_Or64, 3359 binop(Iop_And64, mkexpr(arg_n), 3360 mkexpr(reg_d)), 3361 binop(Iop_And64, 3362 mkexpr(arg_m), 3363 unop(Iop_Not64, mkexpr(reg_d))))); 3364 } 3365 DIP("vbsl %c%u, %c%u, %c%u\n", 3366 Q ? 'q' : 'd', dreg, 3367 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3368 break; 3369 case 2: 3370 /* VBIT */ 3371 if (Q) { 3372 IRTemp reg_d = newTemp(Ity_V128); 3373 assign(reg_d, getQReg(dreg)); 3374 assign(res, 3375 binop(Iop_OrV128, 3376 binop(Iop_AndV128, mkexpr(arg_n), 3377 mkexpr(arg_m)), 3378 binop(Iop_AndV128, 3379 mkexpr(reg_d), 3380 unop(Iop_NotV128, mkexpr(arg_m))))); 3381 } else { 3382 IRTemp reg_d = newTemp(Ity_I64); 3383 assign(reg_d, getDRegI64(dreg)); 3384 assign(res, 3385 binop(Iop_Or64, 3386 binop(Iop_And64, mkexpr(arg_n), 3387 mkexpr(arg_m)), 3388 binop(Iop_And64, 3389 mkexpr(reg_d), 3390 unop(Iop_Not64, mkexpr(arg_m))))); 3391 } 3392 DIP("vbit %c%u, %c%u, %c%u\n", 3393 Q ? 'q' : 'd', dreg, 3394 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3395 break; 3396 case 3: 3397 /* VBIF */ 3398 if (Q) { 3399 IRTemp reg_d = newTemp(Ity_V128); 3400 assign(reg_d, getQReg(dreg)); 3401 assign(res, 3402 binop(Iop_OrV128, 3403 binop(Iop_AndV128, mkexpr(reg_d), 3404 mkexpr(arg_m)), 3405 binop(Iop_AndV128, 3406 mkexpr(arg_n), 3407 unop(Iop_NotV128, mkexpr(arg_m))))); 3408 } else { 3409 IRTemp reg_d = newTemp(Ity_I64); 3410 assign(reg_d, getDRegI64(dreg)); 3411 assign(res, 3412 binop(Iop_Or64, 3413 binop(Iop_And64, mkexpr(reg_d), 3414 mkexpr(arg_m)), 3415 binop(Iop_And64, 3416 mkexpr(arg_n), 3417 unop(Iop_Not64, mkexpr(arg_m))))); 3418 } 3419 DIP("vbif %c%u, %c%u, %c%u\n", 3420 Q ? 'q' : 'd', dreg, 3421 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3422 break; 3423 } 3424 } 3425 } 3426 break; 3427 case 2: 3428 if (B == 0) { 3429 /* VHSUB */ 3430 /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1) */ 3431 ULong imm = 0; 3432 IRExpr *imm_val; 3433 IROp subOp; 3434 IROp notOp; 3435 IROp andOp; 3436 IROp shOp; 3437 if (size == 3) 3438 return False; 3439 switch(size) { 3440 case 0: imm = 0x101010101010101LL; break; 3441 case 1: imm = 0x1000100010001LL; break; 3442 case 2: imm = 0x100000001LL; break; 3443 default: vassert(0); 3444 } 3445 if (Q) { 3446 imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm)); 3447 andOp = Iop_AndV128; 3448 notOp = Iop_NotV128; 3449 } else { 3450 imm_val = mkU64(imm); 3451 andOp = Iop_And64; 3452 notOp = Iop_Not64; 3453 } 3454 if (U) { 3455 switch(size) { 3456 case 0: 3457 subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3458 shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3459 break; 3460 case 1: 3461 subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3462 shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3463 break; 3464 case 2: 3465 subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3466 shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3467 break; 3468 default: 3469 vassert(0); 3470 } 3471 } else { 3472 switch(size) { 3473 case 0: 3474 subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3475 shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8; 3476 break; 3477 case 1: 3478 subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3479 shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4; 3480 break; 3481 case 2: 3482 subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3483 shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2; 3484 break; 3485 default: 3486 vassert(0); 3487 } 3488 } 3489 assign(res, 3490 binop(subOp, 3491 binop(subOp, 3492 binop(shOp, mkexpr(arg_n), mkU8(1)), 3493 binop(shOp, mkexpr(arg_m), mkU8(1))), 3494 binop(andOp, 3495 binop(andOp, 3496 unop(notOp, mkexpr(arg_n)), 3497 mkexpr(arg_m)), 3498 imm_val))); 3499 DIP("vhsub.%c%u %c%u, %c%u, %c%u\n", 3500 U ? 'u' : 's', 8 << size, 3501 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3502 mreg); 3503 } else { 3504 /* VQSUB */ 3505 IROp op, op2; 3506 IRTemp tmp; 3507 if (Q) { 3508 switch (size) { 3509 case 0: 3510 op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16; 3511 op2 = Iop_Sub8x16; 3512 break; 3513 case 1: 3514 op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8; 3515 op2 = Iop_Sub16x8; 3516 break; 3517 case 2: 3518 op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4; 3519 op2 = Iop_Sub32x4; 3520 break; 3521 case 3: 3522 op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2; 3523 op2 = Iop_Sub64x2; 3524 break; 3525 default: 3526 vassert(0); 3527 } 3528 } else { 3529 switch (size) { 3530 case 0: 3531 op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8; 3532 op2 = Iop_Sub8x8; 3533 break; 3534 case 1: 3535 op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4; 3536 op2 = Iop_Sub16x4; 3537 break; 3538 case 2: 3539 op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2; 3540 op2 = Iop_Sub32x2; 3541 break; 3542 case 3: 3543 op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1; 3544 op2 = Iop_Sub64; 3545 break; 3546 default: 3547 vassert(0); 3548 } 3549 } 3550 if (Q) 3551 tmp = newTemp(Ity_V128); 3552 else 3553 tmp = newTemp(Ity_I64); 3554 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3555 #ifndef DISABLE_QC_FLAG 3556 assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m))); 3557 setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT); 3558 #endif 3559 DIP("vqsub.%c%u %c%u, %c%u, %c%u\n", 3560 U ? 'u' : 's', 8 << size, 3561 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3562 mreg); 3563 } 3564 break; 3565 case 3: { 3566 IROp op; 3567 if (Q) { 3568 switch (size) { 3569 case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break; 3570 case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break; 3571 case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break; 3572 case 3: return False; 3573 default: vassert(0); 3574 } 3575 } else { 3576 switch (size) { 3577 case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break; 3578 case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break; 3579 case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break; 3580 case 3: return False; 3581 default: vassert(0); 3582 } 3583 } 3584 if (B == 0) { 3585 /* VCGT */ 3586 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3587 DIP("vcgt.%c%u %c%u, %c%u, %c%u\n", 3588 U ? 'u' : 's', 8 << size, 3589 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3590 mreg); 3591 } else { 3592 /* VCGE */ 3593 /* VCGE res, argn, argm 3594 is equal to 3595 VCGT tmp, argm, argn 3596 VNOT res, tmp */ 3597 assign(res, 3598 unop(Q ? Iop_NotV128 : Iop_Not64, 3599 binop(op, mkexpr(arg_m), mkexpr(arg_n)))); 3600 DIP("vcge.%c%u %c%u, %c%u, %c%u\n", 3601 U ? 'u' : 's', 8 << size, 3602 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3603 mreg); 3604 } 3605 } 3606 break; 3607 case 4: 3608 if (B == 0) { 3609 /* VSHL */ 3610 IROp op, sub_op; 3611 IRTemp tmp; 3612 if (U) { 3613 switch (size) { 3614 case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break; 3615 case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break; 3616 case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break; 3617 case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break; 3618 default: vassert(0); 3619 } 3620 } else { 3621 tmp = newTemp(Q ? Ity_V128 : Ity_I64); 3622 switch (size) { 3623 case 0: 3624 op = Q ? Iop_Sar8x16 : Iop_Sar8x8; 3625 sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3626 break; 3627 case 1: 3628 op = Q ? Iop_Sar16x8 : Iop_Sar16x4; 3629 sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3630 break; 3631 case 2: 3632 op = Q ? Iop_Sar32x4 : Iop_Sar32x2; 3633 sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3634 break; 3635 case 3: 3636 op = Q ? Iop_Sar64x2 : Iop_Sar64; 3637 sub_op = Q ? Iop_Sub64x2 : Iop_Sub64; 3638 break; 3639 default: 3640 vassert(0); 3641 } 3642 } 3643 if (U) { 3644 if (!Q && (size == 3)) 3645 assign(res, binop(op, mkexpr(arg_m), 3646 unop(Iop_64to8, mkexpr(arg_n)))); 3647 else 3648 assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n))); 3649 } else { 3650 if (Q) 3651 assign(tmp, binop(sub_op, 3652 binop(Iop_64HLtoV128, mkU64(0), mkU64(0)), 3653 mkexpr(arg_n))); 3654 else 3655 assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n))); 3656 if (!Q && (size == 3)) 3657 assign(res, binop(op, mkexpr(arg_m), 3658 unop(Iop_64to8, mkexpr(tmp)))); 3659 else 3660 assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp))); 3661 } 3662 DIP("vshl.%c%u %c%u, %c%u, %c%u\n", 3663 U ? 'u' : 's', 8 << size, 3664 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3665 nreg); 3666 } else { 3667 /* VQSHL */ 3668 IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt; 3669 IRTemp tmp, shval, mask, old_shval; 3670 UInt i; 3671 ULong esize; 3672 cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; 3673 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3674 if (U) { 3675 switch (size) { 3676 case 0: 3677 op = Q ? Iop_QShl8x16 : Iop_QShl8x8; 3678 op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8; 3679 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3680 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3681 break; 3682 case 1: 3683 op = Q ? Iop_QShl16x8 : Iop_QShl16x4; 3684 op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4; 3685 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3686 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3687 break; 3688 case 2: 3689 op = Q ? Iop_QShl32x4 : Iop_QShl32x2; 3690 op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2; 3691 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3692 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3693 break; 3694 case 3: 3695 op = Q ? Iop_QShl64x2 : Iop_QShl64x1; 3696 op_rev = Q ? Iop_Shr64x2 : Iop_Shr64; 3697 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3698 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3699 break; 3700 default: 3701 vassert(0); 3702 } 3703 } else { 3704 switch (size) { 3705 case 0: 3706 op = Q ? Iop_QSal8x16 : Iop_QSal8x8; 3707 op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8; 3708 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3709 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3710 break; 3711 case 1: 3712 op = Q ? Iop_QSal16x8 : Iop_QSal16x4; 3713 op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4; 3714 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3715 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3716 break; 3717 case 2: 3718 op = Q ? Iop_QSal32x4 : Iop_QSal32x2; 3719 op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2; 3720 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3721 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3722 break; 3723 case 3: 3724 op = Q ? Iop_QSal64x2 : Iop_QSal64x1; 3725 op_rev = Q ? Iop_Sar64x2 : Iop_Sar64; 3726 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3727 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3728 break; 3729 default: 3730 vassert(0); 3731 } 3732 } 3733 if (Q) { 3734 tmp = newTemp(Ity_V128); 3735 shval = newTemp(Ity_V128); 3736 mask = newTemp(Ity_V128); 3737 } else { 3738 tmp = newTemp(Ity_I64); 3739 shval = newTemp(Ity_I64); 3740 mask = newTemp(Ity_I64); 3741 } 3742 assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n))); 3743 #ifndef DISABLE_QC_FLAG 3744 /* Only least significant byte from second argument is used. 3745 Copy this byte to the whole vector element. */ 3746 assign(shval, binop(op_shrn, 3747 binop(op_shln, 3748 mkexpr(arg_n), 3749 mkU8((8 << size) - 8)), 3750 mkU8((8 << size) - 8))); 3751 for(i = 0; i < size; i++) { 3752 old_shval = shval; 3753 shval = newTemp(Q ? Ity_V128 : Ity_I64); 3754 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 3755 mkexpr(old_shval), 3756 binop(op_shln, 3757 mkexpr(old_shval), 3758 mkU8(8 << i)))); 3759 } 3760 /* If shift is greater or equal to the element size and 3761 element is non-zero, then QC flag should be set. */ 3762 esize = (8 << size) - 1; 3763 esize = (esize << 8) | esize; 3764 esize = (esize << 16) | esize; 3765 esize = (esize << 32) | esize; 3766 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 3767 binop(cmp_gt, mkexpr(shval), 3768 Q ? mkU128(esize) : mkU64(esize)), 3769 unop(cmp_neq, mkexpr(arg_m))), 3770 Q ? mkU128(0) : mkU64(0), 3771 Q, condT); 3772 /* Othervise QC flag should be set if shift value is positive and 3773 result beign rightshifted the same value is not equal to left 3774 argument. */ 3775 assign(mask, binop(cmp_gt, mkexpr(shval), 3776 Q ? mkU128(0) : mkU64(0))); 3777 if (!Q && size == 3) 3778 assign(tmp, binop(op_rev, mkexpr(res), 3779 unop(Iop_64to8, mkexpr(arg_n)))); 3780 else 3781 assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n))); 3782 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 3783 mkexpr(tmp), mkexpr(mask)), 3784 binop(Q ? Iop_AndV128 : Iop_And64, 3785 mkexpr(arg_m), mkexpr(mask)), 3786 Q, condT); 3787 #endif 3788 DIP("vqshl.%c%u %c%u, %c%u, %c%u\n", 3789 U ? 'u' : 's', 8 << size, 3790 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3791 nreg); 3792 } 3793 break; 3794 case 5: 3795 if (B == 0) { 3796 /* VRSHL */ 3797 IROp op, op_shrn, op_shln, cmp_gt, op_sub, op_add; 3798 IRTemp shval, old_shval, imm_val, round; 3799 UInt i; 3800 ULong imm; 3801 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3802 imm = 1L; 3803 switch (size) { 3804 case 0: imm = (imm << 8) | imm; /* fall through */ 3805 case 1: imm = (imm << 16) | imm; /* fall through */ 3806 case 2: imm = (imm << 32) | imm; /* fall through */ 3807 case 3: break; 3808 default: vassert(0); 3809 } 3810 imm_val = newTemp(Q ? Ity_V128 : Ity_I64); 3811 round = newTemp(Q ? Ity_V128 : Ity_I64); 3812 assign(imm_val, Q ? mkU128(imm) : mkU64(imm)); 3813 if (U) { 3814 switch (size) { 3815 case 0: 3816 op = Q ? Iop_Shl8x16 : Iop_Shl8x8; 3817 op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3818 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3819 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3820 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3821 break; 3822 case 1: 3823 op = Q ? Iop_Shl16x8 : Iop_Shl16x4; 3824 op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3825 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3826 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3827 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3828 break; 3829 case 2: 3830 op = Q ? Iop_Shl32x4 : Iop_Shl32x2; 3831 op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3832 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 3833 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3834 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3835 break; 3836 case 3: 3837 op = Q ? Iop_Shl64x2 : Iop_Shl64; 3838 op_sub = Q ? Iop_Sub64x2 : Iop_Sub64; 3839 op_add = Q ? Iop_Add64x2 : Iop_Add64; 3840 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3841 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3842 break; 3843 default: 3844 vassert(0); 3845 } 3846 } else { 3847 switch (size) { 3848 case 0: 3849 op = Q ? Iop_Sal8x16 : Iop_Sal8x8; 3850 op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3851 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3852 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3853 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3854 break; 3855 case 1: 3856 op = Q ? Iop_Sal16x8 : Iop_Sal16x4; 3857 op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3858 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3859 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3860 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3861 break; 3862 case 2: 3863 op = Q ? Iop_Sal32x4 : Iop_Sal32x2; 3864 op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3865 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 3866 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3867 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3868 break; 3869 case 3: 3870 op = Q ? Iop_Sal64x2 : Iop_Sal64x1; 3871 op_sub = Q ? Iop_Sub64x2 : Iop_Sub64; 3872 op_add = Q ? Iop_Add64x2 : Iop_Add64; 3873 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3874 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3875 break; 3876 default: 3877 vassert(0); 3878 } 3879 } 3880 if (Q) { 3881 shval = newTemp(Ity_V128); 3882 } else { 3883 shval = newTemp(Ity_I64); 3884 } 3885 /* Only least significant byte from second argument is used. 3886 Copy this byte to the whole vector element. */ 3887 assign(shval, binop(op_shrn, 3888 binop(op_shln, 3889 mkexpr(arg_n), 3890 mkU8((8 << size) - 8)), 3891 mkU8((8 << size) - 8))); 3892 for (i = 0; i < size; i++) { 3893 old_shval = shval; 3894 shval = newTemp(Q ? Ity_V128 : Ity_I64); 3895 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 3896 mkexpr(old_shval), 3897 binop(op_shln, 3898 mkexpr(old_shval), 3899 mkU8(8 << i)))); 3900 } 3901 /* Compute the result */ 3902 if (!Q && size == 3 && U) { 3903 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 3904 binop(op, 3905 mkexpr(arg_m), 3906 unop(Iop_64to8, 3907 binop(op_add, 3908 mkexpr(arg_n), 3909 mkexpr(imm_val)))), 3910 binop(Q ? Iop_AndV128 : Iop_And64, 3911 mkexpr(imm_val), 3912 binop(cmp_gt, 3913 Q ? mkU128(0) : mkU64(0), 3914 mkexpr(arg_n))))); 3915 assign(res, binop(op_add, 3916 binop(op, 3917 mkexpr(arg_m), 3918 unop(Iop_64to8, mkexpr(arg_n))), 3919 mkexpr(round))); 3920 } else { 3921 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 3922 binop(op, 3923 mkexpr(arg_m), 3924 binop(op_add, 3925 mkexpr(arg_n), 3926 mkexpr(imm_val))), 3927 binop(Q ? Iop_AndV128 : Iop_And64, 3928 mkexpr(imm_val), 3929 binop(cmp_gt, 3930 Q ? mkU128(0) : mkU64(0), 3931 mkexpr(arg_n))))); 3932 assign(res, binop(op_add, 3933 binop(op, mkexpr(arg_m), mkexpr(arg_n)), 3934 mkexpr(round))); 3935 } 3936 DIP("vrshl.%c%u %c%u, %c%u, %c%u\n", 3937 U ? 'u' : 's', 8 << size, 3938 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3939 nreg); 3940 } else { 3941 /* VQRSHL */ 3942 IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_sub, op_add; 3943 IRTemp tmp, shval, mask, old_shval, imm_val, round; 3944 UInt i; 3945 ULong esize, imm; 3946 cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; 3947 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3948 imm = 1L; 3949 switch (size) { 3950 case 0: imm = (imm << 8) | imm; /* fall through */ 3951 case 1: imm = (imm << 16) | imm; /* fall through */ 3952 case 2: imm = (imm << 32) | imm; /* fall through */ 3953 case 3: break; 3954 default: vassert(0); 3955 } 3956 imm_val = newTemp(Q ? Ity_V128 : Ity_I64); 3957 round = newTemp(Q ? Ity_V128 : Ity_I64); 3958 assign(imm_val, Q ? mkU128(imm) : mkU64(imm)); 3959 if (U) { 3960 switch (size) { 3961 case 0: 3962 op = Q ? Iop_QShl8x16 : Iop_QShl8x8; 3963 op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3964 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3965 op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8; 3966 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3967 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3968 break; 3969 case 1: 3970 op = Q ? Iop_QShl16x8 : Iop_QShl16x4; 3971 op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3972 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3973 op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4; 3974 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3975 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3976 break; 3977 case 2: 3978 op = Q ? Iop_QShl32x4 : Iop_QShl32x2; 3979 op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3980 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 3981 op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2; 3982 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3983 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3984 break; 3985 case 3: 3986 op = Q ? Iop_QShl64x2 : Iop_QShl64x1; 3987 op_sub = Q ? Iop_Sub64x2 : Iop_Sub64; 3988 op_add = Q ? Iop_Add64x2 : Iop_Add64; 3989 op_rev = Q ? Iop_Shr64x2 : Iop_Shr64; 3990 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3991 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3992 break; 3993 default: 3994 vassert(0); 3995 } 3996 } else { 3997 switch (size) { 3998 case 0: 3999 op = Q ? Iop_QSal8x16 : Iop_QSal8x8; 4000 op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8; 4001 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 4002 op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8; 4003 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 4004 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 4005 break; 4006 case 1: 4007 op = Q ? Iop_QSal16x8 : Iop_QSal16x4; 4008 op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4; 4009 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 4010 op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4; 4011 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 4012 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 4013 break; 4014 case 2: 4015 op = Q ? Iop_QSal32x4 : Iop_QSal32x2; 4016 op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2; 4017 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 4018 op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2; 4019 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 4020 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 4021 break; 4022 case 3: 4023 op = Q ? Iop_QSal64x2 : Iop_QSal64x1; 4024 op_sub = Q ? Iop_Sub64x2 : Iop_Sub64; 4025 op_add = Q ? Iop_Add64x2 : Iop_Add64; 4026 op_rev = Q ? Iop_Sar64x2 : Iop_Sar64; 4027 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 4028 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 4029 break; 4030 default: 4031 vassert(0); 4032 } 4033 } 4034 if (Q) { 4035 tmp = newTemp(Ity_V128); 4036 shval = newTemp(Ity_V128); 4037 mask = newTemp(Ity_V128); 4038 } else { 4039 tmp = newTemp(Ity_I64); 4040 shval = newTemp(Ity_I64); 4041 mask = newTemp(Ity_I64); 4042 } 4043 /* Only least significant byte from second argument is used. 4044 Copy this byte to the whole vector element. */ 4045 assign(shval, binop(op_shrn, 4046 binop(op_shln, 4047 mkexpr(arg_n), 4048 mkU8((8 << size) - 8)), 4049 mkU8((8 << size) - 8))); 4050 for (i = 0; i < size; i++) { 4051 old_shval = shval; 4052 shval = newTemp(Q ? Ity_V128 : Ity_I64); 4053 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 4054 mkexpr(old_shval), 4055 binop(op_shln, 4056 mkexpr(old_shval), 4057 mkU8(8 << i)))); 4058 } 4059 /* Compute the result */ 4060 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 4061 binop(op, 4062 mkexpr(arg_m), 4063 binop(op_add, 4064 mkexpr(arg_n), 4065 mkexpr(imm_val))), 4066 binop(Q ? Iop_AndV128 : Iop_And64, 4067 mkexpr(imm_val), 4068 binop(cmp_gt, 4069 Q ? mkU128(0) : mkU64(0), 4070 mkexpr(arg_n))))); 4071 assign(res, binop(op_add, 4072 binop(op, mkexpr(arg_m), mkexpr(arg_n)), 4073 mkexpr(round))); 4074 #ifndef DISABLE_QC_FLAG 4075 /* If shift is greater or equal to the element size and element is 4076 non-zero, then QC flag should be set. */ 4077 esize = (8 << size) - 1; 4078 esize = (esize << 8) | esize; 4079 esize = (esize << 16) | esize; 4080 esize = (esize << 32) | esize; 4081 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4082 binop(cmp_gt, mkexpr(shval), 4083 Q ? mkU128(esize) : mkU64(esize)), 4084 unop(cmp_neq, mkexpr(arg_m))), 4085 Q ? mkU128(0) : mkU64(0), 4086 Q, condT); 4087 /* Othervise QC flag should be set if shift value is positive and 4088 result beign rightshifted the same value is not equal to left 4089 argument. */ 4090 assign(mask, binop(cmp_gt, mkexpr(shval), 4091 Q ? mkU128(0) : mkU64(0))); 4092 if (!Q && size == 3) 4093 assign(tmp, binop(op_rev, mkexpr(res), 4094 unop(Iop_64to8, mkexpr(arg_n)))); 4095 else 4096 assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n))); 4097 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4098 mkexpr(tmp), mkexpr(mask)), 4099 binop(Q ? Iop_AndV128 : Iop_And64, 4100 mkexpr(arg_m), mkexpr(mask)), 4101 Q, condT); 4102 #endif 4103 DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n", 4104 U ? 'u' : 's', 8 << size, 4105 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 4106 nreg); 4107 } 4108 break; 4109 case 6: 4110 /* VMAX, VMIN */ 4111 if (B == 0) { 4112 /* VMAX */ 4113 IROp op; 4114 if (U == 0) { 4115 switch (size) { 4116 case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break; 4117 case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break; 4118 case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break; 4119 case 3: return False; 4120 default: vassert(0); 4121 } 4122 } else { 4123 switch (size) { 4124 case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break; 4125 case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break; 4126 case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break; 4127 case 3: return False; 4128 default: vassert(0); 4129 } 4130 } 4131 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4132 DIP("vmax.%c%u %c%u, %c%u, %c%u\n", 4133 U ? 'u' : 's', 8 << size, 4134 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4135 mreg); 4136 } else { 4137 /* VMIN */ 4138 IROp op; 4139 if (U == 0) { 4140 switch (size) { 4141 case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break; 4142 case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break; 4143 case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break; 4144 case 3: return False; 4145 default: vassert(0); 4146 } 4147 } else { 4148 switch (size) { 4149 case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break; 4150 case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break; 4151 case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break; 4152 case 3: return False; 4153 default: vassert(0); 4154 } 4155 } 4156 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4157 DIP("vmin.%c%u %c%u, %c%u, %c%u\n", 4158 U ? 'u' : 's', 8 << size, 4159 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4160 mreg); 4161 } 4162 break; 4163 case 7: 4164 if (B == 0) { 4165 /* VABD */ 4166 IROp op_cmp, op_sub; 4167 IRTemp cond; 4168 if ((theInstr >> 23) & 1) { 4169 vpanic("VABDL should not be in dis_neon_data_3same\n"); 4170 } 4171 if (Q) { 4172 switch (size) { 4173 case 0: 4174 op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; 4175 op_sub = Iop_Sub8x16; 4176 break; 4177 case 1: 4178 op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; 4179 op_sub = Iop_Sub16x8; 4180 break; 4181 case 2: 4182 op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; 4183 op_sub = Iop_Sub32x4; 4184 break; 4185 case 3: 4186 return False; 4187 default: 4188 vassert(0); 4189 } 4190 } else { 4191 switch (size) { 4192 case 0: 4193 op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; 4194 op_sub = Iop_Sub8x8; 4195 break; 4196 case 1: 4197 op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; 4198 op_sub = Iop_Sub16x4; 4199 break; 4200 case 2: 4201 op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2; 4202 op_sub = Iop_Sub32x2; 4203 break; 4204 case 3: 4205 return False; 4206 default: 4207 vassert(0); 4208 } 4209 } 4210 if (Q) { 4211 cond = newTemp(Ity_V128); 4212 } else { 4213 cond = newTemp(Ity_I64); 4214 } 4215 assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m))); 4216 assign(res, binop(Q ? Iop_OrV128 : Iop_Or64, 4217 binop(Q ? Iop_AndV128 : Iop_And64, 4218 binop(op_sub, mkexpr(arg_n), 4219 mkexpr(arg_m)), 4220 mkexpr(cond)), 4221 binop(Q ? Iop_AndV128 : Iop_And64, 4222 binop(op_sub, mkexpr(arg_m), 4223 mkexpr(arg_n)), 4224 unop(Q ? Iop_NotV128 : Iop_Not64, 4225 mkexpr(cond))))); 4226 DIP("vabd.%c%u %c%u, %c%u, %c%u\n", 4227 U ? 'u' : 's', 8 << size, 4228 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4229 mreg); 4230 } else { 4231 /* VABA */ 4232 IROp op_cmp, op_sub, op_add; 4233 IRTemp cond, acc, tmp; 4234 if ((theInstr >> 23) & 1) { 4235 vpanic("VABAL should not be in dis_neon_data_3same"); 4236 } 4237 if (Q) { 4238 switch (size) { 4239 case 0: 4240 op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; 4241 op_sub = Iop_Sub8x16; 4242 op_add = Iop_Add8x16; 4243 break; 4244 case 1: 4245 op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; 4246 op_sub = Iop_Sub16x8; 4247 op_add = Iop_Add16x8; 4248 break; 4249 case 2: 4250 op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; 4251 op_sub = Iop_Sub32x4; 4252 op_add = Iop_Add32x4; 4253 break; 4254 case 3: 4255 return False; 4256 default: 4257 vassert(0); 4258 } 4259 } else { 4260 switch (size) { 4261 case 0: 4262 op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; 4263 op_sub = Iop_Sub8x8; 4264 op_add = Iop_Add8x8; 4265 break; 4266 case 1: 4267 op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; 4268 op_sub = Iop_Sub16x4; 4269 op_add = Iop_Add16x4; 4270 break; 4271 case 2: 4272 op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2; 4273 op_sub = Iop_Sub32x2; 4274 op_add = Iop_Add32x2; 4275 break; 4276 case 3: 4277 return False; 4278 default: 4279 vassert(0); 4280 } 4281 } 4282 if (Q) { 4283 cond = newTemp(Ity_V128); 4284 acc = newTemp(Ity_V128); 4285 tmp = newTemp(Ity_V128); 4286 assign(acc, getQReg(dreg)); 4287 } else { 4288 cond = newTemp(Ity_I64); 4289 acc = newTemp(Ity_I64); 4290 tmp = newTemp(Ity_I64); 4291 assign(acc, getDRegI64(dreg)); 4292 } 4293 assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));