1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_arm_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 NEON support is 14 Copyright (C) 2010-2012 Samsung Electronics 15 contributed by Dmitry Zhurikhin <zhur (at) ispras.ru> 16 and Kirill Batuzov <batuzovk (at) ispras.ru> 17 18 This program is free software; you can redistribute it and/or 19 modify it under the terms of the GNU General Public License as 20 published by the Free Software Foundation; either version 2 of the 21 License, or (at your option) any later version. 22 23 This program is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received a copy of the GNU General Public License 29 along with this program; if not, write to the Free Software 30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 31 02110-1301, USA. 32 33 The GNU General Public License is contained in the file COPYING. 34 */ 35 36 /* XXXX thumb to check: 37 that all cases where putIRegT writes r15, we generate a jump. 38 39 All uses of newTemp assign to an IRTemp and not a UInt 40 41 For all thumb loads and stores, including VFP ones, new-ITSTATE is 42 backed out before the memory op, and restored afterwards. This 43 needs to happen even after we go uncond. (and for sure it doesn't 44 happen for VFP loads/stores right now). 45 46 VFP on thumb: check that we exclude all r13/r15 cases that we 47 should. 48 49 XXXX thumb to do: improve the ITSTATE-zeroing optimisation by 50 taking into account the number of insns guarded by an IT. 51 52 remove the nasty hack, in the spechelper, of looking for Or32(..., 53 0xE0) in as the first arg to armg_calculate_condition, and instead 54 use Slice44 as specified in comments in the spechelper. 55 56 add specialisations for armg_calculate_flag_c and _v, as they 57 are moderately often needed in Thumb code. 58 59 Correctness: ITSTATE handling in Thumb SVCs is wrong. 60 61 Correctness (obscure): in m_transtab, when invalidating code 62 address ranges, invalidate up to 18 bytes after the end of the 63 range. This is because the ITSTATE optimisation at the top of 64 _THUMB_WRK below analyses up to 18 bytes before the start of any 65 given instruction, and so might depend on the invalidated area. 66 */ 67 68 /* Limitations, etc 69 70 - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt 71 72 - SWP: the restart jump back is Ijk_Boring; it should be 73 Ijk_NoRedir but that's expensive. See comments on casLE() in 74 guest_x86_toIR.c. 75 */ 76 77 /* "Special" instructions. 78 79 This instruction decoder can decode four special instructions 80 which mean nothing natively (are no-ops as far as regs/mem are 81 concerned) but have meaning for supporting Valgrind. A special 82 instruction is flagged by a 16-byte preamble: 83 84 E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC 85 (mov r12, r12, ROR #3; mov r12, r12, ROR #13; 86 mov r12, r12, ROR #29; mov r12, r12, ROR #19) 87 88 Following that, one of the following 3 are allowed 89 (standard interpretation in parentheses): 90 91 E18AA00A (orr r10,r10,r10) R3 = client_request ( R4 ) 92 E18BB00B (orr r11,r11,r11) R3 = guest_NRADDR 93 E18CC00C (orr r12,r12,r12) branch-and-link-to-noredir R4 94 95 Any other bytes following the 16-byte preamble are illegal and 96 constitute a failure in instruction decoding. This all assumes 97 that the preamble will never occur except in specific code 98 fragments designed for Valgrind to catch. 99 */ 100 101 /* Translates ARM(v5) code to IR. */ 102 103 #include "libvex_basictypes.h" 104 #include "libvex_ir.h" 105 #include "libvex.h" 106 #include "libvex_guest_arm.h" 107 108 #include "main_util.h" 109 #include "main_globals.h" 110 #include "guest_generic_bb_to_IR.h" 111 #include "guest_arm_defs.h" 112 113 114 /*------------------------------------------------------------*/ 115 /*--- Globals ---*/ 116 /*------------------------------------------------------------*/ 117 118 /* These are set at the start of the translation of a instruction, so 119 that we don't have to pass them around endlessly. CONST means does 120 not change during translation of the instruction. 121 */ 122 123 /* CONST: is the host bigendian? This has to do with float vs double 124 register accesses on VFP, but it's complex and not properly thought 125 out. */ 126 static Bool host_is_bigendian; 127 128 /* CONST: The guest address for the instruction currently being 129 translated. This is the real, "decoded" address (not subject 130 to the CPSR.T kludge). */ 131 static Addr32 guest_R15_curr_instr_notENC; 132 133 /* CONST, FOR ASSERTIONS ONLY. Indicates whether currently processed 134 insn is Thumb (True) or ARM (False). */ 135 static Bool __curr_is_Thumb; 136 137 /* MOD: The IRSB* into which we're generating code. */ 138 static IRSB* irsb; 139 140 /* These are to do with handling writes to r15. They are initially 141 set at the start of disInstr_ARM_WRK to indicate no update, 142 possibly updated during the routine, and examined again at the end. 143 If they have been set to indicate a r15 update then a jump is 144 generated. Note, "explicit" jumps (b, bx, etc) are generated 145 directly, not using this mechanism -- this is intended to handle 146 the implicit-style jumps resulting from (eg) assigning to r15 as 147 the result of insns we wouldn't normally consider branchy. */ 148 149 /* MOD. Initially False; set to True iff abovementioned handling is 150 required. */ 151 static Bool r15written; 152 153 /* MOD. Initially IRTemp_INVALID. If the r15 branch to be generated 154 is conditional, this holds the gating IRTemp :: Ity_I32. If the 155 branch to be generated is unconditional, this remains 156 IRTemp_INVALID. */ 157 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */ 158 159 /* MOD. Initially Ijk_Boring. If an r15 branch is to be generated, 160 this holds the jump kind. */ 161 static IRTemp r15kind; 162 163 164 /*------------------------------------------------------------*/ 165 /*--- Debugging output ---*/ 166 /*------------------------------------------------------------*/ 167 168 #define DIP(format, args...) \ 169 if (vex_traceflags & VEX_TRACE_FE) \ 170 vex_printf(format, ## args) 171 172 #define DIS(buf, format, args...) \ 173 if (vex_traceflags & VEX_TRACE_FE) \ 174 vex_sprintf(buf, format, ## args) 175 176 #define ASSERT_IS_THUMB \ 177 do { vassert(__curr_is_Thumb); } while (0) 178 179 #define ASSERT_IS_ARM \ 180 do { vassert(! __curr_is_Thumb); } while (0) 181 182 183 /*------------------------------------------------------------*/ 184 /*--- Helper bits and pieces for deconstructing the ---*/ 185 /*--- arm insn stream. ---*/ 186 /*------------------------------------------------------------*/ 187 188 /* Do a little-endian load of a 32-bit word, regardless of the 189 endianness of the underlying host. */ 190 static inline UInt getUIntLittleEndianly ( UChar* p ) 191 { 192 UInt w = 0; 193 w = (w << 8) | p[3]; 194 w = (w << 8) | p[2]; 195 w = (w << 8) | p[1]; 196 w = (w << 8) | p[0]; 197 return w; 198 } 199 200 /* Do a little-endian load of a 16-bit word, regardless of the 201 endianness of the underlying host. */ 202 static inline UShort getUShortLittleEndianly ( UChar* p ) 203 { 204 UShort w = 0; 205 w = (w << 8) | p[1]; 206 w = (w << 8) | p[0]; 207 return w; 208 } 209 210 static UInt ROR32 ( UInt x, UInt sh ) { 211 vassert(sh >= 0 && sh < 32); 212 if (sh == 0) 213 return x; 214 else 215 return (x << (32-sh)) | (x >> sh); 216 } 217 218 static Int popcount32 ( UInt x ) 219 { 220 Int res = 0, i; 221 for (i = 0; i < 32; i++) { 222 res += (x & 1); 223 x >>= 1; 224 } 225 return res; 226 } 227 228 static UInt setbit32 ( UInt x, Int ix, UInt b ) 229 { 230 UInt mask = 1 << ix; 231 x &= ~mask; 232 x |= ((b << ix) & mask); 233 return x; 234 } 235 236 #define BITS2(_b1,_b0) \ 237 (((_b1) << 1) | (_b0)) 238 239 #define BITS3(_b2,_b1,_b0) \ 240 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 241 242 #define BITS4(_b3,_b2,_b1,_b0) \ 243 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 244 245 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 246 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 247 | BITS4((_b3),(_b2),(_b1),(_b0))) 248 249 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 250 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 251 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 252 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 253 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 254 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 255 256 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 257 (((_b8) << 8) \ 258 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 259 260 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 261 (((_b9) << 9) | ((_b8) << 8) \ 262 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 263 264 /* produces _uint[_bMax:_bMin] */ 265 #define SLICE_UInt(_uint,_bMax,_bMin) \ 266 (( ((UInt)(_uint)) >> (_bMin)) \ 267 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 268 269 270 /*------------------------------------------------------------*/ 271 /*--- Helper bits and pieces for creating IR fragments. ---*/ 272 /*------------------------------------------------------------*/ 273 274 static IRExpr* mkU64 ( ULong i ) 275 { 276 return IRExpr_Const(IRConst_U64(i)); 277 } 278 279 static IRExpr* mkU32 ( UInt i ) 280 { 281 return IRExpr_Const(IRConst_U32(i)); 282 } 283 284 static IRExpr* mkU8 ( UInt i ) 285 { 286 vassert(i < 256); 287 return IRExpr_Const(IRConst_U8( (UChar)i )); 288 } 289 290 static IRExpr* mkexpr ( IRTemp tmp ) 291 { 292 return IRExpr_RdTmp(tmp); 293 } 294 295 static IRExpr* unop ( IROp op, IRExpr* a ) 296 { 297 return IRExpr_Unop(op, a); 298 } 299 300 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 301 { 302 return IRExpr_Binop(op, a1, a2); 303 } 304 305 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 306 { 307 return IRExpr_Triop(op, a1, a2, a3); 308 } 309 310 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 311 { 312 return IRExpr_Load(Iend_LE, ty, addr); 313 } 314 315 /* Add a statement to the list held by "irbb". */ 316 static void stmt ( IRStmt* st ) 317 { 318 addStmtToIRSB( irsb, st ); 319 } 320 321 static void assign ( IRTemp dst, IRExpr* e ) 322 { 323 stmt( IRStmt_WrTmp(dst, e) ); 324 } 325 326 static void storeLE ( IRExpr* addr, IRExpr* data ) 327 { 328 stmt( IRStmt_Store(Iend_LE, addr, data) ); 329 } 330 331 /* Generate a new temporary of the given type. */ 332 static IRTemp newTemp ( IRType ty ) 333 { 334 vassert(isPlausibleIRType(ty)); 335 return newIRTemp( irsb->tyenv, ty ); 336 } 337 338 /* Produces a value in 0 .. 3, which is encoded as per the type 339 IRRoundingMode. */ 340 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 341 { 342 return mkU32(Irrm_NEAREST); 343 } 344 345 /* Generate an expression for SRC rotated right by ROT. */ 346 static IRExpr* genROR32( IRTemp src, Int rot ) 347 { 348 vassert(rot >= 0 && rot < 32); 349 if (rot == 0) 350 return mkexpr(src); 351 return 352 binop(Iop_Or32, 353 binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 354 binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 355 } 356 357 static IRExpr* mkU128 ( ULong i ) 358 { 359 return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 360 } 361 362 /* Generate a 4-aligned version of the given expression if 363 the given condition is true. Else return it unchanged. */ 364 static IRExpr* align4if ( IRExpr* e, Bool b ) 365 { 366 if (b) 367 return binop(Iop_And32, e, mkU32(~3)); 368 else 369 return e; 370 } 371 372 373 /*------------------------------------------------------------*/ 374 /*--- Helpers for accessing guest registers. ---*/ 375 /*------------------------------------------------------------*/ 376 377 #define OFFB_R0 offsetof(VexGuestARMState,guest_R0) 378 #define OFFB_R1 offsetof(VexGuestARMState,guest_R1) 379 #define OFFB_R2 offsetof(VexGuestARMState,guest_R2) 380 #define OFFB_R3 offsetof(VexGuestARMState,guest_R3) 381 #define OFFB_R4 offsetof(VexGuestARMState,guest_R4) 382 #define OFFB_R5 offsetof(VexGuestARMState,guest_R5) 383 #define OFFB_R6 offsetof(VexGuestARMState,guest_R6) 384 #define OFFB_R7 offsetof(VexGuestARMState,guest_R7) 385 #define OFFB_R8 offsetof(VexGuestARMState,guest_R8) 386 #define OFFB_R9 offsetof(VexGuestARMState,guest_R9) 387 #define OFFB_R10 offsetof(VexGuestARMState,guest_R10) 388 #define OFFB_R11 offsetof(VexGuestARMState,guest_R11) 389 #define OFFB_R12 offsetof(VexGuestARMState,guest_R12) 390 #define OFFB_R13 offsetof(VexGuestARMState,guest_R13) 391 #define OFFB_R14 offsetof(VexGuestARMState,guest_R14) 392 #define OFFB_R15T offsetof(VexGuestARMState,guest_R15T) 393 394 #define OFFB_CC_OP offsetof(VexGuestARMState,guest_CC_OP) 395 #define OFFB_CC_DEP1 offsetof(VexGuestARMState,guest_CC_DEP1) 396 #define OFFB_CC_DEP2 offsetof(VexGuestARMState,guest_CC_DEP2) 397 #define OFFB_CC_NDEP offsetof(VexGuestARMState,guest_CC_NDEP) 398 #define OFFB_NRADDR offsetof(VexGuestARMState,guest_NRADDR) 399 400 #define OFFB_D0 offsetof(VexGuestARMState,guest_D0) 401 #define OFFB_D1 offsetof(VexGuestARMState,guest_D1) 402 #define OFFB_D2 offsetof(VexGuestARMState,guest_D2) 403 #define OFFB_D3 offsetof(VexGuestARMState,guest_D3) 404 #define OFFB_D4 offsetof(VexGuestARMState,guest_D4) 405 #define OFFB_D5 offsetof(VexGuestARMState,guest_D5) 406 #define OFFB_D6 offsetof(VexGuestARMState,guest_D6) 407 #define OFFB_D7 offsetof(VexGuestARMState,guest_D7) 408 #define OFFB_D8 offsetof(VexGuestARMState,guest_D8) 409 #define OFFB_D9 offsetof(VexGuestARMState,guest_D9) 410 #define OFFB_D10 offsetof(VexGuestARMState,guest_D10) 411 #define OFFB_D11 offsetof(VexGuestARMState,guest_D11) 412 #define OFFB_D12 offsetof(VexGuestARMState,guest_D12) 413 #define OFFB_D13 offsetof(VexGuestARMState,guest_D13) 414 #define OFFB_D14 offsetof(VexGuestARMState,guest_D14) 415 #define OFFB_D15 offsetof(VexGuestARMState,guest_D15) 416 #define OFFB_D16 offsetof(VexGuestARMState,guest_D16) 417 #define OFFB_D17 offsetof(VexGuestARMState,guest_D17) 418 #define OFFB_D18 offsetof(VexGuestARMState,guest_D18) 419 #define OFFB_D19 offsetof(VexGuestARMState,guest_D19) 420 #define OFFB_D20 offsetof(VexGuestARMState,guest_D20) 421 #define OFFB_D21 offsetof(VexGuestARMState,guest_D21) 422 #define OFFB_D22 offsetof(VexGuestARMState,guest_D22) 423 #define OFFB_D23 offsetof(VexGuestARMState,guest_D23) 424 #define OFFB_D24 offsetof(VexGuestARMState,guest_D24) 425 #define OFFB_D25 offsetof(VexGuestARMState,guest_D25) 426 #define OFFB_D26 offsetof(VexGuestARMState,guest_D26) 427 #define OFFB_D27 offsetof(VexGuestARMState,guest_D27) 428 #define OFFB_D28 offsetof(VexGuestARMState,guest_D28) 429 #define OFFB_D29 offsetof(VexGuestARMState,guest_D29) 430 #define OFFB_D30 offsetof(VexGuestARMState,guest_D30) 431 #define OFFB_D31 offsetof(VexGuestARMState,guest_D31) 432 433 #define OFFB_FPSCR offsetof(VexGuestARMState,guest_FPSCR) 434 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO) 435 #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE) 436 #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32) 437 #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0) 438 #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1) 439 #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) 440 #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) 441 442 443 /* ---------------- Integer registers ---------------- */ 444 445 static Int integerGuestRegOffset ( UInt iregNo ) 446 { 447 /* Do we care about endianness here? We do if sub-parts of integer 448 registers are accessed, but I don't think that ever happens on 449 ARM. */ 450 switch (iregNo) { 451 case 0: return OFFB_R0; 452 case 1: return OFFB_R1; 453 case 2: return OFFB_R2; 454 case 3: return OFFB_R3; 455 case 4: return OFFB_R4; 456 case 5: return OFFB_R5; 457 case 6: return OFFB_R6; 458 case 7: return OFFB_R7; 459 case 8: return OFFB_R8; 460 case 9: return OFFB_R9; 461 case 10: return OFFB_R10; 462 case 11: return OFFB_R11; 463 case 12: return OFFB_R12; 464 case 13: return OFFB_R13; 465 case 14: return OFFB_R14; 466 case 15: return OFFB_R15T; 467 default: vassert(0); 468 } 469 } 470 471 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */ 472 static IRExpr* llGetIReg ( UInt iregNo ) 473 { 474 vassert(iregNo < 16); 475 return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 476 } 477 478 /* Architected read from a reg in ARM mode. This automagically adds 8 479 to all reads of r15. */ 480 static IRExpr* getIRegA ( UInt iregNo ) 481 { 482 IRExpr* e; 483 ASSERT_IS_ARM; 484 vassert(iregNo < 16); 485 if (iregNo == 15) { 486 /* If asked for r15, don't read the guest state value, as that 487 may not be up to date in the case where loop unrolling has 488 happened, because the first insn's write to the block is 489 omitted; hence in the 2nd and subsequent unrollings we don't 490 have a correct value in guest r15. Instead produce the 491 constant that we know would be produced at this point. */ 492 vassert(0 == (guest_R15_curr_instr_notENC & 3)); 493 e = mkU32(guest_R15_curr_instr_notENC + 8); 494 } else { 495 e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 496 } 497 return e; 498 } 499 500 /* Architected read from a reg in Thumb mode. This automagically adds 501 4 to all reads of r15. */ 502 static IRExpr* getIRegT ( UInt iregNo ) 503 { 504 IRExpr* e; 505 ASSERT_IS_THUMB; 506 vassert(iregNo < 16); 507 if (iregNo == 15) { 508 /* Ditto comment in getIReg. */ 509 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 510 e = mkU32(guest_R15_curr_instr_notENC + 4); 511 } else { 512 e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 ); 513 } 514 return e; 515 } 516 517 /* Plain ("low level") write to a reg; no jump or alignment magic for 518 r15. */ 519 static void llPutIReg ( UInt iregNo, IRExpr* e ) 520 { 521 vassert(iregNo < 16); 522 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 523 stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) ); 524 } 525 526 /* Architected write to an integer register in ARM mode. If it is to 527 r15, record info so at the end of this insn's translation, a branch 528 to it can be made. Also handles conditional writes to the 529 register: if guardT == IRTemp_INVALID then the write is 530 unconditional. If writing r15, also 4-align it. */ 531 static void putIRegA ( UInt iregNo, 532 IRExpr* e, 533 IRTemp guardT /* :: Ity_I32, 0 or 1 */, 534 IRJumpKind jk /* if a jump is generated */ ) 535 { 536 /* if writing r15, force e to be 4-aligned. */ 537 // INTERWORKING FIXME. this needs to be relaxed so that 538 // puts caused by LDMxx which load r15 interwork right. 539 // but is no aligned too relaxed? 540 //if (iregNo == 15) 541 // e = binop(Iop_And32, e, mkU32(~3)); 542 ASSERT_IS_ARM; 543 /* So, generate either an unconditional or a conditional write to 544 the reg. */ 545 if (guardT == IRTemp_INVALID) { 546 /* unconditional write */ 547 llPutIReg( iregNo, e ); 548 } else { 549 llPutIReg( iregNo, 550 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 551 llGetIReg(iregNo), 552 e )); 553 } 554 if (iregNo == 15) { 555 // assert against competing r15 updates. Shouldn't 556 // happen; should be ruled out by the instr matching 557 // logic. 558 vassert(r15written == False); 559 vassert(r15guard == IRTemp_INVALID); 560 vassert(r15kind == Ijk_Boring); 561 r15written = True; 562 r15guard = guardT; 563 r15kind = jk; 564 } 565 } 566 567 568 /* Architected write to an integer register in Thumb mode. Writes to 569 r15 are not allowed. Handles conditional writes to the register: 570 if guardT == IRTemp_INVALID then the write is unconditional. */ 571 static void putIRegT ( UInt iregNo, 572 IRExpr* e, 573 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 574 { 575 /* So, generate either an unconditional or a conditional write to 576 the reg. */ 577 ASSERT_IS_THUMB; 578 vassert(iregNo >= 0 && iregNo <= 14); 579 if (guardT == IRTemp_INVALID) { 580 /* unconditional write */ 581 llPutIReg( iregNo, e ); 582 } else { 583 llPutIReg( iregNo, 584 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 585 llGetIReg(iregNo), 586 e )); 587 } 588 } 589 590 591 /* Thumb16 and Thumb32 only. 592 Returns true if reg is 13 or 15. Implements the BadReg 593 predicate in the ARM ARM. */ 594 static Bool isBadRegT ( UInt r ) 595 { 596 vassert(r <= 15); 597 ASSERT_IS_THUMB; 598 return r == 13 || r == 15; 599 } 600 601 602 /* ---------------- Double registers ---------------- */ 603 604 static Int doubleGuestRegOffset ( UInt dregNo ) 605 { 606 /* Do we care about endianness here? Probably do if we ever get 607 into the situation of dealing with the single-precision VFP 608 registers. */ 609 switch (dregNo) { 610 case 0: return OFFB_D0; 611 case 1: return OFFB_D1; 612 case 2: return OFFB_D2; 613 case 3: return OFFB_D3; 614 case 4: return OFFB_D4; 615 case 5: return OFFB_D5; 616 case 6: return OFFB_D6; 617 case 7: return OFFB_D7; 618 case 8: return OFFB_D8; 619 case 9: return OFFB_D9; 620 case 10: return OFFB_D10; 621 case 11: return OFFB_D11; 622 case 12: return OFFB_D12; 623 case 13: return OFFB_D13; 624 case 14: return OFFB_D14; 625 case 15: return OFFB_D15; 626 case 16: return OFFB_D16; 627 case 17: return OFFB_D17; 628 case 18: return OFFB_D18; 629 case 19: return OFFB_D19; 630 case 20: return OFFB_D20; 631 case 21: return OFFB_D21; 632 case 22: return OFFB_D22; 633 case 23: return OFFB_D23; 634 case 24: return OFFB_D24; 635 case 25: return OFFB_D25; 636 case 26: return OFFB_D26; 637 case 27: return OFFB_D27; 638 case 28: return OFFB_D28; 639 case 29: return OFFB_D29; 640 case 30: return OFFB_D30; 641 case 31: return OFFB_D31; 642 default: vassert(0); 643 } 644 } 645 646 /* Plain ("low level") read from a VFP Dreg. */ 647 static IRExpr* llGetDReg ( UInt dregNo ) 648 { 649 vassert(dregNo < 32); 650 return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 ); 651 } 652 653 /* Architected read from a VFP Dreg. */ 654 static IRExpr* getDReg ( UInt dregNo ) { 655 return llGetDReg( dregNo ); 656 } 657 658 /* Plain ("low level") write to a VFP Dreg. */ 659 static void llPutDReg ( UInt dregNo, IRExpr* e ) 660 { 661 vassert(dregNo < 32); 662 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64); 663 stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) ); 664 } 665 666 /* Architected write to a VFP Dreg. Handles conditional writes to the 667 register: if guardT == IRTemp_INVALID then the write is 668 unconditional. */ 669 static void putDReg ( UInt dregNo, 670 IRExpr* e, 671 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 672 { 673 /* So, generate either an unconditional or a conditional write to 674 the reg. */ 675 if (guardT == IRTemp_INVALID) { 676 /* unconditional write */ 677 llPutDReg( dregNo, e ); 678 } else { 679 llPutDReg( dregNo, 680 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 681 llGetDReg(dregNo), 682 e )); 683 } 684 } 685 686 /* And now exactly the same stuff all over again, but this time 687 taking/returning I64 rather than F64, to support 64-bit Neon 688 ops. */ 689 690 /* Plain ("low level") read from a Neon Integer Dreg. */ 691 static IRExpr* llGetDRegI64 ( UInt dregNo ) 692 { 693 vassert(dregNo < 32); 694 return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 ); 695 } 696 697 /* Architected read from a Neon Integer Dreg. */ 698 static IRExpr* getDRegI64 ( UInt dregNo ) { 699 return llGetDRegI64( dregNo ); 700 } 701 702 /* Plain ("low level") write to a Neon Integer Dreg. */ 703 static void llPutDRegI64 ( UInt dregNo, IRExpr* e ) 704 { 705 vassert(dregNo < 32); 706 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 707 stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) ); 708 } 709 710 /* Architected write to a Neon Integer Dreg. Handles conditional 711 writes to the register: if guardT == IRTemp_INVALID then the write 712 is unconditional. */ 713 static void putDRegI64 ( UInt dregNo, 714 IRExpr* e, 715 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 716 { 717 /* So, generate either an unconditional or a conditional write to 718 the reg. */ 719 if (guardT == IRTemp_INVALID) { 720 /* unconditional write */ 721 llPutDRegI64( dregNo, e ); 722 } else { 723 llPutDRegI64( dregNo, 724 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 725 llGetDRegI64(dregNo), 726 e )); 727 } 728 } 729 730 /* ---------------- Quad registers ---------------- */ 731 732 static Int quadGuestRegOffset ( UInt qregNo ) 733 { 734 /* Do we care about endianness here? Probably do if we ever get 735 into the situation of dealing with the 64 bit Neon registers. */ 736 switch (qregNo) { 737 case 0: return OFFB_D0; 738 case 1: return OFFB_D2; 739 case 2: return OFFB_D4; 740 case 3: return OFFB_D6; 741 case 4: return OFFB_D8; 742 case 5: return OFFB_D10; 743 case 6: return OFFB_D12; 744 case 7: return OFFB_D14; 745 case 8: return OFFB_D16; 746 case 9: return OFFB_D18; 747 case 10: return OFFB_D20; 748 case 11: return OFFB_D22; 749 case 12: return OFFB_D24; 750 case 13: return OFFB_D26; 751 case 14: return OFFB_D28; 752 case 15: return OFFB_D30; 753 default: vassert(0); 754 } 755 } 756 757 /* Plain ("low level") read from a Neon Qreg. */ 758 static IRExpr* llGetQReg ( UInt qregNo ) 759 { 760 vassert(qregNo < 16); 761 return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 ); 762 } 763 764 /* Architected read from a Neon Qreg. */ 765 static IRExpr* getQReg ( UInt qregNo ) { 766 return llGetQReg( qregNo ); 767 } 768 769 /* Plain ("low level") write to a Neon Qreg. */ 770 static void llPutQReg ( UInt qregNo, IRExpr* e ) 771 { 772 vassert(qregNo < 16); 773 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 774 stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) ); 775 } 776 777 /* Architected write to a Neon Qreg. Handles conditional writes to the 778 register: if guardT == IRTemp_INVALID then the write is 779 unconditional. */ 780 static void putQReg ( UInt qregNo, 781 IRExpr* e, 782 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 783 { 784 /* So, generate either an unconditional or a conditional write to 785 the reg. */ 786 if (guardT == IRTemp_INVALID) { 787 /* unconditional write */ 788 llPutQReg( qregNo, e ); 789 } else { 790 llPutQReg( qregNo, 791 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 792 llGetQReg(qregNo), 793 e )); 794 } 795 } 796 797 798 /* ---------------- Float registers ---------------- */ 799 800 static Int floatGuestRegOffset ( UInt fregNo ) 801 { 802 /* Start with the offset of the containing double, and then correct 803 for endianness. Actually this is completely bogus and needs 804 careful thought. */ 805 Int off; 806 vassert(fregNo < 32); 807 off = doubleGuestRegOffset(fregNo >> 1); 808 if (host_is_bigendian) { 809 vassert(0); 810 } else { 811 if (fregNo & 1) 812 off += 4; 813 } 814 return off; 815 } 816 817 /* Plain ("low level") read from a VFP Freg. */ 818 static IRExpr* llGetFReg ( UInt fregNo ) 819 { 820 vassert(fregNo < 32); 821 return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 ); 822 } 823 824 /* Architected read from a VFP Freg. */ 825 static IRExpr* getFReg ( UInt fregNo ) { 826 return llGetFReg( fregNo ); 827 } 828 829 /* Plain ("low level") write to a VFP Freg. */ 830 static void llPutFReg ( UInt fregNo, IRExpr* e ) 831 { 832 vassert(fregNo < 32); 833 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32); 834 stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) ); 835 } 836 837 /* Architected write to a VFP Freg. Handles conditional writes to the 838 register: if guardT == IRTemp_INVALID then the write is 839 unconditional. */ 840 static void putFReg ( UInt fregNo, 841 IRExpr* e, 842 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 843 { 844 /* So, generate either an unconditional or a conditional write to 845 the reg. */ 846 if (guardT == IRTemp_INVALID) { 847 /* unconditional write */ 848 llPutFReg( fregNo, e ); 849 } else { 850 llPutFReg( fregNo, 851 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 852 llGetFReg(fregNo), 853 e )); 854 } 855 } 856 857 858 /* ---------------- Misc registers ---------------- */ 859 860 static void putMiscReg32 ( UInt gsoffset, 861 IRExpr* e, /* :: Ity_I32 */ 862 IRTemp guardT /* :: Ity_I32, 0 or 1 */) 863 { 864 switch (gsoffset) { 865 case OFFB_FPSCR: break; 866 case OFFB_QFLAG32: break; 867 case OFFB_GEFLAG0: break; 868 case OFFB_GEFLAG1: break; 869 case OFFB_GEFLAG2: break; 870 case OFFB_GEFLAG3: break; 871 default: vassert(0); /* awaiting more cases */ 872 } 873 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 874 875 if (guardT == IRTemp_INVALID) { 876 /* unconditional write */ 877 stmt(IRStmt_Put(gsoffset, e)); 878 } else { 879 stmt(IRStmt_Put( 880 gsoffset, 881 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)), 882 IRExpr_Get(gsoffset, Ity_I32), 883 e 884 ) 885 )); 886 } 887 } 888 889 static IRTemp get_ITSTATE ( void ) 890 { 891 ASSERT_IS_THUMB; 892 IRTemp t = newTemp(Ity_I32); 893 assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 894 return t; 895 } 896 897 static void put_ITSTATE ( IRTemp t ) 898 { 899 ASSERT_IS_THUMB; 900 stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 901 } 902 903 static IRTemp get_QFLAG32 ( void ) 904 { 905 IRTemp t = newTemp(Ity_I32); 906 assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 907 return t; 908 } 909 910 static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 911 { 912 putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 913 } 914 915 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 916 Status Register) to indicate that overflow or saturation occurred. 917 Nb: t must be zero to denote no saturation, and any nonzero 918 value to indicate saturation. */ 919 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 920 { 921 IRTemp old = get_QFLAG32(); 922 IRTemp nyu = newTemp(Ity_I32); 923 assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 924 put_QFLAG32(nyu, condT); 925 } 926 927 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit. 928 flagNo: which flag bit to set [3...0] 929 lowbits_to_ignore: 0 = look at all 32 bits 930 8 = look at top 24 bits only 931 16 = look at top 16 bits only 932 31 = look at the top bit only 933 e: input value to be evaluated. 934 The new value is taken from 'e' with the lowest 'lowbits_to_ignore' 935 masked out. If the resulting value is zero then the GE flag is 936 set to 0; any other value sets the flag to 1. */ 937 static void put_GEFLAG32 ( Int flagNo, /* 0, 1, 2 or 3 */ 938 Int lowbits_to_ignore, /* 0, 8, 16 or 31 */ 939 IRExpr* e, /* Ity_I32 */ 940 IRTemp condT ) 941 { 942 vassert( flagNo >= 0 && flagNo <= 3 ); 943 vassert( lowbits_to_ignore == 0 || 944 lowbits_to_ignore == 8 || 945 lowbits_to_ignore == 16 || 946 lowbits_to_ignore == 31 ); 947 IRTemp masked = newTemp(Ity_I32); 948 assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore))); 949 950 switch (flagNo) { 951 case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break; 952 case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break; 953 case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break; 954 case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break; 955 default: vassert(0); 956 } 957 } 958 959 /* Return the (32-bit, zero-or-nonzero representation scheme) of 960 the specified GE flag. */ 961 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ ) 962 { 963 switch (flagNo) { 964 case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 ); 965 case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 ); 966 case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 ); 967 case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 ); 968 default: vassert(0); 969 } 970 } 971 972 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and 973 2 are set from bit 31 of the value, and GE 1 and 0 are set from bit 974 15 of the value. All other bits are ignored. */ 975 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT ) 976 { 977 IRTemp ge10 = newTemp(Ity_I32); 978 IRTemp ge32 = newTemp(Ity_I32); 979 assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000))); 980 assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000))); 981 put_GEFLAG32( 0, 0, mkexpr(ge10), condT ); 982 put_GEFLAG32( 1, 0, mkexpr(ge10), condT ); 983 put_GEFLAG32( 2, 0, mkexpr(ge32), condT ); 984 put_GEFLAG32( 3, 0, mkexpr(ge32), condT ); 985 } 986 987 988 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 989 from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from 990 bit 7. All other bits are ignored. */ 991 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT ) 992 { 993 IRTemp ge0 = newTemp(Ity_I32); 994 IRTemp ge1 = newTemp(Ity_I32); 995 IRTemp ge2 = newTemp(Ity_I32); 996 IRTemp ge3 = newTemp(Ity_I32); 997 assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080))); 998 assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000))); 999 assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000))); 1000 assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000))); 1001 put_GEFLAG32( 0, 0, mkexpr(ge0), condT ); 1002 put_GEFLAG32( 1, 0, mkexpr(ge1), condT ); 1003 put_GEFLAG32( 2, 0, mkexpr(ge2), condT ); 1004 put_GEFLAG32( 3, 0, mkexpr(ge3), condT ); 1005 } 1006 1007 1008 /* ---------------- FPSCR stuff ---------------- */ 1009 1010 /* Generate IR to get hold of the rounding mode bits in FPSCR, and 1011 convert them to IR format. Bind the final result to the 1012 returned temp. */ 1013 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1014 { 1015 /* The ARMvfp encoding for rounding mode bits is: 1016 00 to nearest 1017 01 to +infinity 1018 10 to -infinity 1019 11 to zero 1020 We need to convert that to the IR encoding: 1021 00 to nearest (the default) 1022 10 to +infinity 1023 01 to -infinity 1024 11 to zero 1025 Which can be done by swapping bits 0 and 1. 1026 The rmode bits are at 23:22 in FPSCR. 1027 */ 1028 IRTemp armEncd = newTemp(Ity_I32); 1029 IRTemp swapped = newTemp(Ity_I32); 1030 /* Fish FPSCR[23:22] out, and slide to bottom. Doesn't matter that 1031 we don't zero out bits 24 and above, since the assignment to 1032 'swapped' will mask them out anyway. */ 1033 assign(armEncd, 1034 binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22))); 1035 /* Now swap them. */ 1036 assign(swapped, 1037 binop(Iop_Or32, 1038 binop(Iop_And32, 1039 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1040 mkU32(2)), 1041 binop(Iop_And32, 1042 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1043 mkU32(1)) 1044 )); 1045 return swapped; 1046 } 1047 1048 1049 /*------------------------------------------------------------*/ 1050 /*--- Helpers for flag handling and conditional insns ---*/ 1051 /*------------------------------------------------------------*/ 1052 1053 static HChar* name_ARMCondcode ( ARMCondcode cond ) 1054 { 1055 switch (cond) { 1056 case ARMCondEQ: return "{eq}"; 1057 case ARMCondNE: return "{ne}"; 1058 case ARMCondHS: return "{hs}"; // or 'cs' 1059 case ARMCondLO: return "{lo}"; // or 'cc' 1060 case ARMCondMI: return "{mi}"; 1061 case ARMCondPL: return "{pl}"; 1062 case ARMCondVS: return "{vs}"; 1063 case ARMCondVC: return "{vc}"; 1064 case ARMCondHI: return "{hi}"; 1065 case ARMCondLS: return "{ls}"; 1066 case ARMCondGE: return "{ge}"; 1067 case ARMCondLT: return "{lt}"; 1068 case ARMCondGT: return "{gt}"; 1069 case ARMCondLE: return "{le}"; 1070 case ARMCondAL: return ""; // {al}: is the default 1071 case ARMCondNV: return "{nv}"; 1072 default: vpanic("name_ARMCondcode"); 1073 } 1074 } 1075 /* and a handy shorthand for it */ 1076 static HChar* nCC ( ARMCondcode cond ) { 1077 return name_ARMCondcode(cond); 1078 } 1079 1080 1081 /* Build IR to calculate some particular condition from stored 1082 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1083 Ity_I32, suitable for narrowing. Although the return type is 1084 Ity_I32, the returned value is either 0 or 1. 'cond' must be 1085 :: Ity_I32 and must denote the condition to compute in 1086 bits 7:4, and be zero everywhere else. 1087 */ 1088 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond ) 1089 { 1090 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32); 1091 /* And 'cond' had better produce a value in which only bits 7:4 are 1092 nonzero. However, obviously we can't assert for that. */ 1093 1094 /* So what we're constructing for the first argument is 1095 "(cond << 4) | stored-operation". 1096 However, as per comments above, 'cond' must be supplied 1097 pre-shifted to this function. 1098 1099 This pairing scheme requires that the ARM_CC_OP_ values all fit 1100 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1101 8 bits of the first argument. */ 1102 IRExpr** args 1103 = mkIRExprVec_4( 1104 binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond), 1105 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1106 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1107 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) 1108 ); 1109 IRExpr* call 1110 = mkIRExprCCall( 1111 Ity_I32, 1112 0/*regparm*/, 1113 "armg_calculate_condition", &armg_calculate_condition, 1114 args 1115 ); 1116 1117 /* Exclude the requested condition, OP and NDEP from definedness 1118 checking. We're only interested in DEP1 and DEP2. */ 1119 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1120 return call; 1121 } 1122 1123 1124 /* Build IR to calculate some particular condition from stored 1125 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1126 Ity_I32, suitable for narrowing. Although the return type is 1127 Ity_I32, the returned value is either 0 or 1. 1128 */ 1129 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond ) 1130 { 1131 /* First arg is "(cond << 4) | condition". This requires that the 1132 ARM_CC_OP_ values all fit in 4 bits. Hence we are passing a 1133 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1134 vassert(cond >= 0 && cond <= 15); 1135 return mk_armg_calculate_condition_dyn( mkU32(cond << 4) ); 1136 } 1137 1138 1139 /* Build IR to calculate just the carry flag from stored 1140 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1141 Ity_I32. */ 1142 static IRExpr* mk_armg_calculate_flag_c ( void ) 1143 { 1144 IRExpr** args 1145 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1146 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1147 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1148 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1149 IRExpr* call 1150 = mkIRExprCCall( 1151 Ity_I32, 1152 0/*regparm*/, 1153 "armg_calculate_flag_c", &armg_calculate_flag_c, 1154 args 1155 ); 1156 /* Exclude OP and NDEP from definedness checking. We're only 1157 interested in DEP1 and DEP2. */ 1158 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1159 return call; 1160 } 1161 1162 1163 /* Build IR to calculate just the overflow flag from stored 1164 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1165 Ity_I32. */ 1166 static IRExpr* mk_armg_calculate_flag_v ( void ) 1167 { 1168 IRExpr** args 1169 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1170 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1171 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1172 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1173 IRExpr* call 1174 = mkIRExprCCall( 1175 Ity_I32, 1176 0/*regparm*/, 1177 "armg_calculate_flag_v", &armg_calculate_flag_v, 1178 args 1179 ); 1180 /* Exclude OP and NDEP from definedness checking. We're only 1181 interested in DEP1 and DEP2. */ 1182 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1183 return call; 1184 } 1185 1186 1187 /* Build IR to calculate N Z C V in bits 31:28 of the 1188 returned word. */ 1189 static IRExpr* mk_armg_calculate_flags_nzcv ( void ) 1190 { 1191 IRExpr** args 1192 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1193 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1194 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1195 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1196 IRExpr* call 1197 = mkIRExprCCall( 1198 Ity_I32, 1199 0/*regparm*/, 1200 "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv, 1201 args 1202 ); 1203 /* Exclude OP and NDEP from definedness checking. We're only 1204 interested in DEP1 and DEP2. */ 1205 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1206 return call; 1207 } 1208 1209 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q ) 1210 { 1211 IRExpr** args1; 1212 IRExpr** args2; 1213 IRExpr *call1, *call2, *res; 1214 1215 if (Q) { 1216 args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)), 1217 binop(Iop_GetElem32x4, resL, mkU8(1)), 1218 binop(Iop_GetElem32x4, resR, mkU8(0)), 1219 binop(Iop_GetElem32x4, resR, mkU8(1)) ); 1220 args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)), 1221 binop(Iop_GetElem32x4, resL, mkU8(3)), 1222 binop(Iop_GetElem32x4, resR, mkU8(2)), 1223 binop(Iop_GetElem32x4, resR, mkU8(3)) ); 1224 } else { 1225 args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)), 1226 binop(Iop_GetElem32x2, resL, mkU8(1)), 1227 binop(Iop_GetElem32x2, resR, mkU8(0)), 1228 binop(Iop_GetElem32x2, resR, mkU8(1)) ); 1229 } 1230 1231 #if 1 1232 call1 = mkIRExprCCall( 1233 Ity_I32, 1234 0/*regparm*/, 1235 "armg_calculate_flag_qc", &armg_calculate_flag_qc, 1236 args1 1237 ); 1238 if (Q) { 1239 call2 = mkIRExprCCall( 1240 Ity_I32, 1241 0/*regparm*/, 1242 "armg_calculate_flag_qc", &armg_calculate_flag_qc, 1243 args2 1244 ); 1245 } 1246 if (Q) { 1247 res = binop(Iop_Or32, call1, call2); 1248 } else { 1249 res = call1; 1250 } 1251 #else 1252 if (Q) { 1253 res = unop(Iop_1Uto32, 1254 binop(Iop_CmpNE32, 1255 binop(Iop_Or32, 1256 binop(Iop_Or32, 1257 binop(Iop_Xor32, 1258 args1[0], 1259 args1[2]), 1260 binop(Iop_Xor32, 1261 args1[1], 1262 args1[3])), 1263 binop(Iop_Or32, 1264 binop(Iop_Xor32, 1265 args2[0], 1266 args2[2]), 1267 binop(Iop_Xor32, 1268 args2[1], 1269 args2[3]))), 1270 mkU32(0))); 1271 } else { 1272 res = unop(Iop_1Uto32, 1273 binop(Iop_CmpNE32, 1274 binop(Iop_Or32, 1275 binop(Iop_Xor32, 1276 args1[0], 1277 args1[2]), 1278 binop(Iop_Xor32, 1279 args1[1], 1280 args1[3])), 1281 mkU32(0))); 1282 } 1283 #endif 1284 return res; 1285 } 1286 1287 // FIXME: this is named wrongly .. looks like a sticky set of 1288 // QC, not a write to it. 1289 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q, 1290 IRTemp condT ) 1291 { 1292 putMiscReg32 (OFFB_FPSCR, 1293 binop(Iop_Or32, 1294 IRExpr_Get(OFFB_FPSCR, Ity_I32), 1295 binop(Iop_Shl32, 1296 mk_armg_calculate_flag_qc(resL, resR, Q), 1297 mkU8(27))), 1298 condT); 1299 } 1300 1301 /* Build IR to conditionally set the flags thunk. As with putIReg, if 1302 guard is IRTemp_INVALID then it's unconditional, else it holds a 1303 condition :: Ity_I32. */ 1304 static 1305 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1, 1306 IRTemp t_dep2, IRTemp t_ndep, 1307 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1308 { 1309 IRTemp c8; 1310 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32)); 1311 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32)); 1312 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32)); 1313 vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER); 1314 if (guardT == IRTemp_INVALID) { 1315 /* unconditional */ 1316 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(cc_op) )); 1317 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1318 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1319 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1320 } else { 1321 /* conditional */ 1322 c8 = newTemp(Ity_I8); 1323 assign( c8, unop(Iop_32to8, mkexpr(guardT)) ); 1324 stmt( IRStmt_Put( 1325 OFFB_CC_OP, 1326 IRExpr_Mux0X( mkexpr(c8), 1327 IRExpr_Get(OFFB_CC_OP, Ity_I32), 1328 mkU32(cc_op) ))); 1329 stmt( IRStmt_Put( 1330 OFFB_CC_DEP1, 1331 IRExpr_Mux0X( mkexpr(c8), 1332 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1333 mkexpr(t_dep1) ))); 1334 stmt( IRStmt_Put( 1335 OFFB_CC_DEP2, 1336 IRExpr_Mux0X( mkexpr(c8), 1337 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1338 mkexpr(t_dep2) ))); 1339 stmt( IRStmt_Put( 1340 OFFB_CC_NDEP, 1341 IRExpr_Mux0X( mkexpr(c8), 1342 IRExpr_Get(OFFB_CC_NDEP, Ity_I32), 1343 mkexpr(t_ndep) ))); 1344 } 1345 } 1346 1347 1348 /* Minor variant of the above that sets NDEP to zero (if it 1349 sets it at all) */ 1350 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 1351 IRTemp t_dep2, 1352 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1353 { 1354 IRTemp z32 = newTemp(Ity_I32); 1355 assign( z32, mkU32(0) ); 1356 setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 1357 } 1358 1359 1360 /* Minor variant of the above that sets DEP2 to zero (if it 1361 sets it at all) */ 1362 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 1363 IRTemp t_ndep, 1364 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1365 { 1366 IRTemp z32 = newTemp(Ity_I32); 1367 assign( z32, mkU32(0) ); 1368 setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 1369 } 1370 1371 1372 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 1373 sets them at all) */ 1374 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 1375 IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1376 { 1377 IRTemp z32 = newTemp(Ity_I32); 1378 assign( z32, mkU32(0) ); 1379 setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 1380 } 1381 1382 1383 /* ARM only */ 1384 /* Generate a side-exit to the next instruction, if the given guard 1385 expression :: Ity_I32 is 0 (note! the side exit is taken if the 1386 condition is false!) This is used to skip over conditional 1387 instructions which we can't generate straight-line code for, either 1388 because they are too complex or (more likely) they potentially 1389 generate exceptions. 1390 */ 1391 static void mk_skip_over_A32_if_cond_is_false ( 1392 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1393 ) 1394 { 1395 ASSERT_IS_ARM; 1396 vassert(guardT != IRTemp_INVALID); 1397 vassert(0 == (guest_R15_curr_instr_notENC & 3)); 1398 stmt( IRStmt_Exit( 1399 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1400 Ijk_Boring, 1401 IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)), 1402 OFFB_R15T 1403 )); 1404 } 1405 1406 /* Thumb16 only */ 1407 /* ditto, but jump over a 16-bit thumb insn */ 1408 static void mk_skip_over_T16_if_cond_is_false ( 1409 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1410 ) 1411 { 1412 ASSERT_IS_THUMB; 1413 vassert(guardT != IRTemp_INVALID); 1414 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1415 stmt( IRStmt_Exit( 1416 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1417 Ijk_Boring, 1418 IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)), 1419 OFFB_R15T 1420 )); 1421 } 1422 1423 1424 /* Thumb32 only */ 1425 /* ditto, but jump over a 32-bit thumb insn */ 1426 static void mk_skip_over_T32_if_cond_is_false ( 1427 IRTemp guardT /* :: Ity_I32, 0 or 1 */ 1428 ) 1429 { 1430 ASSERT_IS_THUMB; 1431 vassert(guardT != IRTemp_INVALID); 1432 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1433 stmt( IRStmt_Exit( 1434 unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), 1435 Ijk_Boring, 1436 IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)), 1437 OFFB_R15T 1438 )); 1439 } 1440 1441 1442 /* Thumb16 and Thumb32 only 1443 Generate a SIGILL followed by a restart of the current instruction 1444 if the given temp is nonzero. */ 1445 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ ) 1446 { 1447 ASSERT_IS_THUMB; 1448 vassert(t != IRTemp_INVALID); 1449 vassert(0 == (guest_R15_curr_instr_notENC & 1)); 1450 stmt( 1451 IRStmt_Exit( 1452 binop(Iop_CmpNE32, mkexpr(t), mkU32(0)), 1453 Ijk_NoDecode, 1454 IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)), 1455 OFFB_R15T 1456 ) 1457 ); 1458 } 1459 1460 1461 /* Inspect the old_itstate, and generate a SIGILL if it indicates that 1462 we are currently in an IT block and are not the last in the block. 1463 This also rolls back guest_ITSTATE to its old value before the exit 1464 and restores it to its new value afterwards. This is so that if 1465 the exit is taken, we have an up to date version of ITSTATE 1466 available. Without doing that, we have no hope of making precise 1467 exceptions work. */ 1468 static void gen_SIGILL_T_if_in_but_NLI_ITBlock ( 1469 IRTemp old_itstate /* :: Ity_I32 */, 1470 IRTemp new_itstate /* :: Ity_I32 */ 1471 ) 1472 { 1473 ASSERT_IS_THUMB; 1474 put_ITSTATE(old_itstate); // backout 1475 IRTemp guards_for_next3 = newTemp(Ity_I32); 1476 assign(guards_for_next3, 1477 binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8))); 1478 gen_SIGILL_T_if_nonzero(guards_for_next3); 1479 put_ITSTATE(new_itstate); //restore 1480 } 1481 1482 1483 /* Simpler version of the above, which generates a SIGILL if 1484 we're anywhere within an IT block. */ 1485 static void gen_SIGILL_T_if_in_ITBlock ( 1486 IRTemp old_itstate /* :: Ity_I32 */, 1487 IRTemp new_itstate /* :: Ity_I32 */ 1488 ) 1489 { 1490 put_ITSTATE(old_itstate); // backout 1491 gen_SIGILL_T_if_nonzero(old_itstate); 1492 put_ITSTATE(new_itstate); //restore 1493 } 1494 1495 1496 /* Generate an APSR value, from the NZCV thunk, and 1497 from QFLAG32 and GEFLAG0 .. GEFLAG3. */ 1498 static IRTemp synthesise_APSR ( void ) 1499 { 1500 IRTemp res1 = newTemp(Ity_I32); 1501 // Get NZCV 1502 assign( res1, mk_armg_calculate_flags_nzcv() ); 1503 // OR in the Q value 1504 IRTemp res2 = newTemp(Ity_I32); 1505 assign( 1506 res2, 1507 binop(Iop_Or32, 1508 mkexpr(res1), 1509 binop(Iop_Shl32, 1510 unop(Iop_1Uto32, 1511 binop(Iop_CmpNE32, 1512 mkexpr(get_QFLAG32()), 1513 mkU32(0))), 1514 mkU8(ARMG_CC_SHIFT_Q))) 1515 ); 1516 // OR in GE0 .. GE3 1517 IRExpr* ge0 1518 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0))); 1519 IRExpr* ge1 1520 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0))); 1521 IRExpr* ge2 1522 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0))); 1523 IRExpr* ge3 1524 = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0))); 1525 IRTemp res3 = newTemp(Ity_I32); 1526 assign(res3, 1527 binop(Iop_Or32, 1528 mkexpr(res2), 1529 binop(Iop_Or32, 1530 binop(Iop_Or32, 1531 binop(Iop_Shl32, ge0, mkU8(16)), 1532 binop(Iop_Shl32, ge1, mkU8(17))), 1533 binop(Iop_Or32, 1534 binop(Iop_Shl32, ge2, mkU8(18)), 1535 binop(Iop_Shl32, ge3, mkU8(19))) ))); 1536 return res3; 1537 } 1538 1539 1540 /* and the inverse transformation: given an APSR value, 1541 set the NZCV thunk, the Q flag, and the GE flags. */ 1542 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge, 1543 IRTemp apsrT, IRTemp condT ) 1544 { 1545 vassert(write_nzcvq || write_ge); 1546 if (write_nzcvq) { 1547 // Do NZCV 1548 IRTemp immT = newTemp(Ity_I32); 1549 assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) ); 1550 setFlags_D1(ARMG_CC_OP_COPY, immT, condT); 1551 // Do Q 1552 IRTemp qnewT = newTemp(Ity_I32); 1553 assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q))); 1554 put_QFLAG32(qnewT, condT); 1555 } 1556 if (write_ge) { 1557 // Do GE3..0 1558 put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)), 1559 condT); 1560 put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)), 1561 condT); 1562 put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)), 1563 condT); 1564 put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)), 1565 condT); 1566 } 1567 } 1568 1569 1570 /*------------------------------------------------------------*/ 1571 /*--- Helpers for saturation ---*/ 1572 /*------------------------------------------------------------*/ 1573 1574 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and 1575 (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in 1576 (b) the floor is computed from the value of imm5. these two fnsn 1577 should be commoned up. */ 1578 1579 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1 1580 Optionally return flag resQ saying whether saturation occurred. 1581 See definition in manual, section A2.2.1, page 41 1582 (bits(N), boolean) UnsignedSatQ( integer i, integer N ) 1583 { 1584 if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; } 1585 elsif ( i < 0 ) { result = 0; saturated = TRUE; } 1586 else { result = i; saturated = FALSE; } 1587 return ( result<N-1:0>, saturated ); 1588 } 1589 */ 1590 static void armUnsignedSatQ( IRTemp* res, /* OUT - Ity_I32 */ 1591 IRTemp* resQ, /* OUT - Ity_I32 */ 1592 IRTemp regT, /* value to clamp - Ity_I32 */ 1593 UInt imm5 ) /* saturation ceiling */ 1594 { 1595 UInt ceil = (1 << imm5) - 1; // (2^imm5)-1 1596 UInt floor = 0; 1597 1598 IRTemp node0 = newTemp(Ity_I32); 1599 IRTemp node1 = newTemp(Ity_I32); 1600 IRTemp node2 = newTemp(Ity_I1); 1601 IRTemp node3 = newTemp(Ity_I32); 1602 IRTemp node4 = newTemp(Ity_I32); 1603 IRTemp node5 = newTemp(Ity_I1); 1604 IRTemp node6 = newTemp(Ity_I32); 1605 1606 assign( node0, mkexpr(regT) ); 1607 assign( node1, mkU32(ceil) ); 1608 assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) ); 1609 assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)), 1610 mkexpr(node0), 1611 mkexpr(node1) ) ); 1612 assign( node4, mkU32(floor) ); 1613 assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) ); 1614 assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)), 1615 mkexpr(node3), 1616 mkexpr(node4) ) ); 1617 assign( *res, mkexpr(node6) ); 1618 1619 /* if saturation occurred, then resQ is set to some nonzero value 1620 if sat did not occur, resQ is guaranteed to be zero. */ 1621 if (resQ) { 1622 assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) ); 1623 } 1624 } 1625 1626 1627 /* SignedSatQ(): 'clamp' each value so it lies between -2^N <= x <= (2^N) - 1 1628 Optionally return flag resQ saying whether saturation occurred. 1629 - see definition in manual, section A2.2.1, page 41 1630 (bits(N), boolean ) SignedSatQ( integer i, integer N ) 1631 { 1632 if ( i > 2^(N-1) - 1 ) { result = 2^(N-1) - 1; saturated = TRUE; } 1633 elsif ( i < -(2^(N-1)) ) { result = -(2^(N-1)); saturated = FALSE; } 1634 else { result = i; saturated = FALSE; } 1635 return ( result[N-1:0], saturated ); 1636 } 1637 */ 1638 static void armSignedSatQ( IRTemp regT, /* value to clamp - Ity_I32 */ 1639 UInt imm5, /* saturation ceiling */ 1640 IRTemp* res, /* OUT - Ity_I32 */ 1641 IRTemp* resQ ) /* OUT - Ity_I32 */ 1642 { 1643 Int ceil = (1 << (imm5-1)) - 1; // (2^(imm5-1))-1 1644 Int floor = -(1 << (imm5-1)); // -(2^(imm5-1)) 1645 1646 IRTemp node0 = newTemp(Ity_I32); 1647 IRTemp node1 = newTemp(Ity_I32); 1648 IRTemp node2 = newTemp(Ity_I1); 1649 IRTemp node3 = newTemp(Ity_I32); 1650 IRTemp node4 = newTemp(Ity_I32); 1651 IRTemp node5 = newTemp(Ity_I1); 1652 IRTemp node6 = newTemp(Ity_I32); 1653 1654 assign( node0, mkexpr(regT) ); 1655 assign( node1, mkU32(ceil) ); 1656 assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) ); 1657 assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)), 1658 mkexpr(node0), mkexpr(node1) ) ); 1659 assign( node4, mkU32(floor) ); 1660 assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) ); 1661 assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)), 1662 mkexpr(node3), mkexpr(node4) ) ); 1663 assign( *res, mkexpr(node6) ); 1664 1665 /* if saturation occurred, then resQ is set to some nonzero value 1666 if sat did not occur, resQ is guaranteed to be zero. */ 1667 if (resQ) { 1668 assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) ); 1669 } 1670 } 1671 1672 1673 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed 1674 overflow occurred for 32-bit addition. Needs both args and the 1675 result. HD p27. */ 1676 static 1677 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE, 1678 IRTemp argL, IRTemp argR ) 1679 { 1680 IRTemp res = newTemp(Ity_I32); 1681 assign(res, resE); 1682 return 1683 binop( Iop_Shr32, 1684 binop( Iop_And32, 1685 binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ), 1686 binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )), 1687 mkU8(31) ); 1688 } 1689 1690 /* Similarly .. also from HD p27 .. */ 1691 static 1692 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE, 1693 IRTemp argL, IRTemp argR ) 1694 { 1695 IRTemp res = newTemp(Ity_I32); 1696 assign(res, resE); 1697 return 1698 binop( Iop_Shr32, 1699 binop( Iop_And32, 1700 binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ), 1701 binop( Iop_Xor32, mkexpr(res), mkexpr(argL) )), 1702 mkU8(31) ); 1703 } 1704 1705 1706 /*------------------------------------------------------------*/ 1707 /*--- Larger helpers ---*/ 1708 /*------------------------------------------------------------*/ 1709 1710 /* Compute both the result and new C flag value for a LSL by an imm5 1711 or by a register operand. May generate reads of the old C value 1712 (hence only safe to use before any writes to guest state happen). 1713 Are factored out so can be used by both ARM and Thumb. 1714 1715 Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg}, 1716 "res" (the result) is a.k.a. "shop", shifter operand 1717 "newC" (the new C) is a.k.a. "shco", shifter carry out 1718 1719 The calling convention for res and newC is a bit funny. They could 1720 be passed by value, but instead are passed by ref. 1721 1722 The C (shco) value computed must be zero in bits 31:1, as the IR 1723 optimisations for flag handling (guest_arm_spechelper) rely on 1724 that, and the slow-path handlers (armg_calculate_flags_nzcv) assert 1725 for it. Same applies to all these functions that compute shco 1726 after a shift or rotate, not just this one. 1727 */ 1728 1729 static void compute_result_and_C_after_LSL_by_imm5 ( 1730 /*OUT*/HChar* buf, 1731 IRTemp* res, 1732 IRTemp* newC, 1733 IRTemp rMt, UInt shift_amt, /* operands */ 1734 UInt rM /* only for debug printing */ 1735 ) 1736 { 1737 if (shift_amt == 0) { 1738 if (newC) { 1739 assign( *newC, mk_armg_calculate_flag_c() ); 1740 } 1741 assign( *res, mkexpr(rMt) ); 1742 DIS(buf, "r%u", rM); 1743 } else { 1744 vassert(shift_amt >= 1 && shift_amt <= 31); 1745 if (newC) { 1746 assign( *newC, 1747 binop(Iop_And32, 1748 binop(Iop_Shr32, mkexpr(rMt), 1749 mkU8(32 - shift_amt)), 1750 mkU32(1))); 1751 } 1752 assign( *res, 1753 binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) ); 1754 DIS(buf, "r%u, LSL #%u", rM, shift_amt); 1755 } 1756 } 1757 1758 1759 static void compute_result_and_C_after_LSL_by_reg ( 1760 /*OUT*/HChar* buf, 1761 IRTemp* res, 1762 IRTemp* newC, 1763 IRTemp rMt, IRTemp rSt, /* operands */ 1764 UInt rM, UInt rS /* only for debug printing */ 1765 ) 1766 { 1767 // shift left in range 0 .. 255 1768 // amt = rS & 255 1769 // res = amt < 32 ? Rm << amt : 0 1770 // newC = amt == 0 ? oldC : 1771 // amt in 1..32 ? Rm[32-amt] : 0 1772 IRTemp amtT = newTemp(Ity_I32); 1773 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 1774 if (newC) { 1775 /* mux0X(amt == 0, 1776 mux0X(amt < 32, 1777 0, 1778 Rm[(32-amt) & 31]), 1779 oldC) 1780 */ 1781 /* About the best you can do is pray that iropt is able 1782 to nuke most or all of the following junk. */ 1783 IRTemp oldC = newTemp(Ity_I32); 1784 assign(oldC, mk_armg_calculate_flag_c() ); 1785 assign( 1786 *newC, 1787 IRExpr_Mux0X( 1788 unop(Iop_1Uto8, 1789 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 1790 IRExpr_Mux0X( 1791 unop(Iop_1Uto8, 1792 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 1793 mkU32(0), 1794 binop(Iop_And32, 1795 binop(Iop_Shr32, 1796 mkexpr(rMt), 1797 unop(Iop_32to8, 1798 binop(Iop_And32, 1799 binop(Iop_Sub32, 1800 mkU32(32), 1801 mkexpr(amtT)), 1802 mkU32(31) 1803 ) 1804 ) 1805 ), 1806 mkU32(1) 1807 ) 1808 ), 1809 mkexpr(oldC) 1810 ) 1811 ); 1812 } 1813 // (Rm << (Rs & 31)) & (((Rs & 255) - 32) >>s 31) 1814 // Lhs of the & limits the shift to 31 bits, so as to 1815 // give known IR semantics. Rhs of the & is all 1s for 1816 // Rs <= 31 and all 0s for Rs >= 32. 1817 assign( 1818 *res, 1819 binop( 1820 Iop_And32, 1821 binop(Iop_Shl32, 1822 mkexpr(rMt), 1823 unop(Iop_32to8, 1824 binop(Iop_And32, mkexpr(rSt), mkU32(31)))), 1825 binop(Iop_Sar32, 1826 binop(Iop_Sub32, 1827 mkexpr(amtT), 1828 mkU32(32)), 1829 mkU8(31)))); 1830 DIS(buf, "r%u, LSL r%u", rM, rS); 1831 } 1832 1833 1834 static void compute_result_and_C_after_LSR_by_imm5 ( 1835 /*OUT*/HChar* buf, 1836 IRTemp* res, 1837 IRTemp* newC, 1838 IRTemp rMt, UInt shift_amt, /* operands */ 1839 UInt rM /* only for debug printing */ 1840 ) 1841 { 1842 if (shift_amt == 0) { 1843 // conceptually a 32-bit shift, however: 1844 // res = 0 1845 // newC = Rm[31] 1846 if (newC) { 1847 assign( *newC, 1848 binop(Iop_And32, 1849 binop(Iop_Shr32, mkexpr(rMt), mkU8(31)), 1850 mkU32(1))); 1851 } 1852 assign( *res, mkU32(0) ); 1853 DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM); 1854 } else { 1855 // shift in range 1..31 1856 // res = Rm >>u shift_amt 1857 // newC = Rm[shift_amt - 1] 1858 vassert(shift_amt >= 1 && shift_amt <= 31); 1859 if (newC) { 1860 assign( *newC, 1861 binop(Iop_And32, 1862 binop(Iop_Shr32, mkexpr(rMt), 1863 mkU8(shift_amt - 1)), 1864 mkU32(1))); 1865 } 1866 assign( *res, 1867 binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) ); 1868 DIS(buf, "r%u, LSR #%u", rM, shift_amt); 1869 } 1870 } 1871 1872 1873 static void compute_result_and_C_after_LSR_by_reg ( 1874 /*OUT*/HChar* buf, 1875 IRTemp* res, 1876 IRTemp* newC, 1877 IRTemp rMt, IRTemp rSt, /* operands */ 1878 UInt rM, UInt rS /* only for debug printing */ 1879 ) 1880 { 1881 // shift right in range 0 .. 255 1882 // amt = rS & 255 1883 // res = amt < 32 ? Rm >>u amt : 0 1884 // newC = amt == 0 ? oldC : 1885 // amt in 1..32 ? Rm[amt-1] : 0 1886 IRTemp amtT = newTemp(Ity_I32); 1887 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 1888 if (newC) { 1889 /* mux0X(amt == 0, 1890 mux0X(amt < 32, 1891 0, 1892 Rm[(amt-1) & 31]), 1893 oldC) 1894 */ 1895 IRTemp oldC = newTemp(Ity_I32); 1896 assign(oldC, mk_armg_calculate_flag_c() ); 1897 assign( 1898 *newC, 1899 IRExpr_Mux0X( 1900 unop(Iop_1Uto8, 1901 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 1902 IRExpr_Mux0X( 1903 unop(Iop_1Uto8, 1904 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 1905 mkU32(0), 1906 binop(Iop_And32, 1907 binop(Iop_Shr32, 1908 mkexpr(rMt), 1909 unop(Iop_32to8, 1910 binop(Iop_And32, 1911 binop(Iop_Sub32, 1912 mkexpr(amtT), 1913 mkU32(1)), 1914 mkU32(31) 1915 ) 1916 ) 1917 ), 1918 mkU32(1) 1919 ) 1920 ), 1921 mkexpr(oldC) 1922 ) 1923 ); 1924 } 1925 // (Rm >>u (Rs & 31)) & (((Rs & 255) - 32) >>s 31) 1926 // Lhs of the & limits the shift to 31 bits, so as to 1927 // give known IR semantics. Rhs of the & is all 1s for 1928 // Rs <= 31 and all 0s for Rs >= 32. 1929 assign( 1930 *res, 1931 binop( 1932 Iop_And32, 1933 binop(Iop_Shr32, 1934 mkexpr(rMt), 1935 unop(Iop_32to8, 1936 binop(Iop_And32, mkexpr(rSt), mkU32(31)))), 1937 binop(Iop_Sar32, 1938 binop(Iop_Sub32, 1939 mkexpr(amtT), 1940 mkU32(32)), 1941 mkU8(31)))); 1942 DIS(buf, "r%u, LSR r%u", rM, rS); 1943 } 1944 1945 1946 static void compute_result_and_C_after_ASR_by_imm5 ( 1947 /*OUT*/HChar* buf, 1948 IRTemp* res, 1949 IRTemp* newC, 1950 IRTemp rMt, UInt shift_amt, /* operands */ 1951 UInt rM /* only for debug printing */ 1952 ) 1953 { 1954 if (shift_amt == 0) { 1955 // conceptually a 32-bit shift, however: 1956 // res = Rm >>s 31 1957 // newC = Rm[31] 1958 if (newC) { 1959 assign( *newC, 1960 binop(Iop_And32, 1961 binop(Iop_Shr32, mkexpr(rMt), mkU8(31)), 1962 mkU32(1))); 1963 } 1964 assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) ); 1965 DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM); 1966 } else { 1967 // shift in range 1..31 1968 // res = Rm >>s shift_amt 1969 // newC = Rm[shift_amt - 1] 1970 vassert(shift_amt >= 1 && shift_amt <= 31); 1971 if (newC) { 1972 assign( *newC, 1973 binop(Iop_And32, 1974 binop(Iop_Shr32, mkexpr(rMt), 1975 mkU8(shift_amt - 1)), 1976 mkU32(1))); 1977 } 1978 assign( *res, 1979 binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) ); 1980 DIS(buf, "r%u, ASR #%u", rM, shift_amt); 1981 } 1982 } 1983 1984 1985 static void compute_result_and_C_after_ASR_by_reg ( 1986 /*OUT*/HChar* buf, 1987 IRTemp* res, 1988 IRTemp* newC, 1989 IRTemp rMt, IRTemp rSt, /* operands */ 1990 UInt rM, UInt rS /* only for debug printing */ 1991 ) 1992 { 1993 // arithmetic shift right in range 0 .. 255 1994 // amt = rS & 255 1995 // res = amt < 32 ? Rm >>s amt : Rm >>s 31 1996 // newC = amt == 0 ? oldC : 1997 // amt in 1..32 ? Rm[amt-1] : Rm[31] 1998 IRTemp amtT = newTemp(Ity_I32); 1999 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 2000 if (newC) { 2001 /* mux0X(amt == 0, 2002 mux0X(amt < 32, 2003 Rm[31], 2004 Rm[(amt-1) & 31]) 2005 oldC) 2006 */ 2007 IRTemp oldC = newTemp(Ity_I32); 2008 assign(oldC, mk_armg_calculate_flag_c() ); 2009 assign( 2010 *newC, 2011 IRExpr_Mux0X( 2012 unop(Iop_1Uto8, 2013 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))), 2014 IRExpr_Mux0X( 2015 unop(Iop_1Uto8, 2016 binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))), 2017 binop(Iop_And32, 2018 binop(Iop_Shr32, 2019 mkexpr(rMt), 2020 mkU8(31) 2021 ), 2022 mkU32(1) 2023 ), 2024 binop(Iop_And32, 2025 binop(Iop_Shr32, 2026 mkexpr(rMt), 2027 unop(Iop_32to8, 2028 binop(Iop_And32, 2029 binop(Iop_Sub32, 2030 mkexpr(amtT), 2031 mkU32(1)), 2032 mkU32(31) 2033 ) 2034 ) 2035 ), 2036 mkU32(1) 2037 ) 2038 ), 2039 mkexpr(oldC) 2040 ) 2041 ); 2042 } 2043 // (Rm >>s (amt <u 32 ? amt : 31)) 2044 assign( 2045 *res, 2046 binop( 2047 Iop_Sar32, 2048 mkexpr(rMt), 2049 unop( 2050 Iop_32to8, 2051 IRExpr_Mux0X( 2052 unop( 2053 Iop_1Uto8, 2054 binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))), 2055 mkU32(31), 2056 mkexpr(amtT))))); 2057 DIS(buf, "r%u, ASR r%u", rM, rS); 2058 } 2059 2060 2061 static void compute_result_and_C_after_ROR_by_reg ( 2062 /*OUT*/HChar* buf, 2063 IRTemp* res, 2064 IRTemp* newC, 2065 IRTemp rMt, IRTemp rSt, /* operands */ 2066 UInt rM, UInt rS /* only for debug printing */ 2067 ) 2068 { 2069 // rotate right in range 0 .. 255 2070 // amt = rS & 255 2071 // shop = Rm `ror` (amt & 31) 2072 // shco = amt == 0 ? oldC : Rm[(amt-1) & 31] 2073 IRTemp amtT = newTemp(Ity_I32); 2074 assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) ); 2075 IRTemp amt5T = newTemp(Ity_I32); 2076 assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) ); 2077 IRTemp oldC = newTemp(Ity_I32); 2078 assign(oldC, mk_armg_calculate_flag_c() ); 2079 if (newC) { 2080 assign( 2081 *newC, 2082 IRExpr_Mux0X( 2083 unop(Iop_32to8, mkexpr(amtT)), 2084 mkexpr(oldC), 2085 binop(Iop_And32, 2086 binop(Iop_Shr32, 2087 mkexpr(rMt), 2088 unop(Iop_32to8, 2089 binop(Iop_And32, 2090 binop(Iop_Sub32, 2091 mkexpr(amtT), 2092 mkU32(1) 2093 ), 2094 mkU32(31) 2095 ) 2096 ) 2097 ), 2098 mkU32(1) 2099 ) 2100 ) 2101 ); 2102 } 2103 assign( 2104 *res, 2105 IRExpr_Mux0X( 2106 unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt), 2107 binop(Iop_Or32, 2108 binop(Iop_Shr32, 2109 mkexpr(rMt), 2110 unop(Iop_32to8, mkexpr(amt5T)) 2111 ), 2112 binop(Iop_Shl32, 2113 mkexpr(rMt), 2114 unop(Iop_32to8, 2115 binop(Iop_Sub32, mkU32(32), mkexpr(amt5T)) 2116 ) 2117 ) 2118 ) 2119 ) 2120 ); 2121 DIS(buf, "r%u, ROR r#%u", rM, rS); 2122 } 2123 2124 2125 /* Generate an expression corresponding to the immediate-shift case of 2126 a shifter operand. This is used both for ARM and Thumb2. 2127 2128 Bind it to a temporary, and return that via *res. If newC is 2129 non-NULL, also compute a value for the shifter's carry out (in the 2130 LSB of a word), bind it to a temporary, and return that via *shco. 2131 2132 Generates GETs from the guest state and is therefore not safe to 2133 use once we start doing PUTs to it, for any given instruction. 2134 2135 'how' is encoded thusly: 2136 00b LSL, 01b LSR, 10b ASR, 11b ROR 2137 Most but not all ARM and Thumb integer insns use this encoding. 2138 Be careful to ensure the right value is passed here. 2139 */ 2140 static void compute_result_and_C_after_shift_by_imm5 ( 2141 /*OUT*/HChar* buf, 2142 /*OUT*/IRTemp* res, 2143 /*OUT*/IRTemp* newC, 2144 IRTemp rMt, /* reg to shift */ 2145 UInt how, /* what kind of shift */ 2146 UInt shift_amt, /* shift amount (0..31) */ 2147 UInt rM /* only for debug printing */ 2148 ) 2149 { 2150 vassert(shift_amt < 32); 2151 vassert(how < 4); 2152 2153 switch (how) { 2154 2155 case 0: 2156 compute_result_and_C_after_LSL_by_imm5( 2157 buf, res, newC, rMt, shift_amt, rM 2158 ); 2159 break; 2160 2161 case 1: 2162 compute_result_and_C_after_LSR_by_imm5( 2163 buf, res, newC, rMt, shift_amt, rM 2164 ); 2165 break; 2166 2167 case 2: 2168 compute_result_and_C_after_ASR_by_imm5( 2169 buf, res, newC, rMt, shift_amt, rM 2170 ); 2171 break; 2172 2173 case 3: 2174 if (shift_amt == 0) { 2175 IRTemp oldcT = newTemp(Ity_I32); 2176 // rotate right 1 bit through carry (?) 2177 // RRX -- described at ARM ARM A5-17 2178 // res = (oldC << 31) | (Rm >>u 1) 2179 // newC = Rm[0] 2180 if (newC) { 2181 assign( *newC, 2182 binop(Iop_And32, mkexpr(rMt), mkU32(1))); 2183 } 2184 assign( oldcT, mk_armg_calculate_flag_c() ); 2185 assign( *res, 2186 binop(Iop_Or32, 2187 binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)), 2188 binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) ); 2189 DIS(buf, "r%u, RRX", rM); 2190 } else { 2191 // rotate right in range 1..31 2192 // res = Rm `ror` shift_amt 2193 // newC = Rm[shift_amt - 1] 2194 vassert(shift_amt >= 1 && shift_amt <= 31); 2195 if (newC) { 2196 assign( *newC, 2197 binop(Iop_And32, 2198 binop(Iop_Shr32, mkexpr(rMt), 2199 mkU8(shift_amt - 1)), 2200 mkU32(1))); 2201 } 2202 assign( *res, 2203 binop(Iop_Or32, 2204 binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)), 2205 binop(Iop_Shl32, mkexpr(rMt), 2206 mkU8(32-shift_amt)))); 2207 DIS(buf, "r%u, ROR #%u", rM, shift_amt); 2208 } 2209 break; 2210 2211 default: 2212 /*NOTREACHED*/ 2213 vassert(0); 2214 } 2215 } 2216 2217 2218 /* Generate an expression corresponding to the register-shift case of 2219 a shifter operand. This is used both for ARM and Thumb2. 2220 2221 Bind it to a temporary, and return that via *res. If newC is 2222 non-NULL, also compute a value for the shifter's carry out (in the 2223 LSB of a word), bind it to a temporary, and return that via *shco. 2224 2225 Generates GETs from the guest state and is therefore not safe to 2226 use once we start doing PUTs to it, for any given instruction. 2227 2228 'how' is encoded thusly: 2229 00b LSL, 01b LSR, 10b ASR, 11b ROR 2230 Most but not all ARM and Thumb integer insns use this encoding. 2231 Be careful to ensure the right value is passed here. 2232 */ 2233 static void compute_result_and_C_after_shift_by_reg ( 2234 /*OUT*/HChar* buf, 2235 /*OUT*/IRTemp* res, 2236 /*OUT*/IRTemp* newC, 2237 IRTemp rMt, /* reg to shift */ 2238 UInt how, /* what kind of shift */ 2239 IRTemp rSt, /* shift amount */ 2240 UInt rM, /* only for debug printing */ 2241 UInt rS /* only for debug printing */ 2242 ) 2243 { 2244 vassert(how < 4); 2245 switch (how) { 2246 case 0: { /* LSL */ 2247 compute_result_and_C_after_LSL_by_reg( 2248 buf, res, newC, rMt, rSt, rM, rS 2249 ); 2250 break; 2251 } 2252 case 1: { /* LSR */ 2253 compute_result_and_C_after_LSR_by_reg( 2254 buf, res, newC, rMt, rSt, rM, rS 2255 ); 2256 break; 2257 } 2258 case 2: { /* ASR */ 2259 compute_result_and_C_after_ASR_by_reg( 2260 buf, res, newC, rMt, rSt, rM, rS 2261 ); 2262 break; 2263 } 2264 case 3: { /* ROR */ 2265 compute_result_and_C_after_ROR_by_reg( 2266 buf, res, newC, rMt, rSt, rM, rS 2267 ); 2268 break; 2269 } 2270 default: 2271 /*NOTREACHED*/ 2272 vassert(0); 2273 } 2274 } 2275 2276 2277 /* Generate an expression corresponding to a shifter_operand, bind it 2278 to a temporary, and return that via *shop. If shco is non-NULL, 2279 also compute a value for the shifter's carry out (in the LSB of a 2280 word), bind it to a temporary, and return that via *shco. 2281 2282 If for some reason we can't come up with a shifter operand (missing 2283 case? not really a shifter operand?) return False. 2284 2285 Generates GETs from the guest state and is therefore not safe to 2286 use once we start doing PUTs to it, for any given instruction. 2287 2288 For ARM insns only; not for Thumb. 2289 */ 2290 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0, 2291 /*OUT*/IRTemp* shop, 2292 /*OUT*/IRTemp* shco, 2293 /*OUT*/HChar* buf ) 2294 { 2295 UInt insn_4 = (insn_11_0 >> 4) & 1; 2296 UInt insn_7 = (insn_11_0 >> 7) & 1; 2297 vassert(insn_25 <= 0x1); 2298 vassert(insn_11_0 <= 0xFFF); 2299 2300 vassert(shop && *shop == IRTemp_INVALID); 2301 *shop = newTemp(Ity_I32); 2302 2303 if (shco) { 2304 vassert(*shco == IRTemp_INVALID); 2305 *shco = newTemp(Ity_I32); 2306 } 2307 2308 /* 32-bit immediate */ 2309 2310 if (insn_25 == 1) { 2311 /* immediate: (7:0) rotated right by 2 * (11:8) */ 2312 UInt imm = (insn_11_0 >> 0) & 0xFF; 2313 UInt rot = 2 * ((insn_11_0 >> 8) & 0xF); 2314 vassert(rot <= 30); 2315 imm = ROR32(imm, rot); 2316 if (shco) { 2317 if (rot == 0) { 2318 assign( *shco, mk_armg_calculate_flag_c() ); 2319 } else { 2320 assign( *shco, mkU32( (imm >> 31) & 1 ) ); 2321 } 2322 } 2323 DIS(buf, "#0x%x", imm); 2324 assign( *shop, mkU32(imm) ); 2325 return True; 2326 } 2327 2328 /* Shift/rotate by immediate */ 2329 2330 if (insn_25 == 0 && insn_4 == 0) { 2331 /* Rm (3:0) shifted (6:5) by immediate (11:7) */ 2332 UInt shift_amt = (insn_11_0 >> 7) & 0x1F; 2333 UInt rM = (insn_11_0 >> 0) & 0xF; 2334 UInt how = (insn_11_0 >> 5) & 3; 2335 /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */ 2336 IRTemp rMt = newTemp(Ity_I32); 2337 assign(rMt, getIRegA(rM)); 2338 2339 vassert(shift_amt <= 31); 2340 2341 compute_result_and_C_after_shift_by_imm5( 2342 buf, shop, shco, rMt, how, shift_amt, rM 2343 ); 2344 return True; 2345 } 2346 2347 /* Shift/rotate by register */ 2348 if (insn_25 == 0 && insn_4 == 1) { 2349 /* Rm (3:0) shifted (6:5) by Rs (11:8) */ 2350 UInt rM = (insn_11_0 >> 0) & 0xF; 2351 UInt rS = (insn_11_0 >> 8) & 0xF; 2352 UInt how = (insn_11_0 >> 5) & 3; 2353 /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */ 2354 IRTemp rMt = newTemp(Ity_I32); 2355 IRTemp rSt = newTemp(Ity_I32); 2356 2357 if (insn_7 == 1) 2358 return False; /* not really a shifter operand */ 2359 2360 assign(rMt, getIRegA(rM)); 2361 assign(rSt, getIRegA(rS)); 2362 2363 compute_result_and_C_after_shift_by_reg( 2364 buf, shop, shco, rMt, how, rSt, rM, rS 2365 ); 2366 return True; 2367 } 2368 2369 vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 ); 2370 return False; 2371 } 2372 2373 2374 /* ARM only */ 2375 static 2376 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12, 2377 /*OUT*/HChar* buf ) 2378 { 2379 vassert(rN < 16); 2380 vassert(bU < 2); 2381 vassert(imm12 < 0x1000); 2382 UChar opChar = bU == 1 ? '+' : '-'; 2383 DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12); 2384 return 2385 binop( (bU == 1 ? Iop_Add32 : Iop_Sub32), 2386 getIRegA(rN), 2387 mkU32(imm12) ); 2388 } 2389 2390 2391 /* ARM only. 2392 NB: This is "DecodeImmShift" in newer versions of the the ARM ARM. 2393 */ 2394 static 2395 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM, 2396 UInt sh2, UInt imm5, 2397 /*OUT*/HChar* buf ) 2398 { 2399 vassert(rN < 16); 2400 vassert(bU < 2); 2401 vassert(rM < 16); 2402 vassert(sh2 < 4); 2403 vassert(imm5 < 32); 2404 UChar opChar = bU == 1 ? '+' : '-'; 2405 IRExpr* index = NULL; 2406 switch (sh2) { 2407 case 0: /* LSL */ 2408 /* imm5 can be in the range 0 .. 31 inclusive. */ 2409 index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5)); 2410 DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5); 2411 break; 2412 case 1: /* LSR */ 2413 if (imm5 == 0) { 2414 index = mkU32(0); 2415 vassert(0); // ATC 2416 } else { 2417 index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5)); 2418 } 2419 DIS(buf, "[r%u, %cr%u, LSR #%u]", 2420 rN, opChar, rM, imm5 == 0 ? 32 : imm5); 2421 break; 2422 case 2: /* ASR */ 2423 /* Doesn't this just mean that the behaviour with imm5 == 0 2424 is the same as if it had been 31 ? */ 2425 if (imm5 == 0) { 2426 index = binop(Iop_Sar32, getIRegA(rM), mkU8(31)); 2427 vassert(0); // ATC 2428 } else { 2429 index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5)); 2430 } 2431 DIS(buf, "[r%u, %cr%u, ASR #%u]", 2432 rN, opChar, rM, imm5 == 0 ? 32 : imm5); 2433 break; 2434 case 3: /* ROR or RRX */ 2435 if (imm5 == 0) { 2436 IRTemp rmT = newTemp(Ity_I32); 2437 IRTemp cflagT = newTemp(Ity_I32); 2438 assign(rmT, getIRegA(rM)); 2439 assign(cflagT, mk_armg_calculate_flag_c()); 2440 index = binop(Iop_Or32, 2441 binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)), 2442 binop(Iop_Shr32, mkexpr(rmT), mkU8(1))); 2443 DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM); 2444 } else { 2445 IRTemp rmT = newTemp(Ity_I32); 2446 assign(rmT, getIRegA(rM)); 2447 vassert(imm5 >= 1 && imm5 <= 31); 2448 index = binop(Iop_Or32, 2449 binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)), 2450 binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5))); 2451 DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5); 2452 } 2453 break; 2454 default: 2455 vassert(0); 2456 } 2457 vassert(index); 2458 return binop(bU == 1 ? Iop_Add32 : Iop_Sub32, 2459 getIRegA(rN), index); 2460 } 2461 2462 2463 /* ARM only */ 2464 static 2465 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8, 2466 /*OUT*/HChar* buf ) 2467 { 2468 vassert(rN < 16); 2469 vassert(bU < 2); 2470 vassert(imm8 < 0x100); 2471 UChar opChar = bU == 1 ? '+' : '-'; 2472 DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8); 2473 return 2474 binop( (bU == 1 ? Iop_Add32 : Iop_Sub32), 2475 getIRegA(rN), 2476 mkU32(imm8) ); 2477 } 2478 2479 2480 /* ARM only */ 2481 static 2482 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM, 2483 /*OUT*/HChar* buf ) 2484 { 2485 vassert(rN < 16); 2486 vassert(bU < 2); 2487 vassert(rM < 16); 2488 UChar opChar = bU == 1 ? '+' : '-'; 2489 IRExpr* index = getIRegA(rM); 2490 DIS(buf, "[r%u, %c r%u]", rN, opChar, rM); 2491 return binop(bU == 1 ? Iop_Add32 : Iop_Sub32, 2492 getIRegA(rN), index); 2493 } 2494 2495 2496 /* irRes :: Ity_I32 holds a floating point comparison result encoded 2497 as an IRCmpF64Result. Generate code to convert it to an 2498 ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value. 2499 Assign a new temp to hold that value, and return the temp. */ 2500 static 2501 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes ) 2502 { 2503 IRTemp ix = newTemp(Ity_I32); 2504 IRTemp termL = newTemp(Ity_I32); 2505 IRTemp termR = newTemp(Ity_I32); 2506 IRTemp nzcv = newTemp(Ity_I32); 2507 2508 /* This is where the fun starts. We have to convert 'irRes' from 2509 an IR-convention return result (IRCmpF64Result) to an 2510 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 2511 4 bits of 'nzcv'. */ 2512 /* Map compare result from IR to ARM(nzcv) */ 2513 /* 2514 FP cmp result | IR | ARM(nzcv) 2515 -------------------------------- 2516 UN 0x45 0011 2517 LT 0x01 1000 2518 GT 0x00 0010 2519 EQ 0x40 0110 2520 */ 2521 /* Now since you're probably wondering WTF .. 2522 2523 ix fishes the useful bits out of the IR value, bits 6 and 0, and 2524 places them side by side, giving a number which is 0, 1, 2 or 3. 2525 2526 termL is a sequence cooked up by GNU superopt. It converts ix 2527 into an almost correct value NZCV value (incredibly), except 2528 for the case of UN, where it produces 0100 instead of the 2529 required 0011. 2530 2531 termR is therefore a correction term, also computed from ix. It 2532 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 2533 the final correct value, we subtract termR from termL. 2534 2535 Don't take my word for it. There's a test program at the bottom 2536 of this file, to try this out with. 2537 */ 2538 assign( 2539 ix, 2540 binop(Iop_Or32, 2541 binop(Iop_And32, 2542 binop(Iop_Shr32, mkexpr(irRes), mkU8(5)), 2543 mkU32(3)), 2544 binop(Iop_And32, mkexpr(irRes), mkU32(1)))); 2545 2546 assign( 2547 termL, 2548 binop(Iop_Add32, 2549 binop(Iop_Shr32, 2550 binop(Iop_Sub32, 2551 binop(Iop_Shl32, 2552 binop(Iop_Xor32, mkexpr(ix), mkU32(1)), 2553 mkU8(30)), 2554 mkU32(1)), 2555 mkU8(29)), 2556 mkU32(1))); 2557 2558 assign( 2559 termR, 2560 binop(Iop_And32, 2561 binop(Iop_And32, 2562 mkexpr(ix), 2563 binop(Iop_Shr32, mkexpr(ix), mkU8(1))), 2564 mkU32(1))); 2565 2566 assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR))); 2567 return nzcv; 2568 } 2569 2570 2571 /* Thumb32 only. This is "ThumbExpandImm" in the ARM ARM. If 2572 updatesC is non-NULL, a boolean is written to it indicating whether 2573 or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C". 2574 */ 2575 static UInt thumbExpandImm ( Bool* updatesC, 2576 UInt imm1, UInt imm3, UInt imm8 ) 2577 { 2578 vassert(imm1 < (1<<1)); 2579 vassert(imm3 < (1<<3)); 2580 vassert(imm8 < (1<<8)); 2581 UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1); 2582 UInt abcdefgh = imm8; 2583 UInt lbcdefgh = imm8 | 0x80; 2584 if (updatesC) { 2585 *updatesC = i_imm3_a >= 8; 2586 } 2587 switch (i_imm3_a) { 2588 case 0: case 1: 2589 return abcdefgh; 2590 case 2: case 3: 2591 return (abcdefgh << 16) | abcdefgh; 2592 case 4: case 5: 2593 return (abcdefgh << 24) | (abcdefgh << 8); 2594 case 6: case 7: 2595 return (abcdefgh << 24) | (abcdefgh << 16) 2596 | (abcdefgh << 8) | abcdefgh; 2597 case 8 ... 31: 2598 return lbcdefgh << (32 - i_imm3_a); 2599 default: 2600 break; 2601 } 2602 /*NOTREACHED*/vassert(0); 2603 } 2604 2605 2606 /* Version of thumbExpandImm where we simply feed it the 2607 instruction halfwords (the lowest addressed one is I0). */ 2608 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC, 2609 UShort i0s, UShort i1s ) 2610 { 2611 UInt i0 = (UInt)i0s; 2612 UInt i1 = (UInt)i1s; 2613 UInt imm1 = SLICE_UInt(i0,10,10); 2614 UInt imm3 = SLICE_UInt(i1,14,12); 2615 UInt imm8 = SLICE_UInt(i1,7,0); 2616 return thumbExpandImm(updatesC, imm1, imm3, imm8); 2617 } 2618 2619 2620 /* Thumb16 only. Given the firstcond and mask fields from an IT 2621 instruction, compute the 32-bit ITSTATE value implied, as described 2622 in libvex_guest_arm.h. This is not the ARM ARM representation. 2623 Also produce the t/e chars for the 2nd, 3rd, 4th insns, for 2624 disassembly printing. Returns False if firstcond or mask 2625 denote something invalid. 2626 2627 The number and conditions for the instructions to be 2628 conditionalised depend on firstcond and mask: 2629 2630 mask cond 1 cond 2 cond 3 cond 4 2631 2632 1000 fc[3:0] 2633 x100 fc[3:0] fc[3:1]:x 2634 xy10 fc[3:0] fc[3:1]:x fc[3:1]:y 2635 xyz1 fc[3:0] fc[3:1]:x fc[3:1]:y fc[3:1]:z 2636 2637 The condition fields are assembled in *itstate backwards (cond 4 at 2638 the top, cond 1 at the bottom). Conditions are << 4'd and then 2639 ^0xE'd, and those fields that correspond to instructions in the IT 2640 block are tagged with a 1 bit. 2641 */ 2642 static Bool compute_ITSTATE ( /*OUT*/UInt* itstate, 2643 /*OUT*/UChar* ch1, 2644 /*OUT*/UChar* ch2, 2645 /*OUT*/UChar* ch3, 2646 UInt firstcond, UInt mask ) 2647 { 2648 vassert(firstcond <= 0xF); 2649 vassert(mask <= 0xF); 2650 *itstate = 0; 2651 *ch1 = *ch2 = *ch3 = '.'; 2652 if (mask == 0) 2653 return False; /* the logic below actually ensures this anyway, 2654 but clearer to make it explicit. */ 2655 if (firstcond == 0xF) 2656 return False; /* NV is not allowed */ 2657 if (firstcond == 0xE && popcount32(mask) != 1) 2658 return False; /* if firstcond is AL then all the rest must be too */ 2659 2660 UInt m3 = (mask >> 3) & 1; 2661 UInt m2 = (mask >> 2) & 1; 2662 UInt m1 = (mask >> 1) & 1; 2663 UInt m0 = (mask >> 0) & 1; 2664 2665 UInt fc = (firstcond << 4) | 1/*in-IT-block*/; 2666 UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/; 2667 2668 if (m3 == 1 && (m2|m1|m0) == 0) { 2669 *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc; 2670 *itstate ^= 0xE0E0E0E0; 2671 return True; 2672 } 2673 2674 if (m2 == 1 && (m1|m0) == 0) { 2675 *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc; 2676 *itstate ^= 0xE0E0E0E0; 2677 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2678 return True; 2679 } 2680 2681 if (m1 == 1 && m0 == 0) { 2682 *itstate = (ni << 24) 2683 | (setbit32(fc, 4, m2) << 16) 2684 | (setbit32(fc, 4, m3) << 8) | fc; 2685 *itstate ^= 0xE0E0E0E0; 2686 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2687 *ch2 = m2 == (firstcond & 1) ? 't' : 'e'; 2688 return True; 2689 } 2690 2691 if (m0 == 1) { 2692 *itstate = (setbit32(fc, 4, m1) << 24) 2693 | (setbit32(fc, 4, m2) << 16) 2694 | (setbit32(fc, 4, m3) << 8) | fc; 2695 *itstate ^= 0xE0E0E0E0; 2696 *ch1 = m3 == (firstcond & 1) ? 't' : 'e'; 2697 *ch2 = m2 == (firstcond & 1) ? 't' : 'e'; 2698 *ch3 = m1 == (firstcond & 1) ? 't' : 'e'; 2699 return True; 2700 } 2701 2702 return False; 2703 } 2704 2705 2706 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight 2707 Chapter 7 Section 1. */ 2708 static IRTemp gen_BITREV ( IRTemp x0 ) 2709 { 2710 IRTemp x1 = newTemp(Ity_I32); 2711 IRTemp x2 = newTemp(Ity_I32); 2712 IRTemp x3 = newTemp(Ity_I32); 2713 IRTemp x4 = newTemp(Ity_I32); 2714 IRTemp x5 = newTemp(Ity_I32); 2715 UInt c1 = 0x55555555; 2716 UInt c2 = 0x33333333; 2717 UInt c3 = 0x0F0F0F0F; 2718 UInt c4 = 0x00FF00FF; 2719 UInt c5 = 0x0000FFFF; 2720 assign(x1, 2721 binop(Iop_Or32, 2722 binop(Iop_Shl32, 2723 binop(Iop_And32, mkexpr(x0), mkU32(c1)), 2724 mkU8(1)), 2725 binop(Iop_Shr32, 2726 binop(Iop_And32, mkexpr(x0), mkU32(~c1)), 2727 mkU8(1)) 2728 )); 2729 assign(x2, 2730 binop(Iop_Or32, 2731 binop(Iop_Shl32, 2732 binop(Iop_And32, mkexpr(x1), mkU32(c2)), 2733 mkU8(2)), 2734 binop(Iop_Shr32, 2735 binop(Iop_And32, mkexpr(x1), mkU32(~c2)), 2736 mkU8(2)) 2737 )); 2738 assign(x3, 2739 binop(Iop_Or32, 2740 binop(Iop_Shl32, 2741 binop(Iop_And32, mkexpr(x2), mkU32(c3)), 2742 mkU8(4)), 2743 binop(Iop_Shr32, 2744 binop(Iop_And32, mkexpr(x2), mkU32(~c3)), 2745 mkU8(4)) 2746 )); 2747 assign(x4, 2748 binop(Iop_Or32, 2749 binop(Iop_Shl32, 2750 binop(Iop_And32, mkexpr(x3), mkU32(c4)), 2751 mkU8(8)), 2752 binop(Iop_Shr32, 2753 binop(Iop_And32, mkexpr(x3), mkU32(~c4)), 2754 mkU8(8)) 2755 )); 2756 assign(x5, 2757 binop(Iop_Or32, 2758 binop(Iop_Shl32, 2759 binop(Iop_And32, mkexpr(x4), mkU32(c5)), 2760 mkU8(16)), 2761 binop(Iop_Shr32, 2762 binop(Iop_And32, mkexpr(x4), mkU32(~c5)), 2763 mkU8(16)) 2764 )); 2765 return x5; 2766 } 2767 2768 2769 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order 2770 0:1:2:3 (aka byte-swap). */ 2771 static IRTemp gen_REV ( IRTemp arg ) 2772 { 2773 IRTemp res = newTemp(Ity_I32); 2774 assign(res, 2775 binop(Iop_Or32, 2776 binop(Iop_Shl32, mkexpr(arg), mkU8(24)), 2777 binop(Iop_Or32, 2778 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)), 2779 mkU32(0x00FF0000)), 2780 binop(Iop_Or32, 2781 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)), 2782 mkU32(0x0000FF00)), 2783 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)), 2784 mkU32(0x000000FF) ) 2785 )))); 2786 return res; 2787 } 2788 2789 2790 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order 2791 2:3:0:1 (swap within lo and hi halves). */ 2792 static IRTemp gen_REV16 ( IRTemp arg ) 2793 { 2794 IRTemp res = newTemp(Ity_I32); 2795 assign(res, 2796 binop(Iop_Or32, 2797 binop(Iop_And32, 2798 binop(Iop_Shl32, mkexpr(arg), mkU8(8)), 2799 mkU32(0xFF00FF00)), 2800 binop(Iop_And32, 2801 binop(Iop_Shr32, mkexpr(arg), mkU8(8)), 2802 mkU32(0x00FF00FF)))); 2803 return res; 2804 } 2805 2806 2807 /*------------------------------------------------------------*/ 2808 /*--- Advanced SIMD (NEON) instructions ---*/ 2809 /*------------------------------------------------------------*/ 2810 2811 /*------------------------------------------------------------*/ 2812 /*--- NEON data processing ---*/ 2813 /*------------------------------------------------------------*/ 2814 2815 /* For all NEON DP ops, we use the normal scheme to handle conditional 2816 writes to registers -- pass in condT and hand that on to the 2817 put*Reg functions. In ARM mode condT is always IRTemp_INVALID 2818 since NEON is unconditional for ARM. In Thumb mode condT is 2819 derived from the ITSTATE shift register in the normal way. */ 2820 2821 static 2822 UInt get_neon_d_regno(UInt theInstr) 2823 { 2824 UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF); 2825 if (theInstr & 0x40) { 2826 if (x & 1) { 2827 x = x + 0x100; 2828 } else { 2829 x = x >> 1; 2830 } 2831 } 2832 return x; 2833 } 2834 2835 static 2836 UInt get_neon_n_regno(UInt theInstr) 2837 { 2838 UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF); 2839 if (theInstr & 0x40) { 2840 if (x & 1) { 2841 x = x + 0x100; 2842 } else { 2843 x = x >> 1; 2844 } 2845 } 2846 return x; 2847 } 2848 2849 static 2850 UInt get_neon_m_regno(UInt theInstr) 2851 { 2852 UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF); 2853 if (theInstr & 0x40) { 2854 if (x & 1) { 2855 x = x + 0x100; 2856 } else { 2857 x = x >> 1; 2858 } 2859 } 2860 return x; 2861 } 2862 2863 static 2864 Bool dis_neon_vext ( UInt theInstr, IRTemp condT ) 2865 { 2866 UInt dreg = get_neon_d_regno(theInstr); 2867 UInt mreg = get_neon_m_regno(theInstr); 2868 UInt nreg = get_neon_n_regno(theInstr); 2869 UInt imm4 = (theInstr >> 8) & 0xf; 2870 UInt Q = (theInstr >> 6) & 1; 2871 HChar reg_t = Q ? 'q' : 'd'; 2872 2873 if (Q) { 2874 putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg), 2875 getQReg(mreg), mkU8(imm4)), condT); 2876 } else { 2877 putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg), 2878 getDRegI64(mreg), mkU8(imm4)), condT); 2879 } 2880 DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg, 2881 reg_t, mreg, imm4); 2882 return True; 2883 } 2884 2885 /* VTBL, VTBX */ 2886 static 2887 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT ) 2888 { 2889 UInt op = (theInstr >> 6) & 1; 2890 UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6)); 2891 UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6)); 2892 UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6)); 2893 UInt len = (theInstr >> 8) & 3; 2894 Int i; 2895 IROp cmp; 2896 ULong imm; 2897 IRTemp arg_l; 2898 IRTemp old_mask, new_mask, cur_mask; 2899 IRTemp old_res, new_res; 2900 IRTemp old_arg, new_arg; 2901 2902 if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100) 2903 return False; 2904 if (nreg + len > 31) 2905 return False; 2906 2907 cmp = Iop_CmpGT8Ux8; 2908 2909 old_mask = newTemp(Ity_I64); 2910 old_res = newTemp(Ity_I64); 2911 old_arg = newTemp(Ity_I64); 2912 assign(old_mask, mkU64(0)); 2913 assign(old_res, mkU64(0)); 2914 assign(old_arg, getDRegI64(mreg)); 2915 imm = 8; 2916 imm = (imm << 8) | imm; 2917 imm = (imm << 16) | imm; 2918 imm = (imm << 32) | imm; 2919 2920 for (i = 0; i <= len; i++) { 2921 arg_l = newTemp(Ity_I64); 2922 new_mask = newTemp(Ity_I64); 2923 cur_mask = newTemp(Ity_I64); 2924 new_res = newTemp(Ity_I64); 2925 new_arg = newTemp(Ity_I64); 2926 assign(arg_l, getDRegI64(nreg+i)); 2927 assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm))); 2928 assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg))); 2929 assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask))); 2930 assign(new_res, binop(Iop_Or64, 2931 mkexpr(old_res), 2932 binop(Iop_And64, 2933 binop(Iop_Perm8x8, 2934 mkexpr(arg_l), 2935 binop(Iop_And64, 2936 mkexpr(old_arg), 2937 mkexpr(cur_mask))), 2938 mkexpr(cur_mask)))); 2939 2940 old_arg = new_arg; 2941 old_mask = new_mask; 2942 old_res = new_res; 2943 } 2944 if (op) { 2945 new_res = newTemp(Ity_I64); 2946 assign(new_res, binop(Iop_Or64, 2947 binop(Iop_And64, 2948 getDRegI64(dreg), 2949 unop(Iop_Not64, mkexpr(old_mask))), 2950 mkexpr(old_res))); 2951 old_res = new_res; 2952 } 2953 2954 putDRegI64(dreg, mkexpr(old_res), condT); 2955 DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg); 2956 if (len > 0) { 2957 DIP("d%u-d%u", nreg, nreg + len); 2958 } else { 2959 DIP("d%u", nreg); 2960 } 2961 DIP("}, d%u\n", mreg); 2962 return True; 2963 } 2964 2965 /* VDUP (scalar) */ 2966 static 2967 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT ) 2968 { 2969 UInt Q = (theInstr >> 6) & 1; 2970 UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF); 2971 UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF); 2972 UInt imm4 = (theInstr >> 16) & 0xF; 2973 UInt index; 2974 UInt size; 2975 IRTemp arg_m; 2976 IRTemp res; 2977 IROp op, op2; 2978 2979 if ((imm4 == 0) || (imm4 == 8)) 2980 return False; 2981 if ((Q == 1) && ((dreg & 1) == 1)) 2982 return False; 2983 if (Q) 2984 dreg >>= 1; 2985 arg_m = newTemp(Ity_I64); 2986 assign(arg_m, getDRegI64(mreg)); 2987 if (Q) 2988 res = newTemp(Ity_V128); 2989 else 2990 res = newTemp(Ity_I64); 2991 if ((imm4 & 1) == 1) { 2992 op = Q ? Iop_Dup8x16 : Iop_Dup8x8; 2993 op2 = Iop_GetElem8x8; 2994 index = imm4 >> 1; 2995 size = 8; 2996 } else if ((imm4 & 3) == 2) { 2997 op = Q ? Iop_Dup16x8 : Iop_Dup16x4; 2998 op2 = Iop_GetElem16x4; 2999 index = imm4 >> 2; 3000 size = 16; 3001 } else if ((imm4 & 7) == 4) { 3002 op = Q ? Iop_Dup32x4 : Iop_Dup32x2; 3003 op2 = Iop_GetElem32x2; 3004 index = imm4 >> 3; 3005 size = 32; 3006 } else { 3007 return False; // can this ever happen? 3008 } 3009 assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index)))); 3010 if (Q) { 3011 putQReg(dreg, mkexpr(res), condT); 3012 } else { 3013 putDRegI64(dreg, mkexpr(res), condT); 3014 } 3015 DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index); 3016 return True; 3017 } 3018 3019 /* A7.4.1 Three registers of the same length */ 3020 static 3021 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT ) 3022 { 3023 UInt Q = (theInstr >> 6) & 1; 3024 UInt dreg = get_neon_d_regno(theInstr); 3025 UInt nreg = get_neon_n_regno(theInstr); 3026 UInt mreg = get_neon_m_regno(theInstr); 3027 UInt A = (theInstr >> 8) & 0xF; 3028 UInt B = (theInstr >> 4) & 1; 3029 UInt C = (theInstr >> 20) & 0x3; 3030 UInt U = (theInstr >> 24) & 1; 3031 UInt size = C; 3032 3033 IRTemp arg_n; 3034 IRTemp arg_m; 3035 IRTemp res; 3036 3037 if (Q) { 3038 arg_n = newTemp(Ity_V128); 3039 arg_m = newTemp(Ity_V128); 3040 res = newTemp(Ity_V128); 3041 assign(arg_n, getQReg(nreg)); 3042 assign(arg_m, getQReg(mreg)); 3043 } else { 3044 arg_n = newTemp(Ity_I64); 3045 arg_m = newTemp(Ity_I64); 3046 res = newTemp(Ity_I64); 3047 assign(arg_n, getDRegI64(nreg)); 3048 assign(arg_m, getDRegI64(mreg)); 3049 } 3050 3051 switch(A) { 3052 case 0: 3053 if (B == 0) { 3054 /* VHADD */ 3055 ULong imm = 0; 3056 IRExpr *imm_val; 3057 IROp addOp; 3058 IROp andOp; 3059 IROp shOp; 3060 char regType = Q ? 'q' : 'd'; 3061 3062 if (size == 3) 3063 return False; 3064 switch(size) { 3065 case 0: imm = 0x101010101010101LL; break; 3066 case 1: imm = 0x1000100010001LL; break; 3067 case 2: imm = 0x100000001LL; break; 3068 default: vassert(0); 3069 } 3070 if (Q) { 3071 imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm)); 3072 andOp = Iop_AndV128; 3073 } else { 3074 imm_val = mkU64(imm); 3075 andOp = Iop_And64; 3076 } 3077 if (U) { 3078 switch(size) { 3079 case 0: 3080 addOp = Q ? Iop_Add8x16 : Iop_Add8x8; 3081 shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3082 break; 3083 case 1: 3084 addOp = Q ? Iop_Add16x8 : Iop_Add16x4; 3085 shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3086 break; 3087 case 2: 3088 addOp = Q ? Iop_Add32x4 : Iop_Add32x2; 3089 shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3090 break; 3091 default: 3092 vassert(0); 3093 } 3094 } else { 3095 switch(size) { 3096 case 0: 3097 addOp = Q ? Iop_Add8x16 : Iop_Add8x8; 3098 shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8; 3099 break; 3100 case 1: 3101 addOp = Q ? Iop_Add16x8 : Iop_Add16x4; 3102 shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4; 3103 break; 3104 case 2: 3105 addOp = Q ? Iop_Add32x4 : Iop_Add32x2; 3106 shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2; 3107 break; 3108 default: 3109 vassert(0); 3110 } 3111 } 3112 assign(res, 3113 binop(addOp, 3114 binop(addOp, 3115 binop(shOp, mkexpr(arg_m), mkU8(1)), 3116 binop(shOp, mkexpr(arg_n), mkU8(1))), 3117 binop(shOp, 3118 binop(addOp, 3119 binop(andOp, mkexpr(arg_m), imm_val), 3120 binop(andOp, mkexpr(arg_n), imm_val)), 3121 mkU8(1)))); 3122 DIP("vhadd.%c%d %c%d, %c%d, %c%d\n", 3123 U ? 'u' : 's', 8 << size, regType, 3124 dreg, regType, nreg, regType, mreg); 3125 } else { 3126 /* VQADD */ 3127 IROp op, op2; 3128 IRTemp tmp; 3129 char reg_t = Q ? 'q' : 'd'; 3130 if (Q) { 3131 switch (size) { 3132 case 0: 3133 op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16; 3134 op2 = Iop_Add8x16; 3135 break; 3136 case 1: 3137 op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8; 3138 op2 = Iop_Add16x8; 3139 break; 3140 case 2: 3141 op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4; 3142 op2 = Iop_Add32x4; 3143 break; 3144 case 3: 3145 op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2; 3146 op2 = Iop_Add64x2; 3147 break; 3148 default: 3149 vassert(0); 3150 } 3151 } else { 3152 switch (size) { 3153 case 0: 3154 op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8; 3155 op2 = Iop_Add8x8; 3156 break; 3157 case 1: 3158 op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4; 3159 op2 = Iop_Add16x4; 3160 break; 3161 case 2: 3162 op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2; 3163 op2 = Iop_Add32x2; 3164 break; 3165 case 3: 3166 op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1; 3167 op2 = Iop_Add64; 3168 break; 3169 default: 3170 vassert(0); 3171 } 3172 } 3173 if (Q) { 3174 tmp = newTemp(Ity_V128); 3175 } else { 3176 tmp = newTemp(Ity_I64); 3177 } 3178 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3179 #ifndef DISABLE_QC_FLAG 3180 assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m))); 3181 setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT); 3182 #endif 3183 DIP("vqadd.%c%d %c%d, %c%d, %c%d\n", 3184 U ? 'u' : 's', 3185 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg); 3186 } 3187 break; 3188 case 1: 3189 if (B == 0) { 3190 /* VRHADD */ 3191 /* VRHADD C, A, B ::= 3192 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */ 3193 IROp shift_op, add_op; 3194 IRTemp cc; 3195 ULong one = 1; 3196 HChar reg_t = Q ? 'q' : 'd'; 3197 switch (size) { 3198 case 0: one = (one << 8) | one; /* fall through */ 3199 case 1: one = (one << 16) | one; /* fall through */ 3200 case 2: one = (one << 32) | one; break; 3201 case 3: return False; 3202 default: vassert(0); 3203 } 3204 if (Q) { 3205 switch (size) { 3206 case 0: 3207 shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16; 3208 add_op = Iop_Add8x16; 3209 break; 3210 case 1: 3211 shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8; 3212 add_op = Iop_Add16x8; 3213 break; 3214 case 2: 3215 shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4; 3216 add_op = Iop_Add32x4; 3217 break; 3218 case 3: 3219 return False; 3220 default: 3221 vassert(0); 3222 } 3223 } else { 3224 switch (size) { 3225 case 0: 3226 shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8; 3227 add_op = Iop_Add8x8; 3228 break; 3229 case 1: 3230 shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4; 3231 add_op = Iop_Add16x4; 3232 break; 3233 case 2: 3234 shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2; 3235 add_op = Iop_Add32x2; 3236 break; 3237 case 3: 3238 return False; 3239 default: 3240 vassert(0); 3241 } 3242 } 3243 if (Q) { 3244 cc = newTemp(Ity_V128); 3245 assign(cc, binop(shift_op, 3246 binop(add_op, 3247 binop(add_op, 3248 binop(Iop_AndV128, 3249 mkexpr(arg_n), 3250 binop(Iop_64HLtoV128, 3251 mkU64(one), 3252 mkU64(one))), 3253 binop(Iop_AndV128, 3254 mkexpr(arg_m), 3255 binop(Iop_64HLtoV128, 3256 mkU64(one), 3257 mkU64(one)))), 3258 binop(Iop_64HLtoV128, 3259 mkU64(one), 3260 mkU64(one))), 3261 mkU8(1))); 3262 assign(res, binop(add_op, 3263 binop(add_op, 3264 binop(shift_op, 3265 mkexpr(arg_n), 3266 mkU8(1)), 3267 binop(shift_op, 3268 mkexpr(arg_m), 3269 mkU8(1))), 3270 mkexpr(cc))); 3271 } else { 3272 cc = newTemp(Ity_I64); 3273 assign(cc, binop(shift_op, 3274 binop(add_op, 3275 binop(add_op, 3276 binop(Iop_And64, 3277 mkexpr(arg_n), 3278 mkU64(one)), 3279 binop(Iop_And64, 3280 mkexpr(arg_m), 3281 mkU64(one))), 3282 mkU64(one)), 3283 mkU8(1))); 3284 assign(res, binop(add_op, 3285 binop(add_op, 3286 binop(shift_op, 3287 mkexpr(arg_n), 3288 mkU8(1)), 3289 binop(shift_op, 3290 mkexpr(arg_m), 3291 mkU8(1))), 3292 mkexpr(cc))); 3293 } 3294 DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n", 3295 U ? 'u' : 's', 3296 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg); 3297 } else { 3298 if (U == 0) { 3299 switch(C) { 3300 case 0: { 3301 /* VAND */ 3302 HChar reg_t = Q ? 'q' : 'd'; 3303 if (Q) { 3304 assign(res, binop(Iop_AndV128, mkexpr(arg_n), 3305 mkexpr(arg_m))); 3306 } else { 3307 assign(res, binop(Iop_And64, mkexpr(arg_n), 3308 mkexpr(arg_m))); 3309 } 3310 DIP("vand %c%d, %c%d, %c%d\n", 3311 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3312 break; 3313 } 3314 case 1: { 3315 /* VBIC */ 3316 HChar reg_t = Q ? 'q' : 'd'; 3317 if (Q) { 3318 assign(res, binop(Iop_AndV128,mkexpr(arg_n), 3319 unop(Iop_NotV128, mkexpr(arg_m)))); 3320 } else { 3321 assign(res, binop(Iop_And64, mkexpr(arg_n), 3322 unop(Iop_Not64, mkexpr(arg_m)))); 3323 } 3324 DIP("vbic %c%d, %c%d, %c%d\n", 3325 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3326 break; 3327 } 3328 case 2: 3329 if ( nreg != mreg) { 3330 /* VORR */ 3331 HChar reg_t = Q ? 'q' : 'd'; 3332 if (Q) { 3333 assign(res, binop(Iop_OrV128, mkexpr(arg_n), 3334 mkexpr(arg_m))); 3335 } else { 3336 assign(res, binop(Iop_Or64, mkexpr(arg_n), 3337 mkexpr(arg_m))); 3338 } 3339 DIP("vorr %c%d, %c%d, %c%d\n", 3340 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3341 } else { 3342 /* VMOV */ 3343 HChar reg_t = Q ? 'q' : 'd'; 3344 assign(res, mkexpr(arg_m)); 3345 DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg); 3346 } 3347 break; 3348 case 3:{ 3349 /* VORN */ 3350 HChar reg_t = Q ? 'q' : 'd'; 3351 if (Q) { 3352 assign(res, binop(Iop_OrV128,mkexpr(arg_n), 3353 unop(Iop_NotV128, mkexpr(arg_m)))); 3354 } else { 3355 assign(res, binop(Iop_Or64, mkexpr(arg_n), 3356 unop(Iop_Not64, mkexpr(arg_m)))); 3357 } 3358 DIP("vorn %c%d, %c%d, %c%d\n", 3359 reg_t, dreg, reg_t, nreg, reg_t, mreg); 3360 break; 3361 } 3362 } 3363 } else { 3364 switch(C) { 3365 case 0: 3366 /* VEOR (XOR) */ 3367 if (Q) { 3368 assign(res, binop(Iop_XorV128, mkexpr(arg_n), 3369 mkexpr(arg_m))); 3370 } else { 3371 assign(res, binop(Iop_Xor64, mkexpr(arg_n), 3372 mkexpr(arg_m))); 3373 } 3374 DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg, 3375 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3376 break; 3377 case 1: 3378 /* VBSL */ 3379 if (Q) { 3380 IRTemp reg_d = newTemp(Ity_V128); 3381 assign(reg_d, getQReg(dreg)); 3382 assign(res, 3383 binop(Iop_OrV128, 3384 binop(Iop_AndV128, mkexpr(arg_n), 3385 mkexpr(reg_d)), 3386 binop(Iop_AndV128, 3387 mkexpr(arg_m), 3388 unop(Iop_NotV128, 3389 mkexpr(reg_d)) ) ) ); 3390 } else { 3391 IRTemp reg_d = newTemp(Ity_I64); 3392 assign(reg_d, getDRegI64(dreg)); 3393 assign(res, 3394 binop(Iop_Or64, 3395 binop(Iop_And64, mkexpr(arg_n), 3396 mkexpr(reg_d)), 3397 binop(Iop_And64, 3398 mkexpr(arg_m), 3399 unop(Iop_Not64, mkexpr(reg_d))))); 3400 } 3401 DIP("vbsl %c%u, %c%u, %c%u\n", 3402 Q ? 'q' : 'd', dreg, 3403 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3404 break; 3405 case 2: 3406 /* VBIT */ 3407 if (Q) { 3408 IRTemp reg_d = newTemp(Ity_V128); 3409 assign(reg_d, getQReg(dreg)); 3410 assign(res, 3411 binop(Iop_OrV128, 3412 binop(Iop_AndV128, mkexpr(arg_n), 3413 mkexpr(arg_m)), 3414 binop(Iop_AndV128, 3415 mkexpr(reg_d), 3416 unop(Iop_NotV128, mkexpr(arg_m))))); 3417 } else { 3418 IRTemp reg_d = newTemp(Ity_I64); 3419 assign(reg_d, getDRegI64(dreg)); 3420 assign(res, 3421 binop(Iop_Or64, 3422 binop(Iop_And64, mkexpr(arg_n), 3423 mkexpr(arg_m)), 3424 binop(Iop_And64, 3425 mkexpr(reg_d), 3426 unop(Iop_Not64, mkexpr(arg_m))))); 3427 } 3428 DIP("vbit %c%u, %c%u, %c%u\n", 3429 Q ? 'q' : 'd', dreg, 3430 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3431 break; 3432 case 3: 3433 /* VBIF */ 3434 if (Q) { 3435 IRTemp reg_d = newTemp(Ity_V128); 3436 assign(reg_d, getQReg(dreg)); 3437 assign(res, 3438 binop(Iop_OrV128, 3439 binop(Iop_AndV128, mkexpr(reg_d), 3440 mkexpr(arg_m)), 3441 binop(Iop_AndV128, 3442 mkexpr(arg_n), 3443 unop(Iop_NotV128, mkexpr(arg_m))))); 3444 } else { 3445 IRTemp reg_d = newTemp(Ity_I64); 3446 assign(reg_d, getDRegI64(dreg)); 3447 assign(res, 3448 binop(Iop_Or64, 3449 binop(Iop_And64, mkexpr(reg_d), 3450 mkexpr(arg_m)), 3451 binop(Iop_And64, 3452 mkexpr(arg_n), 3453 unop(Iop_Not64, mkexpr(arg_m))))); 3454 } 3455 DIP("vbif %c%u, %c%u, %c%u\n", 3456 Q ? 'q' : 'd', dreg, 3457 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 3458 break; 3459 } 3460 } 3461 } 3462 break; 3463 case 2: 3464 if (B == 0) { 3465 /* VHSUB */ 3466 /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1) */ 3467 ULong imm = 0; 3468 IRExpr *imm_val; 3469 IROp subOp; 3470 IROp notOp; 3471 IROp andOp; 3472 IROp shOp; 3473 if (size == 3) 3474 return False; 3475 switch(size) { 3476 case 0: imm = 0x101010101010101LL; break; 3477 case 1: imm = 0x1000100010001LL; break; 3478 case 2: imm = 0x100000001LL; break; 3479 default: vassert(0); 3480 } 3481 if (Q) { 3482 imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm)); 3483 andOp = Iop_AndV128; 3484 notOp = Iop_NotV128; 3485 } else { 3486 imm_val = mkU64(imm); 3487 andOp = Iop_And64; 3488 notOp = Iop_Not64; 3489 } 3490 if (U) { 3491 switch(size) { 3492 case 0: 3493 subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3494 shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3495 break; 3496 case 1: 3497 subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3498 shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3499 break; 3500 case 2: 3501 subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3502 shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3503 break; 3504 default: 3505 vassert(0); 3506 } 3507 } else { 3508 switch(size) { 3509 case 0: 3510 subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3511 shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8; 3512 break; 3513 case 1: 3514 subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3515 shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4; 3516 break; 3517 case 2: 3518 subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3519 shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2; 3520 break; 3521 default: 3522 vassert(0); 3523 } 3524 } 3525 assign(res, 3526 binop(subOp, 3527 binop(subOp, 3528 binop(shOp, mkexpr(arg_n), mkU8(1)), 3529 binop(shOp, mkexpr(arg_m), mkU8(1))), 3530 binop(andOp, 3531 binop(andOp, 3532 unop(notOp, mkexpr(arg_n)), 3533 mkexpr(arg_m)), 3534 imm_val))); 3535 DIP("vhsub.%c%u %c%u, %c%u, %c%u\n", 3536 U ? 'u' : 's', 8 << size, 3537 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3538 mreg); 3539 } else { 3540 /* VQSUB */ 3541 IROp op, op2; 3542 IRTemp tmp; 3543 if (Q) { 3544 switch (size) { 3545 case 0: 3546 op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16; 3547 op2 = Iop_Sub8x16; 3548 break; 3549 case 1: 3550 op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8; 3551 op2 = Iop_Sub16x8; 3552 break; 3553 case 2: 3554 op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4; 3555 op2 = Iop_Sub32x4; 3556 break; 3557 case 3: 3558 op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2; 3559 op2 = Iop_Sub64x2; 3560 break; 3561 default: 3562 vassert(0); 3563 } 3564 } else { 3565 switch (size) { 3566 case 0: 3567 op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8; 3568 op2 = Iop_Sub8x8; 3569 break; 3570 case 1: 3571 op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4; 3572 op2 = Iop_Sub16x4; 3573 break; 3574 case 2: 3575 op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2; 3576 op2 = Iop_Sub32x2; 3577 break; 3578 case 3: 3579 op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1; 3580 op2 = Iop_Sub64; 3581 break; 3582 default: 3583 vassert(0); 3584 } 3585 } 3586 if (Q) 3587 tmp = newTemp(Ity_V128); 3588 else 3589 tmp = newTemp(Ity_I64); 3590 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3591 #ifndef DISABLE_QC_FLAG 3592 assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m))); 3593 setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT); 3594 #endif 3595 DIP("vqsub.%c%u %c%u, %c%u, %c%u\n", 3596 U ? 'u' : 's', 8 << size, 3597 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3598 mreg); 3599 } 3600 break; 3601 case 3: { 3602 IROp op; 3603 if (Q) { 3604 switch (size) { 3605 case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break; 3606 case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break; 3607 case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break; 3608 case 3: return False; 3609 default: vassert(0); 3610 } 3611 } else { 3612 switch (size) { 3613 case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break; 3614 case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break; 3615 case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break; 3616 case 3: return False; 3617 default: vassert(0); 3618 } 3619 } 3620 if (B == 0) { 3621 /* VCGT */ 3622 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 3623 DIP("vcgt.%c%u %c%u, %c%u, %c%u\n", 3624 U ? 'u' : 's', 8 << size, 3625 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3626 mreg); 3627 } else { 3628 /* VCGE */ 3629 /* VCGE res, argn, argm 3630 is equal to 3631 VCGT tmp, argm, argn 3632 VNOT res, tmp */ 3633 assign(res, 3634 unop(Q ? Iop_NotV128 : Iop_Not64, 3635 binop(op, mkexpr(arg_m), mkexpr(arg_n)))); 3636 DIP("vcge.%c%u %c%u, %c%u, %c%u\n", 3637 U ? 'u' : 's', 8 << size, 3638 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 3639 mreg); 3640 } 3641 } 3642 break; 3643 case 4: 3644 if (B == 0) { 3645 /* VSHL */ 3646 IROp op, sub_op; 3647 IRTemp tmp; 3648 if (U) { 3649 switch (size) { 3650 case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break; 3651 case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break; 3652 case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break; 3653 case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break; 3654 default: vassert(0); 3655 } 3656 } else { 3657 tmp = newTemp(Q ? Ity_V128 : Ity_I64); 3658 switch (size) { 3659 case 0: 3660 op = Q ? Iop_Sar8x16 : Iop_Sar8x8; 3661 sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8; 3662 break; 3663 case 1: 3664 op = Q ? Iop_Sar16x8 : Iop_Sar16x4; 3665 sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4; 3666 break; 3667 case 2: 3668 op = Q ? Iop_Sar32x4 : Iop_Sar32x2; 3669 sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2; 3670 break; 3671 case 3: 3672 op = Q ? Iop_Sar64x2 : Iop_Sar64; 3673 sub_op = Q ? Iop_Sub64x2 : Iop_Sub64; 3674 break; 3675 default: 3676 vassert(0); 3677 } 3678 } 3679 if (U) { 3680 if (!Q && (size == 3)) 3681 assign(res, binop(op, mkexpr(arg_m), 3682 unop(Iop_64to8, mkexpr(arg_n)))); 3683 else 3684 assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n))); 3685 } else { 3686 if (Q) 3687 assign(tmp, binop(sub_op, 3688 binop(Iop_64HLtoV128, mkU64(0), mkU64(0)), 3689 mkexpr(arg_n))); 3690 else 3691 assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n))); 3692 if (!Q && (size == 3)) 3693 assign(res, binop(op, mkexpr(arg_m), 3694 unop(Iop_64to8, mkexpr(tmp)))); 3695 else 3696 assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp))); 3697 } 3698 DIP("vshl.%c%u %c%u, %c%u, %c%u\n", 3699 U ? 'u' : 's', 8 << size, 3700 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3701 nreg); 3702 } else { 3703 /* VQSHL */ 3704 IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt; 3705 IRTemp tmp, shval, mask, old_shval; 3706 UInt i; 3707 ULong esize; 3708 cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; 3709 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3710 if (U) { 3711 switch (size) { 3712 case 0: 3713 op = Q ? Iop_QShl8x16 : Iop_QShl8x8; 3714 op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8; 3715 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3716 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3717 break; 3718 case 1: 3719 op = Q ? Iop_QShl16x8 : Iop_QShl16x4; 3720 op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4; 3721 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3722 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3723 break; 3724 case 2: 3725 op = Q ? Iop_QShl32x4 : Iop_QShl32x2; 3726 op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2; 3727 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3728 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3729 break; 3730 case 3: 3731 op = Q ? Iop_QShl64x2 : Iop_QShl64x1; 3732 op_rev = Q ? Iop_Shr64x2 : Iop_Shr64; 3733 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3734 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3735 break; 3736 default: 3737 vassert(0); 3738 } 3739 } else { 3740 switch (size) { 3741 case 0: 3742 op = Q ? Iop_QSal8x16 : Iop_QSal8x8; 3743 op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8; 3744 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3745 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3746 break; 3747 case 1: 3748 op = Q ? Iop_QSal16x8 : Iop_QSal16x4; 3749 op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4; 3750 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3751 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3752 break; 3753 case 2: 3754 op = Q ? Iop_QSal32x4 : Iop_QSal32x2; 3755 op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2; 3756 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3757 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3758 break; 3759 case 3: 3760 op = Q ? Iop_QSal64x2 : Iop_QSal64x1; 3761 op_rev = Q ? Iop_Sar64x2 : Iop_Sar64; 3762 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3763 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3764 break; 3765 default: 3766 vassert(0); 3767 } 3768 } 3769 if (Q) { 3770 tmp = newTemp(Ity_V128); 3771 shval = newTemp(Ity_V128); 3772 mask = newTemp(Ity_V128); 3773 } else { 3774 tmp = newTemp(Ity_I64); 3775 shval = newTemp(Ity_I64); 3776 mask = newTemp(Ity_I64); 3777 } 3778 assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n))); 3779 #ifndef DISABLE_QC_FLAG 3780 /* Only least significant byte from second argument is used. 3781 Copy this byte to the whole vector element. */ 3782 assign(shval, binop(op_shrn, 3783 binop(op_shln, 3784 mkexpr(arg_n), 3785 mkU8((8 << size) - 8)), 3786 mkU8((8 << size) - 8))); 3787 for(i = 0; i < size; i++) { 3788 old_shval = shval; 3789 shval = newTemp(Q ? Ity_V128 : Ity_I64); 3790 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 3791 mkexpr(old_shval), 3792 binop(op_shln, 3793 mkexpr(old_shval), 3794 mkU8(8 << i)))); 3795 } 3796 /* If shift is greater or equal to the element size and 3797 element is non-zero, then QC flag should be set. */ 3798 esize = (8 << size) - 1; 3799 esize = (esize << 8) | esize; 3800 esize = (esize << 16) | esize; 3801 esize = (esize << 32) | esize; 3802 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 3803 binop(cmp_gt, mkexpr(shval), 3804 Q ? mkU128(esize) : mkU64(esize)), 3805 unop(cmp_neq, mkexpr(arg_m))), 3806 Q ? mkU128(0) : mkU64(0), 3807 Q, condT); 3808 /* Othervise QC flag should be set if shift value is positive and 3809 result beign rightshifted the same value is not equal to left 3810 argument. */ 3811 assign(mask, binop(cmp_gt, mkexpr(shval), 3812 Q ? mkU128(0) : mkU64(0))); 3813 if (!Q && size == 3) 3814 assign(tmp, binop(op_rev, mkexpr(res), 3815 unop(Iop_64to8, mkexpr(arg_n)))); 3816 else 3817 assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n))); 3818 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 3819 mkexpr(tmp), mkexpr(mask)), 3820 binop(Q ? Iop_AndV128 : Iop_And64, 3821 mkexpr(arg_m), mkexpr(mask)), 3822 Q, condT); 3823 #endif 3824 DIP("vqshl.%c%u %c%u, %c%u, %c%u\n", 3825 U ? 'u' : 's', 8 << size, 3826 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3827 nreg); 3828 } 3829 break; 3830 case 5: 3831 if (B == 0) { 3832 /* VRSHL */ 3833 IROp op, op_shrn, op_shln, cmp_gt, op_add; 3834 IRTemp shval, old_shval, imm_val, round; 3835 UInt i; 3836 ULong imm; 3837 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3838 imm = 1L; 3839 switch (size) { 3840 case 0: imm = (imm << 8) | imm; /* fall through */ 3841 case 1: imm = (imm << 16) | imm; /* fall through */ 3842 case 2: imm = (imm << 32) | imm; /* fall through */ 3843 case 3: break; 3844 default: vassert(0); 3845 } 3846 imm_val = newTemp(Q ? Ity_V128 : Ity_I64); 3847 round = newTemp(Q ? Ity_V128 : Ity_I64); 3848 assign(imm_val, Q ? mkU128(imm) : mkU64(imm)); 3849 if (U) { 3850 switch (size) { 3851 case 0: 3852 op = Q ? Iop_Shl8x16 : Iop_Shl8x8; 3853 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3854 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3855 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3856 break; 3857 case 1: 3858 op = Q ? Iop_Shl16x8 : Iop_Shl16x4; 3859 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3860 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3861 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3862 break; 3863 case 2: 3864 op = Q ? Iop_Shl32x4 : Iop_Shl32x2; 3865 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 3866 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3867 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3868 break; 3869 case 3: 3870 op = Q ? Iop_Shl64x2 : Iop_Shl64; 3871 op_add = Q ? Iop_Add64x2 : Iop_Add64; 3872 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3873 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3874 break; 3875 default: 3876 vassert(0); 3877 } 3878 } else { 3879 switch (size) { 3880 case 0: 3881 op = Q ? Iop_Sal8x16 : Iop_Sal8x8; 3882 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3883 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3884 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3885 break; 3886 case 1: 3887 op = Q ? Iop_Sal16x8 : Iop_Sal16x4; 3888 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3889 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 3890 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 3891 break; 3892 case 2: 3893 op = Q ? Iop_Sal32x4 : Iop_Sal32x2; 3894 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 3895 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 3896 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 3897 break; 3898 case 3: 3899 op = Q ? Iop_Sal64x2 : Iop_Sal64x1; 3900 op_add = Q ? Iop_Add64x2 : Iop_Add64; 3901 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 3902 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 3903 break; 3904 default: 3905 vassert(0); 3906 } 3907 } 3908 if (Q) { 3909 shval = newTemp(Ity_V128); 3910 } else { 3911 shval = newTemp(Ity_I64); 3912 } 3913 /* Only least significant byte from second argument is used. 3914 Copy this byte to the whole vector element. */ 3915 assign(shval, binop(op_shrn, 3916 binop(op_shln, 3917 mkexpr(arg_n), 3918 mkU8((8 << size) - 8)), 3919 mkU8((8 << size) - 8))); 3920 for (i = 0; i < size; i++) { 3921 old_shval = shval; 3922 shval = newTemp(Q ? Ity_V128 : Ity_I64); 3923 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 3924 mkexpr(old_shval), 3925 binop(op_shln, 3926 mkexpr(old_shval), 3927 mkU8(8 << i)))); 3928 } 3929 /* Compute the result */ 3930 if (!Q && size == 3 && U) { 3931 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 3932 binop(op, 3933 mkexpr(arg_m), 3934 unop(Iop_64to8, 3935 binop(op_add, 3936 mkexpr(arg_n), 3937 mkexpr(imm_val)))), 3938 binop(Q ? Iop_AndV128 : Iop_And64, 3939 mkexpr(imm_val), 3940 binop(cmp_gt, 3941 Q ? mkU128(0) : mkU64(0), 3942 mkexpr(arg_n))))); 3943 assign(res, binop(op_add, 3944 binop(op, 3945 mkexpr(arg_m), 3946 unop(Iop_64to8, mkexpr(arg_n))), 3947 mkexpr(round))); 3948 } else { 3949 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 3950 binop(op, 3951 mkexpr(arg_m), 3952 binop(op_add, 3953 mkexpr(arg_n), 3954 mkexpr(imm_val))), 3955 binop(Q ? Iop_AndV128 : Iop_And64, 3956 mkexpr(imm_val), 3957 binop(cmp_gt, 3958 Q ? mkU128(0) : mkU64(0), 3959 mkexpr(arg_n))))); 3960 assign(res, binop(op_add, 3961 binop(op, mkexpr(arg_m), mkexpr(arg_n)), 3962 mkexpr(round))); 3963 } 3964 DIP("vrshl.%c%u %c%u, %c%u, %c%u\n", 3965 U ? 'u' : 's', 8 << size, 3966 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 3967 nreg); 3968 } else { 3969 /* VQRSHL */ 3970 IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add; 3971 IRTemp tmp, shval, mask, old_shval, imm_val, round; 3972 UInt i; 3973 ULong esize, imm; 3974 cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; 3975 cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; 3976 imm = 1L; 3977 switch (size) { 3978 case 0: imm = (imm << 8) | imm; /* fall through */ 3979 case 1: imm = (imm << 16) | imm; /* fall through */ 3980 case 2: imm = (imm << 32) | imm; /* fall through */ 3981 case 3: break; 3982 default: vassert(0); 3983 } 3984 imm_val = newTemp(Q ? Ity_V128 : Ity_I64); 3985 round = newTemp(Q ? Ity_V128 : Ity_I64); 3986 assign(imm_val, Q ? mkU128(imm) : mkU64(imm)); 3987 if (U) { 3988 switch (size) { 3989 case 0: 3990 op = Q ? Iop_QShl8x16 : Iop_QShl8x8; 3991 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 3992 op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8; 3993 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 3994 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 3995 break; 3996 case 1: 3997 op = Q ? Iop_QShl16x8 : Iop_QShl16x4; 3998 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 3999 op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4; 4000 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 4001 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 4002 break; 4003 case 2: 4004 op = Q ? Iop_QShl32x4 : Iop_QShl32x2; 4005 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 4006 op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2; 4007 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 4008 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 4009 break; 4010 case 3: 4011 op = Q ? Iop_QShl64x2 : Iop_QShl64x1; 4012 op_add = Q ? Iop_Add64x2 : Iop_Add64; 4013 op_rev = Q ? Iop_Shr64x2 : Iop_Shr64; 4014 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 4015 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 4016 break; 4017 default: 4018 vassert(0); 4019 } 4020 } else { 4021 switch (size) { 4022 case 0: 4023 op = Q ? Iop_QSal8x16 : Iop_QSal8x8; 4024 op_add = Q ? Iop_Add8x16 : Iop_Add8x8; 4025 op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8; 4026 op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; 4027 op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; 4028 break; 4029 case 1: 4030 op = Q ? Iop_QSal16x8 : Iop_QSal16x4; 4031 op_add = Q ? Iop_Add16x8 : Iop_Add16x4; 4032 op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4; 4033 op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; 4034 op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; 4035 break; 4036 case 2: 4037 op = Q ? Iop_QSal32x4 : Iop_QSal32x2; 4038 op_add = Q ? Iop_Add32x4 : Iop_Add32x2; 4039 op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2; 4040 op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; 4041 op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; 4042 break; 4043 case 3: 4044 op = Q ? Iop_QSal64x2 : Iop_QSal64x1; 4045 op_add = Q ? Iop_Add64x2 : Iop_Add64; 4046 op_rev = Q ? Iop_Sar64x2 : Iop_Sar64; 4047 op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64; 4048 op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64; 4049 break; 4050 default: 4051 vassert(0); 4052 } 4053 } 4054 if (Q) { 4055 tmp = newTemp(Ity_V128); 4056 shval = newTemp(Ity_V128); 4057 mask = newTemp(Ity_V128); 4058 } else { 4059 tmp = newTemp(Ity_I64); 4060 shval = newTemp(Ity_I64); 4061 mask = newTemp(Ity_I64); 4062 } 4063 /* Only least significant byte from second argument is used. 4064 Copy this byte to the whole vector element. */ 4065 assign(shval, binop(op_shrn, 4066 binop(op_shln, 4067 mkexpr(arg_n), 4068 mkU8((8 << size) - 8)), 4069 mkU8((8 << size) - 8))); 4070 for (i = 0; i < size; i++) { 4071 old_shval = shval; 4072 shval = newTemp(Q ? Ity_V128 : Ity_I64); 4073 assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64, 4074 mkexpr(old_shval), 4075 binop(op_shln, 4076 mkexpr(old_shval), 4077 mkU8(8 << i)))); 4078 } 4079 /* Compute the result */ 4080 assign(round, binop(Q ? Iop_AndV128 : Iop_And64, 4081 binop(op, 4082 mkexpr(arg_m), 4083 binop(op_add, 4084 mkexpr(arg_n), 4085 mkexpr(imm_val))), 4086 binop(Q ? Iop_AndV128 : Iop_And64, 4087 mkexpr(imm_val), 4088 binop(cmp_gt, 4089 Q ? mkU128(0) : mkU64(0), 4090 mkexpr(arg_n))))); 4091 assign(res, binop(op_add, 4092 binop(op, mkexpr(arg_m), mkexpr(arg_n)), 4093 mkexpr(round))); 4094 #ifndef DISABLE_QC_FLAG 4095 /* If shift is greater or equal to the element size and element is 4096 non-zero, then QC flag should be set. */ 4097 esize = (8 << size) - 1; 4098 esize = (esize << 8) | esize; 4099 esize = (esize << 16) | esize; 4100 esize = (esize << 32) | esize; 4101 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4102 binop(cmp_gt, mkexpr(shval), 4103 Q ? mkU128(esize) : mkU64(esize)), 4104 unop(cmp_neq, mkexpr(arg_m))), 4105 Q ? mkU128(0) : mkU64(0), 4106 Q, condT); 4107 /* Othervise QC flag should be set if shift value is positive and 4108 result beign rightshifted the same value is not equal to left 4109 argument. */ 4110 assign(mask, binop(cmp_gt, mkexpr(shval), 4111 Q ? mkU128(0) : mkU64(0))); 4112 if (!Q && size == 3) 4113 assign(tmp, binop(op_rev, mkexpr(res), 4114 unop(Iop_64to8, mkexpr(arg_n)))); 4115 else 4116 assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n))); 4117 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4118 mkexpr(tmp), mkexpr(mask)), 4119 binop(Q ? Iop_AndV128 : Iop_And64, 4120 mkexpr(arg_m), mkexpr(mask)), 4121 Q, condT); 4122 #endif 4123 DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n", 4124 U ? 'u' : 's', 8 << size, 4125 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd', 4126 nreg); 4127 } 4128 break; 4129 case 6: 4130 /* VMAX, VMIN */ 4131 if (B == 0) { 4132 /* VMAX */ 4133 IROp op; 4134 if (U == 0) { 4135 switch (size) { 4136 case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break; 4137 case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break; 4138 case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break; 4139 case 3: return False; 4140 default: vassert(0); 4141 } 4142 } else { 4143 switch (size) { 4144 case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break; 4145 case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break; 4146 case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break; 4147 case 3: return False; 4148 default: vassert(0); 4149 } 4150 } 4151 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4152 DIP("vmax.%c%u %c%u, %c%u, %c%u\n", 4153 U ? 'u' : 's', 8 << size, 4154 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4155 mreg); 4156 } else { 4157 /* VMIN */ 4158 IROp op; 4159 if (U == 0) { 4160 switch (size) { 4161 case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break; 4162 case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break; 4163 case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break; 4164 case 3: return False; 4165 default: vassert(0); 4166 } 4167 } else { 4168 switch (size) { 4169 case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break; 4170 case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break; 4171 case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break; 4172 case 3: return False; 4173 default: vassert(0); 4174 } 4175 } 4176 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4177 DIP("vmin.%c%u %c%u, %c%u, %c%u\n", 4178 U ? 'u' : 's', 8 << size, 4179 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4180 mreg); 4181 } 4182 break; 4183 case 7: 4184 if (B == 0) { 4185 /* VABD */ 4186 IROp op_cmp, op_sub; 4187 IRTemp cond; 4188 if ((theInstr >> 23) & 1) { 4189 vpanic("VABDL should not be in dis_neon_data_3same\n"); 4190 } 4191 if (Q) { 4192 switch (size) { 4193 case 0: 4194 op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; 4195 op_sub = Iop_Sub8x16; 4196 break; 4197 case 1: 4198 op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; 4199 op_sub = Iop_Sub16x8; 4200 break; 4201 case 2: 4202 op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; 4203 op_sub = Iop_Sub32x4; 4204 break; 4205 case 3: 4206 return False; 4207 default: 4208 vassert(0); 4209 } 4210 } else { 4211 switch (size) { 4212 case 0: 4213 op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; 4214 op_sub = Iop_Sub8x8; 4215 break; 4216 case 1: 4217 op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; 4218 op_sub = Iop_Sub16x4; 4219 break; 4220 case 2: 4221 op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2; 4222 op_sub = Iop_Sub32x2; 4223 break; 4224 case 3: 4225 return False; 4226 default: 4227 vassert(0); 4228 } 4229 } 4230 if (Q) { 4231 cond = newTemp(Ity_V128); 4232 } else { 4233 cond = newTemp(Ity_I64); 4234 } 4235 assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m))); 4236 assign(res, binop(Q ? Iop_OrV128 : Iop_Or64, 4237 binop(Q ? Iop_AndV128 : Iop_And64, 4238 binop(op_sub, mkexpr(arg_n), 4239 mkexpr(arg_m)), 4240 mkexpr(cond)), 4241 binop(Q ? Iop_AndV128 : Iop_And64, 4242 binop(op_sub, mkexpr(arg_m), 4243 mkexpr(arg_n)), 4244 unop(Q ? Iop_NotV128 : Iop_Not64, 4245 mkexpr(cond))))); 4246 DIP("vabd.%c%u %c%u, %c%u, %c%u\n", 4247 U ? 'u' : 's', 8 << size, 4248 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4249 mreg); 4250 } else { 4251 /* VABA */ 4252 IROp op_cmp, op_sub, op_add; 4253 IRTemp cond, acc, tmp; 4254 if ((theInstr >> 23) & 1) { 4255 vpanic("VABAL should not be in dis_neon_data_3same"); 4256 } 4257 if (Q) { 4258 switch (size) { 4259 case 0: 4260 op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; 4261 op_sub = Iop_Sub8x16; 4262 op_add = Iop_Add8x16; 4263 break; 4264 case 1: 4265 op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; 4266 op_sub = Iop_Sub16x8; 4267 op_add = Iop_Add16x8; 4268 break; 4269 case 2: 4270 op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; 4271 op_sub = Iop_Sub32x4; 4272 op_add = Iop_Add32x4; 4273 break; 4274 case 3: 4275 return False; 4276 default: 4277 vassert(0); 4278 } 4279 } else { 4280 switch (size) { 4281 case 0: 4282 op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; 4283 op_sub = Iop_Sub8x8; 4284 op_add = Iop_Add8x8; 4285 break; 4286 case 1: 4287 op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; 4288 op_sub = Iop_Sub16x4; 4289 op_add = Iop_Add16x4; 4290 break; 4291 case 2: 4292 op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2; 4293 op_sub = Iop_Sub32x2; 4294 op_add = Iop_Add32x2; 4295 break; 4296 case 3: 4297 return False; 4298 default: 4299 vassert(0); 4300 } 4301 } 4302 if (Q) { 4303 cond = newTemp(Ity_V128); 4304 acc = newTemp(Ity_V128); 4305 tmp = newTemp(Ity_V128); 4306 assign(acc, getQReg(dreg)); 4307 } else { 4308 cond = newTemp(Ity_I64); 4309 acc = newTemp(Ity_I64); 4310 tmp = newTemp(Ity_I64); 4311 assign(acc, getDRegI64(dreg)); 4312 } 4313 assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m))); 4314 assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64, 4315 binop(Q ? Iop_AndV128 : Iop_And64, 4316 binop(op_sub, mkexpr(arg_n), 4317 mkexpr(arg_m)), 4318 mkexpr(cond)), 4319 binop(Q ? Iop_AndV128 : Iop_And64, 4320 binop(op_sub, mkexpr(arg_m), 4321 mkexpr(arg_n)), 4322 unop(Q ? Iop_NotV128 : Iop_Not64, 4323 mkexpr(cond))))); 4324 assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp))); 4325 DIP("vaba.%c%u %c%u, %c%u, %c%u\n", 4326 U ? 'u' : 's', 8 << size, 4327 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4328 mreg); 4329 } 4330 break; 4331 case 8: 4332 if (B == 0) { 4333 IROp op; 4334 if (U == 0) { 4335 /* VADD */ 4336 switch (size) { 4337 case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break; 4338 case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break; 4339 case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break; 4340 case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break; 4341 default: vassert(0); 4342 } 4343 DIP("vadd.i%u %c%u, %c%u, %c%u\n", 4344 8 << size, Q ? 'q' : 'd', 4345 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4346 } else { 4347 /* VSUB */ 4348 switch (size) { 4349 case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break; 4350 case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break; 4351 case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break; 4352 case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break; 4353 default: vassert(0); 4354 } 4355 DIP("vsub.i%u %c%u, %c%u, %c%u\n", 4356 8 << size, Q ? 'q' : 'd', 4357 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4358 } 4359 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4360 } else { 4361 IROp op; 4362 switch (size) { 4363 case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break; 4364 case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break; 4365 case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break; 4366 case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break; 4367 default: vassert(0); 4368 } 4369 if (U == 0) { 4370 /* VTST */ 4371 assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64, 4372 mkexpr(arg_n), 4373 mkexpr(arg_m)))); 4374 DIP("vtst.%u %c%u, %c%u, %c%u\n", 4375 8 << size, Q ? 'q' : 'd', 4376 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4377 } else { 4378 /* VCEQ */ 4379 assign(res, unop(Q ? Iop_NotV128 : Iop_Not64, 4380 unop(op, 4381 binop(Q ? Iop_XorV128 : Iop_Xor64, 4382 mkexpr(arg_n), 4383 mkexpr(arg_m))))); 4384 DIP("vceq.i%u %c%u, %c%u, %c%u\n", 4385 8 << size, Q ? 'q' : 'd', 4386 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4387 } 4388 } 4389 break; 4390 case 9: 4391 if (B == 0) { 4392 /* VMLA, VMLS (integer) */ 4393 IROp op, op2; 4394 UInt P = (theInstr >> 24) & 1; 4395 if (P) { 4396 switch (size) { 4397 case 0: 4398 op = Q ? Iop_Mul8x16 : Iop_Mul8x8; 4399 op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8; 4400 break; 4401 case 1: 4402 op = Q ? Iop_Mul16x8 : Iop_Mul16x4; 4403 op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4; 4404 break; 4405 case 2: 4406 op = Q ? Iop_Mul32x4 : Iop_Mul32x2; 4407 op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2; 4408 break; 4409 case 3: 4410 return False; 4411 default: 4412 vassert(0); 4413 } 4414 } else { 4415 switch (size) { 4416 case 0: 4417 op = Q ? Iop_Mul8x16 : Iop_Mul8x8; 4418 op2 = Q ? Iop_Add8x16 : Iop_Add8x8; 4419 break; 4420 case 1: 4421 op = Q ? Iop_Mul16x8 : Iop_Mul16x4; 4422 op2 = Q ? Iop_Add16x8 : Iop_Add16x4; 4423 break; 4424 case 2: 4425 op = Q ? Iop_Mul32x4 : Iop_Mul32x2; 4426 op2 = Q ? Iop_Add32x4 : Iop_Add32x2; 4427 break; 4428 case 3: 4429 return False; 4430 default: 4431 vassert(0); 4432 } 4433 } 4434 assign(res, binop(op2, 4435 Q ? getQReg(dreg) : getDRegI64(dreg), 4436 binop(op, mkexpr(arg_n), mkexpr(arg_m)))); 4437 DIP("vml%c.i%u %c%u, %c%u, %c%u\n", 4438 P ? 's' : 'a', 8 << size, 4439 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4440 mreg); 4441 } else { 4442 /* VMUL */ 4443 IROp op; 4444 UInt P = (theInstr >> 24) & 1; 4445 if (P) { 4446 switch (size) { 4447 case 0: 4448 op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8; 4449 break; 4450 case 1: case 2: case 3: return False; 4451 default: vassert(0); 4452 } 4453 } else { 4454 switch (size) { 4455 case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break; 4456 case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break; 4457 case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break; 4458 case 3: return False; 4459 default: vassert(0); 4460 } 4461 } 4462 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4463 DIP("vmul.%c%u %c%u, %c%u, %c%u\n", 4464 P ? 'p' : 'i', 8 << size, 4465 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', 4466 mreg); 4467 } 4468 break; 4469 case 10: { 4470 /* VPMAX, VPMIN */ 4471 UInt P = (theInstr >> 4) & 1; 4472 IROp op; 4473 if (Q) 4474 return False; 4475 if (P) { 4476 switch (size) { 4477 case 0: op = U ? Iop_PwMin8Ux8 : Iop_PwMin8Sx8; break; 4478 case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break; 4479 case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break; 4480 case 3: return False; 4481 default: vassert(0); 4482 } 4483 } else { 4484 switch (size) { 4485 case 0: op = U ? Iop_PwMax8Ux8 : Iop_PwMax8Sx8; break; 4486 case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break; 4487 case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break; 4488 case 3: return False; 4489 default: vassert(0); 4490 } 4491 } 4492 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4493 DIP("vp%s.%c%u %c%u, %c%u, %c%u\n", 4494 P ? "min" : "max", U ? 'u' : 's', 4495 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, 4496 Q ? 'q' : 'd', mreg); 4497 break; 4498 } 4499 case 11: 4500 if (B == 0) { 4501 if (U == 0) { 4502 /* VQDMULH */ 4503 IROp op ,op2; 4504 ULong imm; 4505 switch (size) { 4506 case 0: case 3: 4507 return False; 4508 case 1: 4509 op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4; 4510 op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4; 4511 imm = 1LL << 15; 4512 imm = (imm << 16) | imm; 4513 imm = (imm << 32) | imm; 4514 break; 4515 case 2: 4516 op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2; 4517 op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2; 4518 imm = 1LL << 31; 4519 imm = (imm << 32) | imm; 4520 break; 4521 default: 4522 vassert(0); 4523 } 4524 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4525 #ifndef DISABLE_QC_FLAG 4526 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4527 binop(op2, mkexpr(arg_n), 4528 Q ? mkU128(imm) : mkU64(imm)), 4529 binop(op2, mkexpr(arg_m), 4530 Q ? mkU128(imm) : mkU64(imm))), 4531 Q ? mkU128(0) : mkU64(0), 4532 Q, condT); 4533 #endif 4534 DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n", 4535 8 << size, Q ? 'q' : 'd', 4536 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4537 } else { 4538 /* VQRDMULH */ 4539 IROp op ,op2; 4540 ULong imm; 4541 switch(size) { 4542 case 0: case 3: 4543 return False; 4544 case 1: 4545 imm = 1LL << 15; 4546 imm = (imm << 16) | imm; 4547 imm = (imm << 32) | imm; 4548 op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4; 4549 op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4; 4550 break; 4551 case 2: 4552 imm = 1LL << 31; 4553 imm = (imm << 32) | imm; 4554 op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2; 4555 op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2; 4556 break; 4557 default: 4558 vassert(0); 4559 } 4560 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4561 #ifndef DISABLE_QC_FLAG 4562 setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64, 4563 binop(op2, mkexpr(arg_n), 4564 Q ? mkU128(imm) : mkU64(imm)), 4565 binop(op2, mkexpr(arg_m), 4566 Q ? mkU128(imm) : mkU64(imm))), 4567 Q ? mkU128(0) : mkU64(0), 4568 Q, condT); 4569 #endif 4570 DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n", 4571 8 << size, Q ? 'q' : 'd', 4572 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4573 } 4574 } else { 4575 if (U == 0) { 4576 /* VPADD */ 4577 IROp op; 4578 if (Q) 4579 return False; 4580 switch (size) { 4581 case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8; break; 4582 case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break; 4583 case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break; 4584 case 3: return False; 4585 default: vassert(0); 4586 } 4587 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4588 DIP("vpadd.i%d %c%u, %c%u, %c%u\n", 4589 8 << size, Q ? 'q' : 'd', 4590 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4591 } 4592 } 4593 break; 4594 /* Starting from here these are FP SIMD cases */ 4595 case 13: 4596 if (B == 0) { 4597 IROp op; 4598 if (U == 0) { 4599 if ((C >> 1) == 0) { 4600 /* VADD */ 4601 op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ; 4602 DIP("vadd.f32 %c%u, %c%u, %c%u\n", 4603 Q ? 'q' : 'd', dreg, 4604 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4605 } else { 4606 /* VSUB */ 4607 op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ; 4608 DIP("vsub.f32 %c%u, %c%u, %c%u\n", 4609 Q ? 'q' : 'd', dreg, 4610 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4611 } 4612 } else { 4613 if ((C >> 1) == 0) { 4614 /* VPADD */ 4615 if (Q) 4616 return False; 4617 op = Iop_PwAdd32Fx2; 4618 DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg); 4619 } else { 4620 /* VABD */ 4621 if (Q) { 4622 assign(res, unop(Iop_Abs32Fx4, 4623 binop(Iop_Sub32Fx4, 4624 mkexpr(arg_n), 4625 mkexpr(arg_m)))); 4626 } else { 4627 assign(res, unop(Iop_Abs32Fx2, 4628 binop(Iop_Sub32Fx2, 4629 mkexpr(arg_n), 4630 mkexpr(arg_m)))); 4631 } 4632 DIP("vabd.f32 %c%u, %c%u, %c%u\n", 4633 Q ? 'q' : 'd', dreg, 4634 Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4635 break; 4636 } 4637 } 4638 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4639 } else { 4640 if (U == 0) { 4641 /* VMLA, VMLS */ 4642 IROp op, op2; 4643 UInt P = (theInstr >> 21) & 1; 4644 if (P) { 4645 switch (size & 1) { 4646 case 0: 4647 op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2; 4648 op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2; 4649 break; 4650 case 1: return False; 4651 default: vassert(0); 4652 } 4653 } else { 4654 switch (size & 1) { 4655 case 0: 4656 op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2; 4657 op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2; 4658 break; 4659 case 1: return False; 4660 default: vassert(0); 4661 } 4662 } 4663 assign(res, binop(op2, 4664 Q ? getQReg(dreg) : getDRegI64(dreg), 4665 binop(op, mkexpr(arg_n), mkexpr(arg_m)))); 4666 4667 DIP("vml%c.f32 %c%u, %c%u, %c%u\n", 4668 P ? 's' : 'a', Q ? 'q' : 'd', 4669 dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg); 4670 } else { 4671 /* VMUL */ 4672 IROp op; 4673 if ((C >> 1) != 0) 4674 return False; 4675 op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ; 4676 assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m))); 4677 DIP("vmul.f32 %c%u, %c%u, %c%u\n", 4678 Q ? 'q' : 'd'<