1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- begin guest_arm64_toIR.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2013-2013 OpenWorks 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 //ZZ /* XXXX thumb to check: 33 //ZZ that all cases where putIRegT writes r15, we generate a jump. 34 //ZZ 35 //ZZ All uses of newTemp assign to an IRTemp and not a UInt 36 //ZZ 37 //ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is 38 //ZZ backed out before the memory op, and restored afterwards. This 39 //ZZ needs to happen even after we go uncond. (and for sure it doesn't 40 //ZZ happen for VFP loads/stores right now). 41 //ZZ 42 //ZZ VFP on thumb: check that we exclude all r13/r15 cases that we 43 //ZZ should. 44 //ZZ 45 //ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by 46 //ZZ taking into account the number of insns guarded by an IT. 47 //ZZ 48 //ZZ remove the nasty hack, in the spechelper, of looking for Or32(..., 49 //ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead 50 //ZZ use Slice44 as specified in comments in the spechelper. 51 //ZZ 52 //ZZ add specialisations for armg_calculate_flag_c and _v, as they 53 //ZZ are moderately often needed in Thumb code. 54 //ZZ 55 //ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong. 56 //ZZ 57 //ZZ Correctness (obscure): in m_transtab, when invalidating code 58 //ZZ address ranges, invalidate up to 18 bytes after the end of the 59 //ZZ range. This is because the ITSTATE optimisation at the top of 60 //ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any 61 //ZZ given instruction, and so might depend on the invalidated area. 62 //ZZ */ 63 //ZZ 64 //ZZ /* Limitations, etc 65 //ZZ 66 //ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD. 67 //ZZ These instructions are non-restartable in the case where the 68 //ZZ transfer(s) fault. 69 //ZZ 70 //ZZ - SWP: the restart jump back is Ijk_Boring; it should be 71 //ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in 72 //ZZ guest_x86_toIR.c. 73 //ZZ */ 74 75 /* "Special" instructions. 76 77 This instruction decoder can decode four special instructions 78 which mean nothing natively (are no-ops as far as regs/mem are 79 concerned) but have meaning for supporting Valgrind. A special 80 instruction is flagged by a 16-byte preamble: 81 82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C 83 (ror x12, x12, #3; ror x12, x12, #13 84 ror x12, x12, #51; ror x12, x12, #61) 85 86 Following that, one of the following 3 are allowed 87 (standard interpretation in parentheses): 88 89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) 90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR 91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 92 AA090129 (orr x9,x9,x9) IR injection 93 94 Any other bytes following the 16-byte preamble are illegal and 95 constitute a failure in instruction decoding. This all assumes 96 that the preamble will never occur except in specific code 97 fragments designed for Valgrind to catch. 98 */ 99 100 /* Translates ARM64 code to IR. */ 101 102 #include "libvex_basictypes.h" 103 #include "libvex_ir.h" 104 #include "libvex.h" 105 #include "libvex_guest_arm64.h" 106 107 #include "main_util.h" 108 #include "main_globals.h" 109 #include "guest_generic_bb_to_IR.h" 110 #include "guest_arm64_defs.h" 111 112 113 /*------------------------------------------------------------*/ 114 /*--- Globals ---*/ 115 /*------------------------------------------------------------*/ 116 117 /* These are set at the start of the translation of a instruction, so 118 that we don't have to pass them around endlessly. CONST means does 119 not change during translation of the instruction. 120 */ 121 122 /* CONST: is the host bigendian? We need to know this in order to do 123 sub-register accesses to the SIMD/FP registers correctly. */ 124 static Bool host_is_bigendian; 125 126 /* CONST: The guest address for the instruction currently being 127 translated. */ 128 static Addr64 guest_PC_curr_instr; 129 130 /* MOD: The IRSB* into which we're generating code. */ 131 static IRSB* irsb; 132 133 134 /*------------------------------------------------------------*/ 135 /*--- Debugging output ---*/ 136 /*------------------------------------------------------------*/ 137 138 #define DIP(format, args...) \ 139 if (vex_traceflags & VEX_TRACE_FE) \ 140 vex_printf(format, ## args) 141 142 #define DIS(buf, format, args...) \ 143 if (vex_traceflags & VEX_TRACE_FE) \ 144 vex_sprintf(buf, format, ## args) 145 146 147 /*------------------------------------------------------------*/ 148 /*--- Helper bits and pieces for deconstructing the ---*/ 149 /*--- arm insn stream. ---*/ 150 /*------------------------------------------------------------*/ 151 152 /* Do a little-endian load of a 32-bit word, regardless of the 153 endianness of the underlying host. */ 154 static inline UInt getUIntLittleEndianly ( UChar* p ) 155 { 156 UInt w = 0; 157 w = (w << 8) | p[3]; 158 w = (w << 8) | p[2]; 159 w = (w << 8) | p[1]; 160 w = (w << 8) | p[0]; 161 return w; 162 } 163 164 /* Sign extend a N-bit value up to 64 bits, by copying 165 bit N-1 into all higher positions. */ 166 static ULong sx_to_64 ( ULong x, UInt n ) 167 { 168 vassert(n > 1 && n < 64); 169 Long r = (Long)x; 170 r = (r << (64-n)) >> (64-n); 171 return (ULong)r; 172 } 173 174 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the 175 //ZZ endianness of the underlying host. */ 176 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) 177 //ZZ { 178 //ZZ UShort w = 0; 179 //ZZ w = (w << 8) | p[1]; 180 //ZZ w = (w << 8) | p[0]; 181 //ZZ return w; 182 //ZZ } 183 //ZZ 184 //ZZ static UInt ROR32 ( UInt x, UInt sh ) { 185 //ZZ vassert(sh >= 0 && sh < 32); 186 //ZZ if (sh == 0) 187 //ZZ return x; 188 //ZZ else 189 //ZZ return (x << (32-sh)) | (x >> sh); 190 //ZZ } 191 //ZZ 192 //ZZ static Int popcount32 ( UInt x ) 193 //ZZ { 194 //ZZ Int res = 0, i; 195 //ZZ for (i = 0; i < 32; i++) { 196 //ZZ res += (x & 1); 197 //ZZ x >>= 1; 198 //ZZ } 199 //ZZ return res; 200 //ZZ } 201 //ZZ 202 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) 203 //ZZ { 204 //ZZ UInt mask = 1 << ix; 205 //ZZ x &= ~mask; 206 //ZZ x |= ((b << ix) & mask); 207 //ZZ return x; 208 //ZZ } 209 210 #define BITS2(_b1,_b0) \ 211 (((_b1) << 1) | (_b0)) 212 213 #define BITS3(_b2,_b1,_b0) \ 214 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 215 216 #define BITS4(_b3,_b2,_b1,_b0) \ 217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 218 219 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 221 | BITS4((_b3),(_b2),(_b1),(_b0))) 222 223 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 225 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 227 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 229 230 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 231 (((_b8) << 8) \ 232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 233 234 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 235 (((_b9) << 9) | ((_b8) << 8) \ 236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 237 238 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 239 (((_b10) << 10) \ 240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 241 242 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 243 (((_b11) << 11) \ 244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 245 246 // produces _uint[_bMax:_bMin] 247 #define SLICE_UInt(_uint,_bMax,_bMin) \ 248 (( ((UInt)(_uint)) >> (_bMin)) \ 249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 250 251 252 /*------------------------------------------------------------*/ 253 /*--- Helper bits and pieces for creating IR fragments. ---*/ 254 /*------------------------------------------------------------*/ 255 256 static IRExpr* mkV128 ( UShort w ) 257 { 258 return IRExpr_Const(IRConst_V128(w)); 259 } 260 261 static IRExpr* mkU64 ( ULong i ) 262 { 263 return IRExpr_Const(IRConst_U64(i)); 264 } 265 266 static IRExpr* mkU32 ( UInt i ) 267 { 268 return IRExpr_Const(IRConst_U32(i)); 269 } 270 271 static IRExpr* mkU8 ( UInt i ) 272 { 273 vassert(i < 256); 274 return IRExpr_Const(IRConst_U8( (UChar)i )); 275 } 276 277 static IRExpr* mkexpr ( IRTemp tmp ) 278 { 279 return IRExpr_RdTmp(tmp); 280 } 281 282 static IRExpr* unop ( IROp op, IRExpr* a ) 283 { 284 return IRExpr_Unop(op, a); 285 } 286 287 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 288 { 289 return IRExpr_Binop(op, a1, a2); 290 } 291 292 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 293 { 294 return IRExpr_Triop(op, a1, a2, a3); 295 } 296 297 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 298 { 299 return IRExpr_Load(Iend_LE, ty, addr); 300 } 301 302 /* Add a statement to the list held by "irbb". */ 303 static void stmt ( IRStmt* st ) 304 { 305 addStmtToIRSB( irsb, st ); 306 } 307 308 static void assign ( IRTemp dst, IRExpr* e ) 309 { 310 stmt( IRStmt_WrTmp(dst, e) ); 311 } 312 313 static void storeLE ( IRExpr* addr, IRExpr* data ) 314 { 315 stmt( IRStmt_Store(Iend_LE, addr, data) ); 316 } 317 318 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) 319 //ZZ { 320 //ZZ if (guardT == IRTemp_INVALID) { 321 //ZZ /* unconditional */ 322 //ZZ storeLE(addr, data); 323 //ZZ } else { 324 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, 325 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 326 //ZZ } 327 //ZZ } 328 //ZZ 329 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, 330 //ZZ IRExpr* addr, IRExpr* alt, 331 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 332 //ZZ { 333 //ZZ if (guardT == IRTemp_INVALID) { 334 //ZZ /* unconditional */ 335 //ZZ IRExpr* loaded = NULL; 336 //ZZ switch (cvt) { 337 //ZZ case ILGop_Ident32: 338 //ZZ loaded = loadLE(Ity_I32, addr); break; 339 //ZZ case ILGop_8Uto32: 340 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; 341 //ZZ case ILGop_8Sto32: 342 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; 343 //ZZ case ILGop_16Uto32: 344 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; 345 //ZZ case ILGop_16Sto32: 346 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; 347 //ZZ default: 348 //ZZ vassert(0); 349 //ZZ } 350 //ZZ vassert(loaded != NULL); 351 //ZZ assign(dst, loaded); 352 //ZZ } else { 353 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the 354 //ZZ loaded data before putting the data in 'dst'. If the load 355 //ZZ does not take place, 'alt' is placed directly in 'dst'. */ 356 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, 357 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 358 //ZZ } 359 //ZZ } 360 361 /* Generate a new temporary of the given type. */ 362 static IRTemp newTemp ( IRType ty ) 363 { 364 vassert(isPlausibleIRType(ty)); 365 return newIRTemp( irsb->tyenv, ty ); 366 } 367 368 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type 369 //ZZ IRRoundingMode. */ 370 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 371 //ZZ { 372 //ZZ return mkU32(Irrm_NEAREST); 373 //ZZ } 374 //ZZ 375 //ZZ /* Generate an expression for SRC rotated right by ROT. */ 376 //ZZ static IRExpr* genROR32( IRTemp src, Int rot ) 377 //ZZ { 378 //ZZ vassert(rot >= 0 && rot < 32); 379 //ZZ if (rot == 0) 380 //ZZ return mkexpr(src); 381 //ZZ return 382 //ZZ binop(Iop_Or32, 383 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 384 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 385 //ZZ } 386 //ZZ 387 //ZZ static IRExpr* mkU128 ( ULong i ) 388 //ZZ { 389 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 390 //ZZ } 391 //ZZ 392 //ZZ /* Generate a 4-aligned version of the given expression if 393 //ZZ the given condition is true. Else return it unchanged. */ 394 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) 395 //ZZ { 396 //ZZ if (b) 397 //ZZ return binop(Iop_And32, e, mkU32(~3)); 398 //ZZ else 399 //ZZ return e; 400 //ZZ } 401 402 /* Other IR construction helpers. */ 403 static IROp mkAND ( IRType ty ) { 404 switch (ty) { 405 case Ity_I32: return Iop_And32; 406 case Ity_I64: return Iop_And64; 407 default: vpanic("mkAND"); 408 } 409 } 410 411 static IROp mkOR ( IRType ty ) { 412 switch (ty) { 413 case Ity_I32: return Iop_Or32; 414 case Ity_I64: return Iop_Or64; 415 default: vpanic("mkOR"); 416 } 417 } 418 419 static IROp mkXOR ( IRType ty ) { 420 switch (ty) { 421 case Ity_I32: return Iop_Xor32; 422 case Ity_I64: return Iop_Xor64; 423 default: vpanic("mkXOR"); 424 } 425 } 426 427 static IROp mkSHL ( IRType ty ) { 428 switch (ty) { 429 case Ity_I32: return Iop_Shl32; 430 case Ity_I64: return Iop_Shl64; 431 default: vpanic("mkSHL"); 432 } 433 } 434 435 static IROp mkSHR ( IRType ty ) { 436 switch (ty) { 437 case Ity_I32: return Iop_Shr32; 438 case Ity_I64: return Iop_Shr64; 439 default: vpanic("mkSHR"); 440 } 441 } 442 443 static IROp mkSAR ( IRType ty ) { 444 switch (ty) { 445 case Ity_I32: return Iop_Sar32; 446 case Ity_I64: return Iop_Sar64; 447 default: vpanic("mkSAR"); 448 } 449 } 450 451 static IROp mkNOT ( IRType ty ) { 452 switch (ty) { 453 case Ity_I32: return Iop_Not32; 454 case Ity_I64: return Iop_Not64; 455 default: vpanic("mkNOT"); 456 } 457 } 458 459 static IROp mkADD ( IRType ty ) { 460 switch (ty) { 461 case Ity_I32: return Iop_Add32; 462 case Ity_I64: return Iop_Add64; 463 default: vpanic("mkADD"); 464 } 465 } 466 467 static IROp mkSUB ( IRType ty ) { 468 switch (ty) { 469 case Ity_I32: return Iop_Sub32; 470 case Ity_I64: return Iop_Sub64; 471 default: vpanic("mkSUB"); 472 } 473 } 474 475 static IROp mkADDF ( IRType ty ) { 476 switch (ty) { 477 case Ity_F32: return Iop_AddF32; 478 case Ity_F64: return Iop_AddF64; 479 default: vpanic("mkADDF"); 480 } 481 } 482 483 static IROp mkSUBF ( IRType ty ) { 484 switch (ty) { 485 case Ity_F32: return Iop_SubF32; 486 case Ity_F64: return Iop_SubF64; 487 default: vpanic("mkSUBF"); 488 } 489 } 490 491 static IROp mkMULF ( IRType ty ) { 492 switch (ty) { 493 case Ity_F32: return Iop_MulF32; 494 case Ity_F64: return Iop_MulF64; 495 default: vpanic("mkMULF"); 496 } 497 } 498 499 static IROp mkDIVF ( IRType ty ) { 500 switch (ty) { 501 case Ity_F32: return Iop_DivF32; 502 case Ity_F64: return Iop_DivF64; 503 default: vpanic("mkMULF"); 504 } 505 } 506 507 static IROp mkNEGF ( IRType ty ) { 508 switch (ty) { 509 case Ity_F32: return Iop_NegF32; 510 case Ity_F64: return Iop_NegF64; 511 default: vpanic("mkNEGF"); 512 } 513 } 514 515 static IROp mkABSF ( IRType ty ) { 516 switch (ty) { 517 case Ity_F32: return Iop_AbsF32; 518 case Ity_F64: return Iop_AbsF64; 519 default: vpanic("mkNEGF"); 520 } 521 } 522 523 static IROp mkSQRTF ( IRType ty ) { 524 switch (ty) { 525 case Ity_F32: return Iop_SqrtF32; 526 case Ity_F64: return Iop_SqrtF64; 527 default: vpanic("mkNEGF"); 528 } 529 } 530 531 static IRExpr* mkU ( IRType ty, ULong imm ) { 532 switch (ty) { 533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); 534 case Ity_I64: return mkU64(imm); 535 default: vpanic("mkU"); 536 } 537 } 538 539 /* Generate IR to create 'arg rotated right by imm', for sane values 540 of 'ty' and 'imm'. */ 541 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) 542 { 543 UInt w = 0; 544 if (ty == Ity_I64) { 545 w = 64; 546 } else { 547 vassert(ty == Ity_I32); 548 w = 32; 549 } 550 vassert(w != 0); 551 vassert(imm < w); 552 if (imm == 0) { 553 return arg; 554 } 555 IRTemp res = newTemp(ty); 556 assign(res, binop(mkOR(ty), 557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), 558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); 559 return res; 560 } 561 562 /* Generate IR to set the returned temp to either all-zeroes or 563 all ones, as a copy of arg<imm>. */ 564 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) 565 { 566 UInt w = 0; 567 if (ty == Ity_I64) { 568 w = 64; 569 } else { 570 vassert(ty == Ity_I32); 571 w = 32; 572 } 573 vassert(w != 0); 574 vassert(imm < w); 575 IRTemp res = newTemp(ty); 576 assign(res, binop(mkSAR(ty), 577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), 578 mkU8(w - 1))); 579 return res; 580 } 581 582 /* U-widen 8/16/32/64 bit int expr to 64. */ 583 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) 584 { 585 switch (srcTy) { 586 case Ity_I64: return e; 587 case Ity_I32: return unop(Iop_32Uto64, e); 588 case Ity_I16: return unop(Iop_16Uto64, e); 589 case Ity_I8: return unop(Iop_8Uto64, e); 590 default: vpanic("widenUto64(arm64)"); 591 } 592 } 593 594 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some 595 of these combinations make sense. */ 596 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) 597 { 598 switch (dstTy) { 599 case Ity_I64: return e; 600 case Ity_I32: return unop(Iop_64to32, e); 601 case Ity_I16: return unop(Iop_64to16, e); 602 case Ity_I8: return unop(Iop_64to8, e); 603 default: vpanic("narrowFrom64(arm64)"); 604 } 605 } 606 607 608 /*------------------------------------------------------------*/ 609 /*--- Helpers for accessing guest registers. ---*/ 610 /*------------------------------------------------------------*/ 611 612 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) 613 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) 614 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) 615 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) 616 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) 617 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) 618 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) 619 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) 620 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) 621 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) 622 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) 623 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) 624 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) 625 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) 626 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) 627 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) 628 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) 629 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) 630 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) 631 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) 632 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) 633 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) 634 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) 635 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) 636 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) 637 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) 638 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) 639 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) 640 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) 641 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) 642 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) 643 644 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) 645 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC) 646 647 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) 648 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) 649 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) 650 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) 651 652 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) 653 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) 654 655 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) 656 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) 657 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) 658 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) 659 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) 660 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) 661 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) 662 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) 663 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) 664 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) 665 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) 666 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) 667 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) 668 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) 669 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) 670 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) 671 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) 672 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) 673 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) 674 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) 675 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) 676 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) 677 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) 678 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) 679 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) 680 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) 681 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) 682 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) 683 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) 684 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) 685 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) 686 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) 687 688 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) 689 #define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR) 690 //ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO) 691 //ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE) 692 //ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32) 693 //ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0) 694 //ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1) 695 //ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2) 696 //ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3) 697 698 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) 699 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) 700 701 702 /* ---------------- Integer registers ---------------- */ 703 704 static Int offsetIReg64 ( UInt iregNo ) 705 { 706 /* Do we care about endianness here? We do if sub-parts of integer 707 registers are accessed. */ 708 switch (iregNo) { 709 case 0: return OFFB_X0; 710 case 1: return OFFB_X1; 711 case 2: return OFFB_X2; 712 case 3: return OFFB_X3; 713 case 4: return OFFB_X4; 714 case 5: return OFFB_X5; 715 case 6: return OFFB_X6; 716 case 7: return OFFB_X7; 717 case 8: return OFFB_X8; 718 case 9: return OFFB_X9; 719 case 10: return OFFB_X10; 720 case 11: return OFFB_X11; 721 case 12: return OFFB_X12; 722 case 13: return OFFB_X13; 723 case 14: return OFFB_X14; 724 case 15: return OFFB_X15; 725 case 16: return OFFB_X16; 726 case 17: return OFFB_X17; 727 case 18: return OFFB_X18; 728 case 19: return OFFB_X19; 729 case 20: return OFFB_X20; 730 case 21: return OFFB_X21; 731 case 22: return OFFB_X22; 732 case 23: return OFFB_X23; 733 case 24: return OFFB_X24; 734 case 25: return OFFB_X25; 735 case 26: return OFFB_X26; 736 case 27: return OFFB_X27; 737 case 28: return OFFB_X28; 738 case 29: return OFFB_X29; 739 case 30: return OFFB_X30; 740 /* but not 31 */ 741 default: vassert(0); 742 } 743 } 744 745 static Int offsetIReg64orSP ( UInt iregNo ) 746 { 747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); 748 } 749 750 static const HChar* nameIReg64orZR ( UInt iregNo ) 751 { 752 vassert(iregNo < 32); 753 static const HChar* names[32] 754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; 758 return names[iregNo]; 759 } 760 761 static const HChar* nameIReg64orSP ( UInt iregNo ) 762 { 763 if (iregNo == 31) { 764 return "sp"; 765 } 766 vassert(iregNo < 31); 767 return nameIReg64orZR(iregNo); 768 } 769 770 static IRExpr* getIReg64orSP ( UInt iregNo ) 771 { 772 vassert(iregNo < 32); 773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 774 } 775 776 static IRExpr* getIReg64orZR ( UInt iregNo ) 777 { 778 if (iregNo == 31) { 779 return mkU64(0); 780 } 781 vassert(iregNo < 31); 782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 783 } 784 785 static void putIReg64orSP ( UInt iregNo, IRExpr* e ) 786 { 787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 789 } 790 791 static void putIReg64orZR ( UInt iregNo, IRExpr* e ) 792 { 793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 794 if (iregNo == 31) { 795 return; 796 } 797 vassert(iregNo < 31); 798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 799 } 800 801 static const HChar* nameIReg32orZR ( UInt iregNo ) 802 { 803 vassert(iregNo < 32); 804 static const HChar* names[32] 805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; 809 return names[iregNo]; 810 } 811 812 static const HChar* nameIReg32orSP ( UInt iregNo ) 813 { 814 if (iregNo == 31) { 815 return "wsp"; 816 } 817 vassert(iregNo < 31); 818 return nameIReg32orZR(iregNo); 819 } 820 821 static IRExpr* getIReg32orSP ( UInt iregNo ) 822 { 823 vassert(iregNo < 32); 824 return unop(Iop_64to32, 825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 826 } 827 828 static IRExpr* getIReg32orZR ( UInt iregNo ) 829 { 830 if (iregNo == 31) { 831 return mkU32(0); 832 } 833 vassert(iregNo < 31); 834 return unop(Iop_64to32, 835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 836 } 837 838 static void putIReg32orSP ( UInt iregNo, IRExpr* e ) 839 { 840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 842 } 843 844 static void putIReg32orZR ( UInt iregNo, IRExpr* e ) 845 { 846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 847 if (iregNo == 31) { 848 return; 849 } 850 vassert(iregNo < 31); 851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 852 } 853 854 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) 855 { 856 vassert(is64 == True || is64 == False); 857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); 858 } 859 860 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) 861 { 862 vassert(is64 == True || is64 == False); 863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); 864 } 865 866 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) 867 { 868 vassert(is64 == True || is64 == False); 869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); 870 } 871 872 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) 873 { 874 vassert(is64 == True || is64 == False); 875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); 876 } 877 878 static void putPC ( IRExpr* e ) 879 { 880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 881 stmt( IRStmt_Put(OFFB_PC, e) ); 882 } 883 884 885 /* ---------------- Vector (Q) registers ---------------- */ 886 887 static Int offsetQReg128 ( UInt qregNo ) 888 { 889 /* We don't care about endianness at this point. It only becomes 890 relevant when dealing with sections of these registers.*/ 891 switch (qregNo) { 892 case 0: return OFFB_Q0; 893 case 1: return OFFB_Q1; 894 case 2: return OFFB_Q2; 895 case 3: return OFFB_Q3; 896 case 4: return OFFB_Q4; 897 case 5: return OFFB_Q5; 898 case 6: return OFFB_Q6; 899 case 7: return OFFB_Q7; 900 case 8: return OFFB_Q8; 901 case 9: return OFFB_Q9; 902 case 10: return OFFB_Q10; 903 case 11: return OFFB_Q11; 904 case 12: return OFFB_Q12; 905 case 13: return OFFB_Q13; 906 case 14: return OFFB_Q14; 907 case 15: return OFFB_Q15; 908 case 16: return OFFB_Q16; 909 case 17: return OFFB_Q17; 910 case 18: return OFFB_Q18; 911 case 19: return OFFB_Q19; 912 case 20: return OFFB_Q20; 913 case 21: return OFFB_Q21; 914 case 22: return OFFB_Q22; 915 case 23: return OFFB_Q23; 916 case 24: return OFFB_Q24; 917 case 25: return OFFB_Q25; 918 case 26: return OFFB_Q26; 919 case 27: return OFFB_Q27; 920 case 28: return OFFB_Q28; 921 case 29: return OFFB_Q29; 922 case 30: return OFFB_Q30; 923 case 31: return OFFB_Q31; 924 default: vassert(0); 925 } 926 } 927 928 /* Write to a complete Qreg. */ 929 static void putQReg128 ( UInt qregNo, IRExpr* e ) 930 { 931 vassert(qregNo < 32); 932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); 934 } 935 936 /* Read a complete Qreg. */ 937 static IRExpr* getQReg128 ( UInt qregNo ) 938 { 939 vassert(qregNo < 32); 940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); 941 } 942 943 /* Produce the IR type for some sub-part of a vector. For 32- and 64- 944 bit sub-parts we can choose either integer or float types, and 945 choose float on the basis that that is the common use case and so 946 will give least interference with Put-to-Get forwarding later 947 on. */ 948 static IRType preferredVectorSubTypeFromSize ( UInt szB ) 949 { 950 switch (szB) { 951 case 1: return Ity_I8; 952 case 2: return Ity_I16; 953 case 4: return Ity_I32; //Ity_F32; 954 case 8: return Ity_F64; 955 case 16: return Ity_V128; 956 default: vassert(0); 957 } 958 } 959 960 /* Find the offset of the laneNo'th lane of type laneTy in the given 961 Qreg. Since the host is little-endian, the least significant lane 962 has the lowest offset. */ 963 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) 964 { 965 vassert(!host_is_bigendian); 966 Int base = offsetQReg128(qregNo); 967 /* Since the host is little-endian, the least significant lane 968 will be at the lowest address. */ 969 /* Restrict this to known types, so as to avoid silently accepting 970 stupid types. */ 971 UInt laneSzB = 0; 972 switch (laneTy) { 973 case Ity_I8: laneSzB = 1; break; 974 case Ity_I16: laneSzB = 2; break; 975 case Ity_F32: case Ity_I32: laneSzB = 4; break; 976 case Ity_F64: case Ity_I64: laneSzB = 8; break; 977 case Ity_V128: laneSzB = 16; break; 978 default: break; 979 } 980 vassert(laneSzB > 0); 981 UInt minOff = laneNo * laneSzB; 982 UInt maxOff = minOff + laneSzB - 1; 983 vassert(maxOff < 16); 984 return base + minOff; 985 } 986 987 /* Put to the least significant lane of a Qreg. */ 988 static void putQRegLO ( UInt qregNo, IRExpr* e ) 989 { 990 IRType ty = typeOfIRExpr(irsb->tyenv, e); 991 Int off = offsetQRegLane(qregNo, ty, 0); 992 switch (ty) { 993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 994 case Ity_F32: case Ity_F64: case Ity_V128: 995 break; 996 default: 997 vassert(0); // Other cases are probably invalid 998 } 999 stmt(IRStmt_Put(off, e)); 1000 } 1001 1002 /* Get from the least significant lane of a Qreg. */ 1003 static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) 1004 { 1005 Int off = offsetQRegLane(qregNo, ty, 0); 1006 switch (ty) { 1007 case Ity_I8: 1008 case Ity_I16: 1009 case Ity_I32: case Ity_I64: 1010 case Ity_F32: case Ity_F64: case Ity_V128: 1011 break; 1012 default: 1013 vassert(0); // Other cases are ATC 1014 } 1015 return IRExpr_Get(off, ty); 1016 } 1017 1018 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) 1019 { 1020 static const HChar* namesQ[32] 1021 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1022 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", 1023 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", 1024 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; 1025 static const HChar* namesD[32] 1026 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1027 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 1028 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1029 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; 1030 static const HChar* namesS[32] 1031 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", 1032 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", 1033 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", 1034 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; 1035 static const HChar* namesH[32] 1036 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", 1037 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", 1038 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", 1039 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; 1040 static const HChar* namesB[32] 1041 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", 1042 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", 1043 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", 1044 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; 1045 vassert(qregNo < 32); 1046 switch (sizeofIRType(laneTy)) { 1047 case 1: return namesB[qregNo]; 1048 case 2: return namesH[qregNo]; 1049 case 4: return namesS[qregNo]; 1050 case 8: return namesD[qregNo]; 1051 case 16: return namesQ[qregNo]; 1052 default: vassert(0); 1053 } 1054 /*NOTREACHED*/ 1055 } 1056 1057 static const HChar* nameQReg128 ( UInt qregNo ) 1058 { 1059 return nameQRegLO(qregNo, Ity_V128); 1060 } 1061 1062 /* Find the offset of the most significant half (8 bytes) of the given 1063 Qreg. This requires knowing the endianness of the host. */ 1064 static Int offsetQRegHI64 ( UInt qregNo ) 1065 { 1066 return offsetQRegLane(qregNo, Ity_I64, 1); 1067 } 1068 1069 static IRExpr* getQRegHI64 ( UInt qregNo ) 1070 { 1071 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); 1072 } 1073 1074 static void putQRegHI64 ( UInt qregNo, IRExpr* e ) 1075 { 1076 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1077 Int off = offsetQRegHI64(qregNo); 1078 switch (ty) { 1079 case Ity_I64: case Ity_F64: 1080 break; 1081 default: 1082 vassert(0); // Other cases are plain wrong 1083 } 1084 stmt(IRStmt_Put(off, e)); 1085 } 1086 1087 /* Put to a specified lane of a Qreg. */ 1088 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) 1089 { 1090 IRType laneTy = typeOfIRExpr(irsb->tyenv, e); 1091 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1092 switch (laneTy) { 1093 case Ity_F64: case Ity_I64: 1094 case Ity_I32: case Ity_F32: 1095 case Ity_I16: 1096 case Ity_I8: 1097 break; 1098 default: 1099 vassert(0); // Other cases are ATC 1100 } 1101 stmt(IRStmt_Put(off, e)); 1102 } 1103 1104 /* Get from a specified lane of a Qreg. */ 1105 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) 1106 { 1107 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1108 switch (laneTy) { 1109 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 1110 case Ity_F64: 1111 break; 1112 default: 1113 vassert(0); // Other cases are ATC 1114 } 1115 return IRExpr_Get(off, laneTy); 1116 } 1117 1118 1119 //ZZ /* ---------------- Misc registers ---------------- */ 1120 //ZZ 1121 //ZZ static void putMiscReg32 ( UInt gsoffset, 1122 //ZZ IRExpr* e, /* :: Ity_I32 */ 1123 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) 1124 //ZZ { 1125 //ZZ switch (gsoffset) { 1126 //ZZ case OFFB_FPSCR: break; 1127 //ZZ case OFFB_QFLAG32: break; 1128 //ZZ case OFFB_GEFLAG0: break; 1129 //ZZ case OFFB_GEFLAG1: break; 1130 //ZZ case OFFB_GEFLAG2: break; 1131 //ZZ case OFFB_GEFLAG3: break; 1132 //ZZ default: vassert(0); /* awaiting more cases */ 1133 //ZZ } 1134 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1135 //ZZ 1136 //ZZ if (guardT == IRTemp_INVALID) { 1137 //ZZ /* unconditional write */ 1138 //ZZ stmt(IRStmt_Put(gsoffset, e)); 1139 //ZZ } else { 1140 //ZZ stmt(IRStmt_Put( 1141 //ZZ gsoffset, 1142 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), 1143 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) 1144 //ZZ )); 1145 //ZZ } 1146 //ZZ } 1147 //ZZ 1148 //ZZ static IRTemp get_ITSTATE ( void ) 1149 //ZZ { 1150 //ZZ ASSERT_IS_THUMB; 1151 //ZZ IRTemp t = newTemp(Ity_I32); 1152 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 1153 //ZZ return t; 1154 //ZZ } 1155 //ZZ 1156 //ZZ static void put_ITSTATE ( IRTemp t ) 1157 //ZZ { 1158 //ZZ ASSERT_IS_THUMB; 1159 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 1160 //ZZ } 1161 //ZZ 1162 //ZZ static IRTemp get_QFLAG32 ( void ) 1163 //ZZ { 1164 //ZZ IRTemp t = newTemp(Ity_I32); 1165 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 1166 //ZZ return t; 1167 //ZZ } 1168 //ZZ 1169 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 1170 //ZZ { 1171 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 1172 //ZZ } 1173 //ZZ 1174 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 1175 //ZZ Status Register) to indicate that overflow or saturation occurred. 1176 //ZZ Nb: t must be zero to denote no saturation, and any nonzero 1177 //ZZ value to indicate saturation. */ 1178 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 1179 //ZZ { 1180 //ZZ IRTemp old = get_QFLAG32(); 1181 //ZZ IRTemp nyu = newTemp(Ity_I32); 1182 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 1183 //ZZ put_QFLAG32(nyu, condT); 1184 //ZZ } 1185 1186 1187 /* ---------------- FPCR stuff ---------------- */ 1188 1189 /* Generate IR to get hold of the rounding mode bits in FPCR, and 1190 convert them to IR format. Bind the final result to the 1191 returned temp. */ 1192 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1193 { 1194 /* The ARMvfp encoding for rounding mode bits is: 1195 00 to nearest 1196 01 to +infinity 1197 10 to -infinity 1198 11 to zero 1199 We need to convert that to the IR encoding: 1200 00 to nearest (the default) 1201 10 to +infinity 1202 01 to -infinity 1203 11 to zero 1204 Which can be done by swapping bits 0 and 1. 1205 The rmode bits are at 23:22 in FPSCR. 1206 */ 1207 IRTemp armEncd = newTemp(Ity_I32); 1208 IRTemp swapped = newTemp(Ity_I32); 1209 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that 1210 we don't zero out bits 24 and above, since the assignment to 1211 'swapped' will mask them out anyway. */ 1212 assign(armEncd, 1213 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); 1214 /* Now swap them. */ 1215 assign(swapped, 1216 binop(Iop_Or32, 1217 binop(Iop_And32, 1218 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1219 mkU32(2)), 1220 binop(Iop_And32, 1221 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1222 mkU32(1)) 1223 )); 1224 return swapped; 1225 } 1226 1227 1228 /*------------------------------------------------------------*/ 1229 /*--- Helpers for flag handling and conditional insns ---*/ 1230 /*------------------------------------------------------------*/ 1231 1232 static const HChar* nameARM64Condcode ( ARM64Condcode cond ) 1233 { 1234 switch (cond) { 1235 case ARM64CondEQ: return "eq"; 1236 case ARM64CondNE: return "ne"; 1237 case ARM64CondCS: return "cs"; // or 'hs' 1238 case ARM64CondCC: return "cc"; // or 'lo' 1239 case ARM64CondMI: return "mi"; 1240 case ARM64CondPL: return "pl"; 1241 case ARM64CondVS: return "vs"; 1242 case ARM64CondVC: return "vc"; 1243 case ARM64CondHI: return "hi"; 1244 case ARM64CondLS: return "ls"; 1245 case ARM64CondGE: return "ge"; 1246 case ARM64CondLT: return "lt"; 1247 case ARM64CondGT: return "gt"; 1248 case ARM64CondLE: return "le"; 1249 case ARM64CondAL: return "al"; 1250 case ARM64CondNV: return "nv"; 1251 default: vpanic("name_ARM64Condcode"); 1252 } 1253 } 1254 1255 /* and a handy shorthand for it */ 1256 static const HChar* nameCC ( ARM64Condcode cond ) { 1257 return nameARM64Condcode(cond); 1258 } 1259 1260 1261 /* Build IR to calculate some particular condition from stored 1262 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1263 Ity_I64, suitable for narrowing. Although the return type is 1264 Ity_I64, the returned value is either 0 or 1. 'cond' must be 1265 :: Ity_I64 and must denote the condition to compute in 1266 bits 7:4, and be zero everywhere else. 1267 */ 1268 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) 1269 { 1270 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); 1271 /* And 'cond' had better produce a value in which only bits 7:4 are 1272 nonzero. However, obviously we can't assert for that. */ 1273 1274 /* So what we're constructing for the first argument is 1275 "(cond << 4) | stored-operation". 1276 However, as per comments above, 'cond' must be supplied 1277 pre-shifted to this function. 1278 1279 This pairing scheme requires that the ARM64_CC_OP_ values all fit 1280 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1281 8 bits of the first argument. */ 1282 IRExpr** args 1283 = mkIRExprVec_4( 1284 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), 1285 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1286 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1287 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) 1288 ); 1289 IRExpr* call 1290 = mkIRExprCCall( 1291 Ity_I64, 1292 0/*regparm*/, 1293 "arm64g_calculate_condition", &arm64g_calculate_condition, 1294 args 1295 ); 1296 1297 /* Exclude the requested condition, OP and NDEP from definedness 1298 checking. We're only interested in DEP1 and DEP2. */ 1299 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1300 return call; 1301 } 1302 1303 1304 /* Build IR to calculate some particular condition from stored 1305 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1306 Ity_I64, suitable for narrowing. Although the return type is 1307 Ity_I64, the returned value is either 0 or 1. 1308 */ 1309 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) 1310 { 1311 /* First arg is "(cond << 4) | condition". This requires that the 1312 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a 1313 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1314 vassert(cond >= 0 && cond <= 15); 1315 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); 1316 } 1317 1318 1319 /* Build IR to calculate just the carry flag from stored 1320 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1321 Ity_I64. */ 1322 static IRExpr* mk_arm64g_calculate_flag_c ( void ) 1323 { 1324 IRExpr** args 1325 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1326 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1327 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1328 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1329 IRExpr* call 1330 = mkIRExprCCall( 1331 Ity_I64, 1332 0/*regparm*/, 1333 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c, 1334 args 1335 ); 1336 /* Exclude OP and NDEP from definedness checking. We're only 1337 interested in DEP1 and DEP2. */ 1338 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1339 return call; 1340 } 1341 1342 1343 //ZZ /* Build IR to calculate just the overflow flag from stored 1344 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1345 //ZZ Ity_I32. */ 1346 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) 1347 //ZZ { 1348 //ZZ IRExpr** args 1349 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1350 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1351 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1352 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1353 //ZZ IRExpr* call 1354 //ZZ = mkIRExprCCall( 1355 //ZZ Ity_I32, 1356 //ZZ 0/*regparm*/, 1357 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, 1358 //ZZ args 1359 //ZZ ); 1360 //ZZ /* Exclude OP and NDEP from definedness checking. We're only 1361 //ZZ interested in DEP1 and DEP2. */ 1362 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1363 //ZZ return call; 1364 //ZZ } 1365 1366 1367 /* Build IR to calculate N Z C V in bits 31:28 of the 1368 returned word. */ 1369 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) 1370 { 1371 IRExpr** args 1372 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1373 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1374 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1375 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1376 IRExpr* call 1377 = mkIRExprCCall( 1378 Ity_I64, 1379 0/*regparm*/, 1380 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, 1381 args 1382 ); 1383 /* Exclude OP and NDEP from definedness checking. We're only 1384 interested in DEP1 and DEP2. */ 1385 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1386 return call; 1387 } 1388 1389 1390 /* Build IR to set the flags thunk, in the most general case. */ 1391 static 1392 void setFlags_D1_D2_ND ( UInt cc_op, 1393 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) 1394 { 1395 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); 1396 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); 1397 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); 1398 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); 1399 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); 1400 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1401 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1402 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1403 } 1404 1405 /* Build IR to set the flags thunk after ADD or SUB. */ 1406 static 1407 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) 1408 { 1409 IRTemp argL64 = IRTemp_INVALID; 1410 IRTemp argR64 = IRTemp_INVALID; 1411 IRTemp z64 = newTemp(Ity_I64); 1412 if (is64) { 1413 argL64 = argL; 1414 argR64 = argR; 1415 } else { 1416 argL64 = newTemp(Ity_I64); 1417 argR64 = newTemp(Ity_I64); 1418 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1419 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1420 } 1421 assign(z64, mkU64(0)); 1422 UInt cc_op = ARM64G_CC_OP_NUMBER; 1423 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } 1424 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } 1425 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } 1426 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } 1427 else { vassert(0); } 1428 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); 1429 } 1430 1431 static 1432 void setFlags_ADC_SBC(Bool is64, Bool isSBC, IRTemp argL, IRTemp argR, IRTemp oldC) 1433 { 1434 IRTemp argL64 = IRTemp_INVALID; 1435 IRTemp argR64 = IRTemp_INVALID; 1436 IRTemp oldC64 = IRTemp_INVALID; 1437 if (is64) { 1438 argL64 = argL; 1439 argR64 = argR; 1440 oldC64 = oldC; 1441 } else { 1442 argL64 = newTemp(Ity_I64); 1443 argR64 = newTemp(Ity_I64); 1444 oldC64 = newTemp(Ity_I64); 1445 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1446 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1447 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC))); 1448 } 1449 UInt cc_op = ARM64G_CC_OP_NUMBER; 1450 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; } 1451 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; } 1452 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; } 1453 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; } 1454 else { vassert(0); } 1455 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64); 1456 } 1457 1458 /* Build IR to set the flags thunk after ADD or SUB, if the given 1459 condition evaluates to True at run time. If not, the flags are set 1460 to the specified NZCV value. */ 1461 static 1462 void setFlags_ADD_SUB_conditionally ( 1463 Bool is64, Bool isSUB, 1464 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv 1465 ) 1466 { 1467 /* Generate IR as follows: 1468 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) 1469 CC_DEP1 = ITE(cond, argL64, nzcv << 28) 1470 CC_DEP2 = ITE(cond, argR64, 0) 1471 CC_NDEP = 0 1472 */ 1473 1474 IRTemp z64 = newTemp(Ity_I64); 1475 assign(z64, mkU64(0)); 1476 1477 /* Establish the operation and operands for the True case. */ 1478 IRTemp t_dep1 = IRTemp_INVALID; 1479 IRTemp t_dep2 = IRTemp_INVALID; 1480 UInt t_op = ARM64G_CC_OP_NUMBER; 1481 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } 1482 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } 1483 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } 1484 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } 1485 else { vassert(0); } 1486 /* */ 1487 if (is64) { 1488 t_dep1 = argL; 1489 t_dep2 = argR; 1490 } else { 1491 t_dep1 = newTemp(Ity_I64); 1492 t_dep2 = newTemp(Ity_I64); 1493 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); 1494 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); 1495 } 1496 1497 /* Establish the operation and operands for the False case. */ 1498 IRTemp f_dep1 = newTemp(Ity_I64); 1499 IRTemp f_dep2 = z64; 1500 UInt f_op = ARM64G_CC_OP_COPY; 1501 assign(f_dep1, mkU64(nzcv << 28)); 1502 1503 /* Final thunk values */ 1504 IRTemp dep1 = newTemp(Ity_I64); 1505 IRTemp dep2 = newTemp(Ity_I64); 1506 IRTemp op = newTemp(Ity_I64); 1507 1508 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); 1509 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); 1510 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); 1511 1512 /* finally .. */ 1513 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); 1514 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); 1515 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); 1516 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); 1517 } 1518 1519 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ 1520 static 1521 void setFlags_LOGIC ( Bool is64, IRTemp res ) 1522 { 1523 IRTemp res64 = IRTemp_INVALID; 1524 IRTemp z64 = newTemp(Ity_I64); 1525 UInt cc_op = ARM64G_CC_OP_NUMBER; 1526 if (is64) { 1527 res64 = res; 1528 cc_op = ARM64G_CC_OP_LOGIC64; 1529 } else { 1530 res64 = newTemp(Ity_I64); 1531 assign(res64, unop(Iop_32Uto64, mkexpr(res))); 1532 cc_op = ARM64G_CC_OP_LOGIC32; 1533 } 1534 assign(z64, mkU64(0)); 1535 setFlags_D1_D2_ND(cc_op, res64, z64, z64); 1536 } 1537 1538 /* Build IR to set the flags thunk to a given NZCV value. NZCV is 1539 located in bits 31:28 of the supplied value. */ 1540 static 1541 void setFlags_COPY ( IRTemp nzcv_28x0 ) 1542 { 1543 IRTemp z64 = newTemp(Ity_I64); 1544 assign(z64, mkU64(0)); 1545 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); 1546 } 1547 1548 1549 //ZZ /* Minor variant of the above that sets NDEP to zero (if it 1550 //ZZ sets it at all) */ 1551 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 1552 //ZZ IRTemp t_dep2, 1553 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1554 //ZZ { 1555 //ZZ IRTemp z32 = newTemp(Ity_I32); 1556 //ZZ assign( z32, mkU32(0) ); 1557 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 1558 //ZZ } 1559 //ZZ 1560 //ZZ 1561 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it 1562 //ZZ sets it at all) */ 1563 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 1564 //ZZ IRTemp t_ndep, 1565 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1566 //ZZ { 1567 //ZZ IRTemp z32 = newTemp(Ity_I32); 1568 //ZZ assign( z32, mkU32(0) ); 1569 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 1570 //ZZ } 1571 //ZZ 1572 //ZZ 1573 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 1574 //ZZ sets them at all) */ 1575 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 1576 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 1577 //ZZ { 1578 //ZZ IRTemp z32 = newTemp(Ity_I32); 1579 //ZZ assign( z32, mkU32(0) ); 1580 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 1581 //ZZ } 1582 1583 1584 /*------------------------------------------------------------*/ 1585 /*--- Misc math helpers ---*/ 1586 /*------------------------------------------------------------*/ 1587 1588 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ 1589 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) 1590 { 1591 IRTemp maskT = newTemp(Ity_I64); 1592 IRTemp res = newTemp(Ity_I64); 1593 vassert(sh >= 1 && sh <= 63); 1594 assign(maskT, mkU64(mask)); 1595 assign( res, 1596 binop(Iop_Or64, 1597 binop(Iop_Shr64, 1598 binop(Iop_And64,mkexpr(x),mkexpr(maskT)), 1599 mkU8(sh)), 1600 binop(Iop_And64, 1601 binop(Iop_Shl64,mkexpr(x),mkU8(sh)), 1602 mkexpr(maskT)) 1603 ) 1604 ); 1605 return res; 1606 } 1607 1608 /* Generates byte swaps within 32-bit lanes. */ 1609 static IRTemp math_UINTSWAP64 ( IRTemp src ) 1610 { 1611 IRTemp res; 1612 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1613 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 1614 return res; 1615 } 1616 1617 /* Generates byte swaps within 16-bit lanes. */ 1618 static IRTemp math_USHORTSWAP64 ( IRTemp src ) 1619 { 1620 IRTemp res; 1621 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1622 return res; 1623 } 1624 1625 /* Generates a 64-bit byte swap. */ 1626 static IRTemp math_BYTESWAP64 ( IRTemp src ) 1627 { 1628 IRTemp res; 1629 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 1630 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 1631 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); 1632 return res; 1633 } 1634 1635 /* Generates a 64-bit bit swap. */ 1636 static IRTemp math_BITSWAP64 ( IRTemp src ) 1637 { 1638 IRTemp res; 1639 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); 1640 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); 1641 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); 1642 return math_BYTESWAP64(res); 1643 } 1644 1645 /* Duplicates the bits at the bottom of the given word to fill the 1646 whole word. src :: Ity_I64 is assumed to have zeroes everywhere 1647 except for the bottom bits. */ 1648 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) 1649 { 1650 if (srcTy == Ity_I8) { 1651 IRTemp t16 = newTemp(Ity_I64); 1652 assign(t16, binop(Iop_Or64, mkexpr(src), 1653 binop(Iop_Shl64, mkexpr(src), mkU8(8)))); 1654 IRTemp t32 = newTemp(Ity_I64); 1655 assign(t32, binop(Iop_Or64, mkexpr(t16), 1656 binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); 1657 IRTemp t64 = newTemp(Ity_I64); 1658 assign(t64, binop(Iop_Or64, mkexpr(t32), 1659 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 1660 return t64; 1661 } 1662 if (srcTy == Ity_I16) { 1663 IRTemp t32 = newTemp(Ity_I64); 1664 assign(t32, binop(Iop_Or64, mkexpr(src), 1665 binop(Iop_Shl64, mkexpr(src), mkU8(16)))); 1666 IRTemp t64 = newTemp(Ity_I64); 1667 assign(t64, binop(Iop_Or64, mkexpr(t32), 1668 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 1669 return t64; 1670 } 1671 if (srcTy == Ity_I32) { 1672 IRTemp t64 = newTemp(Ity_I64); 1673 assign(t64, binop(Iop_Or64, mkexpr(src), 1674 binop(Iop_Shl64, mkexpr(src), mkU8(32)))); 1675 return t64; 1676 } 1677 if (srcTy == Ity_I64) { 1678 return src; 1679 } 1680 vassert(0); 1681 } 1682 1683 1684 /*------------------------------------------------------------*/ 1685 /*--- FP comparison helpers ---*/ 1686 /*------------------------------------------------------------*/ 1687 1688 /* irRes :: Ity_I32 holds a floating point comparison result encoded 1689 as an IRCmpF64Result. Generate code to convert it to an 1690 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. 1691 Assign a new temp to hold that value, and return the temp. */ 1692 static 1693 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) 1694 { 1695 IRTemp ix = newTemp(Ity_I64); 1696 IRTemp termL = newTemp(Ity_I64); 1697 IRTemp termR = newTemp(Ity_I64); 1698 IRTemp nzcv = newTemp(Ity_I64); 1699 IRTemp irRes = newTemp(Ity_I64); 1700 1701 /* This is where the fun starts. We have to convert 'irRes' from 1702 an IR-convention return result (IRCmpF64Result) to an 1703 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 1704 4 bits of 'nzcv'. */ 1705 /* Map compare result from IR to ARM(nzcv) */ 1706 /* 1707 FP cmp result | IR | ARM(nzcv) 1708 -------------------------------- 1709 UN 0x45 0011 1710 LT 0x01 1000 1711 GT 0x00 0010 1712 EQ 0x40 0110 1713 */ 1714 /* Now since you're probably wondering WTF .. 1715 1716 ix fishes the useful bits out of the IR value, bits 6 and 0, and 1717 places them side by side, giving a number which is 0, 1, 2 or 3. 1718 1719 termL is a sequence cooked up by GNU superopt. It converts ix 1720 into an almost correct value NZCV value (incredibly), except 1721 for the case of UN, where it produces 0100 instead of the 1722 required 0011. 1723 1724 termR is therefore a correction term, also computed from ix. It 1725 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 1726 the final correct value, we subtract termR from termL. 1727 1728 Don't take my word for it. There's a test program at the bottom 1729 of guest_arm_toIR.c, to try this out with. 1730 */ 1731 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); 1732 1733 assign( 1734 ix, 1735 binop(Iop_Or64, 1736 binop(Iop_And64, 1737 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), 1738 mkU64(3)), 1739 binop(Iop_And64, mkexpr(irRes), mkU64(1)))); 1740 1741 assign( 1742 termL, 1743 binop(Iop_Add64, 1744 binop(Iop_Shr64, 1745 binop(Iop_Sub64, 1746 binop(Iop_Shl64, 1747 binop(Iop_Xor64, mkexpr(ix), mkU64(1)), 1748 mkU8(62)), 1749 mkU64(1)), 1750 mkU8(61)), 1751 mkU64(1))); 1752 1753 assign( 1754 termR, 1755 binop(Iop_And64, 1756 binop(Iop_And64, 1757 mkexpr(ix), 1758 binop(Iop_Shr64, mkexpr(ix), mkU8(1))), 1759 mkU64(1))); 1760 1761 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); 1762 return nzcv; 1763 } 1764 1765 1766 /*------------------------------------------------------------*/ 1767 /*--- Data processing (immediate) ---*/ 1768 /*------------------------------------------------------------*/ 1769 1770 /* Helper functions for supporting "DecodeBitMasks" */ 1771 1772 static ULong dbm_ROR ( Int width, ULong x, Int rot ) 1773 { 1774 vassert(width > 0 && width <= 64); 1775 vassert(rot >= 0 && rot < width); 1776 if (rot == 0) return x; 1777 ULong res = x >> rot; 1778 res |= (x << (width - rot)); 1779 if (width < 64) 1780 res &= ((1ULL << width) - 1); 1781 return res; 1782 } 1783 1784 static ULong dbm_RepTo64( Int esize, ULong x ) 1785 { 1786 switch (esize) { 1787 case 64: 1788 return x; 1789 case 32: 1790 x &= 0xFFFFFFFF; x |= (x << 32); 1791 return x; 1792 case 16: 1793 x &= 0xFFFF; x |= (x << 16); x |= (x << 32); 1794 return x; 1795 case 8: 1796 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); 1797 return x; 1798 case 4: 1799 x &= 0xF; x |= (x << 4); x |= (x << 8); 1800 x |= (x << 16); x |= (x << 32); 1801 return x; 1802 case 2: 1803 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); 1804 x |= (x << 16); x |= (x << 32); 1805 return x; 1806 default: 1807 break; 1808 } 1809 vpanic("dbm_RepTo64"); 1810 /*NOTREACHED*/ 1811 return 0; 1812 } 1813 1814 static Int dbm_highestSetBit ( ULong x ) 1815 { 1816 Int i; 1817 for (i = 63; i >= 0; i--) { 1818 if (x & (1ULL << i)) 1819 return i; 1820 } 1821 vassert(x == 0); 1822 return -1; 1823 } 1824 1825 static 1826 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, 1827 ULong immN, ULong imms, ULong immr, Bool immediate, 1828 UInt M /*32 or 64*/) 1829 { 1830 vassert(immN < (1ULL << 1)); 1831 vassert(imms < (1ULL << 6)); 1832 vassert(immr < (1ULL << 6)); 1833 vassert(immediate == False || immediate == True); 1834 vassert(M == 32 || M == 64); 1835 1836 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); 1837 if (len < 1) { /* printf("fail1\n"); */ return False; } 1838 vassert(len <= 6); 1839 vassert(M >= (1 << len)); 1840 1841 vassert(len >= 1 && len <= 6); 1842 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); 1843 (1 << len) - 1; 1844 vassert(levels >= 1 && levels <= 63); 1845 1846 if (immediate && ((imms & levels) == levels)) { 1847 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ 1848 return False; 1849 } 1850 1851 ULong S = imms & levels; 1852 ULong R = immr & levels; 1853 Int diff = S - R; 1854 diff &= 63; 1855 Int esize = 1 << len; 1856 vassert(2 <= esize && esize <= 64); 1857 1858 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the 1859 same below with d. S can be 63 in which case we have an out of 1860 range and hence undefined shift. */ 1861 vassert(S >= 0 && S <= 63); 1862 vassert(esize >= (S+1)); 1863 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) 1864 //(1ULL << (S+1)) - 1; 1865 ((1ULL << S) - 1) + (1ULL << S); 1866 1867 Int d = // diff<len-1:0> 1868 diff & ((1 << len)-1); 1869 vassert(esize >= (d+1)); 1870 vassert(d >= 0 && d <= 63); 1871 1872 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) 1873 //(1ULL << (d+1)) - 1; 1874 ((1ULL << d) - 1) + (1ULL << d); 1875 1876 if (esize != 64) vassert(elem_s < (1ULL << esize)); 1877 if (esize != 64) vassert(elem_d < (1ULL << esize)); 1878 1879 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); 1880 if (tmask) *tmask = dbm_RepTo64(esize, elem_d); 1881 1882 return True; 1883 } 1884 1885 1886 static 1887 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, 1888 UInt insn) 1889 { 1890 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 1891 1892 /* insn[28:23] 1893 10000x PC-rel addressing 1894 10001x Add/subtract (immediate) 1895 100100 Logical (immediate) 1896 100101 Move Wide (immediate) 1897 100110 Bitfield 1898 100111 Extract 1899 */ 1900 1901 /* ------------------ ADD/SUB{,S} imm12 ------------------ */ 1902 if (INSN(28,24) == BITS5(1,0,0,0,1)) { 1903 Bool is64 = INSN(31,31) == 1; 1904 Bool isSub = INSN(30,30) == 1; 1905 Bool setCC = INSN(29,29) == 1; 1906 UInt sh = INSN(23,22); 1907 UInt uimm12 = INSN(21,10); 1908 UInt nn = INSN(9,5); 1909 UInt dd = INSN(4,0); 1910 const HChar* nm = isSub ? "sub" : "add"; 1911 if (sh >= 2) { 1912 /* Invalid; fall through */ 1913 } else { 1914 vassert(sh <= 1); 1915 uimm12 <<= (12 * sh); 1916 if (is64) { 1917 IRTemp argL = newTemp(Ity_I64); 1918 IRTemp argR = newTemp(Ity_I64); 1919 IRTemp res = newTemp(Ity_I64); 1920 assign(argL, getIReg64orSP(nn)); 1921 assign(argR, mkU64(uimm12)); 1922 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 1923 mkexpr(argL), mkexpr(argR))); 1924 if (setCC) { 1925 putIReg64orZR(dd, mkexpr(res)); 1926 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 1927 DIP("%ss %s, %s, 0x%x\n", 1928 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); 1929 } else { 1930 putIReg64orSP(dd, mkexpr(res)); 1931 DIP("%s %s, %s, 0x%x\n", 1932 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); 1933 } 1934 } else { 1935 IRTemp argL = newTemp(Ity_I32); 1936 IRTemp argR = newTemp(Ity_I32); 1937 IRTemp res = newTemp(Ity_I32); 1938 assign(argL, getIReg32orSP(nn)); 1939 assign(argR, mkU32(uimm12)); 1940 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, 1941 mkexpr(argL), mkexpr(argR))); 1942 if (setCC) { 1943 putIReg32orZR(dd, mkexpr(res)); 1944 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); 1945 DIP("%ss %s, %s, 0x%x\n", 1946 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); 1947 } else { 1948 putIReg32orSP(dd, mkexpr(res)); 1949 DIP("%s %s, %s, 0x%x\n", 1950 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); 1951 } 1952 } 1953 return True; 1954 } 1955 } 1956 1957 /* -------------------- ADR/ADRP -------------------- */ 1958 if (INSN(28,24) == BITS5(1,0,0,0,0)) { 1959 UInt bP = INSN(31,31); 1960 UInt immLo = INSN(30,29); 1961 UInt immHi = INSN(23,5); 1962 UInt rD = INSN(4,0); 1963 ULong uimm = (immHi << 2) | immLo; 1964 ULong simm = sx_to_64(uimm, 21); 1965 ULong val; 1966 if (bP) { 1967 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); 1968 } else { 1969 val = guest_PC_curr_instr + simm; 1970 } 1971 putIReg64orZR(rD, mkU64(val)); 1972 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); 1973 return True; 1974 } 1975 1976 /* -------------------- LOGIC(imm) -------------------- */ 1977 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { 1978 /* 31 30 28 22 21 15 9 4 1979 sf op 100100 N immr imms Rn Rd 1980 op=00: AND Rd|SP, Rn, #imm 1981 op=01: ORR Rd|SP, Rn, #imm 1982 op=10: EOR Rd|SP, Rn, #imm 1983 op=11: ANDS Rd|ZR, Rn, #imm 1984 */ 1985 Bool is64 = INSN(31,31) == 1; 1986 UInt op = INSN(30,29); 1987 UInt N = INSN(22,22); 1988 UInt immR = INSN(21,16); 1989 UInt immS = INSN(15,10); 1990 UInt nn = INSN(9,5); 1991 UInt dd = INSN(4,0); 1992 ULong imm = 0; 1993 Bool ok; 1994 if (N == 1 && !is64) 1995 goto after_logic_imm; /* not allowed; fall through */ 1996 ok = dbm_DecodeBitMasks(&imm, NULL, 1997 N, immS, immR, True, is64 ? 64 : 32); 1998 if (!ok) 1999 goto after_logic_imm; 2000 2001 const HChar* names[4] = { "and", "orr", "eor", "ands" }; 2002 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; 2003 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; 2004 2005 vassert(op < 4); 2006 if (is64) { 2007 IRExpr* argL = getIReg64orZR(nn); 2008 IRExpr* argR = mkU64(imm); 2009 IRTemp res = newTemp(Ity_I64); 2010 assign(res, binop(ops64[op], argL, argR)); 2011 if (op < 3) { 2012 putIReg64orSP(dd, mkexpr(res)); 2013 DIP("%s %s, %s, 0x%llx\n", names[op], 2014 nameIReg64orSP(dd), nameIReg64orZR(nn), imm); 2015 } else { 2016 putIReg64orZR(dd, mkexpr(res)); 2017 setFlags_LOGIC(True/*is64*/, res); 2018 DIP("%s %s, %s, 0x%llx\n", names[op], 2019 nameIReg64orZR(dd), nameIReg64orZR(nn), imm); 2020 } 2021 } else { 2022 IRExpr* argL = getIReg32orZR(nn); 2023 IRExpr* argR = mkU32((UInt)imm); 2024 IRTemp res = newTemp(Ity_I32); 2025 assign(res, binop(ops32[op], argL, argR)); 2026 if (op < 3) { 2027 putIReg32orSP(dd, mkexpr(res)); 2028 DIP("%s %s, %s, 0x%x\n", names[op], 2029 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); 2030 } else { 2031 putIReg32orZR(dd, mkexpr(res)); 2032 setFlags_LOGIC(False/*!is64*/, res); 2033 DIP("%s %s, %s, 0x%x\n", names[op], 2034 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); 2035 } 2036 } 2037 return True; 2038 } 2039 after_logic_imm: 2040 2041 /* -------------------- MOV{Z,N,K} -------------------- */ 2042 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { 2043 /* 31 30 28 22 20 4 2044 | | | | | | 2045 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) 2046 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) 2047 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) 2048 */ 2049 Bool is64 = INSN(31,31) == 1; 2050 UInt subopc = INSN(30,29); 2051 UInt hw = INSN(22,21); 2052 UInt imm16 = INSN(20,5); 2053 UInt dd = INSN(4,0); 2054 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { 2055 /* invalid; fall through */ 2056 } else { 2057 ULong imm64 = ((ULong)imm16) << (16 * hw); 2058 if (!is64) 2059 vassert(imm64 < 0x100000000ULL); 2060 switch (subopc) { 2061 case BITS2(1,0): // MOVZ 2062 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2063 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2064 break; 2065 case BITS2(0,0): // MOVN 2066 imm64 = ~imm64; 2067 if (!is64) 2068 imm64 &= 0xFFFFFFFFULL; 2069 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2070 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2071 break; 2072 case BITS2(1,1): // MOVK 2073 /* This is more complex. We are inserting a slice into 2074 the destination register, so we need to have the old 2075 value of it. */ 2076 if (is64) { 2077 IRTemp old = newTemp(Ity_I64); 2078 assign(old, getIReg64orZR(dd)); 2079 ULong mask = 0xFFFFULL << (16 * hw); 2080 IRExpr* res 2081 = binop(Iop_Or64, 2082 binop(Iop_And64, mkexpr(old), mkU64(~mask)), 2083 mkU64(imm64)); 2084 putIReg64orZR(dd, res); 2085 DIP("movk %s, 0x%x, lsl %u\n", 2086 nameIReg64orZR(dd), imm16, 16*hw); 2087 } else { 2088 IRTemp old = newTemp(Ity_I32); 2089 assign(old, getIReg32orZR(dd)); 2090 vassert(hw <= 1); 2091 UInt mask = 0xFFFF << (16 * hw); 2092 IRExpr* res 2093 = binop(Iop_Or32, 2094 binop(Iop_And32, mkexpr(old), mkU32(~mask)), 2095 mkU32((UInt)imm64)); 2096 putIReg32orZR(dd, res); 2097 DIP("movk %s, 0x%x, lsl %u\n", 2098 nameIReg32orZR(dd), imm16, 16*hw); 2099 } 2100 break; 2101 default: 2102 vassert(0); 2103 } 2104 return True; 2105 } 2106 } 2107 2108 /* -------------------- {U,S,}BFM -------------------- */ 2109 /* 30 28 22 21 15 9 4 2110 2111 sf 10 100110 N immr imms nn dd 2112 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2113 UBFM Xd, Xn, #immr, #imms when sf=1, N=1 2114 2115 sf 00 100110 N immr imms nn dd 2116 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2117 SBFM Xd, Xn, #immr, #imms when sf=1, N=1 2118 2119 sf 01 100110 N immr imms nn dd 2120 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2121 BFM Xd, Xn, #immr, #imms when sf=1, N=1 2122 */ 2123 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { 2124 UInt sf = INSN(31,31); 2125 UInt opc = INSN(30,29); 2126 UInt N = INSN(22,22); 2127 UInt immR = INSN(21,16); 2128 UInt immS = INSN(15,10); 2129 UInt nn = INSN(9,5); 2130 UInt dd = INSN(4,0); 2131 Bool inZero = False; 2132 Bool extend = False; 2133 const HChar* nm = "???"; 2134 /* skip invalid combinations */ 2135 switch (opc) { 2136 case BITS2(0,0): 2137 inZero = True; extend = True; nm = "sbfm"; break; 2138 case BITS2(0,1): 2139 inZero = False; extend = False; nm = "bfm"; break; 2140 case BITS2(1,0): 2141 inZero = True; extend = False; nm = "ubfm"; break; 2142 case BITS2(1,1): 2143 goto after_bfm; /* invalid */ 2144 default: 2145 vassert(0); 2146 } 2147 if (sf == 1 && N != 1) goto after_bfm; 2148 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 2149 || ((immS >> 5) & 1) != 0)) goto after_bfm; 2150 ULong wmask = 0, tmask = 0; 2151 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, 2152 N, immS, immR, False, sf == 1 ? 64 : 32); 2153 if (!ok) goto after_bfm; /* hmmm */ 2154 2155 Bool is64 = sf == 1; 2156 IRType ty = is64 ? Ity_I64 : Ity_I32; 2157 2158 IRTemp dst = newTemp(ty); 2159 IRTemp src = newTemp(ty); 2160 IRTemp bot = newTemp(ty); 2161 IRTemp top = newTemp(ty); 2162 IRTemp res = newTemp(ty); 2163 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); 2164 assign(src, getIRegOrZR(is64, nn)); 2165 /* perform bitfield move on low bits */ 2166 assign(bot, binop(mkOR(ty), 2167 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), 2168 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), 2169 mkU(ty, wmask)))); 2170 /* determine extension bits (sign, zero or dest register) */ 2171 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); 2172 /* combine extension bits and result bits */ 2173 assign(res, binop(mkOR(ty), 2174 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), 2175 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); 2176 putIRegOrZR(is64, dd, mkexpr(res)); 2177 DIP("%s %s, %s, immR=%u, immS=%u\n", 2178 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); 2179 return True; 2180 } 2181 after_bfm: 2182 2183 /* ---------------------- EXTR ---------------------- */ 2184 /* 30 28 22 20 15 9 4 2185 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 2186 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 2187 */ 2188 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { 2189 Bool is64 = INSN(31,31) == 1; 2190 UInt mm = INSN(20,16); 2191 UInt imm6 = INSN(15,10); 2192 UInt nn = INSN(9,5); 2193 UInt dd = INSN(4,0); 2194 Bool valid = True; 2195 if (INSN(31,31) != INSN(22,22)) 2196 valid = False; 2197 if (!is64 && imm6 >= 32) 2198 valid = False; 2199 if (!valid) goto after_extr; 2200 IRType ty = is64 ? Ity_I64 : Ity_I32; 2201 IRTemp srcHi = newTemp(ty); 2202 IRTemp srcLo = newTemp(ty); 2203 IRTemp res = newTemp(ty); 2204 assign(srcHi, getIRegOrZR(is64, nn)); 2205 assign(srcLo, getIRegOrZR(is64, mm)); 2206 if (imm6 == 0) { 2207 assign(res, mkexpr(srcLo)); 2208 } else { 2209 UInt szBits = 8 * sizeofIRType(ty); 2210 vassert(imm6 > 0 && imm6 < szBits); 2211 assign(res, binop(mkOR(ty), 2212 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), 2213 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); 2214 } 2215 putIRegOrZR(is64, dd, mkexpr(res)); 2216 DIP("extr %s, %s, %s, #%u\n", 2217 nameIRegOrZR(is64,dd), 2218 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); 2219 return True; 2220 } 2221 after_extr: 2222 2223 vex_printf("ARM64 front end: data_processing_immediate\n"); 2224 return False; 2225 # undef INSN 2226 } 2227 2228 2229 /*------------------------------------------------------------*/ 2230 /*--- Data processing (register) instructions ---*/ 2231 /*------------------------------------------------------------*/ 2232 2233 static const HChar* nameSH ( UInt sh ) { 2234 switch (sh) { 2235 case 0: return "lsl"; 2236 case 1: return "lsr"; 2237 case 2: return "asr"; 2238 case 3: return "ror"; 2239 default: vassert(0); 2240 } 2241 } 2242 2243 /* Generate IR to get a register value, possibly shifted by an 2244 immediate. Returns either a 32- or 64-bit temporary holding the 2245 result. After the shift, the value can optionally be NOT-ed 2246 too. 2247 2248 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be 2249 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR 2250 isn't allowed, but it's the job of the caller to check that. 2251 */ 2252 static IRTemp getShiftedIRegOrZR ( Bool is64, 2253 UInt sh_how, UInt sh_amt, UInt regNo, 2254 Bool invert ) 2255 { 2256 vassert(sh_how < 4); 2257 vassert(sh_amt < (is64 ? 64 : 32)); 2258 IRType ty = is64 ? Ity_I64 : Ity_I32; 2259 IRTemp t0 = newTemp(ty); 2260 assign(t0, getIRegOrZR(is64, regNo)); 2261 IRTemp t1 = newTemp(ty); 2262 switch (sh_how) { 2263 case BITS2(0,0): 2264 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); 2265 break; 2266 case BITS2(0,1): 2267 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); 2268 break; 2269 case BITS2(1,0): 2270 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); 2271 break; 2272 case BITS2(1,1): 2273 assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); 2274 break; 2275 default: 2276 vassert(0); 2277 } 2278 if (invert) { 2279 IRTemp t2 = newTemp(ty); 2280 assign(t2, unop(mkNOT(ty), mkexpr(t1))); 2281 return t2; 2282 } else { 2283 return t1; 2284 } 2285 } 2286 2287 2288 static 2289 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, 2290 UInt insn) 2291 { 2292 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2293 2294 /* ------------------- ADD/SUB(reg) ------------------- */ 2295 /* x==0 => 32 bit op x==1 => 64 bit op 2296 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) 2297 2298 31 30 29 28 23 21 20 15 9 4 2299 | | | | | | | | | | 2300 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) 2301 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) 2302 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) 2303 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) 2304 */ 2305 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { 2306 UInt bX = INSN(31,31); 2307 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ 2308 UInt bS = INSN(29, 29); /* set flags? */ 2309 UInt sh = INSN(23,22); 2310 UInt rM = INSN(20,16); 2311 UInt imm6 = INSN(15,10); 2312 UInt rN = INSN(9,5); 2313 UInt rD = INSN(4,0); 2314 Bool isSUB = bOP == 1; 2315 Bool is64 = bX == 1; 2316 IRType ty = is64 ? Ity_I64 : Ity_I32; 2317 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { 2318 /* invalid; fall through */ 2319 } else { 2320 IRTemp argL = newTemp(ty); 2321 assign(argL, getIRegOrZR(is64, rN)); 2322 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); 2323 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2324 IRTemp res = newTemp(ty); 2325 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2326 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2327 if (bS) { 2328 setFlags_ADD_SUB(is64, isSUB, argL, argR); 2329 } 2330 DIP("%s%s %s, %s, %s, %s #%u\n", 2331 bOP ? "sub" : "add", bS ? "s" : "", 2332 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2333 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2334 return True; 2335 } 2336 } 2337 2338 /* ------------------- ADC/SBC(reg) ------------------- */ 2339 /* x==0 => 32 bit op x==1 => 64 bit op 2340 2341 31 30 29 28 23 21 20 15 9 4 2342 | | | | | | | | | | 2343 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm 2344 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm 2345 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm 2346 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm 2347 */ 2348 2349 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) { 2350 UInt bX = INSN(31,31); 2351 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */ 2352 UInt bS = INSN(29,29); /* set flags */ 2353 UInt rM = INSN(20,16); 2354 UInt rN = INSN(9,5); 2355 UInt rD = INSN(4,0); 2356 2357 Bool isSUB = bOP == 1; 2358 Bool is64 = bX == 1; 2359 IRType ty = is64 ? Ity_I64 : Ity_I32; 2360 2361 IRTemp oldC = newTemp(ty); 2362 assign(oldC, 2363 is64 ? mk_arm64g_calculate_flag_c() 2364 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) ); 2365 2366 IRTemp argL = newTemp(ty); 2367 assign(argL, getIRegOrZR(is64, rN)); 2368 IRTemp argR = newTemp(ty); 2369 assign(argR, getIRegOrZR(is64, rM)); 2370 2371 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2372 IRTemp res = newTemp(ty); 2373 if (isSUB) { 2374 IRExpr* one = is64 ? mkU64(1) : mkU32(1); 2375 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32; 2376 assign(res, 2377 binop(op, 2378 binop(op, mkexpr(argL), mkexpr(argR)), 2379 binop(xorOp, mkexpr(oldC), one))); 2380 } else { 2381 assign(res, 2382 binop(op, 2383 binop(op, mkexpr(argL), mkexpr(argR)), 2384 mkexpr(oldC))); 2385 } 2386 2387 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2388 2389 if (bS) { 2390 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC); 2391 } 2392 2393 DIP("%s%s %s, %s, %s\n", 2394 bOP ? "sbc" : "adc", bS ? "s" : "", 2395 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2396 nameIRegOrZR(is64, rM)); 2397 return True; 2398 } 2399 2400 2401 2402 /* -------------------- LOGIC(reg) -------------------- */ 2403 /* x==0 => 32 bit op x==1 => 64 bit op 2404 N==0 => inv? is no-op (no inversion) 2405 N==1 => inv? is NOT 2406 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR 2407 2408 31 30 28 23 21 20 15 9 4 2409 | | | | | | | | | 2410 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) 2411 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) 2412 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) 2413 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) 2414 With N=1, the names are: BIC ORN EON BICS 2415 */ 2416 if (INSN(28,24) == BITS5(0,1,0,1,0)) { 2417 UInt bX = INSN(31,31); 2418 UInt sh = INSN(23,22); 2419 UInt bN = INSN(21,21); 2420 UInt rM = INSN(20,16); 2421 UInt imm6 = INSN(15,10); 2422 UInt rN = INSN(9,5); 2423 UInt rD = INSN(4,0); 2424 Bool is64 = bX == 1; 2425 IRType ty = is64 ? Ity_I64 : Ity_I32; 2426 if (!is64 && imm6 > 31) { 2427 /* invalid; fall though */ 2428 } else { 2429 IRTemp argL = newTemp(ty); 2430 assign(argL, getIRegOrZR(is64, rN)); 2431 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); 2432 IROp op = Iop_INVALID; 2433 switch (INSN(30,29)) { 2434 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; 2435 case BITS2(0,1): op = mkOR(ty); break; 2436 case BITS2(1,0): op = mkXOR(ty); break; 2437 default: vassert(0); 2438 } 2439 IRTemp res = newTemp(ty); 2440 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2441 if (INSN(30,29) == BITS2(1,1)) { 2442 setFlags_LOGIC(is64, res); 2443 } 2444 putIRegOrZR(is64, rD, mkexpr(res)); 2445 2446 static const HChar* names_op[8] 2447 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; 2448 vassert(((bN << 2) | INSN(30,29)) < 8); 2449 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; 2450 /* Special-case the printing of "MOV" */ 2451 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { 2452 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), 2453 nameIRegOrZR(is64, rM)); 2454 } else { 2455 DIP("%s %s, %s, %s, %s #%u\n", nm_op, 2456 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2457 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2458 } 2459 return True; 2460 } 2461 } 2462 2463 /* -------------------- {U,S}MULH -------------------- */ 2464 /* 31 23 22 20 15 9 4 2465 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm 2466 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm 2467 */ 2468 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) 2469 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { 2470 Bool isU = INSN(23,23) == 1; 2471 UInt mm = INSN(20,16); 2472 UInt nn = INSN(9,5); 2473 UInt dd = INSN(4,0); 2474 putIReg64orZR(dd, unop(Iop_128HIto64, 2475 binop(isU ? Iop_MullU64 : Iop_MullS64, 2476 getIReg64orZR(nn), getIReg64orZR(mm)))); 2477 DIP("%cmulh %s, %s, %s\n", 2478 isU ? 'u' : 's', 2479 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); 2480 return True; 2481 } 2482 2483 /* -------------------- M{ADD,SUB} -------------------- */ 2484 /* 31 30 20 15 14 9 4 2485 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n 2486 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n 2487 */ 2488 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { 2489 Bool is64 = INSN(31,31) == 1; 2490 UInt mm = INSN(20,16); 2491 Bool isAdd = INSN(15,15) == 0; 2492 UInt aa = INSN(14,10); 2493 UInt nn = INSN(9,5); 2494 UInt dd = INSN(4,0); 2495 if (is64) { 2496 putIReg64orZR( 2497 dd, 2498 binop(isAdd ? Iop_Add64 : Iop_Sub64, 2499 getIReg64orZR(aa), 2500 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); 2501 } else { 2502 putIReg32orZR( 2503 dd, 2504 binop(isAdd ? Iop_Add32 : Iop_Sub32, 2505 getIReg32orZR(aa), 2506 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); 2507 } 2508 DIP("%s %s, %s, %s, %s\n", 2509 isAdd ? "madd" : "msub", 2510 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 2511 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); 2512 return True; 2513 } 2514 2515 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ 2516 /* 31 30 28 20 15 11 9 4 2517 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm 2518 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm 2519 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm 2520 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm 2521 In all cases, the operation is: Rd = if cond then Rn else OP(Rm) 2522 */ 2523 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { 2524 Bool is64 = INSN(31,31) == 1; 2525 UInt b30 = INSN(30,30); 2526 UInt mm = INSN(20,16); 2527 UInt cond = INSN(15,12); 2528 UInt b10 = INSN(10,10); 2529 UInt nn = INSN(9,5); 2530 UInt dd = INSN(4,0); 2531 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ 2532 IRType ty = is64 ? Ity_I64 : Ity_I32; 2533 IRExpr* argL = getIRegOrZR(is64, nn); 2534 IRExpr* argR = getIRegOrZR(is64, mm); 2535 switch (op) { 2536 case BITS2(0,0): 2537 break; 2538 case BITS2(0,1): 2539 argR = binop(mkADD(ty), argR, mkU(ty,1)); 2540 break; 2541 case BITS2(1,0): 2542 argR = unop(mkNOT(ty), argR); 2543 break; 2544 case BITS2(1,1): 2545 argR = binop(mkSUB(ty), mkU(ty,0), argR); 2546 break; 2547 default: 2548 vassert(0); 2549 } 2550 putIRegOrZR( 2551 is64, dd, 2552 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 2553 argL, argR) 2554 ); 2555 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; 2556 DIP("%s %s, %s, %s, %s\n", op_nm[op], 2557 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 2558 nameIRegOrZR(is64, mm), nameCC(cond)); 2559 return True; 2560 } 2561 2562 /* -------------- ADD/SUB(extended reg) -------------- */ 2563 /* 28 20 15 12 9 4 2564 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld 2565 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld 2566 2567 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld 2568 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld 2569 2570 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld 2571 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld 2572 2573 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld 2574 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld 2575 2576 The 'm' operand is extended per opt, thusly: 2577 2578 000 Xm & 0xFF UXTB 2579 001 Xm & 0xFFFF UXTH 2580 010 Xm & (2^32)-1 UXTW 2581 011 Xm UXTX 2582 2583 100 Xm sx from bit 7 SXTB 2584 101 Xm sx from bit 15 SXTH 2585 110 Xm sx from bit 31 SXTW 2586 111 Xm SXTX 2587 2588 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity 2589 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX 2590 are the identity operation on Wm. 2591 2592 After extension, the value is shifted left by imm3 bits, which 2593 may only be in the range 0 .. 4 inclusive. 2594 */ 2595 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { 2596 Bool is64 = INSN(31,31) == 1; 2597 Bool isSub = INSN(30,30) == 1; 2598 Bool setCC = INSN(29,29) == 1; 2599 UInt mm = INSN(20,16); 2600 UInt opt = INSN(15,13); 2601 UInt imm3 = INSN(12,10); 2602 UInt nn = INSN(9,5); 2603 UInt dd = INSN(4,0); 2604 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", 2605 "sxtb", "sxth", "sxtw", "sxtx" }; 2606 /* Do almost the same thing in the 32- and 64-bit cases. */ 2607 IRTemp xN = newTemp(Ity_I64); 2608 IRTemp xM = newTemp(Ity_I64); 2609 assign(xN, getIReg64orSP(nn)); 2610 assign(xM, getIReg64orZR(mm)); 2611 IRExpr* xMw = mkexpr(xM); /* "xM widened" */ 2612 Int shSX = 0; 2613 /* widen Xm .. */ 2614 switch (opt) { 2615 case BITS3(0,0,0): // UXTB 2616 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; 2617 case BITS3(0,0,1): // UXTH 2618 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; 2619 case BITS3(0,1,0): // UXTW -- noop for the 32bit case 2620 if (is64) { 2621 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); 2622 } 2623 break; 2624 case BITS3(0,1,1): // UXTX -- always a noop 2625 break; 2626 case BITS3(1,0,0): // SXTB 2627 shSX = 56; goto sxTo64; 2628 case BITS3(1,0,1): // SXTH 2629 shSX = 48; goto sxTo64; 2630 case BITS3(1,1,0): // SXTW -- noop for the 32bit case 2631 if (is64) { 2632 shSX = 32; goto sxTo64; 2633 } 2634 break; 2635 case BITS3(1,1,1): // SXTX -- always a noop 2636 break; 2637 sxTo64: 2638 vassert(shSX >= 32); 2639 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), 2640 mkU8(shSX)); 2641 break; 2642 default: 2643 vassert(0); 2644 } 2645 /* and now shift */ 2646 IRTemp argL = xN; 2647 IRTemp argR = newTemp(Ity_I64); 2648 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); 2649 IRTemp res = newTemp(Ity_I64); 2650 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 2651 mkexpr(argL), mkexpr(argR))); 2652 if (is64) { 2653 if (setCC) { 2654 putIReg64orZR(dd, mkexpr(res)); 2655 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 2656 } else { 2657 putIReg64orSP(dd, mkexpr(res)); 2658 } 2659 } else { 2660 if (setCC) { 2661 IRTemp argL32 = newTemp(Ity_I32); 2662 IRTemp argR32 = newTemp(Ity_I32); 2663 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); 2664 assign(argL32, unop(Iop_64to32, mkexpr(argL))); 2665 assign(argR32, unop(Iop_64to32, mkexpr(argR))); 2666 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); 2667 } else { 2668 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); 2669 } 2670 } 2671 DIP("%s%s %s, %s, %s %s lsl %u\n", 2672 isSub ? "sub" : "add", setCC ? "s" : "", 2673 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), 2674 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), 2675 nameExt[opt], imm3); 2676 return True; 2677 } 2678 2679 /* ---------------- CCMP/CCMN(imm) ---------------- */ 2680 /* Bizarrely, these appear in the "data processing register" 2681 category, even though they are operations against an 2682 immediate. */ 2683 /* 31 29 20 15 11 9 3 2684 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond 2685 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond 2686 2687 Operation is: 2688 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv 2689 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv 2690 */ 2691 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 2692 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { 2693 Bool is64 = INSN(31,31) == 1; 2694 Bool isSUB = INSN(30,30) == 1; 2695 UInt imm5 = INSN(20,16); 2696 UInt cond = INSN(15,12); 2697 UInt nn = INSN(9,5); 2698 UInt nzcv = INSN(3,0); 2699 2700 IRTemp condT = newTemp(Ity_I1); 2701 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 2702 2703 IRType ty = is64 ? Ity_I64 : Ity_I32; 2704 IRTemp argL = newTemp(ty); 2705 IRTemp argR = newTemp(ty); 2706 2707 if (is64) { 2708 assign(argL, getIReg64orZR(nn)); 2709 assign(argR, mkU64(imm5)); 2710 } else { 2711 assign(argL, getIReg32orZR(nn)); 2712 assign(argR, mkU32(imm5)); 2713 } 2714 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 2715 2716 DIP("ccm%c %s, #%u, #%u, %s\n", 2717 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 2718 imm5, nzcv, nameCC(cond)); 2719 return True; 2720 } 2721 2722 /* ---------------- CCMP/CCMN(reg) ---------------- */ 2723 /* 31 29 20 15 11 9 3 2724 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond 2725 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond 2726 Operation is: 2727 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv 2728 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv 2729 */ 2730 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 2731 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { 2732 Bool is64 = INSN(31,31) == 1; 2733 Bool isSUB = INSN(30,30) == 1; 2734 UInt mm = INSN(20,16); 2735 UInt cond = INSN(15,12); 2736 UInt nn = INSN(9,5); 2737 UInt nzcv = INSN(3,0); 2738 2739 IRTemp condT = newTemp(Ity_I1); 2740 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 2741 2742 IRType ty = is64 ? Ity_I64 : Ity_I32; 2743 IRTemp argL = newTemp(ty); 2744 IRTemp argR = newTemp(ty); 2745 2746 if (is64) { 2747 assign(argL, getIReg64orZR(nn)); 2748 assign(argR, getIReg64orZR(mm)); 2749 } else { 2750 assign(argL, getIReg32orZR(nn)); 2751 assign(argR, getIReg32orZR(mm)); 2752 } 2753 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 2754 2755 DIP("ccm%c %s, %s, #%u, %s\n", 2756 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 2757 nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); 2758 return True; 2759 } 2760 2761 2762 /* -------------- REV/REV16/REV32/RBIT -------------- */ 2763 /* 31 30 28 20 15 11 9 4 2764 2765 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn 2766 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn 2767 2768 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn 2769 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn 2770 2771 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn 2772 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn 2773 2774 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn 2775 */ 2776 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 2777 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { 2778 UInt b31 = INSN(31,31); 2779 UInt opc = INSN(11,10); 2780 2781 UInt ix = 0; 2782 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; 2783 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; 2784 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; 2785 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; 2786 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; 2787 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; 2788 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; 2789 if (ix >= 1 && ix <= 7) { 2790 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; 2791 UInt nn = INSN(9,5); 2792 UInt dd = INSN(4,0); 2793 IRTemp src = newTemp(Ity_I64); 2794 IRTemp dst = IRTemp_INVALID; 2795 IRTemp (*math)(IRTemp) = NULL; 2796 switch (ix) { 2797 case 1: case 2: math = math_BYTESWAP64; break; 2798 case 3: case 4: math = math_BITSWAP64; break; 2799 case 5: case 6: math = math_USHORTSWAP64; break; 2800 case 7: math = math_UINTSWAP64; break; 2801 default: vassert(0); 2802 } 2803 const HChar* names[7] 2804 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; 2805 const HChar* nm = names[ix-1]; 2806 vassert(math); 2807 if (ix == 6) { 2808 /* This has to be special cased, since the logic below doesn't 2809 handle it correctly. */ 2810 assign(src, getIReg64orZR(nn)); 2811 dst = math(src); 2812 putIReg64orZR(dd, 2813 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); 2814 } else if (is64) { 2815 assign(src, getIReg64orZR(nn)); 2816 dst = math(src); 2817 putIReg64orZR(dd, mkexpr(dst)); 2818 } else { 2819 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); 2820 dst = math(src); 2821 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 2822 } 2823 DIP("%s %s, %s\n", nm, 2824 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); 2825 return True; 2826 } 2827 /* else fall through */ 2828 } 2829 2830 /* -------------------- CLZ/CLS -------------------- */ 2831 /* 30 28 24 20 15 9 4 2832 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn 2833 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn 2834 */ 2835 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 2836 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { 2837 Bool is64 = INSN(31,31) == 1; 2838 Bool isCLS = INSN(10,10) == 1; 2839 UInt nn = INSN(9,5); 2840 UInt dd = INSN(4,0); 2841 IRTemp src = newTemp(Ity_I64); 2842 IRTemp dst = newTemp(Ity_I64); 2843 if (!isCLS) { // CLS not yet supported 2844 if (is64) { 2845 assign(src, getIReg64orZR(nn)); 2846 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), 2847 mkU64(64), 2848 unop(Iop_Clz64, mkexpr(src)))); 2849 putIReg64orZR(dd, mkexpr(dst)); 2850 } else { 2851 assign(src, binop(Iop_Shl64, 2852 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); 2853 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)), 2854 mkU64(32), 2855 unop(Iop_Clz64, mkexpr(src)))); 2856 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 2857 } 2858 DIP("cl%c %s, %s\n", 2859 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); 2860 return True; 2861 } 2862 } 2863 2864 /* -------------------- LSLV/LSRV/ASRV -------------------- */ 2865 /* 30 28 20 15 11 9 4 2866 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm 2867 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm 2868 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm 2869 */ 2870 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 2871 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) { 2872 Bool is64 = INSN(31,31) == 1; 2873 UInt mm = INSN(20,16); 2874 UInt op = INSN(11,10); 2875 UInt nn = INSN(9,5); 2876 UInt dd = INSN(4,0); 2877 IRType ty = is64 ? Ity_I64 : Ity_I32; 2878 IRTemp srcL = newTemp(ty); 2879 IRTemp srcR = newTemp(Ity_I8); 2880 IRTemp res = newTemp(ty); 2881 IROp iop = Iop_INVALID; 2882 assign(srcL, getIRegOrZR(is64, nn)); 2883 assign(srcR, 2884 unop(Iop_64to8, 2885 binop(Iop_And64, 2886 getIReg64orZR(mm), mkU64(is64 ? 63 : 31)))); 2887 switch (op) { 2888 case BITS2(0,0): iop = mkSHL(ty); break; 2889 case BITS2(0,1): iop = mkSHR(ty); break; 2890 case BITS2(1,0): iop = mkSAR(ty); break; 2891 default: vassert(0); 2892 } 2893 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR))); 2894 putIRegOrZR(is64, dd, mkexpr(res)); 2895 vassert(op < 3); 2896 const HChar* names[3] = { "lslv", "lsrv", "asrv" }; 2897 DIP("%s %s, %s, %s\n", 2898 names[op], nameIRegOrZR(is64,dd), 2899 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); 2900 return True; 2901 } 2902 2903 /* -------------------- SDIV/UDIV -------------------- */ 2904 /* 30 28 20 15 10 9 4 2905 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm 2906 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm 2907 */ 2908 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 2909 && INSN(15,11) == BITS5(0,0,0,0,1)) { 2910 Bool is64 = INSN(31,31) == 1; 2911 UInt mm = INSN(20,16); 2912 Bool isS = INSN(10,10) == 1; 2913 UInt nn = INSN(9,5); 2914 UInt dd = INSN(4,0); 2915 if (isS) { 2916 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, 2917 getIRegOrZR(is64, nn), 2918 getIRegOrZR(is64, mm))); 2919 } else { 2920 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, 2921 getIRegOrZR(is64, nn), 2922 getIRegOrZR(is64, mm))); 2923 } 2924 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', 2925 nameIRegOrZR(is64, dd), 2926 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); 2927 return True; 2928 } 2929 2930 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ 2931 /* 31 23 20 15 14 9 4 2932 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa 2933 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa 2934 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa 2935 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa 2936 with operation 2937 Xd = Xa +/- (Wn *u/s Wm) 2938 */ 2939 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { 2940 Bool isU = INSN(23,23) == 1; 2941 UInt mm = INSN(20,16); 2942 Bool isAdd = INSN(15,15) == 0; 2943 UInt aa = INSN(14,10); 2944 UInt nn = INSN(9,5); 2945 UInt dd = INSN(4,0); 2946 IRTemp wN = newTemp(Ity_I32); 2947 IRTemp wM = newTemp(Ity_I32); 2948 IRTemp xA = newTemp(Ity_I64); 2949 IRTemp muld = newTemp(Ity_I64); 2950 IRTemp res = newTemp(Ity_I64); 2951 assign(wN, getIReg32orZR(nn)); 2952 assign(wM, getIReg32orZR(mm)); 2953 assign(xA, getIReg64orZR(aa)); 2954 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, 2955 mkexpr(wN), mkexpr(wM))); 2956 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, 2957 mkexpr(xA), mkexpr(muld))); 2958 putIReg64orZR(dd, mkexpr(res)); 2959 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", 2960 nameIReg64orZR(dd), nameIReg32orZR(nn), 2961 nameIReg32orZR(mm), nameIReg64orZR(aa)); 2962 return True; 2963 } 2964 vex_printf("ARM64 front end: data_processing_register\n"); 2965 return False; 2966 # undef INSN 2967 } 2968 2969 2970 /*------------------------------------------------------------*/ 2971 /*--- Load and Store instructions ---*/ 2972 /*------------------------------------------------------------*/ 2973 2974 /* Generate the EA for a "reg + reg" style amode. This is done from 2975 parts of the insn, but for sanity checking sake it takes the whole 2976 insn. This appears to depend on insn[15:12], with opt=insn[15:13] 2977 and S=insn[12]: 2978 2979 The possible forms, along with their opt:S values, are: 2980 011:0 Xn|SP + Xm 2981 111:0 Xn|SP + Xm 2982 011:1 Xn|SP + Xm * transfer_szB 2983 111:1 Xn|SP + Xm * transfer_szB 2984 010:0 Xn|SP + 32Uto64(Wm) 2985 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB 2986 110:0 Xn|SP + 32Sto64(Wm) 2987 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB 2988 2989 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of 2990 the transfer size is insn[23,31,30]. For integer loads/stores, 2991 insn[23] is zero, hence szLg2 can be at most 3 in such cases. 2992 2993 If the decoding fails, it returns IRTemp_INVALID. 2994 2995 isInt is True iff this is decoding is for transfers to/from integer 2996 registers. If False it is for transfers to/from vector registers. 2997 */ 2998 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) 2999 { 3000 UInt optS = SLICE_UInt(insn, 15, 12); 3001 UInt mm = SLICE_UInt(insn, 20, 16); 3002 UInt nn = SLICE_UInt(insn, 9, 5); 3003 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) 3004 | SLICE_UInt(insn, 31, 30); // Log2 of the size 3005 3006 buf[0] = 0; 3007 3008 /* Sanity checks, that this really is a load/store insn. */ 3009 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) 3010 goto fail; 3011 3012 if (isInt 3013 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ 3014 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ 3015 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ 3016 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ 3017 goto fail; 3018 3019 if (!isInt 3020 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ 3021 goto fail; 3022 3023 /* Throw out non-verified but possibly valid cases. */ 3024 switch (szLg2) { 3025 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec 3026 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec 3027 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec 3028 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec 3029 case BITS3(1,0,0): // can only ever be valid for the vector case 3030 if (isInt) goto fail; else goto fail; 3031 case BITS3(1,0,1): // these sizes are never valid 3032 case BITS3(1,1,0): 3033 case BITS3(1,1,1): goto fail; 3034 3035 default: vassert(0); 3036 } 3037 3038 IRExpr* rhs = NULL; 3039 switch (optS) { 3040 case BITS4(1,1,1,0): goto fail; //ATC 3041 case BITS4(0,1,1,0): 3042 rhs = getIReg64orZR(mm); 3043 vex_sprintf(buf, "[%s, %s]", 3044 nameIReg64orZR(nn), nameIReg64orZR(mm)); 3045 break; 3046 case BITS4(1,1,1,1): goto fail; //ATC 3047 case BITS4(0,1,1,1): 3048 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); 3049 vex_sprintf(buf, "[%s, %s lsl %u]", 3050 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); 3051 break; 3052 case BITS4(0,1,0,0): 3053 rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); 3054 vex_sprintf(buf, "[%s, %s uxtx]", 3055 nameIReg64orZR(nn), nameIReg32orZR(mm)); 3056 break; 3057 case BITS4(0,1,0,1): 3058 rhs = binop(Iop_Shl64, 3059 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); 3060 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", 3061 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 3062 break; 3063 case BITS4(1,1,0,0): 3064 rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); 3065 vex_sprintf(buf, "[%s, %s sxtx]", 3066 nameIReg64orZR(nn), nameIReg32orZR(mm)); 3067 break; 3068 case BITS4(1,1,0,1): 3069 rhs = binop(Iop_Shl64, 3070 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); 3071 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", 3072 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 3073 break; 3074 default: 3075 /* The rest appear to be genuinely invalid */ 3076 goto fail; 3077 } 3078 3079 vassert(rhs); 3080 IRTemp res = newTemp(Ity_I64); 3081 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); 3082 return res; 3083 3084 fail: 3085 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); 3086 return IRTemp_INVALID; 3087 } 3088 3089 3090 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest 3091 bits of DATAE :: Ity_I64. */ 3092 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) 3093 { 3094 IRExpr* addrE = mkexpr(addr); 3095 switch (szB) { 3096 case 8: 3097 storeLE(addrE, dataE); 3098 break; 3099 case 4: 3100 storeLE(addrE, unop(Iop_64to32, dataE)); 3101 break; 3102 case 2: 3103 storeLE(addrE, unop(Iop_64to16, dataE)); 3104 break; 3105 case 1: 3106 storeLE(addrE, unop(Iop_64to8, dataE)); 3107 break; 3108 default: 3109 vassert(0); 3110 } 3111 } 3112 3113 3114 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR, 3115 placing the result in an Ity_I64 temporary. */ 3116 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) 3117 { 3118 IRTemp res = newTemp(Ity_I64); 3119 IRExpr* addrE = mkexpr(addr); 3120 switch (szB) { 3121 case 8: 3122 assign(res, loadLE(Ity_I64,addrE)); 3123 break; 3124 case 4: 3125 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); 3126 break; 3127 case 2: 3128 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); 3129 break; 3130 case 1: 3131 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); 3132 break; 3133 default: 3134 vassert(0); 3135 } 3136 return res; 3137 } 3138 3139 3140 static 3141 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) 3142 { 3143 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 3144 3145 /* ------------ LDR,STR (immediate, uimm12) ----------- */ 3146 /* uimm12 is scaled by the transfer size 3147 3148 31 29 26 21 9 4 3149 | | | | | | 3150 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] 3151 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] 3152 3153 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] 3154 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] 3155 3156 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] 3157 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] 3158 3159 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] 3160 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] 3161 */ 3162 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { 3163 UInt szLg2 = INSN(31,30); 3164 UInt szB = 1 << szLg2; 3165 Bool isLD = INSN(22,22) == 1; 3166 UInt offs = INSN(21,10) * szB; 3167 UInt nn = INSN(9,5); 3168 UInt tt = INSN(4,0); 3169 IRTemp ta = newTemp(Ity_I64); 3170 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); 3171 if (nn == 31) { /* FIXME generate stack alignment check */ } 3172 vassert(szLg2 < 4); 3173 if (isLD) { 3174 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); 3175 } else { 3176 gen_narrowing_store(szB, ta, getIReg64orZR(tt)); 3177 } 3178 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; 3179 const HChar* st_name[4] = { "strb", "strh", "str", "str" }; 3180 DIP("%s %s, [%s, #%u]\n", 3181 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), 3182 nameIReg64orSP(nn), offs); 3183 return True; 3184 } 3185 3186 /* ------------ LDUR,STUR (immediate, simm9) ----------- */ 3187 /* 3188 31 29 26 20 11 9 4 3189 | | | | | | | 3190 (at-Rn-then-Rn=EA) | | | 3191 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 3192 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 3193 3194 (at-EA-then-Rn=EA) 3195 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! 3196 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! 3197 3198 (at-EA) 3199 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] 3200 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] 3201 3202 simm9 is unscaled. 3203 3204 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the 3205 load case this is because would create two competing values for 3206 Rt. In the store case the reason is unclear, but the spec 3207 disallows it anyway. 3208 3209 Stores are narrowing, loads are unsigned widening. sz encodes 3210 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. 3211 */ 3212 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) 3213 == BITS9(1,1,1, 0,0,0,0,0, 0)) { 3214 UInt szLg2 = INSN(31,30); 3215 UInt szB = 1 << szLg2; 3216 Bool isLoad = INSN(22,22) == 1; 3217 UInt imm9 = INSN(20,12); 3218 UInt nn = INSN(9,5); 3219 UInt tt = INSN(4,0); 3220 Bool wBack = INSN(10,10) == 1; 3221 UInt how = INSN(11,10); 3222 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { 3223 /* undecodable; fall through */ 3224 } else { 3225 if (nn == 31) { /* FIXME generate stack alignment check */ } 3226 3227 // Compute the transfer address TA and the writeback address WA. 3228 IRTemp tRN = newTemp(Ity_I64); 3229 assign(tRN, getIReg64orSP(nn)); 3230 IRTemp tEA = newTemp(Ity_I64); 3231 Long simm9 = (Long)sx_to_64(imm9, 9); 3232 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3233 3234 IRTemp tTA = newTemp(Ity_I64); 3235 IRTemp tWA = newTemp(Ity_I64); 3236 switch (how) { 3237 case BITS2(0,1): 3238 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3239 case BITS2(1,1): 3240 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3241 case BITS2(0,0): 3242 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3243 default: 3244 vassert(0); /* NOTREACHED */ 3245 } 3246 3247 /* Normally rN would be updated after the transfer. However, in 3248 the special case typifed by 3249 str x30, [sp,#-16]! 3250 it is necessary to update SP before the transfer, (1) 3251 because Memcheck will otherwise complain about a write 3252 below the stack pointer, and (2) because the segfault 3253 stack extension mechanism will otherwise extend the stack 3254 only down to SP before the instruction, which might not be 3255 far enough, if the -16 bit takes the actual access 3256 address to the next page. 3257 */ 3258 Bool earlyWBack 3259 = wBack && simm9 < 0 && szB == 8 3260 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; 3261 3262 if (wBack && earlyWBack) 3263 putIReg64orSP(nn, mkexpr(tEA)); 3264 3265 if (isLoad) { 3266 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); 3267 } else { 3268 gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); 3269 } 3270 3271 if (wBack && !earlyWBack) 3272 putIReg64orSP(nn, mkexpr(tEA)); 3273 3274 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; 3275 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; 3276 const HChar* fmt_str = NULL; 3277 switch (how) { 3278 case BITS2(0,1): 3279 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3280 break; 3281 case BITS2(1,1): 3282 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3283 break; 3284 case BITS2(0,0): 3285 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; 3286 break; 3287 default: 3288 vassert(0); 3289 } 3290 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], 3291 nameIRegOrZR(szB == 8, tt), 3292 nameIReg64orSP(nn), simm9); 3293 return True; 3294 } 3295 } 3296 3297 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ 3298 /* L==1 => mm==LD 3299 L==0 => mm==ST 3300 x==0 => 32 bit transfers, and zero extended loads 3301 x==1 => 64 bit transfers 3302 simm7 is scaled by the (single-register) transfer size 3303 3304 (at-Rn-then-Rn=EA) 3305 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm 3306 3307 (at-EA-then-Rn=EA) 3308 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! 3309 3310 (at-EA) 3311 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] 3312 */ 3313 3314 UInt insn_30_23 = INSN(30,23); 3315 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) 3316 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) 3317 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { 3318 UInt bL = INSN(22,22); 3319 UInt bX = INSN(31,31); 3320 UInt bWBack = INSN(23,23); 3321 UInt rT1 = INSN(4,0); 3322 UInt rN = INSN(9,5); 3323 UInt rT2 = INSN(14,10); 3324 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 3325 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) 3326 || (bL && rT1 == rT2)) { 3327 /* undecodable; fall through */ 3328 } else { 3329 if (rN == 31) { /* FIXME generate stack alignment check */ } 3330 3331 // Compute the transfer address TA and the writeback address WA. 3332 IRTemp tRN = newTemp(Ity_I64); 3333 assign(tRN, getIReg64orSP(rN)); 3334 IRTemp tEA = newTemp(Ity_I64); 3335 simm7 = (bX ? 8 : 4) * simm7; 3336 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 3337 3338 IRTemp tTA = newTemp(Ity_I64); 3339 IRTemp tWA = newTemp(Ity_I64); 3340 switch (INSN(24,23)) { 3341 case BITS2(0,1): 3342 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3343 case BITS2(1,1): 3344 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3345 case BITS2(1,0): 3346 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3347 default: 3348 vassert(0); /* NOTREACHED */ 3349 } 3350 3351 /* Normally rN would be updated after the transfer. However, in 3352 the special case typifed by 3353 stp x29, x30, [sp,#-112]! 3354 it is necessary to update SP before the transfer, (1) 3355 because Memcheck will otherwise complain about a write 3356 below the stack pointer, and (2) because the segfault 3357 stack extension mechanism will otherwise extend the stack 3358 only down to SP before the instruction, which might not be 3359 far enough, if the -112 bit takes the actual access 3360 address to the next page. 3361 */ 3362 Bool earlyWBack 3363 = bWBack && simm7 < 0 3364 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; 3365 3366 if (bWBack && earlyWBack) 3367 putIReg64orSP(rN, mkexpr(tEA)); 3368 3369 /**/ if (bL == 1 && bX == 1) { 3370 // 64 bit load 3371 putIReg64orZR(rT1, loadLE(Ity_I64, 3372 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 3373 putIReg64orZR(rT2, loadLE(Ity_I64, 3374 binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); 3375 } else if (bL == 1 && bX == 0) { 3376 // 32 bit load 3377 putIReg32orZR(rT1, loadLE(Ity_I32, 3378 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 3379 putIReg32orZR(rT2, loadLE(Ity_I32, 3380 binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); 3381 } else if (bL == 0 && bX == 1) { 3382 // 64 bit store 3383 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 3384 getIReg64orZR(rT1)); 3385 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), 3386 getIReg64orZR(rT2)); 3387 } else { 3388 vassert(bL == 0 && bX == 0); 3389 // 32 bit store 3390 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 3391 getIReg32orZR(rT1)); 3392 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), 3393 getIReg32orZR(rT2)); 3394 } 3395 3396 if (bWBack && !earlyWBack) 3397 putIReg64orSP(rN, mkexpr(tEA)); 3398 3399 const HChar* fmt_str = NULL; 3400 switch (INSN(24,23)) { 3401 case BITS2(0,1): 3402 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3403 break; 3404 case BITS2(1,1): 3405 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3406 break; 3407 case BITS2(1,0): 3408 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 3409 break; 3410 default: 3411 vassert(0); 3412 } 3413 DIP(fmt_str, bL == 0 ? "st" : "ld", 3414 nameIRegOrZR(bX == 1, rT1), 3415 nameIRegOrZR(bX == 1, rT2), 3416 nameIReg64orSP(rN), simm7); 3417 return True; 3418 } 3419 } 3420 3421 /* ---------------- LDR (literal, int reg) ---------------- */ 3422 /* 31 29 23 4 3423 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] 3424 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] 3425 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] 3426 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] 3427 Just handles the first two cases for now. 3428 */ 3429 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { 3430 UInt imm19 = INSN(23,5); 3431 UInt rT = INSN(4,0); 3432 UInt bX = INSN(30,30); 3433 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 3434 if (bX) { 3435 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); 3436 } else { 3437 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); 3438 } 3439 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); 3440 return True; 3441 } 3442 3443 /* -------------- {LD,ST}R (integer register) --------------- */ 3444 /* 31 29 20 15 12 11 9 4 3445 | | | | | | | | 3446 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}] 3447 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}] 3448 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}] 3449 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}] 3450 3451 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}] 3452 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}] 3453 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}] 3454 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}] 3455 */ 3456 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) 3457 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3458 HChar dis_buf[64]; 3459 UInt szLg2 = INSN(31,30); 3460 Bool isLD = INSN(22,22) == 1; 3461 UInt tt = INSN(4,0); 3462 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 3463 if (ea != IRTemp_INVALID) { 3464 switch (szLg2) { 3465 case 3: /* 64 bit */ 3466 if (isLD) { 3467 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); 3468 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); 3469 } else { 3470 storeLE(mkexpr(ea), getIReg64orZR(tt)); 3471 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); 3472 } 3473 break; 3474 case 2: /* 32 bit */ 3475 if (isLD) { 3476 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); 3477 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); 3478 } else { 3479 storeLE(mkexpr(ea), getIReg32orZR(tt)); 3480 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); 3481 } 3482 break; 3483 case 1: /* 16 bit */ 3484 if (isLD) { 3485 putIReg64orZR(tt, unop(Iop_16Uto64, 3486 loadLE(Ity_I16, mkexpr(ea)))); 3487 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3488 } else { 3489 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); 3490 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3491 } 3492 break; 3493 case 0: /* 8 bit */ 3494 if (isLD) { 3495 putIReg64orZR(tt, unop(Iop_8Uto64, 3496 loadLE(Ity_I8, mkexpr(ea)))); 3497 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); 3498 } else { 3499 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); 3500 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); 3501 } 3502 break; 3503 default: 3504 vassert(0); 3505 } 3506 return True; 3507 } 3508 } 3509 3510 /* -------------- LDRS{B,H,W} (uimm12) -------------- */ 3511 /* 31 29 26 23 21 9 4 3512 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] 3513 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] 3514 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] 3515 where 3516 Rt is Wt when x==1, Xt when x==0 3517 */ 3518 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { 3519 /* Further checks on bits 31:30 and 22 */ 3520 Bool valid = False; 3521 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3522 case BITS3(1,0,0): 3523 case BITS3(0,1,0): case BITS3(0,1,1): 3524 case BITS3(0,0,0): case BITS3(0,0,1): 3525 valid = True; 3526 break; 3527 } 3528 if (valid) { 3529 UInt szLg2 = INSN(31,30); 3530 UInt bitX = INSN(22,22); 3531 UInt imm12 = INSN(21,10); 3532 UInt nn = INSN(9,5); 3533 UInt tt = INSN(4,0); 3534 UInt szB = 1 << szLg2; 3535 IRExpr* ea = binop(Iop_Add64, 3536 getIReg64orSP(nn), mkU64(imm12 * szB)); 3537 switch (szB) { 3538 case 4: 3539 vassert(bitX == 0); 3540 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); 3541 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), 3542 nameIReg64orSP(nn), imm12 * szB); 3543 break; 3544 case 2: 3545 if (bitX == 1) { 3546 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); 3547 } else { 3548 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); 3549 } 3550 DIP("ldrsh %s, [%s, #%u]\n", 3551 nameIRegOrZR(bitX == 0, tt), 3552 nameIReg64orSP(nn), imm12 * szB); 3553 break; 3554 case 1: 3555 if (bitX == 1) { 3556 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); 3557 } else { 3558 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); 3559 } 3560 DIP("ldrsb %s, [%s, #%u]\n", 3561 nameIRegOrZR(bitX == 0, tt), 3562 nameIReg64orSP(nn), imm12 * szB); 3563 break; 3564 default: 3565 vassert(0); 3566 } 3567 return True; 3568 } 3569 /* else fall through */ 3570 } 3571 3572 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ 3573 /* (at-Rn-then-Rn=EA) 3574 31 29 23 21 20 11 9 4 3575 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 3576 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 3577 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 3578 3579 (at-EA-then-Rn=EA) 3580 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! 3581 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! 3582 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! 3583 where 3584 Rt is Wt when x==1, Xt when x==0 3585 transfer-at-Rn when [11]==0, at EA when [11]==1 3586 */ 3587 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3588 && INSN(21,21) == 0 && INSN(10,10) == 1) { 3589 /* Further checks on bits 31:30 and 22 */ 3590 Bool valid = False; 3591 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3592 case BITS3(1,0,0): // LDRSW Xt 3593 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt 3594 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt 3595 valid = True; 3596 break; 3597 } 3598 if (valid) { 3599 UInt szLg2 = INSN(31,30); 3600 UInt imm9 = INSN(20,12); 3601 Bool atRN = INSN(11,11) == 0; 3602 UInt nn = INSN(9,5); 3603 UInt tt = INSN(4,0); 3604 IRTemp tRN = newTemp(Ity_I64); 3605 IRTemp tEA = newTemp(Ity_I64); 3606 IRTemp tTA = IRTemp_INVALID; 3607 ULong simm9 = sx_to_64(imm9, 9); 3608 Bool is64 = INSN(22,22) == 0; 3609 assign(tRN, getIReg64orSP(nn)); 3610 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3611 tTA = atRN ? tRN : tEA; 3612 HChar ch = '?'; 3613 /* There are 5 cases: 3614 byte load, SX to 64 3615 byte load, SX to 32, ZX to 64 3616 halfword load, SX to 64 3617 halfword load, SX to 32, ZX to 64 3618 word load, SX to 64 3619 The ifs below handle them in the listed order. 3620 */ 3621 if (szLg2 == 0) { 3622 ch = 'b'; 3623 if (is64) { 3624 putIReg64orZR(tt, unop(Iop_8Sto64, 3625 loadLE(Ity_I8, mkexpr(tTA)))); 3626 } else { 3627 putIReg32orZR(tt, unop(Iop_8Sto32, 3628 loadLE(Ity_I8, mkexpr(tTA)))); 3629 } 3630 } 3631 else if (szLg2 == 1) { 3632 ch = 'h'; 3633 if (is64) { 3634 putIReg64orZR(tt, unop(Iop_16Sto64, 3635 loadLE(Ity_I16, mkexpr(tTA)))); 3636 } else { 3637 putIReg32orZR(tt, unop(Iop_16Sto32, 3638 loadLE(Ity_I16, mkexpr(tTA)))); 3639 } 3640 } 3641 else if (szLg2 == 2 && is64) { 3642 ch = 'w'; 3643 putIReg64orZR(tt, unop(Iop_32Sto64, 3644 loadLE(Ity_I32, mkexpr(tTA)))); 3645 } 3646 else { 3647 vassert(0); 3648 } 3649 putIReg64orSP(nn, mkexpr(tEA)); 3650 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!", 3651 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 3652 return True; 3653 } 3654 /* else fall through */ 3655 } 3656 3657 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ 3658 /* 31 29 23 21 20 11 9 4 3659 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] 3660 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] 3661 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] 3662 where 3663 Rt is Wt when x==1, Xt when x==0 3664 */ 3665 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3666 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 3667 /* Further checks on bits 31:30 and 22 */ 3668 Bool valid = False; 3669 switch ((INSN(31,30) << 1) | INSN(22,22)) { 3670 case BITS3(1,0,0): // LDURSW Xt 3671 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt 3672 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt 3673 valid = True; 3674 break; 3675 } 3676 if (valid) { 3677 UInt szLg2 = INSN(31,30); 3678 UInt imm9 = INSN(20,12); 3679 UInt nn = INSN(9,5); 3680 UInt tt = INSN(4,0); 3681 IRTemp tRN = newTemp(Ity_I64); 3682 IRTemp tEA = newTemp(Ity_I64); 3683 ULong simm9 = sx_to_64(imm9, 9); 3684 Bool is64 = INSN(22,22) == 0; 3685 assign(tRN, getIReg64orSP(nn)); 3686 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 3687 HChar ch = '?'; 3688 /* There are 5 cases: 3689 byte load, SX to 64 3690 byte load, SX to 32, ZX to 64 3691 halfword load, SX to 64 3692 halfword load, SX to 32, ZX to 64 3693 word load, SX to 64 3694 The ifs below handle them in the listed order. 3695 */ 3696 if (szLg2 == 0) { 3697 ch = 'b'; 3698 if (is64) { 3699 putIReg64orZR(tt, unop(Iop_8Sto64, 3700 loadLE(Ity_I8, mkexpr(tEA)))); 3701 } else { 3702 putIReg32orZR(tt, unop(Iop_8Sto32, 3703 loadLE(Ity_I8, mkexpr(tEA)))); 3704 } 3705 } 3706 else if (szLg2 == 1) { 3707 ch = 'h'; 3708 if (is64) { 3709 putIReg64orZR(tt, unop(Iop_16Sto64, 3710 loadLE(Ity_I16, mkexpr(tEA)))); 3711 } else { 3712 putIReg32orZR(tt, unop(Iop_16Sto32, 3713 loadLE(Ity_I16, mkexpr(tEA)))); 3714 } 3715 } 3716 else if (szLg2 == 2 && is64) { 3717 ch = 'w'; 3718 putIReg64orZR(tt, unop(Iop_32Sto64, 3719 loadLE(Ity_I32, mkexpr(tEA)))); 3720 } 3721 else { 3722 vassert(0); 3723 } 3724 DIP("ldurs%c %s, [%s, #%lld]", 3725 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 3726 return True; 3727 } 3728 /* else fall through */ 3729 } 3730 3731 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ 3732 /* L==1 => mm==LD 3733 L==0 => mm==ST 3734 sz==00 => 32 bit (S) transfers 3735 sz==01 => 64 bit (D) transfers 3736 sz==10 => 128 bit (Q) transfers 3737 sz==11 isn't allowed 3738 simm7 is scaled by the (single-register) transfer size 3739 3740 31 29 22 21 14 9 4 3741 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm 3742 (at-Rn-then-Rn=EA) 3743 3744 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! 3745 (at-EA-then-Rn=EA) 3746 3747 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] 3748 (at-EA) 3749 */ 3750 3751 UInt insn_29_23 = INSN(29,23); 3752 if (insn_29_23 == BITS7(1,0,1,1,0,0,1) 3753 || insn_29_23 == BITS7(1,0,1,1,0,1,1) 3754 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) { 3755 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units 3756 Bool isLD = INSN(22,22) == 1; 3757 Bool wBack = INSN(23,23) == 1; 3758 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 3759 UInt tt2 = INSN(14,10); 3760 UInt nn = INSN(9,5); 3761 UInt tt1 = INSN(4,0); 3762 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { 3763 /* undecodable; fall through */ 3764 } else { 3765 if (nn == 31) { /* FIXME generate stack alignment check */ } 3766 3767 // Compute the transfer address TA and the writeback address WA. 3768 UInt szB = 4 << szSlg2; /* szB is the per-register size */ 3769 IRTemp tRN = newTemp(Ity_I64); 3770 assign(tRN, getIReg64orSP(nn)); 3771 IRTemp tEA = newTemp(Ity_I64); 3772 simm7 = szB * simm7; 3773 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 3774 3775 IRTemp tTA = newTemp(Ity_I64); 3776 IRTemp tWA = newTemp(Ity_I64); 3777 switch (INSN(24,23)) { 3778 case BITS2(0,1): 3779 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 3780 case BITS2(1,1): 3781 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 3782 case BITS2(1,0): 3783 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 3784 default: 3785 vassert(0); /* NOTREACHED */ 3786 } 3787 3788 IRType ty = Ity_INVALID; 3789 switch (szB) { 3790 case 4: ty = Ity_F32; break; 3791 case 8: ty = Ity_F64; break; 3792 case 16: ty = Ity_V128; break; 3793 default: vassert(0); 3794 } 3795 3796 /* Normally rN would be updated after the transfer. However, in 3797 the special cases typifed by 3798 stp q0, q1, [sp,#-512]! 3799 stp d0, d1, [sp,#-512]! 3800 stp s0, s1, [sp,#-512]! 3801 it is necessary to update SP before the transfer, (1) 3802 because Memcheck will otherwise complain about a write 3803 below the stack pointer, and (2) because the segfault 3804 stack extension mechanism will otherwise extend the stack 3805 only down to SP before the instruction, which might not be 3806 far enough, if the -512 bit takes the actual access 3807 address to the next page. 3808 */ 3809 Bool earlyWBack 3810 = wBack && simm7 < 0 3811 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; 3812 3813 if (wBack && earlyWBack) 3814 putIReg64orSP(nn, mkexpr(tEA)); 3815 3816 if (isLD) { 3817 if (szB < 16) { 3818 putQReg128(tt1, mkV128(0x0000)); 3819 } 3820 putQRegLO(tt1, 3821 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); 3822 if (szB < 16) { 3823 putQReg128(tt2, mkV128(0x0000)); 3824 } 3825 putQRegLO(tt2, 3826 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); 3827 } else { 3828 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), 3829 getQRegLO(tt1, ty)); 3830 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), 3831 getQRegLO(tt2, ty)); 3832 } 3833 3834 if (wBack && !earlyWBack) 3835 putIReg64orSP(nn, mkexpr(tEA)); 3836 3837 const HChar* fmt_str = NULL; 3838 switch (INSN(24,23)) { 3839 case BITS2(0,1): 3840 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 3841 break; 3842 case BITS2(1,1): 3843 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 3844 break; 3845 case BITS2(1,0): 3846 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 3847 break; 3848 default: 3849 vassert(0); 3850 } 3851 DIP(fmt_str, isLD ? "ld" : "st", 3852 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), 3853 nameIReg64orSP(nn), simm7); 3854 return True; 3855 } 3856 } 3857 3858 /* -------------- {LD,ST}R (vector register) --------------- */ 3859 /* 31 29 23 20 15 12 11 9 4 3860 | | | | | | | | | 3861 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}] 3862 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}] 3863 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}] 3864 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}] 3865 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}] 3866 3867 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}] 3868 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}] 3869 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}] 3870 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}] 3871 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}] 3872 */ 3873 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 3874 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3875 HChar dis_buf[64]; 3876 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 3877 Bool isLD = INSN(22,22) == 1; 3878 UInt tt = INSN(4,0); 3879 if (szLg2 >= 4) goto after_LDR_STR_vector_register; 3880 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); 3881 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; 3882 switch (szLg2) { 3883 case 0: /* 8 bit */ 3884 if (isLD) { 3885 putQReg128(tt, mkV128(0x0000)); 3886 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); 3887 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 3888 } else { 3889 vassert(0); //ATC 3890 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); 3891 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 3892 } 3893 break; 3894 case 1: 3895 if (isLD) { 3896 putQReg128(tt, mkV128(0x0000)); 3897 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); 3898 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 3899 } else { 3900 vassert(0); //ATC 3901 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); 3902 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 3903 } 3904 break; 3905 case 2: /* 32 bit */ 3906 if (isLD) { 3907 putQReg128(tt, mkV128(0x0000)); 3908 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); 3909 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 3910 } else { 3911 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); 3912 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 3913 } 3914 break; 3915 case 3: /* 64 bit */ 3916 if (isLD) { 3917 putQReg128(tt, mkV128(0x0000)); 3918 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); 3919 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 3920 } else { 3921 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); 3922 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 3923 } 3924 break; 3925 case 4: return False; //ATC 3926 default: vassert(0); 3927 } 3928 return True; 3929 } 3930 after_LDR_STR_vector_register: 3931 3932 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ 3933 /* 31 29 22 20 15 12 11 9 4 3934 | | | | | | | | | 3935 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}] 3936 3937 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}] 3938 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}] 3939 3940 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}] 3941 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}] 3942 */ 3943 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 3944 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 3945 HChar dis_buf[64]; 3946 UInt szLg2 = INSN(31,30); 3947 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 3948 UInt tt = INSN(4,0); 3949 if (szLg2 == 3) goto after_LDRS_integer_register; 3950 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 3951 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; 3952 /* Enumerate the 5 variants explicitly. */ 3953 if (szLg2 == 2/*32 bit*/ && sxTo64) { 3954 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); 3955 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); 3956 return True; 3957 } 3958 else 3959 if (szLg2 == 1/*16 bit*/) { 3960 if (sxTo64) { 3961 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); 3962 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); 3963 } else { 3964 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); 3965 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); 3966 } 3967 return True; 3968 } 3969 else 3970 if (szLg2 == 0/*8 bit*/) { 3971 if (sxTo64) { 3972 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); 3973 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); 3974 } else { 3975 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); 3976 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); 3977 } 3978 return True; 3979 } 3980 /* else it's an invalid combination */ 3981 } 3982 after_LDRS_integer_register: 3983 3984 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ 3985 /* This is the Unsigned offset variant only. The Post-Index and 3986 Pre-Index variants are below. 3987 3988 31 29 23 21 9 4 3989 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] 3990 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] 3991 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] 3992 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] 3993 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] 3994 3995 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] 3996 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] 3997 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] 3998 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] 3999 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] 4000 */ 4001 if (INSN(29,24) == BITS6(1,1,1,1,0,1) 4002 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { 4003 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4004 Bool isLD = INSN(22,22) == 1; 4005 UInt pimm12 = INSN(21,10) << szLg2; 4006 UInt nn = INSN(9,5); 4007 UInt tt = INSN(4,0); 4008 IRTemp tEA = newTemp(Ity_I64); 4009 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4010 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); 4011 if (isLD) { 4012 if (szLg2 < 4) { 4013 putQReg128(tt, mkV128(0x0000)); 4014 } 4015 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 4016 } else { 4017 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 4018 } 4019 DIP("%s %s, [%s, #%u]\n", 4020 isLD ? "ldr" : "str", 4021 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); 4022 return True; 4023 } 4024 4025 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ 4026 /* These are the Post-Index and Pre-Index variants. 4027 4028 31 29 23 20 11 9 4 4029 (at-Rn-then-Rn=EA) 4030 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm 4031 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm 4032 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm 4033 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm 4034 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm 4035 4036 (at-EA-then-Rn=EA) 4037 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! 4038 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! 4039 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! 4040 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! 4041 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! 4042 4043 Stores are the same except with bit 22 set to 0. 4044 */ 4045 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 4046 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 4047 && INSN(21,21) == 0 && INSN(10,10) == 1) { 4048 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4049 Bool isLD = INSN(22,22) == 1; 4050 UInt imm9 = INSN(20,12); 4051 Bool atRN = INSN(11,11) == 0; 4052 UInt nn = INSN(9,5); 4053 UInt tt = INSN(4,0); 4054 IRTemp tRN = newTemp(Ity_I64); 4055 IRTemp tEA = newTemp(Ity_I64); 4056 IRTemp tTA = IRTemp_INVALID; 4057 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4058 ULong simm9 = sx_to_64(imm9, 9); 4059 assign(tRN, getIReg64orSP(nn)); 4060 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 4061 tTA = atRN ? tRN : tEA; 4062 if (isLD) { 4063 if (szLg2 < 4) { 4064 putQReg128(tt, mkV128(0x0000)); 4065 } 4066 putQRegLO(tt, loadLE(ty, mkexpr(tTA))); 4067 } else { 4068 storeLE(mkexpr(tTA), getQRegLO(tt, ty)); 4069 } 4070 putIReg64orSP(nn, mkexpr(tEA)); 4071 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", 4072 isLD ? "ldr" : "str", 4073 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9); 4074 return True; 4075 } 4076 4077 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ 4078 /* 31 29 23 20 11 9 4 4079 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] 4080 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] 4081 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] 4082 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] 4083 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] 4084 4085 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] 4086 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] 4087 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] 4088 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] 4089 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] 4090 */ 4091 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 4092 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 4093 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 4094 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 4095 Bool isLD = INSN(22,22) == 1; 4096 UInt imm9 = INSN(20,12); 4097 UInt nn = INSN(9,5); 4098 UInt tt = INSN(4,0); 4099 ULong simm9 = sx_to_64(imm9, 9); 4100 IRTemp tEA = newTemp(Ity_I64); 4101 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 4102 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); 4103 if (isLD) { 4104 if (szLg2 < 4) { 4105 putQReg128(tt, mkV128(0x0000)); 4106 } 4107 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 4108 } else { 4109 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 4110 } 4111 DIP("%s %s, [%s, #%lld]\n", 4112 isLD ? "ldur" : "stur", 4113 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 4114 return True; 4115 } 4116 4117 /* ---------------- LDR (literal, SIMD&FP) ---------------- */ 4118 /* 31 29 23 4 4119 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] 4120 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] 4121 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] 4122 */ 4123 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { 4124 UInt szB = 4 << INSN(31,30); 4125 UInt imm19 = INSN(23,5); 4126 UInt tt = INSN(4,0); 4127 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 4128 IRType ty = preferredVectorSubTypeFromSize(szB); 4129 putQReg128(tt, mkV128(0x0000)); 4130 putQRegLO(tt, loadLE(ty, mkU64(ea))); 4131 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); 4132 return True; 4133 } 4134 4135 /* ---------- LD1/ST1 (single structure, no offset) ---------- */ 4136 /* 31 23 15 4137 0Q00 1101 0L00 0000 xx0S sz N T 4138 ---- 4139 opcode 4140 1011 1111 1011 1111 0010 00 0 0 <- mask 4141 0000 1101 0000 0000 0000 00 0 0 <- result 4142 4143 FIXME does this assume that the host is little endian? 4144 */ 4145 4146 if ((insn & 0xBFBF2000) == 0x0D000000) { 4147 Bool isLD = INSN(22,22) == 1; 4148 UInt rN = INSN(9,5); 4149 UInt vT = INSN(4,0); 4150 UInt q = INSN(30, 30); 4151 UInt xx = INSN(15, 14); 4152 UInt opcode = INSN(15, 13); 4153 UInt s = INSN(12, 12); 4154 UInt sz = INSN(11, 10); 4155 4156 UInt index = (q << 3) | (s << 2) | sz; 4157 const HChar* name = ""; 4158 Bool valid = False; 4159 IRType laneTy = Ity_I8; 4160 4161 if (opcode == 0x0) { // 8 bit variant 4162 name = "b"; 4163 valid = True; 4164 } else if (opcode == 0x2 && (sz & 1) == 0) { // 16 bit variant 4165 name = "h"; 4166 laneTy = Ity_I16; 4167 index >>= 1; 4168 valid = True; 4169 } else if (opcode == 0x4 && sz == 0x0) { // 32 bit variant 4170 name = "s"; 4171 laneTy = Ity_I32; 4172 index >>= 2; 4173 valid = True; 4174 } else if (opcode == 0x4 && sz == 0x1 && s == 0) { // 64 bit variant 4175 name = "d"; 4176 laneTy = Ity_I64; 4177 index >>= 3; 4178 valid = True; 4179 } 4180 4181 if (valid) { 4182 IRTemp tEA = newTemp(Ity_I64); 4183 assign(tEA, getIReg64orSP(rN)); 4184 if (rN == 31) { /* FIXME generate stack alignment check */ } 4185 if (isLD) { 4186 putQRegLane(vT, index, loadLE(laneTy, mkexpr(tEA))); 4187 } else { 4188 storeLE(mkexpr(tEA), getQRegLane(vT, index, laneTy)); 4189 } 4190 4191 DIP("%s {v%u.%s}[%d], [%s]\n", isLD ? "ld1" : "st1", 4192 vT, name, index, nameIReg64orSP(rN)); 4193 return True; 4194 } 4195 4196 } 4197 4198 4199 /* ---------- LD1/ST1 (multiple structure, no offset, one register variant) ---------- */ 4200 /* 31 23 4201 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP] 4202 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP] 4203 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP] 4204 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP] 4205 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP] 4206 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP] 4207 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP] 4208 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP] 4209 FIXME does this assume that the host is little endian? 4210 */ 4211 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases 4212 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases 4213 ) { 4214 Bool isLD = INSN(22,22) == 1; 4215 UInt rN = INSN(9,5); 4216 UInt vT = INSN(4,0); 4217 IRTemp tEA = newTemp(Ity_I64); 4218 const HChar* names[4] = { "2d", "4s", "8h", "16b" }; 4219 const HChar* name = names[INSN(11,10)]; 4220 assign(tEA, getIReg64orSP(rN)); 4221 if (rN == 31) { /* FIXME generate stack alignment check */ } 4222 if (isLD) { 4223 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); 4224 } else { 4225 storeLE(mkexpr(tEA), getQReg128(vT)); 4226 } 4227 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", 4228 vT, name, nameIReg64orSP(rN)); 4229 return True; 4230 } 4231 4232 /* 31 23 4233 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP] 4234 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP] 4235 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP] 4236 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP] 4237 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP] 4238 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP] 4239 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP] 4240 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP] 4241 FIXME does this assume that the host is little endian? 4242 */ 4243 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases 4244 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases 4245 ) { 4246 Bool isLD = INSN(22,22) == 1; 4247 UInt rN = INSN(9,5); 4248 UInt vT = INSN(4,0); 4249 IRTemp tEA = newTemp(Ity_I64); 4250 const HChar* names[4] = { "1d", "2s", "4h", "8b" }; 4251 const HChar* name = names[INSN(11,10)]; 4252 assign(tEA, getIReg64orSP(rN)); 4253 if (rN == 31) { /* FIXME generate stack alignment check */ } 4254 if (isLD) { 4255 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); 4256 putQRegLane(vT, 1, mkU64(0)); 4257 } else { 4258 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); 4259 } 4260 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1", 4261 vT, name, nameIReg64orSP(rN)); 4262 return True; 4263 } 4264 4265 /* ---------- LD1/ST1 (multiple structure, post-index, one register variant) ---------- */ 4266 /* 31 23 4267 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16 4268 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16 4269 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16 4270 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16 4271 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16 4272 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16 4273 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16 4274 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16 4275 Note that #16 is implied and cannot be any other value. 4276 FIXME does this assume that the host is little endian? 4277 */ 4278 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases 4279 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases 4280 ) { 4281 Bool isLD = INSN(22,22) == 1; 4282 UInt rN = INSN(9,5); 4283 UInt vT = INSN(4,0); 4284 IRTemp tEA = newTemp(Ity_I64); 4285 const HChar* names[4] = { "2d", "4s", "8h", "16b" }; 4286 const HChar* name = names[INSN(11,10)]; 4287 assign(tEA, getIReg64orSP(rN)); 4288 if (rN == 31) { /* FIXME generate stack alignment check */ } 4289 if (isLD) { 4290 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA))); 4291 } else { 4292 storeLE(mkexpr(tEA), getQReg128(vT)); 4293 } 4294 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16))); 4295 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1", 4296 vT, name, nameIReg64orSP(rN)); 4297 return True; 4298 } 4299 4300 /* 31 23 4301 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8 4302 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8 4303 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8 4304 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8 4305 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8 4306 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8 4307 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8 4308 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8 4309 Note that #8 is implied and cannot be any other value. 4310 FIXME does this assume that the host is little endian? 4311 */ 4312 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases 4313 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases 4314 ) { 4315 Bool isLD = INSN(22,22) == 1; 4316 UInt rN = INSN(9,5); 4317 UInt vT = INSN(4,0); 4318 IRTemp tEA = newTemp(Ity_I64); 4319 const HChar* names[4] = { "1d", "2s", "4h", "8b" }; 4320 const HChar* name = names[INSN(11,10)]; 4321 assign(tEA, getIReg64orSP(rN)); 4322 if (rN == 31) { /* FIXME generate stack alignment check */ } 4323 if (isLD) { 4324 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA))); 4325 putQRegLane(vT, 1, mkU64(0)); 4326 } else { 4327 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64)); 4328 } 4329 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8))); 4330 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1", 4331 vT, name, nameIReg64orSP(rN)); 4332 return True; 4333 } 4334 4335 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */ 4336 /* Only a very few cases. */ 4337 /* 31 23 11 9 4 4338 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 4339 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 4340 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 4341 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32 4342 */ 4343 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d 4344 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d 4345 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s 4346 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s 4347 ) { 4348 Bool isLD = INSN(22,22) == 1; 4349 UInt rN = INSN(9,5); 4350 UInt vT = INSN(4,0); 4351 IRTemp tEA = newTemp(Ity_I64); 4352 UInt sz = INSN(11,10); 4353 const HChar* name = "??"; 4354 assign(tEA, getIReg64orSP(rN)); 4355 if (rN == 31) { /* FIXME generate stack alignment check */ } 4356 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); 4357 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8)); 4358 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); 4359 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24)); 4360 if (sz == BITS2(1,1)) { 4361 name = "2d"; 4362 if (isLD) { 4363 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0)); 4364 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16)); 4365 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8)); 4366 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24)); 4367 } else { 4368 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64)); 4369 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64)); 4370 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64)); 4371 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64)); 4372 } 4373 } 4374 else if (sz == BITS2(1,0)) { 4375 /* Uh, this is ugly. TODO: better. */ 4376 name = "4s"; 4377 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4)); 4378 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12)); 4379 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20)); 4380 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28)); 4381 if (isLD) { 4382 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0)); 4383 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8)); 4384 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16)); 4385 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24)); 4386 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4)); 4387 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12)); 4388 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20)); 4389 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28)); 4390 } else { 4391 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32)); 4392 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32)); 4393 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32)); 4394 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32)); 4395 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32)); 4396 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32)); 4397 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32)); 4398 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32)); 4399 } 4400 } 4401 else { 4402 vassert(0); // Can't happen. 4403 } 4404 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32))); 4405 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2", 4406 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); 4407 return True; 4408 } 4409 4410 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */ 4411 /* Only a very few cases. */ 4412 /* 31 23 4413 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] 4414 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP] 4415 */ 4416 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1 4417 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1 4418 ) { 4419 Bool isLD = INSN(22,22) == 1; 4420 UInt rN = INSN(9,5); 4421 UInt vT = INSN(4,0); 4422 IRTemp tEA = newTemp(Ity_I64); 4423 const HChar* name = "16b"; 4424 assign(tEA, getIReg64orSP(rN)); 4425 if (rN == 31) { /* FIXME generate stack alignment check */ } 4426 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); 4427 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); 4428 if (isLD) { 4429 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0)); 4430 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16)); 4431 } else { 4432 storeLE(tEA_0, getQReg128((vT+0) % 32)); 4433 storeLE(tEA_16, getQReg128((vT+1) % 32)); 4434 } 4435 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1", 4436 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN)); 4437 return True; 4438 } 4439 4440 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ 4441 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ 4442 /* 31 29 23 20 14 9 4 4443 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] 4444 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] 4445 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] 4446 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] 4447 */ 4448 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) 4449 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) 4450 && INSN(14,10) == BITS5(1,1,1,1,1)) { 4451 UInt szBlg2 = INSN(31,30); 4452 Bool isLD = INSN(22,22) == 1; 4453 Bool isAcqOrRel = INSN(15,15) == 1; 4454 UInt ss = INSN(20,16); 4455 UInt nn = INSN(9,5); 4456 UInt tt = INSN(4,0); 4457 4458 vassert(szBlg2 < 4); 4459 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 4460 IRType ty = integerIRTypeOfSize(szB); 4461 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 4462 4463 IRTemp ea = newTemp(Ity_I64); 4464 assign(ea, getIReg64orSP(nn)); 4465 /* FIXME generate check that ea is szB-aligned */ 4466 4467 if (isLD && ss == BITS5(1,1,1,1,1)) { 4468 IRTemp res = newTemp(ty); 4469 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); 4470 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 4471 if (isAcqOrRel) { 4472 stmt(IRStmt_MBE(Imbe_Fence)); 4473 } 4474 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 4475 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4476 return True; 4477 } 4478 if (!isLD) { 4479 if (isAcqOrRel) { 4480 stmt(IRStmt_MBE(Imbe_Fence)); 4481 } 4482 IRTemp res = newTemp(Ity_I1); 4483 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 4484 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); 4485 /* IR semantics: res is 1 if store succeeds, 0 if it fails. 4486 Need to set rS to 1 on failure, 0 on success. */ 4487 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), 4488 mkU64(1))); 4489 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 4490 nameIRegOrZR(False, ss), 4491 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4492 return True; 4493 } 4494 /* else fall through */ 4495 } 4496 4497 /* ------------------ LDA{R,RH,RB} ------------------ */ 4498 /* ------------------ STL{R,RH,RB} ------------------ */ 4499 /* 31 29 23 20 14 9 4 4500 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP] 4501 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP] 4502 */ 4503 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) 4504 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { 4505 UInt szBlg2 = INSN(31,30); 4506 Bool isLD = INSN(22,22) == 1; 4507 UInt nn = INSN(9,5); 4508 UInt tt = INSN(4,0); 4509 4510 vassert(szBlg2 < 4); 4511 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 4512 IRType ty = integerIRTypeOfSize(szB); 4513 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 4514 4515 IRTemp ea = newTemp(Ity_I64); 4516 assign(ea, getIReg64orSP(nn)); 4517 /* FIXME generate check that ea is szB-aligned */ 4518 4519 if (isLD) { 4520 IRTemp res = newTemp(ty); 4521 assign(res, loadLE(ty, mkexpr(ea))); 4522 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 4523 stmt(IRStmt_MBE(Imbe_Fence)); 4524 DIP("lda%s %s, [%s]\n", suffix[szBlg2], 4525 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4526 } else { 4527 stmt(IRStmt_MBE(Imbe_Fence)); 4528 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 4529 storeLE(mkexpr(ea), data); 4530 DIP("stl%s %s, [%s]\n", suffix[szBlg2], 4531 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 4532 } 4533 return True; 4534 } 4535 4536 /* ------------------ PRFM (immediate) ------------------ */ 4537 /* 31 29 21 9 4 4538 11 11100110 imm12 n t PRFM <option>, [Xn|SP{, #pimm}] 4539 */ 4540 4541 if (INSN(31, 22) == BITS10(1,1,1,1,1,0,0,1,1,0)) { 4542 /* TODO: decode */ 4543 DIP("prfm ??? (imm)"); 4544 return True; 4545 } 4546 4547 vex_printf("ARM64 front end: load_store\n"); 4548 return False; 4549 # undef INSN 4550 } 4551 4552 4553 /*------------------------------------------------------------*/ 4554 /*--- Control flow and misc instructions ---*/ 4555 /*------------------------------------------------------------*/ 4556 4557 static 4558 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, 4559 VexArchInfo* archinfo) 4560 { 4561 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 4562 4563 /* ---------------------- B cond ----------------------- */ 4564 /* 31 24 4 3 4565 0101010 0 imm19 0 cond */ 4566 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { 4567 UInt cond = INSN(3,0); 4568 ULong uimm64 = INSN(23,5) << 2; 4569 Long simm64 = (Long)sx_to_64(uimm64, 21); 4570 vassert(dres->whatNext == Dis_Continue); 4571 vassert(dres->len == 4); 4572 vassert(dres->continueAt == 0); 4573 vassert(dres->jk_StopHere == Ijk_INVALID); 4574 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 4575 Ijk_Boring, 4576 IRConst_U64(guest_PC_curr_instr + simm64), 4577 OFFB_PC) ); 4578 putPC(mkU64(guest_PC_curr_instr + 4)); 4579 dres->whatNext = Dis_StopHere; 4580 dres->jk_StopHere = Ijk_Boring; 4581 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); 4582 return True; 4583 } 4584 4585 /* -------------------- B{L} uncond -------------------- */ 4586 if (INSN(30,26) == BITS5(0,0,1,0,1)) { 4587 /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) 4588 100101 imm26 B (PC + sxTo64(imm26 << 2)) 4589 */ 4590 UInt bLink = INSN(31,31); 4591 ULong uimm64 = INSN(25,0) << 2; 4592 Long simm64 = (Long)sx_to_64(uimm64, 28); 4593 if (bLink) { 4594 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 4595 } 4596 putPC(mkU64(guest_PC_curr_instr + simm64)); 4597 dres->whatNext = Dis_StopHere; 4598 dres->jk_StopHere = Ijk_Call; 4599 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", 4600 guest_PC_curr_instr + simm64); 4601 return True; 4602 } 4603 4604 /* --------------------- B{L} reg --------------------- */ 4605 /* 31 24 22 20 15 9 4 4606 1101011 00 10 11111 000000 nn 00000 RET Rn 4607 1101011 00 01 11111 000000 nn 00000 CALL Rn 4608 1101011 00 00 11111 000000 nn 00000 JMP Rn 4609 */ 4610 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) 4611 && INSN(20,16) == BITS5(1,1,1,1,1) 4612 && INSN(15,10) == BITS6(0,0,0,0,0,0) 4613 && INSN(4,0) == BITS5(0,0,0,0,0)) { 4614 UInt branch_type = INSN(22,21); 4615 UInt nn = INSN(9,5); 4616 if (branch_type == BITS2(1,0) /* RET */) { 4617 putPC(getIReg64orZR(nn)); 4618 dres->whatNext = Dis_StopHere; 4619 dres->jk_StopHere = Ijk_Ret; 4620 DIP("ret %s\n", nameIReg64orZR(nn)); 4621 return True; 4622 } 4623 if (branch_type == BITS2(0,1) /* CALL */) { 4624 IRTemp dst = newTemp(Ity_I64); 4625 assign(dst, getIReg64orZR(nn)); 4626 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 4627 putPC(mkexpr(dst)); 4628 dres->whatNext = Dis_StopHere; 4629 dres->jk_StopHere = Ijk_Call; 4630 DIP("blr %s\n", nameIReg64orZR(nn)); 4631 return True; 4632 } 4633 if (branch_type == BITS2(0,0) /* JMP */) { 4634 putPC(getIReg64orZR(nn)); 4635 dres->whatNext = Dis_StopHere; 4636 dres->jk_StopHere = Ijk_Boring; 4637 DIP("jmp %s\n", nameIReg64orZR(nn)); 4638 return True; 4639 } 4640 } 4641 4642 /* -------------------- CB{N}Z -------------------- */ 4643 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 4644 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 4645 */ 4646 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { 4647 Bool is64 = INSN(31,31) == 1; 4648 Bool bIfZ = INSN(24,24) == 0; 4649 ULong uimm64 = INSN(23,5) << 2; 4650 UInt rT = INSN(4,0); 4651 Long simm64 = (Long)sx_to_64(uimm64, 21); 4652 IRExpr* cond = NULL; 4653 if (is64) { 4654 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 4655 getIReg64orZR(rT), mkU64(0)); 4656 } else { 4657 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, 4658 getIReg32orZR(rT), mkU32(0)); 4659 } 4660 stmt( IRStmt_Exit(cond, 4661 Ijk_Boring, 4662 IRConst_U64(guest_PC_curr_instr + simm64), 4663 OFFB_PC) ); 4664 putPC(mkU64(guest_PC_curr_instr + 4)); 4665 dres->whatNext = Dis_StopHere; 4666 dres->jk_StopHere = Ijk_Boring; 4667 DIP("cb%sz %s, 0x%llx\n", 4668 bIfZ ? "" : "n", nameIRegOrZR(is64, rT), 4669 guest_PC_curr_instr + simm64); 4670 return True; 4671 } 4672 4673 /* -------------------- TB{N}Z -------------------- */ 4674 /* 31 30 24 23 18 5 4 4675 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 4676 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 4677 */ 4678 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { 4679 UInt b5 = INSN(31,31); 4680 Bool bIfZ = INSN(24,24) == 0; 4681 UInt b40 = INSN(23,19); 4682 UInt imm14 = INSN(18,5); 4683 UInt tt = INSN(4,0); 4684 UInt bitNo = (b5 << 5) | b40; 4685 ULong uimm64 = imm14 << 2; 4686 Long simm64 = sx_to_64(uimm64, 16); 4687 IRExpr* cond 4688 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 4689 binop(Iop_And64, 4690 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), 4691 mkU64(1)), 4692 mkU64(0)); 4693 stmt( IRStmt_Exit(cond, 4694 Ijk_Boring, 4695 IRConst_U64(guest_PC_curr_instr + simm64), 4696 OFFB_PC) ); 4697 putPC(mkU64(guest_PC_curr_instr + 4)); 4698 dres->whatNext = Dis_StopHere; 4699 dres->jk_StopHere = Ijk_Boring; 4700 DIP("tb%sz %s, #%u, 0x%llx\n", 4701 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, 4702 guest_PC_curr_instr + simm64); 4703 return True; 4704 } 4705 4706 /* -------------------- SVC -------------------- */ 4707 /* 11010100 000 imm16 000 01 4708 Don't bother with anything except the imm16==0 case. 4709 */ 4710 if (INSN(31,0) == 0xD4000001) { 4711 putPC(mkU64(guest_PC_curr_instr + 4)); 4712 dres->whatNext = Dis_StopHere; 4713 dres->jk_StopHere = Ijk_Sys_syscall; 4714 DIP("svc #0\n"); 4715 return True; 4716 } 4717 4718 /* ------------------ M{SR,RS} ------------------ */ 4719 /* Only handles the case where the system register is TPIDR_EL0. 4720 0xD51BD0 010 Rt MSR tpidr_el0, rT 4721 0xD53BD0 010 Rt MRS rT, tpidr_el0 4722 */ 4723 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ 4724 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { 4725 Bool toSys = INSN(21,21) == 0; 4726 UInt tt = INSN(4,0); 4727 if (toSys) { 4728 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); 4729 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); 4730 } else { 4731 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); 4732 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); 4733 } 4734 return True; 4735 } 4736 /* Cases for FPCR 4737 0xD51B44 000 Rt MSR fpcr, rT 4738 0xD53B44 000 Rt MSR rT, fpcr 4739 */ 4740 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ 4741 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { 4742 Bool toSys = INSN(21,21) == 0; 4743 UInt tt = INSN(4,0); 4744 if (toSys) { 4745 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); 4746 DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); 4747 } else { 4748 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); 4749 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); 4750 } 4751 return True; 4752 } 4753 /* Cases for FPSR 4754 0xD51B44 001 Rt MSR fpsr, rT 4755 0xD53B44 001 Rt MSR rT, fpsr 4756 */ 4757 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ 4758 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { 4759 Bool toSys = INSN(21,21) == 0; 4760 UInt tt = INSN(4,0); 4761 if (toSys) { 4762 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) ); 4763 DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); 4764 } else { 4765 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32)); 4766 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); 4767 } 4768 return True; 4769 } 4770 /* Cases for NZCV 4771 D51B42 000 Rt MSR nzcv, rT 4772 D53B42 000 Rt MRS rT, nzcv 4773 */ 4774 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ 4775 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { 4776 Bool toSys = INSN(21,21) == 0; 4777 UInt tt = INSN(4,0); 4778 if (toSys) { 4779 IRTemp t = newTemp(Ity_I64); 4780 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); 4781 setFlags_COPY(t); 4782 DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); 4783 } else { 4784 IRTemp res = newTemp(Ity_I64); 4785 assign(res, mk_arm64g_calculate_flags_nzcv()); 4786 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); 4787 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); 4788 } 4789 return True; 4790 } 4791 /* Cases for DCZID_EL0 4792 Don't support arbitrary reads and writes to this register. Just 4793 return the value 16, which indicates that the DC ZVA instruction 4794 is not permitted, so we don't have to emulate it. 4795 D5 3B 00 111 Rt MRS rT, dczid_el0 4796 */ 4797 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { 4798 UInt tt = INSN(4,0); 4799 putIReg64orZR(tt, mkU64(1<<4)); 4800 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); 4801 return True; 4802 } 4803 /* Cases for CTR_EL0 4804 We just handle reads, and make up a value from the D and I line 4805 sizes in the VexArchInfo we are given, and patch in the following 4806 fields that the Foundation model gives ("natively"): 4807 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 4808 D5 3B 00 001 Rt MRS rT, dczid_el0 4809 */ 4810 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { 4811 UInt tt = INSN(4,0); 4812 /* Need to generate a value from dMinLine_lg2_szB and 4813 dMinLine_lg2_szB. The value in the register is in 32-bit 4814 units, so need to subtract 2 from the values in the 4815 VexArchInfo. We can assume that the values here are valid -- 4816 disInstr_ARM64 checks them -- so there's no need to deal with 4817 out-of-range cases. */ 4818 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 4819 && archinfo->arm64_dMinLine_lg2_szB <= 17 4820 && archinfo->arm64_iMinLine_lg2_szB >= 2 4821 && archinfo->arm64_iMinLine_lg2_szB <= 17); 4822 UInt val 4823 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) 4824 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); 4825 putIReg64orZR(tt, mkU64(val)); 4826 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); 4827 return True; 4828 } 4829 4830 /* ------------------ IC_IVAU ------------------ */ 4831 /* D5 0B 75 001 Rt ic ivau, rT 4832 */ 4833 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { 4834 /* We will always be provided with a valid iMinLine value. */ 4835 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 4836 && archinfo->arm64_iMinLine_lg2_szB <= 17); 4837 /* Round the requested address, in rT, down to the start of the 4838 containing block. */ 4839 UInt tt = INSN(4,0); 4840 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; 4841 IRTemp addr = newTemp(Ity_I64); 4842 assign( addr, binop( Iop_And64, 4843 getIReg64orZR(tt), 4844 mkU64(~(lineszB - 1))) ); 4845 /* Set the invalidation range, request exit-and-invalidate, with 4846 continuation at the next instruction. */ 4847 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 4848 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 4849 /* be paranoid ... */ 4850 stmt( IRStmt_MBE(Imbe_Fence) ); 4851 putPC(mkU64( guest_PC_curr_instr + 4 )); 4852 dres->whatNext = Dis_StopHere; 4853 dres->jk_StopHere = Ijk_InvalICache; 4854 DIP("ic ivau, %s\n", nameIReg64orZR(tt)); 4855 return True; 4856 } 4857 4858 /* ------------------ DC_CVAU ------------------ */ 4859 /* D5 0B 7B 001 Rt dc cvau, rT 4860 */ 4861 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { 4862 /* Exactly the same scheme as for IC IVAU, except we observe the 4863 dMinLine size, and request an Ijk_FlushDCache instead of 4864 Ijk_InvalICache. */ 4865 /* We will always be provided with a valid dMinLine value. */ 4866 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 4867 && archinfo->arm64_dMinLine_lg2_szB <= 17); 4868 /* Round the requested address, in rT, down to the start of the 4869 containing block. */ 4870 UInt tt = INSN(4,0); 4871 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; 4872 IRTemp addr = newTemp(Ity_I64); 4873 assign( addr, binop( Iop_And64, 4874 getIReg64orZR(tt), 4875 mkU64(~(lineszB - 1))) ); 4876 /* Set the flush range, request exit-and-flush, with 4877 continuation at the next instruction. */ 4878 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 4879 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 4880 /* be paranoid ... */ 4881 stmt( IRStmt_MBE(Imbe_Fence) ); 4882 putPC(mkU64( guest_PC_curr_instr + 4 )); 4883 dres->whatNext = Dis_StopHere; 4884 dres->jk_StopHere = Ijk_FlushDCache; 4885 DIP("dc cvau, %s\n", nameIReg64orZR(tt)); 4886 return True; 4887 } 4888 4889 /* ------------------ ISB, DMB, DSB ------------------ */ 4890 if (INSN(31,0) == 0xD5033FDF) { 4891 stmt(IRStmt_MBE(Imbe_Fence)); 4892 DIP("isb\n"); 4893 return True; 4894 } 4895 if (INSN(31,0) == 0xD5033BBF) { 4896 stmt(IRStmt_MBE(Imbe_Fence)); 4897 DIP("dmb ish\n"); 4898 return True; 4899 } 4900 if (INSN(31,0) == 0xD5033ABF) { 4901 stmt(IRStmt_MBE(Imbe_Fence)); 4902 DIP("dmb ishst\n"); 4903 return True; 4904 } 4905 if (INSN(31,0) == 0xD50339BF) { 4906 stmt(IRStmt_MBE(Imbe_Fence)); 4907 DIP("dmb ishld\n"); 4908 return True; 4909 } 4910 if (INSN(31,0) == 0xD5033B9F) { 4911 stmt(IRStmt_MBE(Imbe_Fence)); 4912 DIP("dsb ish\n"); 4913 return True; 4914 } 4915 if (INSN(31,0) == 0xD5033F9F) { 4916 stmt(IRStmt_MBE(Imbe_Fence)); 4917 DIP("dsb sy\n"); 4918 return True; 4919 } 4920 4921 /* -------------------- NOP -------------------- */ 4922 if (INSN(31,0) == 0xD503201F) { 4923 DIP("nop\n"); 4924 return True; 4925 } 4926 4927 //fail: 4928 vex_printf("ARM64 front end: branch_etc\n"); 4929 return False; 4930 # undef INSN 4931 } 4932 4933 4934 /*------------------------------------------------------------*/ 4935 /*--- SIMD and FP instructions ---*/ 4936 /*------------------------------------------------------------*/ 4937 4938 /* begin FIXME -- rm temp scaffolding */ 4939 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp ); 4940 static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp ); 4941 4942 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp ); 4943 static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp ); 4944 static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp ); 4945 static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp ); 4946 4947 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp ); 4948 static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp ); 4949 static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp ); 4950 static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp ); 4951 4952 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp ); 4953 static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp ); 4954 static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp ); 4955 static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp ); 4956 /* end FIXME -- rm temp scaffolding */ 4957 4958 /* Generate N copies of |bit| in the bottom of a ULong. */ 4959 static ULong Replicate ( ULong bit, Int N ) 4960 { 4961 vassert(bit <= 1 && N >= 1 && N < 64); 4962 if (bit == 0) { 4963 return 0; 4964 } else { 4965 /* Careful. This won't work for N == 64. */ 4966 return (1ULL << N) - 1; 4967 } 4968 } 4969 4970 static ULong Replicate32x2 ( ULong bits32 ) 4971 { 4972 vassert(0 == (bits32 & ~0xFFFFFFFFULL)); 4973 return (bits32 << 32) | bits32; 4974 } 4975 4976 static ULong Replicate16x4 ( ULong bits16 ) 4977 { 4978 vassert(0 == (bits16 & ~0xFFFFULL)); 4979 return Replicate32x2((bits16 << 16) | bits16); 4980 } 4981 4982 static ULong Replicate8x8 ( ULong bits8 ) 4983 { 4984 vassert(0 == (bits8 & ~0xFFULL)); 4985 return Replicate16x4((bits8 << 8) | bits8); 4986 } 4987 4988 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of 4989 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N 4990 is 64. In the former case, the upper 32 bits of the returned value 4991 are guaranteed to be zero. */ 4992 static ULong VFPExpandImm ( ULong imm8, Int N ) 4993 { 4994 vassert(imm8 <= 0xFF); 4995 vassert(N == 32 || N == 64); 4996 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. 4997 Int F = N - E - 1; 4998 ULong imm8_6 = (imm8 >> 6) & 1; 4999 /* sign: 1 bit */ 5000 /* exp: E bits */ 5001 /* frac: F bits */ 5002 ULong sign = (imm8 >> 7) & 1; 5003 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); 5004 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); 5005 vassert(sign < (1ULL << 1)); 5006 vassert(exp < (1ULL << E)); 5007 vassert(frac < (1ULL << F)); 5008 vassert(1 + E + F == N); 5009 ULong res = (sign << (E+F)) | (exp << F) | frac; 5010 return res; 5011 } 5012 5013 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. 5014 This might fail, as indicated by the returned Bool. Page 2530 of 5015 the manual. */ 5016 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, 5017 UInt op, UInt cmode, UInt imm8 ) 5018 { 5019 vassert(op <= 1); 5020 vassert(cmode <= 15); 5021 vassert(imm8 <= 255); 5022 5023 *res = 0; /* will overwrite iff returning True */ 5024 5025 ULong imm64 = 0; 5026 Bool testimm8 = False; 5027 5028 switch (cmode >> 1) { 5029 case 0: 5030 testimm8 = False; imm64 = Replicate32x2(imm8); break; 5031 case 1: 5032 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; 5033 case 2: 5034 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; 5035 case 3: 5036 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; 5037 case 4: 5038 testimm8 = False; imm64 = Replicate16x4(imm8); break; 5039 case 5: 5040 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; 5041 case 6: 5042 testimm8 = True; 5043 if ((cmode & 1) == 0) 5044 imm64 = Replicate32x2((imm8 << 8) | 0xFF); 5045 else 5046 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); 5047 break; 5048 case 7: 5049 testimm8 = False; 5050 if ((cmode & 1) == 0 && op == 0) 5051 imm64 = Replicate8x8(imm8); 5052 if ((cmode & 1) == 0 && op == 1) { 5053 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; 5054 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; 5055 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; 5056 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; 5057 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; 5058 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; 5059 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; 5060 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; 5061 } 5062 if ((cmode & 1) == 1 && op == 0) { 5063 ULong imm8_7 = (imm8 >> 7) & 1; 5064 ULong imm8_6 = (imm8 >> 6) & 1; 5065 ULong imm8_50 = imm8 & 63; 5066 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) 5067 | ((imm8_6 ^ 1) << (5 + 6 + 19)) 5068 | (Replicate(imm8_6, 5) << (6 + 19)) 5069 | (imm8_50 << 19); 5070 imm64 = Replicate32x2(imm32); 5071 } 5072 if ((cmode & 1) == 1 && op == 1) { 5073 // imm64 = imm8<7>:NOT(imm8<6>) 5074 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); 5075 ULong imm8_7 = (imm8 >> 7) & 1; 5076 ULong imm8_6 = (imm8 >> 6) & 1; 5077 ULong imm8_50 = imm8 & 63; 5078 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) 5079 | (Replicate(imm8_6, 8) << 54) 5080 | (imm8_50 << 48); 5081 } 5082 break; 5083 default: 5084 vassert(0); 5085 } 5086 5087 if (testimm8 && imm8 == 0) 5088 return False; 5089 5090 *res = imm64; 5091 return True; 5092 } 5093 5094 5095 /* Help a bit for decoding laneage for vector operations that can be 5096 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q 5097 and SZ bits, typically for vector floating point. */ 5098 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, 5099 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, 5100 /*OUT*/const HChar** arrSpec, 5101 Bool bitQ, Bool bitSZ ) 5102 { 5103 vassert(bitQ == True || bitQ == False); 5104 vassert(bitSZ == True || bitSZ == False); 5105 if (bitQ && bitSZ) { // 2x64 5106 if (tyI) *tyI = Ity_I64; 5107 if (tyF) *tyF = Ity_F64; 5108 if (nLanes) *nLanes = 2; 5109 if (zeroUpper) *zeroUpper = False; 5110 if (arrSpec) *arrSpec = "2d"; 5111 return True; 5112 } 5113 if (bitQ && !bitSZ) { // 4x32 5114 if (tyI) *tyI = Ity_I32; 5115 if (tyF) *tyF = Ity_F32; 5116 if (nLanes) *nLanes = 4; 5117 if (zeroUpper) *zeroUpper = False; 5118 if (arrSpec) *arrSpec = "4s"; 5119 return True; 5120 } 5121 if (!bitQ && !bitSZ) { // 2x32 5122 if (tyI) *tyI = Ity_I32; 5123 if (tyF) *tyF = Ity_F32; 5124 if (nLanes) *nLanes = 2; 5125 if (zeroUpper) *zeroUpper = True; 5126 if (arrSpec) *arrSpec = "2s"; 5127 return True; 5128 } 5129 // Else impliedly 1x64, which isn't allowed. 5130 return False; 5131 } 5132 5133 /* Helper for decoding laneage for simple vector operations, 5134 eg integer add. */ 5135 static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper, 5136 /*OUT*/const HChar** arrSpec, 5137 Bool bitQ, UInt szBlg2 ) 5138 { 5139 vassert(bitQ == True || bitQ == False); 5140 vassert(szBlg2 < 4); 5141 Bool zu = False; 5142 const HChar* as = NULL; 5143 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) { 5144 case 0: zu = True; as = "8b"; break; 5145 case 1: zu = False; as = "16b"; break; 5146 case 2: zu = True; as = "4h"; break; 5147 case 3: zu = False; as = "8h"; break; 5148 case 4: zu = True; as = "2s"; break; 5149 case 5: zu = False; as = "4s"; break; 5150 case 6: return False; // impliedly 1x64 5151 case 7: zu = False; as = "2d"; break; 5152 default: vassert(0); 5153 } 5154 vassert(as); 5155 if (arrSpec) *arrSpec = as; 5156 if (zeroUpper) *zeroUpper = zu; 5157 return True; 5158 } 5159 5160 5161 /* Helper for decoding laneage for shift-style vector operations 5162 that involve an immediate shift amount. */ 5163 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, 5164 UInt immh, UInt immb ) 5165 { 5166 vassert(immh < (1<<4)); 5167 vassert(immb < (1<<3)); 5168 UInt immhb = (immh << 3) | immb; 5169 if (immh & 8) { 5170 if (shift) *shift = 128 - immhb; 5171 if (szBlg2) *szBlg2 = 3; 5172 return True; 5173 } 5174 if (immh & 4) { 5175 if (shift) *shift = 64 - immhb; 5176 if (szBlg2) *szBlg2 = 2; 5177 return True; 5178 } 5179 if (immh & 2) { 5180 if (shift) *shift = 32 - immhb; 5181 if (szBlg2) *szBlg2 = 1; 5182 return True; 5183 } 5184 if (immh & 1) { 5185 if (shift) *shift = 16 - immhb; 5186 if (szBlg2) *szBlg2 = 0; 5187 return True; 5188 } 5189 return False; 5190 } 5191 5192 5193 /* Generate IR to fold all lanes of the V128 value in 'src' as 5194 characterised by the operator 'op', and return the result in the 5195 bottom bits of a V128, with all other bits set to zero. */ 5196 static IRTemp math_MINMAXV ( IRTemp src, IROp op ) 5197 { 5198 /* The basic idea is to use repeated applications of Iop_CatEven* 5199 and Iop_CatOdd* operators to 'src' so as to clone each lane into 5200 a complete vector. Then fold all those vectors with 'op' and 5201 zero out all but the least significant lane. */ 5202 switch (op) { 5203 case Iop_Min8Sx16: case Iop_Min8Ux16: 5204 case Iop_Max8Sx16: case Iop_Max8Ux16: { 5205 /* NB: temp naming here is misleading -- the naming is for 8 5206 lanes of 16 bit, whereas what is being operated on is 16 5207 lanes of 8 bits. */ 5208 IRTemp x76543210 = src; 5209 IRTemp x76547654 = newTemp(Ity_V128); 5210 IRTemp x32103210 = newTemp(Ity_V128); 5211 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 5212 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 5213 IRTemp x76767676 = newTemp(Ity_V128); 5214 IRTemp x54545454 = newTemp(Ity_V128); 5215 IRTemp x32323232 = newTemp(Ity_V128); 5216 IRTemp x10101010 = newTemp(Ity_V128); 5217 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 5218 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 5219 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 5220 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 5221 IRTemp x77777777 = newTemp(Ity_V128); 5222 IRTemp x66666666 = newTemp(Ity_V128); 5223 IRTemp x55555555 = newTemp(Ity_V128); 5224 IRTemp x44444444 = newTemp(Ity_V128); 5225 IRTemp x33333333 = newTemp(Ity_V128); 5226 IRTemp x22222222 = newTemp(Ity_V128); 5227 IRTemp x11111111 = newTemp(Ity_V128); 5228 IRTemp x00000000 = newTemp(Ity_V128); 5229 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 5230 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 5231 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 5232 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 5233 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 5234 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 5235 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 5236 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 5237 /* Naming not misleading after here. */ 5238 IRTemp xAllF = newTemp(Ity_V128); 5239 IRTemp xAllE = newTemp(Ity_V128); 5240 IRTemp xAllD = newTemp(Ity_V128); 5241 IRTemp xAllC = newTemp(Ity_V128); 5242 IRTemp xAllB = newTemp(Ity_V128); 5243 IRTemp xAllA = newTemp(Ity_V128); 5244 IRTemp xAll9 = newTemp(Ity_V128); 5245 IRTemp xAll8 = newTemp(Ity_V128); 5246 IRTemp xAll7 = newTemp(Ity_V128); 5247 IRTemp xAll6 = newTemp(Ity_V128); 5248 IRTemp xAll5 = newTemp(Ity_V128); 5249 IRTemp xAll4 = newTemp(Ity_V128); 5250 IRTemp xAll3 = newTemp(Ity_V128); 5251 IRTemp xAll2 = newTemp(Ity_V128); 5252 IRTemp xAll1 = newTemp(Ity_V128); 5253 IRTemp xAll0 = newTemp(Ity_V128); 5254 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); 5255 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); 5256 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); 5257 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); 5258 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); 5259 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); 5260 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); 5261 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); 5262 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); 5263 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); 5264 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); 5265 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); 5266 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); 5267 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); 5268 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); 5269 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); 5270 IRTemp maxFE = newTemp(Ity_V128); 5271 IRTemp maxDC = newTemp(Ity_V128); 5272 IRTemp maxBA = newTemp(Ity_V128); 5273 IRTemp max98 = newTemp(Ity_V128); 5274 IRTemp max76 = newTemp(Ity_V128); 5275 IRTemp max54 = newTemp(Ity_V128); 5276 IRTemp max32 = newTemp(Ity_V128); 5277 IRTemp max10 = newTemp(Ity_V128); 5278 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); 5279 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); 5280 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); 5281 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); 5282 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); 5283 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); 5284 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); 5285 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); 5286 IRTemp maxFEDC = newTemp(Ity_V128); 5287 IRTemp maxBA98 = newTemp(Ity_V128); 5288 IRTemp max7654 = newTemp(Ity_V128); 5289 IRTemp max3210 = newTemp(Ity_V128); 5290 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); 5291 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); 5292 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 5293 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5294 IRTemp maxFEDCBA98 = newTemp(Ity_V128); 5295 IRTemp max76543210 = newTemp(Ity_V128); 5296 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); 5297 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 5298 IRTemp maxAllLanes = newTemp(Ity_V128); 5299 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), 5300 mkexpr(max76543210))); 5301 IRTemp res = newTemp(Ity_V128); 5302 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); 5303 return res; 5304 } 5305 case Iop_Min16Sx8: case Iop_Min16Ux8: 5306 case Iop_Max16Sx8: case Iop_Max16Ux8: { 5307 IRTemp x76543210 = src; 5308 IRTemp x76547654 = newTemp(Ity_V128); 5309 IRTemp x32103210 = newTemp(Ity_V128); 5310 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 5311 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 5312 IRTemp x76767676 = newTemp(Ity_V128); 5313 IRTemp x54545454 = newTemp(Ity_V128); 5314 IRTemp x32323232 = newTemp(Ity_V128); 5315 IRTemp x10101010 = newTemp(Ity_V128); 5316 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 5317 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 5318 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 5319 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 5320 IRTemp x77777777 = newTemp(Ity_V128); 5321 IRTemp x66666666 = newTemp(Ity_V128); 5322 IRTemp x55555555 = newTemp(Ity_V128); 5323 IRTemp x44444444 = newTemp(Ity_V128); 5324 IRTemp x33333333 = newTemp(Ity_V128); 5325 IRTemp x22222222 = newTemp(Ity_V128); 5326 IRTemp x11111111 = newTemp(Ity_V128); 5327 IRTemp x00000000 = newTemp(Ity_V128); 5328 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 5329 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 5330 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 5331 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 5332 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 5333 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 5334 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 5335 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 5336 IRTemp max76 = newTemp(Ity_V128); 5337 IRTemp max54 = newTemp(Ity_V128); 5338 IRTemp max32 = newTemp(Ity_V128); 5339 IRTemp max10 = newTemp(Ity_V128); 5340 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); 5341 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); 5342 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); 5343 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); 5344 IRTemp max7654 = newTemp(Ity_V128); 5345 IRTemp max3210 = newTemp(Ity_V128); 5346 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 5347 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5348 IRTemp max76543210 = newTemp(Ity_V128); 5349 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 5350 IRTemp res = newTemp(Ity_V128); 5351 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); 5352 return res; 5353 } 5354 case Iop_Min32Sx4: case Iop_Min32Ux4: 5355 case Iop_Max32Sx4: case Iop_Max32Ux4: { 5356 IRTemp x3210 = src; 5357 IRTemp x3232 = newTemp(Ity_V128); 5358 IRTemp x1010 = newTemp(Ity_V128); 5359 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); 5360 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); 5361 IRTemp x3333 = newTemp(Ity_V128); 5362 IRTemp x2222 = newTemp(Ity_V128); 5363 IRTemp x1111 = newTemp(Ity_V128); 5364 IRTemp x0000 = newTemp(Ity_V128); 5365 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); 5366 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); 5367 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); 5368 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); 5369 IRTemp max32 = newTemp(Ity_V128); 5370 IRTemp max10 = newTemp(Ity_V128); 5371 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); 5372 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); 5373 IRTemp max3210 = newTemp(Ity_V128); 5374 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 5375 IRTemp res = newTemp(Ity_V128); 5376 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); 5377 return res; 5378 } 5379 default: 5380 vassert(0); 5381 } 5382 } 5383 5384 5385 /* Generate IR for TBL and TBX. This deals with the 128 bit case 5386 only. */ 5387 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, 5388 IRTemp oor_values ) 5389 { 5390 vassert(len >= 0 && len <= 3); 5391 5392 /* Generate some useful constants as concisely as possible. */ 5393 IRTemp half15 = newTemp(Ity_I64); 5394 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); 5395 IRTemp half16 = newTemp(Ity_I64); 5396 assign(half16, mkU64(0x1010101010101010ULL)); 5397 5398 /* A zero vector */ 5399 IRTemp allZero = newTemp(Ity_V128); 5400 assign(allZero, mkV128(0x0000)); 5401 /* A vector containing 15 in each 8-bit lane */ 5402 IRTemp all15 = newTemp(Ity_V128); 5403 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); 5404 /* A vector containing 16 in each 8-bit lane */ 5405 IRTemp all16 = newTemp(Ity_V128); 5406 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); 5407 /* A vector containing 32 in each 8-bit lane */ 5408 IRTemp all32 = newTemp(Ity_V128); 5409 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); 5410 /* A vector containing 48 in each 8-bit lane */ 5411 IRTemp all48 = newTemp(Ity_V128); 5412 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); 5413 /* A vector containing 64 in each 8-bit lane */ 5414 IRTemp all64 = newTemp(Ity_V128); 5415 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); 5416 5417 /* Group the 16/32/48/64 vectors so as to be indexable. */ 5418 IRTemp allXX[4] = { all16, all32, all48, all64 }; 5419 5420 /* Compute the result for each table vector, with zeroes in places 5421 where the index values are out of range, and OR them into the 5422 running vector. */ 5423 IRTemp running_result = newTemp(Ity_V128); 5424 assign(running_result, mkV128(0)); 5425 5426 UInt tabent; 5427 for (tabent = 0; tabent <= len; tabent++) { 5428 vassert(tabent >= 0 && tabent < 4); 5429 IRTemp bias = newTemp(Ity_V128); 5430 assign(bias, 5431 mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); 5432 IRTemp biased_indices = newTemp(Ity_V128); 5433 assign(biased_indices, 5434 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); 5435 IRTemp valid_mask = newTemp(Ity_V128); 5436 assign(valid_mask, 5437 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); 5438 IRTemp safe_biased_indices = newTemp(Ity_V128); 5439 assign(safe_biased_indices, 5440 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); 5441 IRTemp results_or_junk = newTemp(Ity_V128); 5442 assign(results_or_junk, 5443 binop(Iop_Perm8x16, mkexpr(tab[tabent]), 5444 mkexpr(safe_biased_indices))); 5445 IRTemp results_or_zero = newTemp(Ity_V128); 5446 assign(results_or_zero, 5447 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); 5448 /* And OR that into the running result. */ 5449 IRTemp tmp = newTemp(Ity_V128); 5450 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), 5451 mkexpr(running_result))); 5452 running_result = tmp; 5453 } 5454 5455 /* So now running_result holds the overall result where the indices 5456 are in range, and zero in out-of-range lanes. Now we need to 5457 compute an overall validity mask and use this to copy in the 5458 lanes in the oor_values for out of range indices. This is 5459 unnecessary for TBL but will get folded out by iropt, so we lean 5460 on that and generate the same code for TBL and TBX here. */ 5461 IRTemp overall_valid_mask = newTemp(Ity_V128); 5462 assign(overall_valid_mask, 5463 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); 5464 IRTemp result = newTemp(Ity_V128); 5465 assign(result, 5466 binop(Iop_OrV128, 5467 mkexpr(running_result), 5468 binop(Iop_AndV128, 5469 mkexpr(oor_values), 5470 unop(Iop_NotV128, mkexpr(overall_valid_mask))))); 5471 return result; 5472 } 5473 5474 5475 static 5476 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 5477 { 5478 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 5479 5480 /* ---------------- FMOV (general) ---------------- */ 5481 /* case 30 23 20 18 15 9 4 5482 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn 5483 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn 5484 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn 5485 5486 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn 5487 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn 5488 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1] 5489 */ 5490 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0) 5491 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5492 UInt sf = INSN(31,31); 5493 UInt ty = INSN(23,22); // type 5494 UInt rm = INSN(20,19); // rmode 5495 UInt op = INSN(18,16); // opcode 5496 UInt nn = INSN(9,5); 5497 UInt dd = INSN(4,0); 5498 UInt ix = 0; // case 5499 if (sf == 0) { 5500 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 5501 ix = 1; 5502 else 5503 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 5504 ix = 4; 5505 } else { 5506 vassert(sf == 1); 5507 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 5508 ix = 2; 5509 else 5510 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 5511 ix = 5; 5512 else 5513 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1)) 5514 ix = 3; 5515 else 5516 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0)) 5517 ix = 6; 5518 } 5519 if (ix > 0) { 5520 switch (ix) { 5521 case 1: 5522 putQReg128(dd, mkV128(0)); 5523 putQRegLO(dd, getIReg32orZR(nn)); 5524 DIP("fmov s%u, w%u\n", dd, nn); 5525 break; 5526 case 2: 5527 putQReg128(dd, mkV128(0)); 5528 putQRegLO(dd, getIReg64orZR(nn)); 5529 DIP("fmov d%u, x%u\n", dd, nn); 5530 break; 5531 case 3: 5532 putQRegHI64(dd, getIReg64orZR(nn)); 5533 DIP("fmov v%u.d[1], x%u\n", dd, nn); 5534 break; 5535 case 4: 5536 putIReg32orZR(dd, getQRegLO(nn, Ity_I32)); 5537 DIP("fmov w%u, s%u\n", dd, nn); 5538 break; 5539 case 5: 5540 putIReg64orZR(dd, getQRegLO(nn, Ity_I64)); 5541 DIP("fmov x%u, d%u\n", dd, nn); 5542 break; 5543 case 6: 5544 putIReg64orZR(dd, getQRegHI64(nn)); 5545 DIP("fmov x%u, v%u.d[1]\n", dd, nn); 5546 break; 5547 default: 5548 vassert(0); 5549 } 5550 return True; 5551 } 5552 /* undecodable; fall through */ 5553 } 5554 5555 /* -------------- FMOV (scalar, immediate) -------------- */ 5556 /* 31 28 23 20 12 9 4 5557 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm 5558 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm 5559 */ 5560 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5561 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) { 5562 Bool isD = INSN(22,22) == 1; 5563 UInt imm8 = INSN(20,13); 5564 UInt dd = INSN(4,0); 5565 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32); 5566 if (!isD) { 5567 vassert(0 == (imm & 0xFFFFFFFF00000000ULL)); 5568 } 5569 putQReg128(dd, mkV128(0)); 5570 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL)); 5571 DIP("fmov %s, #0x%llx\n", 5572 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm); 5573 return True; 5574 } 5575 5576 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */ 5577 /* 31 28 18 15 11 9 4 5578 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0) 5579 MOV Vd.2d #imm (q=1) 5580 Allowable op:cmode 5581 FMOV = 1:1111 5582 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, x:1110 5583 */ 5584 if (INSN(31,31) == 0 5585 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0) 5586 && INSN(11,10) == BITS2(0,1)) { 5587 UInt bitQ = INSN(30,30); 5588 UInt bitOP = INSN(29,29); 5589 UInt cmode = INSN(15,12); 5590 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5); 5591 UInt dd = INSN(4,0); 5592 ULong imm64lo = 0; 5593 UInt op_cmode = (bitOP << 4) | cmode; 5594 Bool ok = False; 5595 switch (op_cmode) { 5596 case BITS5(1,1,1,1,1): // 1:1111 5597 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): 5598 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00 5599 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00 5600 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 5601 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x 5602 case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110 5603 ok = True; break; 5604 default: 5605 break; 5606 } 5607 if (ok) { 5608 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8); 5609 } 5610 if (ok) { 5611 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo; 5612 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo))); 5613 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); 5614 return True; 5615 } 5616 /* else fall through */ 5617 } 5618 5619 /* -------------- {S,U}CVTF (vector, integer, scalar) -------------- */ 5620 /* 31 28 23 21 15 9 4 ix (u:sz) 5621 010 11110 00 100001 110110 n d SCVTF Sd, Sn 0 5622 0 01 SCVTF Dd, Dn 1 5623 1 00 UCVTF Sd, Sn 2 5624 1 01 UCVTF Dd, Dn 3 5625 */ 5626 if (INSN(31,30) == BITS2(0,1) && INSN(28,23) == BITS6(1,1,1,1,0,0) 5627 && INSN(21, 10) == BITS12(1,0,0,0,0,1,1,1,0,1,1,0)) { 5628 Bool is64 = INSN(22,22); 5629 Bool isU = INSN(29,29); 5630 UInt nn = INSN(9,5); 5631 UInt dd = INSN(4,0); 5632 5633 UInt ix = (INSN(29,29) << 1) | INSN(22,22); 5634 5635 const IROp ops[4] 5636 = { Iop_I32StoF32, Iop_I64StoF64, 5637 Iop_I32UtoF32, Iop_I64UtoF64 }; 5638 5639 putQReg128(dd, mkV128(0)); 5640 putQRegLO(dd, binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, is64 ? Ity_I64 : Ity_I32))); 5641 5642 DIP("%ccvtf %s, %s\n", 5643 isU ? 'u' : 's', nameQRegLO(dd, is64 ? Ity_F64 : Ity_F32), 5644 nameQRegLO(nn, is64 ? Ity_I64 : Ity_I32)); 5645 5646 return True; 5647 } 5648 /* -------------- {S,U}CVTF (scalar, fixed-point) -------------- */ 5649 /* 31 28 23 21 20 18 15 9 4 ix 5650 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0 5651 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1 5652 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2 5653 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3 5654 5655 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4 5656 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5 5657 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6 5658 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7 5659 5660 These are signed/unsigned conversion from integer registers to 5661 FP registers, all 4 32/64-bit combinations, rounded per FPCR. 5662 */ 5663 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1) 5664 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5665 Bool isI64 = INSN(31,31) == 1; 5666 Bool isF64 = INSN(22,22) == 1; 5667 Bool isU = INSN(16,16) == 1; 5668 UInt nn = INSN(9,5); 5669 UInt dd = INSN(4,0); 5670 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 5671 const IROp ops[8] 5672 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 5673 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 5674 IRExpr* src = getIRegOrZR(isI64, nn); 5675 IRExpr* res = (isF64 && !isI64) 5676 ? unop(ops[ix], src) 5677 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src); 5678 putQReg128(dd, mkV128(0)); 5679 putQRegLO(dd, res); 5680 DIP("%ccvtf %s, %s\n", 5681 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 5682 nameIRegOrZR(isI64, nn)); 5683 return True; 5684 } 5685 5686 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */ 5687 /* 31 23 20 15 11 9 4 5688 ---------------- 0000 ------ FMUL -------- 5689 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm 5690 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm 5691 ---------------- 0010 ------ FADD -------- 5692 ---------------- 0011 ------ FSUB -------- 5693 ---------------- 1000 ------ FNMUL -------- 5694 */ 5695 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5696 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5697 Bool isD = INSN(22,22) == 1; 5698 UInt mm = INSN(20,16); 5699 UInt op = INSN(15,12); 5700 UInt nn = INSN(9,5); 5701 UInt dd = INSN(4,0); 5702 IROp iop = Iop_INVALID; 5703 IRType ty = isD ? Ity_F64 : Ity_F32; 5704 Bool neg = False; 5705 const HChar* nm = "???"; 5706 switch (op) { 5707 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break; 5708 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break; 5709 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break; 5710 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break; 5711 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty); 5712 neg = True; break; 5713 default: return False; 5714 } 5715 vassert(iop != Iop_INVALID); 5716 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()), 5717 getQRegLO(nn, ty), getQRegLO(mm, ty)); 5718 IRTemp res = newTemp(ty); 5719 assign(res, neg ? unop(mkNEGF(ty),resE) : resE); 5720 putQReg128(dd, mkV128(0)); 5721 putQRegLO(dd, mkexpr(res)); 5722 DIP("%s %s, %s, %s\n", 5723 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); 5724 return True; 5725 } 5726 5727 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */ 5728 /* 31 23 21 16 14 9 4 5729 000 11110 00 10000 00 10000 n d FMOV Sd, Sn 5730 000 11110 01 10000 00 10000 n d FMOV Dd, Dn 5731 ------------------ 01 --------- FABS ------ 5732 ------------------ 10 --------- FNEG ------ 5733 ------------------ 11 --------- FSQRT ----- 5734 */ 5735 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 5736 && INSN(21,17) == BITS5(1,0,0,0,0) 5737 && INSN(14,10) == BITS5(1,0,0,0,0)) { 5738 Bool isD = INSN(22,22) == 1; 5739 UInt opc = INSN(16,15); 5740 UInt nn = INSN(9,5); 5741 UInt dd = INSN(4,0); 5742 IRType ty = isD ? Ity_F64 : Ity_F32; 5743 IRTemp res = newTemp(ty); 5744 if (opc == BITS2(0,0)) { 5745 assign(res, getQRegLO(nn, ty)); 5746 putQReg128(dd, mkV128(0x0000)); 5747 putQRegLO(dd, mkexpr(res)); 5748 DIP("fmov %s, %s\n", 5749 nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5750 return True; 5751 } 5752 if (opc == BITS2(1,0) || opc == BITS2(0,1)) { 5753 Bool isAbs = opc == BITS2(0,1); 5754 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty); 5755 assign(res, unop(op, getQRegLO(nn, ty))); 5756 putQReg128(dd, mkV128(0x0000)); 5757 putQRegLO(dd, mkexpr(res)); 5758 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg", 5759 nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5760 return True; 5761 } 5762 if (opc == BITS2(1,1)) { 5763 assign(res, 5764 binop(mkSQRTF(ty), 5765 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty))); 5766 putQReg128(dd, mkV128(0x0000)); 5767 putQRegLO(dd, mkexpr(res)); 5768 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 5769 return True; 5770 } 5771 /* else fall through; other cases are ATC */ 5772 } 5773 5774 /* ---------------- F{ABS,NEG} (vector) ---------------- */ 5775 /* 31 28 22 21 16 9 4 5776 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T 5777 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T 5778 */ 5779 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1) 5780 && INSN(21,17) == BITS5(1,0,0,0,0) 5781 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) { 5782 UInt bitQ = INSN(30,30); 5783 UInt bitSZ = INSN(22,22); 5784 Bool isFNEG = INSN(29,29) == 1; 5785 UInt nn = INSN(9,5); 5786 UInt dd = INSN(4,0); 5787 const HChar* ar = "??"; 5788 IRType tyF = Ity_INVALID; 5789 Bool zeroHI = False; 5790 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar, 5791 (Bool)bitQ, (Bool)bitSZ); 5792 if (ok) { 5793 vassert(tyF == Ity_F64 || tyF == Ity_F32); 5794 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2) 5795 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4); 5796 IRTemp res = newTemp(Ity_V128); 5797 assign(res, unop(op, getQReg128(nn))); 5798 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) 5799 : mkexpr(res)); 5800 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs", 5801 nameQReg128(dd), ar, nameQReg128(nn), ar); 5802 return True; 5803 } 5804 /* else fall through */ 5805 } 5806 5807 /* -------------------- FCMP,FCMPE -------------------- */ 5808 /* 31 23 20 15 9 4 5809 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm 5810 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0 5811 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm 5812 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0 5813 5814 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm 5815 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0 5816 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm 5817 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0 5818 5819 FCMPE generates Invalid Operation exn if either arg is any kind 5820 of NaN. FCMP generates Invalid Operation exn if either arg is a 5821 signalling NaN. We ignore this detail here and produce the same 5822 IR for both. 5823 */ 5824 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1 5825 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) { 5826 Bool isD = INSN(22,22) == 1; 5827 UInt mm = INSN(20,16); 5828 UInt nn = INSN(9,5); 5829 Bool isCMPE = INSN(4,4) == 1; 5830 Bool cmpZero = INSN(3,3) == 1; 5831 IRType ty = isD ? Ity_F64 : Ity_F32; 5832 Bool valid = True; 5833 if (cmpZero && mm != 0) valid = False; 5834 if (valid) { 5835 IRTemp argL = newTemp(ty); 5836 IRTemp argR = newTemp(ty); 5837 IRTemp irRes = newTemp(Ity_I32); 5838 assign(argL, getQRegLO(nn, ty)); 5839 assign(argR, 5840 cmpZero 5841 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0))) 5842 : getQRegLO(mm, ty)); 5843 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 5844 mkexpr(argL), mkexpr(argR))); 5845 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes); 5846 IRTemp nzcv_28x0 = newTemp(Ity_I64); 5847 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28))); 5848 setFlags_COPY(nzcv_28x0); 5849 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty), 5850 cmpZero ? "#0.0" : nameQRegLO(mm, ty)); 5851 return True; 5852 } 5853 } 5854 5855 /* -------------------- F{N}M{ADD,SUB} -------------------- */ 5856 /* 31 22 20 15 14 9 4 ix 5857 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa 5858 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa 5859 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa 5860 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa 5861 where Fx=Dx when sz=1, Fx=Sx when sz=0 5862 5863 -----SPEC------ ----IMPL---- 5864 fmadd a + n * m a + n * m 5865 fmsub a + (-n) * m a - n * m 5866 fnmadd (-a) + (-n) * m -(a + n * m) 5867 fnmsub (-a) + n * m -(a - n * m) 5868 */ 5869 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) { 5870 Bool isD = INSN(22,22) == 1; 5871 UInt mm = INSN(20,16); 5872 UInt aa = INSN(14,10); 5873 UInt nn = INSN(9,5); 5874 UInt dd = INSN(4,0); 5875 UInt ix = (INSN(21,21) << 1) | INSN(15,15); 5876 IRType ty = isD ? Ity_F64 : Ity_F32; 5877 IROp opADD = mkADDF(ty); 5878 IROp opSUB = mkSUBF(ty); 5879 IROp opMUL = mkMULF(ty); 5880 IROp opNEG = mkNEGF(ty); 5881 IRTemp res = newTemp(ty); 5882 IRExpr* eA = getQRegLO(aa, ty); 5883 IRExpr* eN = getQRegLO(nn, ty); 5884 IRExpr* eM = getQRegLO(mm, ty); 5885 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); 5886 IRExpr* eNxM = triop(opMUL, rm, eN, eM); 5887 switch (ix) { 5888 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; 5889 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; 5890 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; 5891 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; 5892 default: vassert(0); 5893 } 5894 putQReg128(dd, mkV128(0x0000)); 5895 putQRegLO(dd, mkexpr(res)); 5896 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" }; 5897 DIP("%s %s, %s, %s, %s\n", 5898 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty), 5899 nameQRegLO(mm, ty), nameQRegLO(aa, ty)); 5900 return True; 5901 } 5902 5903 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */ 5904 /* 30 23 20 18 15 9 4 5905 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to 5906 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest) 5907 ---------------- 01 -------------- FCVTP-------- (round to +inf) 5908 ---------------- 10 -------------- FCVTM-------- (round to -inf) 5909 ---------------- 11 -------------- FCVTZ-------- (round to zero) 5910 5911 Rd is Xd when sf==1, Wd when sf==0 5912 Fn is Dn when x==1, Sn when x==0 5913 20:19 carry the rounding mode, using the same encoding as FPCR 5914 */ 5915 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1 5916 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 5917 Bool isI64 = INSN(31,31) == 1; 5918 Bool isF64 = INSN(22,22) == 1; 5919 UInt rm = INSN(20,19); 5920 Bool isU = INSN(16,16) == 1; 5921 UInt nn = INSN(9,5); 5922 UInt dd = INSN(4,0); 5923 /* Decide on the IR rounding mode to use. */ 5924 IRRoundingMode irrm = 8; /*impossible*/ 5925 HChar ch = '?'; 5926 switch (rm) { 5927 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break; 5928 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break; 5929 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break; 5930 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break; 5931 default: vassert(0); 5932 } 5933 vassert(irrm != 8); 5934 /* Decide on the conversion primop, based on the source size, 5935 dest size and signedness (8 possibilities). Case coding: 5936 F32 ->s I32 0 5937 F32 ->u I32 1 5938 F32 ->s I64 2 5939 F32 ->u I64 3 5940 F64 ->s I32 4 5941 F64 ->u I32 5 5942 F64 ->s I64 6 5943 F64 ->u I64 7 5944 */ 5945 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0); 5946 vassert(ix < 8); 5947 const IROp ops[8] 5948 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U, 5949 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U }; 5950 IROp op = ops[ix]; 5951 // A bit of ATCery: bounce all cases we haven't seen an example of. 5952 if (/* F32toI32S */ 5953 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ 5954 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ 5955 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ 5956 /* F32toI32U */ 5957 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ 5958 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ 5959 /* F32toI64S */ 5960 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ 5961 /* F32toI64U */ 5962 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ 5963 || (op == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ 5964 /* F64toI32S */ 5965 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ 5966 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ 5967 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ 5968 /* F64toI32U */ 5969 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ 5970 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ 5971 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ 5972 /* F64toI64S */ 5973 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ 5974 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ 5975 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ 5976 /* F64toI64U */ 5977 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ 5978 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ 5979 ) { 5980 /* validated */ 5981 } else { 5982 return False; 5983 } 5984 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 5985 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 5986 IRTemp src = newTemp(srcTy); 5987 IRTemp dst = newTemp(dstTy); 5988 assign(src, getQRegLO(nn, srcTy)); 5989 assign(dst, binop(op, mkU32(irrm), mkexpr(src))); 5990 putIRegOrZR(isI64, dd, mkexpr(dst)); 5991 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's', 5992 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 5993 return True; 5994 } 5995 5996 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */ 5997 /* 30 23 20 18 15 9 4 5998 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn 5999 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn 6000 Fn is Dn when x==1, Sn when x==0 6001 */ 6002 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) 6003 && INSN(21,16) == BITS6(1,0,0,1,0,0) 6004 && INSN(15,10) == BITS6(0,0,0,0,0,0)) { 6005 Bool isI64 = INSN(31,31) == 1; 6006 Bool isF64 = INSN(22,22) == 1; 6007 UInt nn = INSN(9,5); 6008 UInt dd = INSN(4,0); 6009 /* Decide on the IR rounding mode to use. */ 6010 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */ 6011 IRRoundingMode irrm = Irrm_NEAREST; 6012 /* Decide on the conversion primop. */ 6013 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S) 6014 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S); 6015 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 6016 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 6017 IRTemp src = newTemp(srcTy); 6018 IRTemp dst = newTemp(dstTy); 6019 assign(src, getQRegLO(nn, srcTy)); 6020 assign(dst, binop(op, mkU32(irrm), mkexpr(src))); 6021 putIRegOrZR(isI64, dd, mkexpr(dst)); 6022 DIP("fcvtas %s, %s (KLUDGED)\n", 6023 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 6024 return True; 6025 } 6026 6027 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */ 6028 /* 31 23 21 17 14 9 4 6029 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) 6030 rm 6031 x==0 => S-registers, x==1 => D-registers 6032 rm (17:15) encodings: 6033 111 per FPCR (FRINTI) 6034 001 +inf (FRINTP) 6035 010 -inf (FRINTM) 6036 011 zero (FRINTZ) 6037 000 tieeven 6038 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! 6039 110 per FPCR + "exact = TRUE" 6040 101 unallocated 6041 */ 6042 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) 6043 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) { 6044 Bool isD = INSN(22,22) == 1; 6045 UInt rm = INSN(17,15); 6046 UInt nn = INSN(9,5); 6047 UInt dd = INSN(4,0); 6048 IRType ty = isD ? Ity_F64 : Ity_F32; 6049 IRExpr* irrmE = NULL; 6050 UChar ch = '?'; 6051 switch (rm) { 6052 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; 6053 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; 6054 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; 6055 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 6056 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; 6057 default: break; 6058 } 6059 if (irrmE) { 6060 IRTemp src = newTemp(ty); 6061 IRTemp dst = newTemp(ty); 6062 assign(src, getQRegLO(nn, ty)); 6063 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 6064 irrmE, mkexpr(src))); 6065 putQReg128(dd, mkV128(0x0000)); 6066 putQRegLO(dd, mkexpr(dst)); 6067 DIP("frint%c %s, %s\n", 6068 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty)); 6069 return True; 6070 } 6071 /* else unhandled rounding mode case -- fall through */ 6072 } 6073 6074 /* ------------------ FCVT (scalar) ------------------ */ 6075 /* 31 23 21 16 14 9 4 6076 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp) 6077 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp) 6078 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp) 6079 --------- 00 ----- 01 --------- FCVT Dd, Sn 6080 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp) 6081 --------- 01 ----- 00 --------- FCVT Sd, Dn 6082 Rounding, when dst is smaller than src, is per the FPCR. 6083 */ 6084 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0) 6085 && INSN(21,17) == BITS5(1,0,0,0,1) 6086 && INSN(14,10) == BITS5(1,0,0,0,0)) { 6087 UInt b2322 = INSN(23,22); 6088 UInt b1615 = INSN(16,15); 6089 UInt nn = INSN(9,5); 6090 UInt dd = INSN(4,0); 6091 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) { 6092 /* Convert S to D */ 6093 IRTemp res = newTemp(Ity_F64); 6094 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32))); 6095 putQReg128(dd, mkV128(0x0000)); 6096 putQRegLO(dd, mkexpr(res)); 6097 DIP("fcvt %s, %s\n", 6098 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32)); 6099 return True; 6100 } 6101 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) { 6102 /* Convert D to S */ 6103 IRTemp res = newTemp(Ity_F32); 6104 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()), 6105 getQRegLO(nn, Ity_F64))); 6106 putQReg128(dd, mkV128(0x0000)); 6107 putQRegLO(dd, mkexpr(res)); 6108 DIP("fcvt %s, %s\n", 6109 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64)); 6110 return True; 6111 } 6112 /* else unhandled */ 6113 } 6114 6115 /* ------------------ FABD (scalar) ------------------ */ 6116 /* 31 23 20 15 9 4 6117 011 11110 111 m 110101 n d FABD Dd, Dn, Dm 6118 011 11110 101 m 110101 n d FABD Sd, Sn, Sm 6119 */ 6120 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1 6121 && INSN(15,10) == BITS6(1,1,0,1,0,1)) { 6122 Bool isD = INSN(22,22) == 1; 6123 UInt mm = INSN(20,16); 6124 UInt nn = INSN(9,5); 6125 UInt dd = INSN(4,0); 6126 IRType ty = isD ? Ity_F64 : Ity_F32; 6127 IRTemp res = newTemp(ty); 6128 assign(res, unop(mkABSF(ty), 6129 triop(mkSUBF(ty), 6130 mkexpr(mk_get_IR_rounding_mode()), 6131 getQRegLO(nn,ty), getQRegLO(mm,ty)))); 6132 putQReg128(dd, mkV128(0x0000)); 6133 putQRegLO(dd, mkexpr(res)); 6134 DIP("fabd %s, %s, %s\n", 6135 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty)); 6136 return True; 6137 } 6138 6139 /* -------------- {S,U}CVTF (vector, integer) -------------- */ 6140 /* 31 28 22 21 15 9 4 6141 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn 6142 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn 6143 with laneage: 6144 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D 6145 */ 6146 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0) 6147 && INSN(21,16) == BITS6(1,0,0,0,0,1) 6148 && INSN(15,10) == BITS6(1,1,0,1,1,0)) { 6149 Bool isQ = INSN(30,30) == 1; 6150 Bool isU = INSN(29,29) == 1; 6151 Bool isF64 = INSN(22,22) == 1; 6152 UInt nn = INSN(9,5); 6153 UInt dd = INSN(4,0); 6154 if (isQ || !isF64) { 6155 IRType tyF = Ity_INVALID, tyI = Ity_INVALID; 6156 UInt nLanes = 0; 6157 Bool zeroHI = False; 6158 const HChar* arrSpec = NULL; 6159 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec, 6160 isQ, isF64 ); 6161 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32) 6162 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32); 6163 IRTemp rm = mk_get_IR_rounding_mode(); 6164 UInt i; 6165 vassert(ok); /* the 'if' above should ensure this */ 6166 for (i = 0; i < nLanes; i++) { 6167 putQRegLane(dd, i, 6168 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI))); 6169 } 6170 if (zeroHI) { 6171 putQRegLane(dd, 1, mkU64(0)); 6172 } 6173 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's', 6174 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 6175 return True; 6176 } 6177 /* else fall through */ 6178 } 6179 6180 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */ 6181 /* 31 28 22 21 20 15 9 4 case 6182 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1 6183 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2 6184 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3 6185 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4 6186 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5 6187 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6 6188 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7 6189 */ 6190 if (INSN(31,31) == 0 6191 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { 6192 Bool isQ = INSN(30,30) == 1; 6193 UInt b29 = INSN(29,29); 6194 UInt b23 = INSN(23,23); 6195 Bool isF64 = INSN(22,22) == 1; 6196 UInt mm = INSN(20,16); 6197 UInt b1510 = INSN(15,10); 6198 UInt nn = INSN(9,5); 6199 UInt dd = INSN(4,0); 6200 UInt ix = 0; 6201 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1; 6202 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2; 6203 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3; 6204 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4; 6205 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5; 6206 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6; 6207 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7; 6208 IRType laneTy = Ity_INVALID; 6209 Bool zeroHI = False; 6210 const HChar* arr = "??"; 6211 Bool ok 6212 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); 6213 /* Skip MLA/MLS for the time being */ 6214 if (ok && ix >= 1 && ix <= 4) { 6215 const IROp ops64[4] 6216 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 }; 6217 const IROp ops32[4] 6218 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 }; 6219 const HChar* names[4] 6220 = { "fadd", "fsub", "fmul", "fdiv" }; 6221 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1]; 6222 IRTemp rm = mk_get_IR_rounding_mode(); 6223 IRTemp t1 = newTemp(Ity_V128); 6224 IRTemp t2 = newTemp(Ity_V128); 6225 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6226 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1)) 6227 : mkexpr(t1)); 6228 putQReg128(dd, mkexpr(t2)); 6229 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1], 6230 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6231 return True; 6232 } 6233 if (ok && ix >= 5 && ix <= 6) { 6234 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4; 6235 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 6236 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 6237 IRTemp rm = mk_get_IR_rounding_mode(); 6238 IRTemp t1 = newTemp(Ity_V128); 6239 IRTemp t2 = newTemp(Ity_V128); 6240 // FIXME: double rounding; use FMA primops instead 6241 assign(t1, triop(opMUL, 6242 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6243 assign(t2, triop(ix == 5 ? opADD : opSUB, 6244 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 6245 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6246 : mkexpr(t2)); 6247 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls", 6248 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6249 return True; 6250 } 6251 if (ok && ix == 7) { 6252 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 6253 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6254 IRTemp rm = mk_get_IR_rounding_mode(); 6255 IRTemp t1 = newTemp(Ity_V128); 6256 IRTemp t2 = newTemp(Ity_V128); 6257 // FIXME: use Abd primop instead? 6258 assign(t1, triop(opSUB, 6259 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 6260 assign(t2, unop(opABS, mkexpr(t1))); 6261 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6262 : mkexpr(t2)); 6263 DIP("fabd %s.%s, %s.%s, %s.%s\n", 6264 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6265 return True; 6266 } 6267 } 6268 6269 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */ 6270 /* 31 28 22 20 15 9 4 case 6271 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm 6272 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm 6273 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm 6274 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm 6275 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm 6276 */ 6277 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1 6278 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) { 6279 Bool isQ = INSN(30,30) == 1; 6280 UInt U = INSN(29,29); 6281 UInt E = INSN(23,23); 6282 Bool isF64 = INSN(22,22) == 1; 6283 UInt ac = INSN(11,11); 6284 UInt mm = INSN(20,16); 6285 UInt nn = INSN(9,5); 6286 UInt dd = INSN(4,0); 6287 /* */ 6288 UInt EUac = (E << 2) | (U << 1) | ac; 6289 IROp opABS = Iop_INVALID; 6290 IROp opCMP = Iop_INVALID; 6291 IRType laneTy = Ity_INVALID; 6292 Bool zeroHI = False; 6293 Bool swap = True; 6294 const HChar* arr = "??"; 6295 const HChar* nm = "??"; 6296 Bool ok 6297 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); 6298 if (ok) { 6299 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32)); 6300 switch (EUac) { 6301 case BITS3(0,0,0): 6302 nm = "fcmeq"; 6303 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 6304 swap = False; 6305 break; 6306 case BITS3(0,1,0): 6307 nm = "fcmge"; 6308 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6309 break; 6310 case BITS3(0,1,1): 6311 nm = "facge"; 6312 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6313 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6314 break; 6315 case BITS3(1,1,0): 6316 nm = "fcmgt"; 6317 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 6318 break; 6319 case BITS3(1,1,1): 6320 nm = "fcagt"; 6321 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 6322 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 6323 break; 6324 default: 6325 break; 6326 } 6327 } 6328 if (opCMP != Iop_INVALID) { 6329 IRExpr* argN = getQReg128(nn); 6330 IRExpr* argM = getQReg128(mm); 6331 if (opABS != Iop_INVALID) { 6332 argN = unop(opABS, argN); 6333 argM = unop(opABS, argM); 6334 } 6335 IRExpr* res = swap ? binop(opCMP, argM, argN) 6336 : binop(opCMP, argN, argM); 6337 if (zeroHI) { 6338 res = unop(Iop_ZeroHI64ofV128, res); 6339 } 6340 putQReg128(dd, res); 6341 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6342 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6343 return True; 6344 } 6345 /* else fall through */ 6346 } 6347 6348 /* -------------------- FCVTN -------------------- */ 6349 /* 31 28 23 20 15 9 4 6350 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn 6351 where case q:s of 00: 16Fx4(lo) <- 32Fx4 6352 01: 32Fx2(lo) <- 64Fx2 6353 10: 16Fx4(hi) <- 32Fx4 6354 11: 32Fx2(hi) <- 64Fx2 6355 Only deals with the 32Fx2 <- 64Fx2 version (s==1) 6356 */ 6357 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0) 6358 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) { 6359 UInt bQ = INSN(30,30); 6360 UInt bS = INSN(22,22); 6361 UInt nn = INSN(9,5); 6362 UInt dd = INSN(4,0); 6363 if (bS == 1) { 6364 IRTemp rm = mk_get_IR_rounding_mode(); 6365 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64); 6366 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64); 6367 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo)); 6368 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi)); 6369 if (bQ == 0) { 6370 putQRegLane(dd, 1, mkU64(0)); 6371 } 6372 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "", 6373 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn)); 6374 return True; 6375 } 6376 /* else fall through */ 6377 } 6378 6379 /* ---------------- ADD/SUB (vector) ---------------- */ 6380 /* 31 28 23 21 20 15 9 4 6381 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T 6382 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T 6383 */ 6384 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6385 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { 6386 Bool isQ = INSN(30,30) == 1; 6387 UInt szBlg2 = INSN(23,22); 6388 Bool isSUB = INSN(29,29) == 1; 6389 UInt mm = INSN(20,16); 6390 UInt nn = INSN(9,5); 6391 UInt dd = INSN(4,0); 6392 Bool zeroHI = False; 6393 const HChar* arrSpec = ""; 6394 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6395 if (ok) { 6396 const IROp opsADD[4] 6397 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; 6398 const IROp opsSUB[4] 6399 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 6400 vassert(szBlg2 < 4); 6401 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2]; 6402 IRTemp t = newTemp(Ity_V128); 6403 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 6404 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) 6405 : mkexpr(t)); 6406 const HChar* nm = isSUB ? "sub" : "add"; 6407 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6408 nameQReg128(dd), arrSpec, 6409 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6410 return True; 6411 } 6412 /* else fall through */ 6413 } 6414 6415 /* ---------------- ADD/SUB (scalar) ---------------- */ 6416 /* 31 28 23 21 20 15 9 4 6417 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm 6418 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm 6419 */ 6420 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1) 6421 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { 6422 Bool isSUB = INSN(29,29) == 1; 6423 UInt mm = INSN(20,16); 6424 UInt nn = INSN(9,5); 6425 UInt dd = INSN(4,0); 6426 IRTemp res = newTemp(Ity_I64); 6427 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64, 6428 getQRegLane(nn, 0, Ity_I64), 6429 getQRegLane(mm, 0, Ity_I64))); 6430 putQRegLane(dd, 0, mkexpr(res)); 6431 putQRegLane(dd, 1, mkU64(0)); 6432 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add", 6433 nameQRegLO(dd, Ity_I64), 6434 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 6435 return True; 6436 } 6437 6438 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */ 6439 /* 31 28 23 21 20 15 9 4 6440 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only 6441 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only 6442 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only 6443 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only 6444 */ 6445 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6446 && INSN(21,21) == 1 6447 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) { 6448 Bool isQ = INSN(30,30) == 1; 6449 UInt szBlg2 = INSN(23,22); 6450 UInt bit29 = INSN(29,29); 6451 UInt mm = INSN(20,16); 6452 UInt nn = INSN(9,5); 6453 UInt dd = INSN(4,0); 6454 Bool isMLAS = INSN(11,11) == 0; 6455 const IROp opsADD[4] 6456 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID }; 6457 const IROp opsSUB[4] 6458 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID }; 6459 const IROp opsMUL[4] 6460 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; 6461 const IROp opsPMUL[4] 6462 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 6463 /* Set opMUL and, if necessary, opACC. A result value of 6464 Iop_INVALID for opMUL indicates that the instruction is 6465 invalid. */ 6466 Bool zeroHI = False; 6467 const HChar* arrSpec = ""; 6468 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6469 vassert(szBlg2 < 4); 6470 IROp opACC = Iop_INVALID; 6471 IROp opMUL = Iop_INVALID; 6472 if (ok) { 6473 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2] 6474 : opsMUL[szBlg2]; 6475 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2]) 6476 : Iop_INVALID; 6477 } 6478 if (ok && opMUL != Iop_INVALID) { 6479 IRTemp t1 = newTemp(Ity_V128); 6480 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm))); 6481 IRTemp t2 = newTemp(Ity_V128); 6482 assign(t2, opACC == Iop_INVALID 6483 ? mkexpr(t1) 6484 : binop(opACC, getQReg128(dd), mkexpr(t1))); 6485 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2)) 6486 : mkexpr(t2)); 6487 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla") 6488 : (bit29 == 1 ? "pmul" : "mul"); 6489 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6490 nameQReg128(dd), arrSpec, 6491 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6492 return True; 6493 } 6494 /* else fall through */ 6495 } 6496 6497 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */ 6498 /* 31 28 23 21 20 15 9 4 6499 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T 6500 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T 6501 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T 6502 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T 6503 */ 6504 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6505 && INSN(21,21) == 1 6506 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) { 6507 Bool isQ = INSN(30,30) == 1; 6508 Bool isU = INSN(29,29) == 1; 6509 UInt szBlg2 = INSN(23,22); 6510 Bool isMAX = INSN(11,11) == 0; 6511 UInt mm = INSN(20,16); 6512 UInt nn = INSN(9,5); 6513 UInt dd = INSN(4,0); 6514 Bool zeroHI = False; 6515 const HChar* arrSpec = ""; 6516 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 6517 if (ok) { 6518 const IROp opMINS[4] 6519 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; 6520 const IROp opMINU[4] 6521 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; 6522 const IROp opMAXS[4] 6523 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; 6524 const IROp opMAXU[4] 6525 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; 6526 vassert(szBlg2 < 4); 6527 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) 6528 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); 6529 IRTemp t = newTemp(Ity_V128); 6530 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 6531 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t)) 6532 : mkexpr(t)); 6533 const HChar* nm = isMAX ? (isU ? "umax" : "smax") 6534 : (isU ? "umin" : "smin"); 6535 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 6536 nameQReg128(dd), arrSpec, 6537 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6538 return True; 6539 } 6540 /* else fall through */ 6541 } 6542 6543 /* -------------------- {S,U}{MIN,MAX}V -------------------- */ 6544 /* 31 28 23 21 16 15 9 4 6545 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T 6546 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T 6547 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T 6548 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T 6549 */ 6550 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) 6551 && INSN(21,17) == BITS5(1,1,0,0,0) 6552 && INSN(15,10) == BITS6(1,0,1,0,1,0)) { 6553 Bool isQ = INSN(30,30) == 1; 6554 Bool isU = INSN(29,29) == 1; 6555 UInt szBlg2 = INSN(23,22); 6556 Bool isMAX = INSN(16,16) == 0; 6557 UInt nn = INSN(9,5); 6558 UInt dd = INSN(4,0); 6559 Bool zeroHI = False; 6560 const HChar* arrSpec = ""; 6561 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2); 6562 if (ok) { 6563 if (szBlg2 == 3) ok = False; 6564 if (szBlg2 == 2 && !isQ) ok = False; 6565 } 6566 if (ok) { 6567 const IROp opMINS[3] 6568 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; 6569 const IROp opMINU[3] 6570 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 }; 6571 const IROp opMAXS[3] 6572 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; 6573 const IROp opMAXU[3] 6574 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; 6575 vassert(szBlg2 < 3); 6576 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2]) 6577 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]); 6578 IRTemp tN1 = newTemp(Ity_V128); 6579 assign(tN1, getQReg128(nn)); 6580 /* If Q == 0, we're just folding lanes in the lower half of 6581 the value. In which case, copy the lower half of the 6582 source into the upper half, so we can then treat it the 6583 same as the full width case. */ 6584 IRTemp tN2 = newTemp(Ity_V128); 6585 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1)); 6586 IRTemp res = math_MINMAXV(tN2, op); 6587 if (res == IRTemp_INVALID) 6588 return False; /* means math_MINMAXV 6589 doesn't handle this case yet */ 6590 putQReg128(dd, mkexpr(res)); 6591 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv") 6592 : (isU ? "uminv" : "sminv"); 6593 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 }; 6594 IRType laneTy = tys[szBlg2]; 6595 DIP("%s %s, %s.%s\n", nm, 6596 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec); 6597 return True; 6598 } 6599 /* else fall through */ 6600 } 6601 /* ------------ UMULL (vector) ------------ */ 6602 /* 31 28 23 21 20 15 9 4 6603 001 01110 sz 1 m 110000 n d UMULL Vd.Ta, Vn.Tb, Vm.Tb 6604 6605 */ 6606 if (INSN(31,24) == BITS8(0,0,1,0,1,1,1,0) && INSN(23,22) != BITS2(1,1) 6607 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,1,0,0,0,0)) { 6608 UInt mm = INSN(20,16); 6609 UInt nn = INSN(9,5); 6610 UInt dd = INSN(4,0); 6611 UInt sz = INSN(23,22); 6612 6613 const HChar* nameTa[3] = { "8h", "4s", "2d" }; 6614 const HChar* nameTb[3] = { "8b", "4h", "2s" }; 6615 const IROp ops[3] = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2 }; 6616 6617 putQReg128(dd, binop(ops[sz], getQRegLO(nn, Ity_I64), getQRegLO(mm, Ity_I64))); 6618 6619 DIP("umull %s.%s, %s.%s, %s.%s\n", nameQReg128(dd), nameTa[sz], 6620 nameQReg128(nn), nameTb[sz], nameQReg128(mm), nameTb[sz]); 6621 return True; 6622 } 6623 6624 6625 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */ 6626 /* 31 28 23 20 15 9 4 6627 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T 6628 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T 6629 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T 6630 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T 6631 T is 16b when q==1, 8b when q==0 6632 */ 6633 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 6634 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 6635 Bool isQ = INSN(30,30) == 1; 6636 Bool isORR = INSN(23,23) == 1; 6637 Bool invert = INSN(22,22) == 1; 6638 UInt mm = INSN(20,16); 6639 UInt nn = INSN(9,5); 6640 UInt dd = INSN(4,0); 6641 IRTemp res = newTemp(Ity_V128); 6642 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128, 6643 getQReg128(nn), 6644 invert ? unop(Iop_NotV128, getQReg128(mm)) 6645 : getQReg128(mm))); 6646 putQReg128(dd, isQ ? mkexpr(res) 6647 : unop(Iop_ZeroHI64ofV128, mkexpr(res))); 6648 const HChar* names[4] = { "and", "bic", "orr", "orn" }; 6649 const HChar* ar = isQ ? "16b" : "8b"; 6650 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)], 6651 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar); 6652 return True; 6653 } 6654 6655 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */ 6656 /* 31 28 23 21 15 9 4 ix 6657 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) == 6658 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0 6659 6660 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u 6661 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s 6662 6663 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u 6664 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s 6665 6666 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0 6667 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0 6668 6669 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0 6670 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0 6671 6672 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0 6673 */ 6674 if (INSN(31,31) == 0 6675 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) { 6676 Bool isQ = INSN(30,30) == 1; 6677 UInt bit29 = INSN(29,29); 6678 UInt szBlg2 = INSN(23,22); 6679 UInt mm = INSN(20,16); 6680 UInt b1510 = INSN(15,10); 6681 UInt nn = INSN(9,5); 6682 UInt dd = INSN(4,0); 6683 const IROp opsEQ[4] 6684 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 }; 6685 const IROp opsGTS[4] 6686 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 }; 6687 const IROp opsGTU[4] 6688 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 }; 6689 Bool zeroHI = False; 6690 const HChar* arrSpec = "??"; 6691 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2); 6692 UInt ix = 0; 6693 if (ok) { 6694 switch (b1510) { 6695 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break; 6696 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break; 6697 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break; 6698 case BITS6(1,0,0,0,1,0): 6699 if (mm == 0) { ix = bit29 ? 7 : 8; }; break; 6700 case BITS6(1,0,0,1,1,0): 6701 if (mm == 0) { ix = bit29 ? 9 : 10; }; break; 6702 case BITS6(1,0,1,0,1,0): 6703 if (mm == 0 && bit29 == 0) { ix = 11; }; break; 6704 default: break; 6705 } 6706 } 6707 if (ix != 0) { 6708 vassert(ok && szBlg2 < 4); 6709 IRExpr* argL = getQReg128(nn); 6710 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000); 6711 IRExpr* res = NULL; 6712 /* Some useful identities: 6713 x > y can be expressed directly 6714 x < y == y > x 6715 x <= y == not (x > y) 6716 x >= y == not (y > x) 6717 */ 6718 switch (ix) { 6719 case 1: res = binop(opsEQ[szBlg2], argL, argR); break; 6720 case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2], 6721 binop(Iop_AndV128, argL, argR), 6722 mkV128(0x0000))); 6723 break; 6724 case 3: res = binop(opsGTU[szBlg2], argL, argR); break; 6725 case 4: res = binop(opsGTS[szBlg2], argL, argR); break; 6726 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL)); 6727 break; 6728 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); 6729 break; 6730 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL)); 6731 break; 6732 case 8: res = binop(opsGTS[szBlg2], argL, argR); break; 6733 case 9: res = unop(Iop_NotV128, 6734 binop(opsGTS[szBlg2], argL, argR)); 6735 break; 6736 case 10: res = binop(opsEQ[szBlg2], argL, argR); break; 6737 case 11: res = binop(opsGTS[szBlg2], argR, argL); break; 6738 default: vassert(0); 6739 } 6740 vassert(res); 6741 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res); 6742 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge", 6743 "ge", "gt", "le", "eq", "lt" }; 6744 if (ix <= 6) { 6745 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1], 6746 nameQReg128(dd), arrSpec, 6747 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec); 6748 } else { 6749 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1], 6750 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 6751 } 6752 return True; 6753 } 6754 /* else fall through */ 6755 } 6756 6757 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */ 6758 /* 31 28 23 20 15 9 4 6759 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T 6760 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T 6761 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T 6762 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T 6763 */ 6764 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) 6765 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 6766 Bool isQ = INSN(30,30) == 1; 6767 UInt op = INSN(23,22); 6768 UInt mm = INSN(20,16); 6769 UInt nn = INSN(9,5); 6770 UInt dd = INSN(4,0); 6771 IRTemp argD = newTemp(Ity_V128); 6772 IRTemp argN = newTemp(Ity_V128); 6773 IRTemp argM = newTemp(Ity_V128); 6774 assign(argD, getQReg128(dd)); 6775 assign(argN, getQReg128(nn)); 6776 assign(argM, getQReg128(mm)); 6777 const IROp opXOR = Iop_XorV128; 6778 const IROp opAND = Iop_AndV128; 6779 const IROp opNOT = Iop_NotV128; 6780 IRExpr* res = NULL; 6781 switch (op) { 6782 case BITS2(0,0): /* EOR */ 6783 res = binop(opXOR, mkexpr(argM), mkexpr(argN)); 6784 break; 6785 case BITS2(0,1): /* BSL */ 6786 res = binop(opXOR, mkexpr(argM), 6787 binop(opAND, 6788 binop(opXOR, mkexpr(argM), mkexpr(argN)), 6789 mkexpr(argD))); 6790 break; 6791 case BITS2(1,0): /* BIT */ 6792 res = binop(opXOR, mkexpr(argD), 6793 binop(opAND, 6794 binop(opXOR, mkexpr(argD), mkexpr(argN)), 6795 mkexpr(argM))); 6796 break; 6797 case BITS2(1,1): /* BIF */ 6798 res = binop(opXOR, mkexpr(argD), 6799 binop(opAND, 6800 binop(opXOR, mkexpr(argD), mkexpr(argN)), 6801 unop(opNOT, mkexpr(argM)))); 6802 break; 6803 default: 6804 vassert(0); 6805 } 6806 vassert(res); 6807 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); 6808 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; 6809 const HChar* arr = isQ ? "16b" : "8b"; 6810 vassert(op < 4); 6811 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op], 6812 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 6813 return True; 6814 } 6815 6816 /* ------------ USHR (scalar, immediate) ------------ */ 6817 /* 31 28 22 18 15 9 4 6818 011 111110 immh immb 000001 n d USHR Vd, Vn, #shift 6819 */ 6820 if (INSN(31,23) == BITS9(0,1,1, 1,1,1,1,1,0) 6821 && INSN(15,10) == BITS6(0,0,0,0,0,1)) { 6822 UInt immh = INSN(22,19); 6823 UInt immb = INSN(18,16); 6824 UInt nn = INSN(9,5); 6825 UInt dd = INSN(4,0); 6826 6827 UInt szBlg2 = 0; 6828 UInt shift = 0; 6829 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); 6830 6831 if (szBlg2 == 3) { 6832 putQRegHI64(dd, mkU64(0x0)); 6833 putQRegLO(dd, binop(Iop_Shr64, getQRegLO(nn, Ity_I64), mkU8(shift))); 6834 DIP("ushr %s, %s\n", nameQRegLO(dd, Ity_I64), nameQRegLO(nn, Ity_I64)); 6835 return True; 6836 } 6837 } 6838 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */ 6839 /* 31 28 22 18 15 9 4 6840 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1) 6841 0q1 011110 immh immb 010001 n d SRI Vd.T, Vn.T, #shift (1) 6842 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2) 6843 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3) 6844 0q1 011110 immh immb 010101 n d SLI Vd.T, Vn.T, #shift (3) 6845 laneTy, shift = case immh:immb of 6846 0001:xxx -> B, SHR:8-xxx, SHL:xxx 6847 001x:xxx -> H, SHR:16-xxxx SHL:xxxx 6848 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx 6849 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx 6850 other -> invalid 6851 As usual the case laneTy==D && q==0 is not allowed. 6852 */ 6853 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) 6854 && INSN(10,10) == 1) { 6855 UInt ix = 0; 6856 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1; 6857 else if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0)) ix = 1; 6858 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2; 6859 else if ( INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3; 6860 if (ix > 0) { 6861 Bool isQ = INSN(30,30) == 1; 6862 UInt immh = INSN(22,19); 6863 UInt immb = INSN(18,16); 6864 UInt nn = INSN(9,5); 6865 UInt dd = INSN(4,0); 6866 Bool isInsert = (ix == 3 && INSN(29,29) == 1) 6867 || (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0)); 6868 6869 const IROp opsSHRN[4] 6870 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 6871 const IROp opsSARN[4] 6872 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; 6873 const IROp opsSHLN[4] 6874 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; 6875 UInt szBlg2 = 0; 6876 UInt shift = 0; 6877 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); 6878 if (ix == 3) { 6879 /* The shift encoding has opposite sign for the leftwards 6880 case. Adjust shift to compensate. */ 6881 shift = (8 << szBlg2) - shift; 6882 } 6883 if (ok && szBlg2 < 4 && shift >= 0 && shift <= (8 << szBlg2) 6884 && !(szBlg2 == 3/*64bit*/ && !isQ)) { 6885 IROp op = Iop_INVALID; 6886 const HChar* nm = NULL; 6887 switch (ix) { 6888 case 1: op = opsSHRN[szBlg2]; nm = isInsert ? "sri" : "ushr"; break; 6889 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break; 6890 case 3: op = opsSHLN[szBlg2]; nm = isInsert ? "sli" : "shl"; break; 6891 default: vassert(0); 6892 } 6893 IRTemp mask = newTemp(Ity_V128); 6894 IRTemp res; 6895 IRTemp candidate = newTemp(Ity_V128); 6896 6897 assign(candidate, binop(op, getQReg128(nn), mkU8(shift))); 6898 6899 if (isInsert) { 6900 assign(mask, binop(op, 6901 binop(Iop_64HLtoV128, 6902 mkU64(0xFFFFFFFFFFFFFFFFULL), 6903 mkU64(0xFFFFFFFFFFFFFFFFULL)), 6904 mkU8(shift))); 6905 res = newTemp(Ity_V128); 6906 6907 assign(res, binop(Iop_OrV128, 6908 binop(Iop_AndV128, 6909 unop(Iop_NotV128, mkexpr(mask)), 6910 getQReg128(dd)), 6911 mkexpr(candidate))); 6912 } else { 6913 res = candidate; 6914 } 6915 6916 putQReg128(dd, isQ ? mkexpr(res) : unop(Iop_ZeroHI64ofV128, mkexpr(res))); 6917 HChar laneCh = "bhsd"[szBlg2]; 6918 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2); 6919 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 6920 nameQReg128(dd), nLanes, laneCh, 6921 nameQReg128(nn), nLanes, laneCh, shift); 6922 return True; 6923 } 6924 /* else fall through */ 6925 } 6926 } 6927 6928 /* -------------------- SHRN{,2} -------------------- */ 6929 /* 31 28 22 18 15 9 4 6930 0q0 011110 immh immb 100001 n d SHRN Vd.Tb, Vn.Ta, #sh 6931 6932 where Ta,Tb,sh 6933 = case immh of 1xxx -> invalid 6934 01xx -> 2d, 2s(q0)/4s(q1), 64 - immh:immb (0..31) 6935 001x -> 4s, 4h(q0)/8h(q1), 32 - immh:immb (0..15) 6936 0001 -> 8h, 8b(q0)/16b(q1), 8 - immh:immb (0..7) 6937 0000 -> AdvSIMD modified immediate (???) 6938 */ 6939 6940 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) 6941 && INSN(15,10) == BITS6(1,0,0,0,0,1)) { 6942 Bool isQ = INSN(30,30) == 1; 6943 UInt immh = INSN(22,19); 6944 UInt immb = INSN(18,16); 6945 UInt nn = INSN(9,5); 6946 UInt dd = INSN(4,0); 6947 IRTemp src = newTemp(Ity_V128); 6948 IRTemp zero = newTemp(Ity_V128); 6949 IRExpr* res = NULL; 6950 const HChar* ta = "??"; 6951 const HChar* tb = "??"; 6952 6953 UInt szBlg2 = 0; 6954 UInt shift = 0; 6955 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb); 6956 6957 if (ok && shift >= 0 && szBlg2 < 3 && shift <= (8 << szBlg2)) { 6958 const IROp opsSHR[3] = { Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 6959 const HChar* tas[3] = { "8h", "4s", "2d" }; 6960 const HChar* tbs_q0[3] = { "8b", "4h", "2s" }; 6961 const HChar* tbs_q1[3] = { "16b", "8h", "4s" }; 6962 assign(src, binop(opsSHR[szBlg2], getQReg128(nn), mkU8(shift))); 6963 assign(zero, mkV128(0x0000)); 6964 switch(szBlg2) { 6965 case 0: 6966 res = mk_CatEvenLanes8x16(zero, src); 6967 break; 6968 case 1: 6969 res = mk_CatEvenLanes16x8(zero, src); 6970 break; 6971 case 2: 6972 res = mk_CatEvenLanes32x4(zero, src); 6973 break; 6974 default: 6975 break; 6976 } 6977 6978 if (res != NULL) { 6979 if (isQ) { 6980 putQRegHI64(dd, unop(Iop_V128to64, res)); 6981 } else { 6982 putQReg128(dd, res); 6983 } 6984 DIP("shrn%s %s.%s, %s.%s, #%d\n", 6985 isQ ? "2" : "", nameQReg128(dd), isQ ? tbs_q1[szBlg2] : tbs_q0[szBlg2], 6986 nameQReg128(nn), tas[szBlg2], shift); 6987 return True; 6988 } 6989 } 6990 } 6991 6992 /* -------------------- {U,S}SHLL{,2} -------------------- */ 6993 /* 31 28 22 18 15 9 4 6994 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh 6995 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh 6996 where Ta,Tb,sh 6997 = case immh of 1xxx -> invalid 6998 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) 6999 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) 7000 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) 7001 0000 -> AdvSIMD modified immediate (???) 7002 */ 7003 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0) 7004 && INSN(15,10) == BITS6(1,0,1,0,0,1)) { 7005 Bool isQ = INSN(30,30) == 1; 7006 Bool isU = INSN(29,29) == 1; 7007 UInt immh = INSN(22,19); 7008 UInt immb = INSN(18,16); 7009 UInt nn = INSN(9,5); 7010 UInt dd = INSN(4,0); 7011 UInt immhb = (immh << 3) | immb; 7012 IRTemp src = newTemp(Ity_V128); 7013 IRTemp zero = newTemp(Ity_V128); 7014 IRExpr* res = NULL; 7015 UInt sh = 0; 7016 const HChar* ta = "??"; 7017 const HChar* tb = "??"; 7018 assign(src, getQReg128(nn)); 7019 assign(zero, mkV128(0x0000)); 7020 if (immh & 8) { 7021 /* invalid; don't assign to res */ 7022 } 7023 else if (immh & 4) { 7024 sh = immhb - 32; 7025 vassert(sh < 32); /* so 32-sh is 1..32 */ 7026 ta = "2d"; 7027 tb = isQ ? "4s" : "2s"; 7028 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) 7029 : mk_InterleaveLO32x4(src, zero); 7030 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); 7031 } 7032 else if (immh & 2) { 7033 sh = immhb - 16; 7034 vassert(sh < 16); /* so 16-sh is 1..16 */ 7035 ta = "4s"; 7036 tb = isQ ? "8h" : "4h"; 7037 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) 7038 : mk_InterleaveLO16x8(src, zero); 7039 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); 7040 } 7041 else if (immh & 1) { 7042 sh = immhb - 8; 7043 vassert(sh < 8); /* so 8-sh is 1..8 */ 7044 ta = "8h"; 7045 tb = isQ ? "16b" : "8b"; 7046 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) 7047 : mk_InterleaveLO8x16(src, zero); 7048 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); 7049 } else { 7050 vassert(immh == 0); 7051 /* invalid; don't assign to res */ 7052 } 7053 /* */ 7054 if (res) { 7055 putQReg128(dd, res); 7056 DIP("%cshll%s %s.%s, %s.%s, #%d\n", 7057 isU ? 'u' : 's', isQ ? "2" : "", 7058 nameQReg128(dd), ta, nameQReg128(nn), tb, sh); 7059 return True; 7060 } 7061 /* else fall through */ 7062 } 7063 7064 /* -------------------- XTN{,2} -------------------- */ 7065 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta 7066 0q0 01110 size 100001 001010 n d 7067 */ 7068 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 7069 && INSN(21,16) == BITS6(1,0,0,0,0,1) 7070 && INSN(15,10) == BITS6(0,0,1,0,1,0)) { 7071 Bool isQ = INSN(30,30) == 1; 7072 UInt size = INSN(23,22); 7073 UInt nn = INSN(9,5); 7074 UInt dd = INSN(4,0); 7075 IROp op = Iop_INVALID; 7076 const HChar* tb = NULL; 7077 const HChar* ta = NULL; 7078 switch ((size << 1) | (isQ ? 1 : 0)) { 7079 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; 7080 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break; 7081 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; 7082 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break; 7083 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; 7084 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break; 7085 case 6: break; 7086 case 7: break; 7087 default: vassert(0); 7088 } 7089 if (op != Iop_INVALID) { 7090 if (!isQ) { 7091 putQRegLane(dd, 1, mkU64(0)); 7092 } 7093 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn))); 7094 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "", 7095 nameQReg128(dd), tb, nameQReg128(nn), ta); 7096 return True; 7097 } 7098 /* else fall through */ 7099 } 7100 7101 /* ---------------- CNT (vector) ---------------- */ 7102 /* 31 29 23 21 9 4 7103 0q 001110 00 100000010110 n d CNT Vd.T, Vn.T 7104 */ 7105 7106 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0) 7107 && INSN(23,22) == BITS2(0,0) 7108 && INSN(21,10) == BITS12(1,0,0,0,0,0,0,1,0,1,1,0) ) { 7109 Bool isQ = INSN(30,30) == 1; 7110 UInt nn = INSN(9,5); 7111 UInt dd = INSN(4,0); 7112 const HChar* name = isQ ? "16b" : "8b"; 7113 7114 IRExpr* res = unop(Iop_Cnt8x16, getQReg128(nn)); 7115 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res)); 7116 7117 DIP("cnt %s.%s, %s.%s\n", nameQReg128(dd), name, nameQReg128(nn), name); 7118 return True; 7119 } 7120 7121 7122 /* ---------------- DUP (element, vector) ---------------- */ 7123 /* 31 28 20 15 9 4 7124 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index] 7125 */ 7126 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7127 && INSN(15,10) == BITS6(0,0,0,0,0,1)) { 7128 Bool isQ = INSN(30,30) == 1; 7129 UInt imm5 = INSN(20,16); 7130 UInt nn = INSN(9,5); 7131 UInt dd = INSN(4,0); 7132 IRTemp w0 = newTemp(Ity_I64); 7133 const HChar* arT = "??"; 7134 const HChar* arTs = "??"; 7135 IRType laneTy = Ity_INVALID; 7136 UInt laneNo = 16; /* invalid */ 7137 if (imm5 & 1) { 7138 arT = isQ ? "16b" : "8b"; 7139 arTs = "b"; 7140 laneNo = (imm5 >> 1) & 15; 7141 laneTy = Ity_I8; 7142 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); 7143 } 7144 else if (imm5 & 2) { 7145 arT = isQ ? "8h" : "4h"; 7146 arTs = "h"; 7147 laneNo = (imm5 >> 2) & 7; 7148 laneTy = Ity_I16; 7149 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); 7150 } 7151 else if (imm5 & 4) { 7152 arT = isQ ? "4s" : "2s"; 7153 arTs = "s"; 7154 laneNo = (imm5 >> 3) & 3; 7155 laneTy = Ity_I32; 7156 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); 7157 } 7158 else if ((imm5 & 8) && isQ) { 7159 arT = "2d"; 7160 arTs = "d"; 7161 laneNo = (imm5 >> 4) & 1; 7162 laneTy = Ity_I64; 7163 assign(w0, getQRegLane(nn, laneNo, laneTy)); 7164 } 7165 else { 7166 /* invalid; leave laneTy unchanged. */ 7167 } 7168 /* */ 7169 if (laneTy != Ity_INVALID) { 7170 vassert(laneNo < 16); 7171 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 7172 putQReg128(dd, binop(Iop_64HLtoV128, 7173 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 7174 DIP("dup %s.%s, %s.%s[%u]\n", 7175 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo); 7176 return True; 7177 } 7178 /* else fall through */ 7179 } 7180 7181 /* ---------------- DUP (general, vector) ---------------- */ 7182 /* 31 28 23 20 15 9 4 7183 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn 7184 Q=0 writes 64, Q=1 writes 128 7185 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W 7186 xxx10 4H(q=0) or 8H(q=1), R=W 7187 xx100 2S(q=0) or 4S(q=1), R=W 7188 x1000 Invalid(q=0) or 2D(q=1), R=X 7189 x0000 Invalid(q=0) or Invalid(q=1) 7190 */ 7191 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7192 && INSN(15,10) == BITS6(0,0,0,0,1,1)) { 7193 Bool isQ = INSN(30,30) == 1; 7194 UInt imm5 = INSN(20,16); 7195 UInt nn = INSN(9,5); 7196 UInt dd = INSN(4,0); 7197 IRTemp w0 = newTemp(Ity_I64); 7198 const HChar* arT = "??"; 7199 IRType laneTy = Ity_INVALID; 7200 if (imm5 & 1) { 7201 arT = isQ ? "16b" : "8b"; 7202 laneTy = Ity_I8; 7203 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn)))); 7204 } 7205 else if (imm5 & 2) { 7206 arT = isQ ? "8h" : "4h"; 7207 laneTy = Ity_I16; 7208 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn)))); 7209 } 7210 else if (imm5 & 4) { 7211 arT = isQ ? "4s" : "2s"; 7212 laneTy = Ity_I32; 7213 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn)))); 7214 } 7215 else if ((imm5 & 8) && isQ) { 7216 arT = "2d"; 7217 laneTy = Ity_I64; 7218 assign(w0, getIReg64orZR(nn)); 7219 } 7220 else { 7221 /* invalid; leave laneTy unchanged. */ 7222 } 7223 /* */ 7224 if (laneTy != Ity_INVALID) { 7225 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 7226 putQReg128(dd, binop(Iop_64HLtoV128, 7227 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 7228 DIP("dup %s.%s, %s\n", 7229 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn)); 7230 return True; 7231 } 7232 /* else fall through */ 7233 } 7234 7235 /* --------------------- {S,U}ADDLV --------------------- */ 7236 /* 31 28 23 21 9 4 7237 0qu 01110 sz 110000001110 n d {U,S}ADDLV Vd, Vn.T 7238 7239 sz V T(q=1/0) 7240 -- - ---- 7241 00 h 16/8b 7242 01 s 8/4h 7243 10 d 4s (q can't be 0) 7244 11 invalid 7245 */ 7246 if (INSN(31,31) == 0 && INSN(28, 24) == BITS5(0,1,1,1,0) 7247 && INSN(21, 10) == BITS12(1,1,0,0,0,0,0,0,1,1,1,0)) { 7248 UInt bitQ = INSN(30,30); 7249 UInt bitU = INSN(29,29); 7250 UInt sz = INSN(23,22); 7251 UInt nn = INSN(9,5); 7252 UInt dd = INSN(4,0); 7253 7254 Bool valid = !((sz == BITS2(1,1)) || (bitQ == 0 && sz == BITS2(1,0))); 7255 if (valid) { 7256 const IRType ddTypes[3] = { Ity_I16, Ity_I32, Ity_I64 }; 7257 const HChar* suffixesQ[3] = { "16b", "8h", "4s" }; 7258 const HChar* suffixesq[3] = { "8b", "4h", "invalid" }; 7259 7260 IRTemp src = newTemp(Ity_V128); 7261 IRExpr* half = mkU64(0xFFFFFFFFFFFFFFFFULL); 7262 IRExpr* zero = mkU64(0x0); 7263 7264 IRExpr* mask = binop(Iop_64HLtoV128, zero, half); 7265 assign(src, bitQ ? getQReg128(nn) : binop(Iop_AndV128, getQReg128(nn), mask)); 7266 7267 IROp op; 7268 switch (sz) { 7269 case BITS2(0,0): op = bitU ? Iop_AddLV8Ux16 : Iop_AddLV8Sx16; break; 7270 case BITS2(0,1): op = bitU ? Iop_AddLV16Ux8 : Iop_AddLV16Sx8; break; 7271 case BITS2(1,0): op = bitU ? Iop_AddLV32Ux4 : Iop_AddLV32Sx4; break; 7272 default: vassert(0); 7273 } 7274 7275 putQReg128(dd, unop(op, mkexpr(src))); 7276 7277 DIP("%saddlv %s,%s.%s\n", bitU ? "u" : "s", nameQRegLO(dd, ddTypes[sz]), 7278 nameQReg128(nn), bitQ ? suffixesQ[sz] : suffixesq[sz]); 7279 7280 return True; 7281 } 7282 /* else fall through */ 7283 } 7284 /* ---------------------- {S,U}MOV ---------------------- */ 7285 /* 31 28 20 15 9 4 7286 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index] 7287 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index] 7288 dest is Xd when q==1, Wd when q==0 7289 UMOV: 7290 Ts,index,ops = case q:imm5 of 7291 0:xxxx1 -> B, xxxx, 8Uto64 7292 1:xxxx1 -> invalid 7293 0:xxx10 -> H, xxx, 16Uto64 7294 1:xxx10 -> invalid 7295 0:xx100 -> S, xx, 32Uto64 7296 1:xx100 -> invalid 7297 1:x1000 -> D, x, copy64 7298 other -> invalid 7299 SMOV: 7300 Ts,index,ops = case q:imm5 of 7301 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32) 7302 1:xxxx1 -> B, xxxx, 8Sto64 7303 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32) 7304 1:xxx10 -> H, xxx, 16Sto64 7305 0:xx100 -> invalid 7306 1:xx100 -> S, xx, 32Sto64 7307 1:x1000 -> invalid 7308 other -> invalid 7309 */ 7310 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7311 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) { 7312 UInt bitQ = INSN(30,30) == 1; 7313 UInt imm5 = INSN(20,16); 7314 UInt nn = INSN(9,5); 7315 UInt dd = INSN(4,0); 7316 Bool isU = INSN(12,12) == 1; 7317 const HChar* arTs = "??"; 7318 UInt laneNo = 16; /* invalid */ 7319 // Setting 'res' to non-NULL determines valid/invalid 7320 IRExpr* res = NULL; 7321 if (!bitQ && (imm5 & 1)) { // 0:xxxx1 7322 laneNo = (imm5 >> 1) & 15; 7323 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 7324 res = isU ? unop(Iop_8Uto64, lane) 7325 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane)); 7326 arTs = "b"; 7327 } 7328 else if (bitQ && (imm5 & 1)) { // 1:xxxx1 7329 laneNo = (imm5 >> 1) & 15; 7330 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 7331 res = isU ? NULL 7332 : unop(Iop_8Sto64, lane); 7333 arTs = "b"; 7334 } 7335 else if (!bitQ && (imm5 & 2)) { // 0:xxx10 7336 laneNo = (imm5 >> 2) & 7; 7337 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 7338 res = isU ? unop(Iop_16Uto64, lane) 7339 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane)); 7340 arTs = "h"; 7341 } 7342 else if (bitQ && (imm5 & 2)) { // 1:xxx10 7343 laneNo = (imm5 >> 2) & 7; 7344 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 7345 res = isU ? NULL 7346 : unop(Iop_16Sto64, lane); 7347 arTs = "h"; 7348 } 7349 else if (!bitQ && (imm5 & 4)) { // 0:xx100 7350 laneNo = (imm5 >> 3) & 3; 7351 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 7352 res = isU ? unop(Iop_32Uto64, lane) 7353 : NULL; 7354 arTs = "s"; 7355 } 7356 else if (bitQ && (imm5 & 4)) { // 1:xxx10 7357 laneNo = (imm5 >> 3) & 3; 7358 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 7359 res = isU ? NULL 7360 : unop(Iop_32Sto64, lane); 7361 arTs = "s"; 7362 } 7363 else if (bitQ && (imm5 & 8)) { // 1:x1000 7364 laneNo = (imm5 >> 4) & 1; 7365 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64); 7366 res = isU ? lane 7367 : NULL; 7368 arTs = "d"; 7369 } 7370 /* */ 7371 if (res) { 7372 vassert(laneNo < 16); 7373 putIReg64orZR(dd, res); 7374 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's', 7375 nameIRegOrZR(bitQ == 1, dd), 7376 nameQReg128(nn), arTs, laneNo); 7377 return True; 7378 } 7379 /* else fall through */ 7380 } 7381 7382 /* -------------------- INS (general) -------------------- */ 7383 /* 31 28 20 15 9 4 7384 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn 7385 where Ts,ix = case imm5 of xxxx1 -> B, xxxx 7386 xxx10 -> H, xxx 7387 xx100 -> S, xx 7388 x1000 -> D, x 7389 */ 7390 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0) 7391 && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 7392 UInt imm5 = INSN(20,16); 7393 UInt nn = INSN(9,5); 7394 UInt dd = INSN(4,0); 7395 HChar ts = '?'; 7396 UInt laneNo = 16; 7397 IRExpr* src = NULL; 7398 if (imm5 & 1) { 7399 src = unop(Iop_64to8, getIReg64orZR(nn)); 7400 laneNo = (imm5 >> 1) & 15; 7401 ts = 'b'; 7402 } 7403 else if (imm5 & 2) { 7404 src = unop(Iop_64to16, getIReg64orZR(nn)); 7405 laneNo = (imm5 >> 2) & 7; 7406 ts = 'h'; 7407 } 7408 else if (imm5 & 4) { 7409 src = unop(Iop_64to32, getIReg64orZR(nn)); 7410 laneNo = (imm5 >> 3) & 3; 7411 ts = 's'; 7412 } 7413 else if (imm5 & 8) { 7414 src = getIReg64orZR(nn); 7415 laneNo = (imm5 >> 4) & 1; 7416 ts = 'd'; 7417 } 7418 /* */ 7419 if (src) { 7420 vassert(laneNo < 16); 7421 putQRegLane(dd, laneNo, src); 7422 DIP("ins %s.%c[%u], %s\n", 7423 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); 7424 return True; 7425 } 7426 /* else invalid; fall through */ 7427 } 7428 7429 /* -------------------- INS (element) -------------------- */ 7430 /* 31 28 20 15 14 10 9 4 7431 011 01110000 imm5 0 imm4 1 n d INS Vd.Ts[ix1], Vn.Ts[ix2] 7432 7433 where Ts, ix1, ix2 = case imm5 of xxxx1 -> B, imm5<4:1>, imm4<3:0> 7434 xxx10 -> H, imm5<4:2>, imm4<3:1> 7435 xx100 -> S, imm5<4:3>, imm4<3:2> 7436 x1000 -> D, imm5<4:4>, imm4<3:3> 7437 */ 7438 if (INSN(31,21) == BITS11(0,1,1,0,1,1,1,0,0,0,0) 7439 && INSN(15,15) == 0 && INSN(10,10) == 1 ) { 7440 UInt imm5 = INSN(20,16); 7441 UInt imm4 = INSN(14,11); 7442 UInt nn = INSN(9,5); 7443 UInt dd = INSN(4,0); 7444 HChar ts = '?'; 7445 IRType ty = Ity_INVALID; 7446 UInt srcLaneNo = 16; 7447 UInt dstLaneNo = 16; 7448 7449 if (imm5 & 1) { 7450 srcLaneNo = imm4; 7451 dstLaneNo = imm5 >> 1; 7452 ty = Ity_I8; 7453 ts = 'b'; 7454 } else if (imm5 & 2) { 7455 srcLaneNo = imm4 >> 1; 7456 dstLaneNo = imm5 >> 2; 7457 ty = Ity_I16; 7458 ts = 'h'; 7459 } else if (imm5 & 4) { 7460 srcLaneNo = imm4 >> 2; 7461 dstLaneNo = imm5 >> 3; 7462 ty = Ity_I32; 7463 ts = 's'; 7464 } else if (imm5 & 8) { 7465 srcLaneNo = imm4 >> 3; 7466 dstLaneNo = imm5 >> 4; 7467 ty = Ity_I64; 7468 ts = 'd'; 7469 } 7470 7471 if (ty != Ity_INVALID) { 7472 vassert(srcLaneNo < 16); 7473 vassert(dstLaneNo < 16); 7474 putQRegLane(dd, dstLaneNo, getQRegLane(nn, srcLaneNo, ty)); 7475 DIP("ins %s.%c[%u], %s.%c[%u]\n", 7476 nameQReg128(dd), ts, dstLaneNo, nameQReg128(nn), ts, dstLaneNo); 7477 return True; 7478 } 7479 7480 } 7481 7482 /* -------------------- NEG (vector) -------------------- */ 7483 /* 31 28 23 21 16 9 4 7484 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn 7485 sz is laneSz, q:sz == 011 is disallowed, as usual 7486 */ 7487 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0) 7488 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) { 7489 Bool isQ = INSN(30,30) == 1; 7490 UInt szBlg2 = INSN(23,22); 7491 UInt nn = INSN(9,5); 7492 UInt dd = INSN(4,0); 7493 Bool zeroHI = False; 7494 const HChar* arrSpec = ""; 7495 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 ); 7496 if (ok) { 7497 const IROp opSUB[4] 7498 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 7499 IRTemp res = newTemp(Ity_V128); 7500 vassert(szBlg2 < 4); 7501 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn))); 7502 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res)) 7503 : mkexpr(res)); 7504 DIP("neg %s.%s, %s.%s\n", 7505 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 7506 return True; 7507 } 7508 /* else fall through */ 7509 } 7510 7511 /* -------------------- TBL, TBX -------------------- */ 7512 /* 31 28 20 15 14 12 9 4 7513 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 7514 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 7515 where Ta = 16b(q=1) or 8b(q=0) 7516 */ 7517 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0) 7518 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) { 7519 Bool isQ = INSN(30,30) == 1; 7520 Bool isTBX = INSN(12,12) == 1; 7521 UInt mm = INSN(20,16); 7522 UInt len = INSN(14,13); 7523 UInt nn = INSN(9,5); 7524 UInt dd = INSN(4,0); 7525 /* The out-of-range values to use. */ 7526 IRTemp oor_values = newTemp(Ity_V128); 7527 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); 7528 /* src value */ 7529 IRTemp src = newTemp(Ity_V128); 7530 assign(src, getQReg128(mm)); 7531 /* The table values */ 7532 IRTemp tab[4]; 7533 UInt i; 7534 for (i = 0; i <= len; i++) { 7535 vassert(i < 4); 7536 tab[i] = newTemp(Ity_V128); 7537 assign(tab[i], getQReg128((nn + i) % 32)); 7538 } 7539 IRTemp res = math_TBL_TBX(tab, len, src, oor_values); 7540 putQReg128(dd, isQ ? mkexpr(res) 7541 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) ); 7542 const HChar* Ta = isQ ? "16b" : "8b"; 7543 const HChar* nm = isTBX ? "tbx" : "tbl"; 7544 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n", 7545 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); 7546 return True; 7547 } 7548 /* FIXME Temporary hacks to get through ld.so FIXME */ 7549 7550 /* ------------------ movi vD.4s, #0x0 ------------------ */ 7551 /* 0x4F 0x00 0x04 000 vD */ 7552 if ((insn & 0xFFFFFFE0) == 0x4F000400) { 7553 UInt vD = INSN(4,0); 7554 putQReg128(vD, mkV128(0x0000)); 7555 DIP("movi v%u.4s, #0x0\n", vD); 7556 return True; 7557 } 7558 7559 /* ---------------- MOV vD.16b, vN.16b ---------------- */ 7560 /* 31 23 20 15 9 4 7561 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b 7562 This only handles the N == M case. 7563 */ 7564 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0) 7565 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) { 7566 UInt mm = INSN(20,16); 7567 UInt nn = INSN(9,5); 7568 UInt dd = INSN(4,0); 7569 if (mm == nn) { 7570 putQReg128(dd, getQReg128(nn)); 7571 DIP("mov v%u.16b, v%u.16b\n", dd, nn); 7572 return True; 7573 } 7574 /* else it's really an ORR; fall through. */ 7575 } 7576 7577 /* ---------------- CMEQ_d_d_#0 ---------------- */ 7578 /* 7579 010 11110 11 10000 0100 110 n d CMEQ Dd, Dn, #0 7580 */ 7581 if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) { 7582 UInt nn = INSN(9,5); 7583 UInt dd = INSN(4,0); 7584 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 7585 binop(Iop_CmpEQ64x2, getQReg128(nn), 7586 mkV128(0x0000)))); 7587 DIP("cmeq d%u, d%u, #0\n", dd, nn); 7588 return True; 7589 } 7590 7591 /* ---------------- SHL_d_d_#imm ---------------- */ 7592 /* 31 22 21 18 15 9 4 7593 010 111110 1 ih3 ib 010101 n d SHL Dd, Dn, #(ih3:ib) 7594 */ 7595 if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1) 7596 && INSN(15,10) == BITS6(0,1,0,1,0,1)) { 7597 UInt nn = INSN(9,5); 7598 UInt dd = INSN(4,0); 7599 UInt sh = INSN(21,16); 7600 vassert(sh < 64); 7601 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 7602 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 7603 DIP("shl d%u, d%u, #%u\n", dd, nn, sh); 7604 return True; 7605 } 7606 vex_printf("ARM64 front end: simd_and_fp\n"); 7607 return False; 7608 # undef INSN 7609 } 7610 7611 7612 /*------------------------------------------------------------*/ 7613 /*--- Disassemble a single ARM64 instruction ---*/ 7614 /*------------------------------------------------------------*/ 7615 7616 /* Disassemble a single ARM64 instruction into IR. The instruction 7617 has is located at |guest_instr| and has guest IP of 7618 |guest_PC_curr_instr|, which will have been set before the call 7619 here. Returns True iff the instruction was decoded, in which case 7620 *dres will be set accordingly, or False, in which case *dres should 7621 be ignored by the caller. */ 7622 7623 static 7624 Bool disInstr_ARM64_WRK ( 7625 /*MB_OUT*/DisResult* dres, 7626 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 7627 Bool resteerCisOk, 7628 void* callback_opaque, 7629 UChar* guest_instr, 7630 VexArchInfo* archinfo, 7631 VexAbiInfo* abiinfo 7632 ) 7633 { 7634 // A macro to fish bits out of 'insn'. 7635 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 7636 7637 //ZZ DisResult dres; 7638 //ZZ UInt insn; 7639 //ZZ //Bool allow_VFP = False; 7640 //ZZ //UInt hwcaps = archinfo->hwcaps; 7641 //ZZ IRTemp condT; /* :: Ity_I32 */ 7642 //ZZ UInt summary; 7643 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text 7644 //ZZ 7645 //ZZ /* What insn variants are we supporting today? */ 7646 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP)); 7647 //ZZ // etc etc 7648 7649 /* Set result defaults. */ 7650 dres->whatNext = Dis_Continue; 7651 dres->len = 4; 7652 dres->continueAt = 0; 7653 dres->jk_StopHere = Ijk_INVALID; 7654 7655 /* At least this is simple on ARM64: insns are all 4 bytes long, and 7656 4-aligned. So just fish the whole thing out of memory right now 7657 and have done. */ 7658 UInt insn = getUIntLittleEndianly( guest_instr ); 7659 7660 if (0) vex_printf("insn: 0x%x\n", insn); 7661 7662 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr); 7663 7664 vassert(0 == (guest_PC_curr_instr & 3ULL)); 7665 7666 /* ----------------------------------------------------------- */ 7667 7668 /* Spot "Special" instructions (see comment at top of file). */ 7669 { 7670 UChar* code = (UChar*)guest_instr; 7671 /* Spot the 16-byte preamble: 7672 93CC0D8C ror x12, x12, #3 7673 93CC358C ror x12, x12, #13 7674 93CCCD8C ror x12, x12, #51 7675 93CCF58C ror x12, x12, #61 7676 */ 7677 UInt word1 = 0x93CC0D8C; 7678 UInt word2 = 0x93CC358C; 7679 UInt word3 = 0x93CCCD8C; 7680 UInt word4 = 0x93CCF58C; 7681 if (getUIntLittleEndianly(code+ 0) == word1 && 7682 getUIntLittleEndianly(code+ 4) == word2 && 7683 getUIntLittleEndianly(code+ 8) == word3 && 7684 getUIntLittleEndianly(code+12) == word4) { 7685 /* Got a "Special" instruction preamble. Which one is it? */ 7686 if (getUIntLittleEndianly(code+16) == 0xAA0A014A 7687 /* orr x10,x10,x10 */) { 7688 /* X3 = client_request ( X4 ) */ 7689 DIP("x3 = client_request ( x4 )\n"); 7690 putPC(mkU64( guest_PC_curr_instr + 20 )); 7691 dres->jk_StopHere = Ijk_ClientReq; 7692 dres->whatNext = Dis_StopHere; 7693 return True; 7694 } 7695 else 7696 if (getUIntLittleEndianly(code+16) == 0xAA0B016B 7697 /* orr x11,x11,x11 */) { 7698 /* X3 = guest_NRADDR */ 7699 DIP("x3 = guest_NRADDR\n"); 7700 dres->len = 20; 7701 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 7702 return True; 7703 } 7704 else 7705 if (getUIntLittleEndianly(code+16) == 0xAA0C018C 7706 /* orr x12,x12,x12 */) { 7707 /* branch-and-link-to-noredir X8 */ 7708 DIP("branch-and-link-to-noredir x8\n"); 7709 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20)); 7710 putPC(getIReg64orZR(8)); 7711 dres->jk_StopHere = Ijk_NoRedir; 7712 dres->whatNext = Dis_StopHere; 7713 return True; 7714 } 7715 else 7716 if (getUIntLittleEndianly(code+16) == 0xAA090129 7717 /* orr x9,x9,x9 */) { 7718 /* IR injection */ 7719 DIP("IR injection\n"); 7720 vex_inject_ir(irsb, Iend_LE); 7721 // Invalidate the current insn. The reason is that the IRop we're 7722 // injecting here can change. In which case the translation has to 7723 // be redone. For ease of handling, we simply invalidate all the 7724 // time. 7725 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr))); 7726 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20))); 7727 putPC(mkU64( guest_PC_curr_instr + 20 )); 7728 dres->whatNext = Dis_StopHere; 7729 dres->jk_StopHere = Ijk_InvalICache; 7730 return True; 7731 } 7732 /* We don't know what it is. */ 7733 return False; 7734 /*NOTREACHED*/ 7735 } 7736 } 7737 7738 /* ----------------------------------------------------------- */ 7739 7740 /* Main ARM64 instruction decoder starts here. */ 7741 7742 Bool ok = False; 7743 7744 /* insn[28:25] determines the top-level grouping, so let's start 7745 off with that. 7746 7747 For all of these dis_ARM64_ functions, we pass *dres with the 7748 normal default results "insn OK, 4 bytes long, keep decoding" so 7749 they don't need to change it. However, decodes of control-flow 7750 insns may cause *dres to change. 7751 */ 7752 switch (INSN(28,25)) { 7753 case BITS4(1,0,0,0): case BITS4(1,0,0,1): 7754 // Data processing - immediate 7755 ok = dis_ARM64_data_processing_immediate(dres, insn); 7756 break; 7757 case BITS4(1,0,1,0): case BITS4(1,0,1,1): 7758 // Branch, exception generation and system instructions 7759 ok = dis_ARM64_branch_etc(dres, insn, archinfo); 7760 break; 7761 case BITS4(0,1,0,0): case BITS4(0,1,1,0): 7762 case BITS4(1,1,0,0): case BITS4(1,1,1,0): 7763 // Loads and stores 7764 ok = dis_ARM64_load_store(dres, insn); 7765 break; 7766 case BITS4(0,1,0,1): case BITS4(1,1,0,1): 7767 // Data processing - register 7768 ok = dis_ARM64_data_processing_register(dres, insn); 7769 break; 7770 case BITS4(0,1,1,1): case BITS4(1,1,1,1): 7771 // Data processing - SIMD and floating point 7772 ok = dis_ARM64_simd_and_fp(dres, insn); 7773 break; 7774 case BITS4(0,0,0,0): case BITS4(0,0,0,1): 7775 case BITS4(0,0,1,0): case BITS4(0,0,1,1): 7776 // UNALLOCATED 7777 break; 7778 default: 7779 vassert(0); /* Can't happen */ 7780 } 7781 7782 /* If the next-level down decoders failed, make sure |dres| didn't 7783 get changed. */ 7784 if (!ok) { 7785 vassert(dres->whatNext == Dis_Continue); 7786 vassert(dres->len == 4); 7787 vassert(dres->continueAt == 0); 7788 vassert(dres->jk_StopHere == Ijk_INVALID); 7789 } 7790 7791 return ok; 7792 7793 # undef INSN 7794 } 7795 7796 7797 /*------------------------------------------------------------*/ 7798 /*--- Top-level fn ---*/ 7799 /*------------------------------------------------------------*/ 7800 7801 /* Disassemble a single instruction into IR. The instruction 7802 is located in host memory at &guest_code[delta]. */ 7803 7804 DisResult disInstr_ARM64 ( IRSB* irsb_IN, 7805 Bool (*resteerOkFn) ( void*, Addr64 ), 7806 Bool resteerCisOk, 7807 void* callback_opaque, 7808 UChar* guest_code_IN, 7809 Long delta_IN, 7810 Addr64 guest_IP, 7811 VexArch guest_arch, 7812 VexArchInfo* archinfo, 7813 VexAbiInfo* abiinfo, 7814 Bool host_bigendian_IN, 7815 Bool sigill_diag_IN ) 7816 { 7817 DisResult dres; 7818 vex_bzero(&dres, sizeof(dres)); 7819 7820 /* Set globals (see top of this file) */ 7821 vassert(guest_arch == VexArchARM64); 7822 7823 irsb = irsb_IN; 7824 host_is_bigendian = host_bigendian_IN; 7825 guest_PC_curr_instr = (Addr64)guest_IP; 7826 7827 /* Sanity checks */ 7828 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */ 7829 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15); 7830 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15); 7831 7832 /* Try to decode */ 7833 Bool ok = disInstr_ARM64_WRK( &dres, 7834 resteerOkFn, resteerCisOk, callback_opaque, 7835 (UChar*)&guest_code_IN[delta_IN], 7836 archinfo, abiinfo ); 7837 if (ok) { 7838 /* All decode successes end up here. */ 7839 vassert(dres.len == 4 || dres.len == 20); 7840 switch (dres.whatNext) { 7841 case Dis_Continue: 7842 putPC( mkU64(dres.len + guest_PC_curr_instr) ); 7843 break; 7844 case Dis_ResteerU: 7845 case Dis_ResteerC: 7846 putPC(mkU64(dres.continueAt)); 7847 break; 7848 case Dis_StopHere: 7849 break; 7850 default: 7851 vassert(0); 7852 } 7853 DIP("\n"); 7854 } else { 7855 /* All decode failures end up here. */ 7856 if (sigill_diag_IN) { 7857 Int i, j; 7858 UChar buf[64]; 7859 UInt insn 7860 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] ); 7861 vex_bzero(buf, sizeof(buf)); 7862 for (i = j = 0; i < 32; i++) { 7863 if (i > 0) { 7864 if ((i & 7) == 0) buf[j++] = ' '; 7865 else if ((i & 3) == 0) buf[j++] = '\''; 7866 } 7867 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0'; 7868 } 7869 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn); 7870 vex_printf("disInstr(arm64): %s\n", buf); 7871 } 7872 7873 /* Tell the dispatcher that this insn cannot be decoded, and so 7874 has not been executed, and (is currently) the next to be 7875 executed. PC should be up-to-date since it is made so at the 7876 start of each insn, but nevertheless be paranoid and update 7877 it again right now. */ 7878 putPC( mkU64(guest_PC_curr_instr) ); 7879 dres.whatNext = Dis_StopHere; 7880 dres.len = 0; 7881 dres.continueAt = 0; 7882 dres.jk_StopHere = Ijk_NoDecode; 7883 } 7884 return dres; 7885 } 7886 7887 //////////////////////////////////////////////////////////////////////// 7888 //////////////////////////////////////////////////////////////////////// 7889 7890 /* Spare code for doing reference implementations of various 128-bit 7891 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit 7892 equivalents see the end of guest_arm_toIR.c. */ 7893 7894 //////////////////////////////////////////////////////////////// 7895 // 64x2 operations 7896 // 7897 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) 7898 { 7899 // returns a0 b0 7900 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)), 7901 unop(Iop_V128to64, mkexpr(b10))); 7902 } 7903 7904 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) 7905 { 7906 // returns a1 b1 7907 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)), 7908 unop(Iop_V128HIto64, mkexpr(b10))); 7909 } 7910 7911 7912 //////////////////////////////////////////////////////////////// 7913 // 32x4 operations 7914 // 7915 7916 // Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with 7917 // the top halves guaranteed to be zero. 7918 static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1, 7919 IRTemp* out0, IRTemp v128 ) 7920 { 7921 if (out3) *out3 = newTemp(Ity_I64); 7922 if (out2) *out2 = newTemp(Ity_I64); 7923 if (out1) *out1 = newTemp(Ity_I64); 7924 if (out0) *out0 = newTemp(Ity_I64); 7925 IRTemp hi64 = newTemp(Ity_I64); 7926 IRTemp lo64 = newTemp(Ity_I64); 7927 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 7928 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 7929 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32))); 7930 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF))); 7931 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32))); 7932 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF))); 7933 } 7934 7935 // Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit 7936 // IRTemp. 7937 static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 7938 { 7939 IRTemp hi64 = newTemp(Ity_I64); 7940 IRTemp lo64 = newTemp(Ity_I64); 7941 assign(hi64, 7942 binop(Iop_Or64, 7943 binop(Iop_Shl64, mkexpr(in3), mkU8(32)), 7944 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF)))); 7945 assign(lo64, 7946 binop(Iop_Or64, 7947 binop(Iop_Shl64, mkexpr(in1), mkU8(32)), 7948 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF)))); 7949 IRTemp res = newTemp(Ity_V128); 7950 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); 7951 return res; 7952 } 7953 7954 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) 7955 { 7956 // returns a2 a0 b2 b0 7957 IRTemp a2, a0, b2, b0; 7958 breakV128to32s(NULL, &a2, NULL, &a0, a3210); 7959 breakV128to32s(NULL, &b2, NULL, &b0, b3210); 7960 return mkexpr(mkV128from32s(a2, a0, b2, b0)); 7961 } 7962 7963 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) 7964 { 7965 // returns a3 a1 b3 b1 7966 IRTemp a3, a1, b3, b1; 7967 breakV128to32s(&a3, NULL, &a1, NULL, a3210); 7968 breakV128to32s(&b3, NULL, &b1, NULL, b3210); 7969 return mkexpr(mkV128from32s(a3, a1, b3, b1)); 7970 } 7971 7972 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) 7973 { 7974 // returns a1 b1 a0 b0 7975 IRTemp a1, a0, b1, b0; 7976 breakV128to32s(NULL, NULL, &a1, &a0, a3210); 7977 breakV128to32s(NULL, NULL, &b1, &b0, b3210); 7978 return mkexpr(mkV128from32s(a1, b1, a0, b0)); 7979 } 7980 7981 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) 7982 { 7983 // returns a3 b3 a2 b2 7984 IRTemp a3, a2, b3, b2; 7985 breakV128to32s(&a3, &a2, NULL, NULL, a3210); 7986 breakV128to32s(&b3, &b2, NULL, NULL, b3210); 7987 return mkexpr(mkV128from32s(a3, b3, a2, b2)); 7988 } 7989 7990 //////////////////////////////////////////////////////////////// 7991 // 16x8 operations 7992 // 7993 7994 static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5, 7995 IRTemp* out4, IRTemp* out3, IRTemp* out2, 7996 IRTemp* out1,IRTemp* out0, IRTemp v128 ) 7997 { 7998 if (out7) *out7 = newTemp(Ity_I64); 7999 if (out6) *out6 = newTemp(Ity_I64); 8000 if (out5) *out5 = newTemp(Ity_I64); 8001 if (out4) *out4 = newTemp(Ity_I64); 8002 if (out3) *out3 = newTemp(Ity_I64); 8003 if (out2) *out2 = newTemp(Ity_I64); 8004 if (out1) *out1 = newTemp(Ity_I64); 8005 if (out0) *out0 = newTemp(Ity_I64); 8006 IRTemp hi64 = newTemp(Ity_I64); 8007 IRTemp lo64 = newTemp(Ity_I64); 8008 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 8009 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 8010 if (out7) 8011 assign(*out7, binop(Iop_And64, 8012 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), 8013 mkU64(0xFFFF))); 8014 if (out6) 8015 assign(*out6, binop(Iop_And64, 8016 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), 8017 mkU64(0xFFFF))); 8018 if (out5) 8019 assign(*out5, binop(Iop_And64, 8020 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), 8021 mkU64(0xFFFF))); 8022 if (out4) 8023 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF))); 8024 if (out3) 8025 assign(*out3, binop(Iop_And64, 8026 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), 8027 mkU64(0xFFFF))); 8028 if (out2) 8029 assign(*out2, binop(Iop_And64, 8030 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), 8031 mkU64(0xFFFF))); 8032 if (out1) 8033 assign(*out1, binop(Iop_And64, 8034 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), 8035 mkU64(0xFFFF))); 8036 if (out0) 8037 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF))); 8038 } 8039 8040 static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, 8041 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 8042 { 8043 IRTemp hi64 = newTemp(Ity_I64); 8044 IRTemp lo64 = newTemp(Ity_I64); 8045 assign(hi64, 8046 binop(Iop_Or64, 8047 binop(Iop_Or64, 8048 binop(Iop_Shl64, 8049 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)), 8050 mkU8(48)), 8051 binop(Iop_Shl64, 8052 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)), 8053 mkU8(32))), 8054 binop(Iop_Or64, 8055 binop(Iop_Shl64, 8056 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)), 8057 mkU8(16)), 8058 binop(Iop_And64, 8059 mkexpr(in4), mkU64(0xFFFF))))); 8060 assign(lo64, 8061 binop(Iop_Or64, 8062 binop(Iop_Or64, 8063 binop(Iop_Shl64, 8064 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)), 8065 mkU8(48)), 8066 binop(Iop_Shl64, 8067 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)), 8068 mkU8(32))), 8069 binop(Iop_Or64, 8070 binop(Iop_Shl64, 8071 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)), 8072 mkU8(16)), 8073 binop(Iop_And64, 8074 mkexpr(in0), mkU64(0xFFFF))))); 8075 IRTemp res = newTemp(Ity_V128); 8076 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64))); 8077 return res; 8078 } 8079 8080 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) 8081 { 8082 // returns a6 a4 a2 a0 b6 b4 b2 b0 8083 IRTemp a6, a4, a2, a0, b6, b4, b2, b0; 8084 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210); 8085 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210); 8086 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0)); 8087 } 8088 8089 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) 8090 { 8091 // returns a7 a5 a3 a1 b7 b5 b3 b1 8092 IRTemp a7, a5, a3, a1, b7, b5, b3, b1; 8093 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210); 8094 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210); 8095 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1)); 8096 } 8097 8098 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) 8099 { 8100 // returns a3 b3 a2 b2 a1 b1 a0 b0 8101 IRTemp a3, b3, a2, b2, a1, a0, b1, b0; 8102 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210); 8103 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210); 8104 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0)); 8105 } 8106 8107 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) 8108 { 8109 // returns a7 b7 a6 b6 a5 b5 a4 b4 8110 IRTemp a7, b7, a6, b6, a5, b5, a4, b4; 8111 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210); 8112 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210); 8113 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4)); 8114 } 8115 8116 //////////////////////////////////////////////////////////////// 8117 // 8x16 operations 8118 // 8119 8120 static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD, 8121 IRTemp* outC, IRTemp* outB, IRTemp* outA, 8122 IRTemp* out9, IRTemp* out8, 8123 IRTemp* out7, IRTemp* out6, IRTemp* out5, 8124 IRTemp* out4, IRTemp* out3, IRTemp* out2, 8125 IRTemp* out1,IRTemp* out0, IRTemp v128 ) 8126 { 8127 if (outF) *outF = newTemp(Ity_I64); 8128 if (outE) *outE = newTemp(Ity_I64); 8129 if (outD) *outD = newTemp(Ity_I64); 8130 if (outC) *outC = newTemp(Ity_I64); 8131 if (outB) *outB = newTemp(Ity_I64); 8132 if (outA) *outA = newTemp(Ity_I64); 8133 if (out9) *out9 = newTemp(Ity_I64); 8134 if (out8) *out8 = newTemp(Ity_I64); 8135 if (out7) *out7 = newTemp(Ity_I64); 8136 if (out6) *out6 = newTemp(Ity_I64); 8137 if (out5) *out5 = newTemp(Ity_I64); 8138 if (out4) *out4 = newTemp(Ity_I64); 8139 if (out3) *out3 = newTemp(Ity_I64); 8140 if (out2) *out2 = newTemp(Ity_I64); 8141 if (out1) *out1 = newTemp(Ity_I64); 8142 if (out0) *out0 = newTemp(Ity_I64); 8143 IRTemp hi64 = newTemp(Ity_I64); 8144 IRTemp lo64 = newTemp(Ity_I64); 8145 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) ); 8146 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) ); 8147 if (outF) 8148 assign(*outF, binop(Iop_And64, 8149 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)), 8150 mkU64(0xFF))); 8151 if (outE) 8152 assign(*outE, binop(Iop_And64, 8153 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)), 8154 mkU64(0xFF))); 8155 if (outD) 8156 assign(*outD, binop(Iop_And64, 8157 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)), 8158 mkU64(0xFF))); 8159 if (outC) 8160 assign(*outC, binop(Iop_And64, 8161 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)), 8162 mkU64(0xFF))); 8163 if (outB) 8164 assign(*outB, binop(Iop_And64, 8165 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)), 8166 mkU64(0xFF))); 8167 if (outA) 8168 assign(*outA, binop(Iop_And64, 8169 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)), 8170 mkU64(0xFF))); 8171 if (out9) 8172 assign(*out9, binop(Iop_And64, 8173 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)), 8174 mkU64(0xFF))); 8175 if (out8) 8176 assign(*out8, binop(Iop_And64, 8177 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)), 8178 mkU64(0xFF))); 8179 if (out7) 8180 assign(*out7, binop(Iop_And64, 8181 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)), 8182 mkU64(0xFF))); 8183 if (out6) 8184 assign(*out6, binop(Iop_And64, 8185 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)), 8186 mkU64(0xFF))); 8187 if (out5) 8188 assign(*out5, binop(Iop_And64, 8189 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)), 8190 mkU64(0xFF))); 8191 if (out4) 8192 assign(*out4, binop(Iop_And64, 8193 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)), 8194 mkU64(0xFF))); 8195 if (out3) 8196 assign(*out3, binop(Iop_And64, 8197 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)), 8198 mkU64(0xFF))); 8199 if (out2) 8200 assign(*out2, binop(Iop_And64, 8201 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)), 8202 mkU64(0xFF))); 8203 if (out1) 8204 assign(*out1, binop(Iop_And64, 8205 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)), 8206 mkU64(0xFF))); 8207 if (out0) 8208 assign(*out0, binop(Iop_And64, 8209 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)), 8210 mkU64(0xFF))); 8211 } 8212 8213 static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC, 8214 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8, 8215 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4, 8216 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 ) 8217 { 8218 IRTemp vFE = newTemp(Ity_I64); 8219 IRTemp vDC = newTemp(Ity_I64); 8220 IRTemp vBA = newTemp(Ity_I64); 8221 IRTemp v98 = newTemp(Ity_I64); 8222 IRTemp v76 = newTemp(Ity_I64); 8223 IRTemp v54 = newTemp(Ity_I64); 8224 IRTemp v32 = newTemp(Ity_I64); 8225 IRTemp v10 = newTemp(Ity_I64); 8226 assign(vFE, binop(Iop_Or64, 8227 binop(Iop_Shl64, 8228 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)), 8229 binop(Iop_And64, mkexpr(inE), mkU64(0xFF)))); 8230 assign(vDC, binop(Iop_Or64, 8231 binop(Iop_Shl64, 8232 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)), 8233 binop(Iop_And64, mkexpr(inC), mkU64(0xFF)))); 8234 assign(vBA, binop(Iop_Or64, 8235 binop(Iop_Shl64, 8236 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)), 8237 binop(Iop_And64, mkexpr(inA), mkU64(0xFF)))); 8238 assign(v98, binop(Iop_Or64, 8239 binop(Iop_Shl64, 8240 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)), 8241 binop(Iop_And64, mkexpr(in8), mkU64(0xFF)))); 8242 assign(v76, binop(Iop_Or64, 8243 binop(Iop_Shl64, 8244 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)), 8245 binop(Iop_And64, mkexpr(in6), mkU64(0xFF)))); 8246 assign(v54, binop(Iop_Or64, 8247 binop(Iop_Shl64, 8248 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)), 8249 binop(Iop_And64, mkexpr(in4), mkU64(0xFF)))); 8250 assign(v32, binop(Iop_Or64, 8251 binop(Iop_Shl64, 8252 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)), 8253 binop(Iop_And64, mkexpr(in2), mkU64(0xFF)))); 8254 assign(v10, binop(Iop_Or64, 8255 binop(Iop_Shl64, 8256 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)), 8257 binop(Iop_And64, mkexpr(in0), mkU64(0xFF)))); 8258 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10); 8259 } 8260 8261 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, 8262 IRTemp bFEDCBA9876543210 ) 8263 { 8264 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 8265 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0; 8266 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8, 8267 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, 8268 aFEDCBA9876543210); 8269 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8, 8270 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, 8271 bFEDCBA9876543210); 8272 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0, 8273 bE, bC, bA, b8, b6, b4, b2, b0)); 8274 } 8275 8276 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, 8277 IRTemp bFEDCBA9876543210 ) 8278 { 8279 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 8280 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1; 8281 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL, 8282 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, 8283 aFEDCBA9876543210); 8284 8285 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL, 8286 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, 8287 aFEDCBA9876543210); 8288 8289 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1, 8290 bF, bD, bB, b9, b7, b5, b3, b1)); 8291 } 8292 8293 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, 8294 IRTemp bFEDCBA9876543210 ) 8295 { 8296 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 8297 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0; 8298 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8299 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0, 8300 aFEDCBA9876543210); 8301 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8302 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0, 8303 bFEDCBA9876543210); 8304 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4, 8305 a3, b3, a2, b2, a1, b1, a0, b0)); 8306 } 8307 8308 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, 8309 IRTemp bFEDCBA9876543210 ) 8310 { 8311 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 8312 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8; 8313 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8, 8314 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8315 aFEDCBA9876543210); 8316 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8, 8317 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 8318 bFEDCBA9876543210); 8319 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC, 8320 aB, bB, aA, bA, a9, b9, a8, b8)); 8321 } 8322 8323 /*--------------------------------------------------------------------*/ 8324 /*--- end guest_arm64_toIR.c ---*/ 8325 /*--------------------------------------------------------------------*/ 8326