1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- begin guest_arm64_toIR.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2013-2013 OpenWorks 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 /* KNOWN LIMITATIONS 2014-Nov-16 33 34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN. 35 36 Also FP comparison "unordered" .. is implemented as normal FP 37 comparison. 38 39 Both should be fixed. They behave incorrectly in the presence of 40 NaNs. 41 42 FMULX is treated the same as FMUL. That's also not correct. 43 44 * Floating multiply-add (etc) insns. Are split into a multiply and 45 an add, and so suffer double rounding and hence sometimes the 46 least significant mantissa bit is incorrect. Fix: use the IR 47 multiply-add IROps instead. 48 49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special 50 handling for the "ties" case. FRINTX might be dubious too. 51 52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation 53 just rounds to nearest. 54 */ 55 56 /* "Special" instructions. 57 58 This instruction decoder can decode four special instructions 59 which mean nothing natively (are no-ops as far as regs/mem are 60 concerned) but have meaning for supporting Valgrind. A special 61 instruction is flagged by a 16-byte preamble: 62 63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C 64 (ror x12, x12, #3; ror x12, x12, #13 65 ror x12, x12, #51; ror x12, x12, #61) 66 67 Following that, one of the following 3 are allowed 68 (standard interpretation in parentheses): 69 70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) 71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR 72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 73 AA090129 (orr x9,x9,x9) IR injection 74 75 Any other bytes following the 16-byte preamble are illegal and 76 constitute a failure in instruction decoding. This all assumes 77 that the preamble will never occur except in specific code 78 fragments designed for Valgrind to catch. 79 */ 80 81 /* Translates ARM64 code to IR. */ 82 83 #include "libvex_basictypes.h" 84 #include "libvex_ir.h" 85 #include "libvex.h" 86 #include "libvex_guest_arm64.h" 87 88 #include "main_util.h" 89 #include "main_globals.h" 90 #include "guest_generic_bb_to_IR.h" 91 #include "guest_arm64_defs.h" 92 93 94 /*------------------------------------------------------------*/ 95 /*--- Globals ---*/ 96 /*------------------------------------------------------------*/ 97 98 /* These are set at the start of the translation of a instruction, so 99 that we don't have to pass them around endlessly. CONST means does 100 not change during translation of the instruction. 101 */ 102 103 /* CONST: what is the host's endianness? We need to know this in 104 order to do sub-register accesses to the SIMD/FP registers 105 correctly. */ 106 static VexEndness host_endness; 107 108 /* CONST: The guest address for the instruction currently being 109 translated. */ 110 static Addr64 guest_PC_curr_instr; 111 112 /* MOD: The IRSB* into which we're generating code. */ 113 static IRSB* irsb; 114 115 116 /*------------------------------------------------------------*/ 117 /*--- Debugging output ---*/ 118 /*------------------------------------------------------------*/ 119 120 #define DIP(format, args...) \ 121 if (vex_traceflags & VEX_TRACE_FE) \ 122 vex_printf(format, ## args) 123 124 #define DIS(buf, format, args...) \ 125 if (vex_traceflags & VEX_TRACE_FE) \ 126 vex_sprintf(buf, format, ## args) 127 128 129 /*------------------------------------------------------------*/ 130 /*--- Helper bits and pieces for deconstructing the ---*/ 131 /*--- arm insn stream. ---*/ 132 /*------------------------------------------------------------*/ 133 134 /* Do a little-endian load of a 32-bit word, regardless of the 135 endianness of the underlying host. */ 136 static inline UInt getUIntLittleEndianly ( const UChar* p ) 137 { 138 UInt w = 0; 139 w = (w << 8) | p[3]; 140 w = (w << 8) | p[2]; 141 w = (w << 8) | p[1]; 142 w = (w << 8) | p[0]; 143 return w; 144 } 145 146 /* Sign extend a N-bit value up to 64 bits, by copying 147 bit N-1 into all higher positions. */ 148 static ULong sx_to_64 ( ULong x, UInt n ) 149 { 150 vassert(n > 1 && n < 64); 151 Long r = (Long)x; 152 r = (r << (64-n)) >> (64-n); 153 return (ULong)r; 154 } 155 156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the 157 //ZZ endianness of the underlying host. */ 158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) 159 //ZZ { 160 //ZZ UShort w = 0; 161 //ZZ w = (w << 8) | p[1]; 162 //ZZ w = (w << 8) | p[0]; 163 //ZZ return w; 164 //ZZ } 165 //ZZ 166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) { 167 //ZZ vassert(sh >= 0 && sh < 32); 168 //ZZ if (sh == 0) 169 //ZZ return x; 170 //ZZ else 171 //ZZ return (x << (32-sh)) | (x >> sh); 172 //ZZ } 173 //ZZ 174 //ZZ static Int popcount32 ( UInt x ) 175 //ZZ { 176 //ZZ Int res = 0, i; 177 //ZZ for (i = 0; i < 32; i++) { 178 //ZZ res += (x & 1); 179 //ZZ x >>= 1; 180 //ZZ } 181 //ZZ return res; 182 //ZZ } 183 //ZZ 184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) 185 //ZZ { 186 //ZZ UInt mask = 1 << ix; 187 //ZZ x &= ~mask; 188 //ZZ x |= ((b << ix) & mask); 189 //ZZ return x; 190 //ZZ } 191 192 #define BITS2(_b1,_b0) \ 193 (((_b1) << 1) | (_b0)) 194 195 #define BITS3(_b2,_b1,_b0) \ 196 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 197 198 #define BITS4(_b3,_b2,_b1,_b0) \ 199 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 200 201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 202 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 203 | BITS4((_b3),(_b2),(_b1),(_b0))) 204 205 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 206 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 208 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 210 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 211 212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 213 (((_b8) << 8) \ 214 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 215 216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 217 (((_b9) << 9) | ((_b8) << 8) \ 218 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 219 220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 221 (((_b10) << 10) \ 222 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 223 224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 225 (((_b11) << 11) \ 226 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 227 228 #define X00 BITS2(0,0) 229 #define X01 BITS2(0,1) 230 #define X10 BITS2(1,0) 231 #define X11 BITS2(1,1) 232 233 // produces _uint[_bMax:_bMin] 234 #define SLICE_UInt(_uint,_bMax,_bMin) \ 235 (( ((UInt)(_uint)) >> (_bMin)) \ 236 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 237 238 239 /*------------------------------------------------------------*/ 240 /*--- Helper bits and pieces for creating IR fragments. ---*/ 241 /*------------------------------------------------------------*/ 242 243 static IRExpr* mkV128 ( UShort w ) 244 { 245 return IRExpr_Const(IRConst_V128(w)); 246 } 247 248 static IRExpr* mkU64 ( ULong i ) 249 { 250 return IRExpr_Const(IRConst_U64(i)); 251 } 252 253 static IRExpr* mkU32 ( UInt i ) 254 { 255 return IRExpr_Const(IRConst_U32(i)); 256 } 257 258 static IRExpr* mkU16 ( UInt i ) 259 { 260 vassert(i < 65536); 261 return IRExpr_Const(IRConst_U16(i)); 262 } 263 264 static IRExpr* mkU8 ( UInt i ) 265 { 266 vassert(i < 256); 267 return IRExpr_Const(IRConst_U8( (UChar)i )); 268 } 269 270 static IRExpr* mkexpr ( IRTemp tmp ) 271 { 272 return IRExpr_RdTmp(tmp); 273 } 274 275 static IRExpr* unop ( IROp op, IRExpr* a ) 276 { 277 return IRExpr_Unop(op, a); 278 } 279 280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 281 { 282 return IRExpr_Binop(op, a1, a2); 283 } 284 285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 286 { 287 return IRExpr_Triop(op, a1, a2, a3); 288 } 289 290 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 291 { 292 return IRExpr_Load(Iend_LE, ty, addr); 293 } 294 295 /* Add a statement to the list held by "irbb". */ 296 static void stmt ( IRStmt* st ) 297 { 298 addStmtToIRSB( irsb, st ); 299 } 300 301 static void assign ( IRTemp dst, IRExpr* e ) 302 { 303 stmt( IRStmt_WrTmp(dst, e) ); 304 } 305 306 static void storeLE ( IRExpr* addr, IRExpr* data ) 307 { 308 stmt( IRStmt_Store(Iend_LE, addr, data) ); 309 } 310 311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) 312 //ZZ { 313 //ZZ if (guardT == IRTemp_INVALID) { 314 //ZZ /* unconditional */ 315 //ZZ storeLE(addr, data); 316 //ZZ } else { 317 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, 318 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 319 //ZZ } 320 //ZZ } 321 //ZZ 322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, 323 //ZZ IRExpr* addr, IRExpr* alt, 324 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 325 //ZZ { 326 //ZZ if (guardT == IRTemp_INVALID) { 327 //ZZ /* unconditional */ 328 //ZZ IRExpr* loaded = NULL; 329 //ZZ switch (cvt) { 330 //ZZ case ILGop_Ident32: 331 //ZZ loaded = loadLE(Ity_I32, addr); break; 332 //ZZ case ILGop_8Uto32: 333 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; 334 //ZZ case ILGop_8Sto32: 335 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; 336 //ZZ case ILGop_16Uto32: 337 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; 338 //ZZ case ILGop_16Sto32: 339 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; 340 //ZZ default: 341 //ZZ vassert(0); 342 //ZZ } 343 //ZZ vassert(loaded != NULL); 344 //ZZ assign(dst, loaded); 345 //ZZ } else { 346 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the 347 //ZZ loaded data before putting the data in 'dst'. If the load 348 //ZZ does not take place, 'alt' is placed directly in 'dst'. */ 349 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, 350 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 351 //ZZ } 352 //ZZ } 353 354 /* Generate a new temporary of the given type. */ 355 static IRTemp newTemp ( IRType ty ) 356 { 357 vassert(isPlausibleIRType(ty)); 358 return newIRTemp( irsb->tyenv, ty ); 359 } 360 361 /* This is used in many places, so the brevity is an advantage. */ 362 static IRTemp newTempV128(void) 363 { 364 return newTemp(Ity_V128); 365 } 366 367 /* Initialise V128 temporaries en masse. */ 368 static 369 void newTempsV128_2(IRTemp* t1, IRTemp* t2) 370 { 371 vassert(t1 && *t1 == IRTemp_INVALID); 372 vassert(t2 && *t2 == IRTemp_INVALID); 373 *t1 = newTempV128(); 374 *t2 = newTempV128(); 375 } 376 377 static 378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3) 379 { 380 vassert(t1 && *t1 == IRTemp_INVALID); 381 vassert(t2 && *t2 == IRTemp_INVALID); 382 vassert(t3 && *t3 == IRTemp_INVALID); 383 *t1 = newTempV128(); 384 *t2 = newTempV128(); 385 *t3 = newTempV128(); 386 } 387 388 static 389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4) 390 { 391 vassert(t1 && *t1 == IRTemp_INVALID); 392 vassert(t2 && *t2 == IRTemp_INVALID); 393 vassert(t3 && *t3 == IRTemp_INVALID); 394 vassert(t4 && *t4 == IRTemp_INVALID); 395 *t1 = newTempV128(); 396 *t2 = newTempV128(); 397 *t3 = newTempV128(); 398 *t4 = newTempV128(); 399 } 400 401 static 402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3, 403 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7) 404 { 405 vassert(t1 && *t1 == IRTemp_INVALID); 406 vassert(t2 && *t2 == IRTemp_INVALID); 407 vassert(t3 && *t3 == IRTemp_INVALID); 408 vassert(t4 && *t4 == IRTemp_INVALID); 409 vassert(t5 && *t5 == IRTemp_INVALID); 410 vassert(t6 && *t6 == IRTemp_INVALID); 411 vassert(t7 && *t7 == IRTemp_INVALID); 412 *t1 = newTempV128(); 413 *t2 = newTempV128(); 414 *t3 = newTempV128(); 415 *t4 = newTempV128(); 416 *t5 = newTempV128(); 417 *t6 = newTempV128(); 418 *t7 = newTempV128(); 419 } 420 421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type 422 //ZZ IRRoundingMode. */ 423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 424 //ZZ { 425 //ZZ return mkU32(Irrm_NEAREST); 426 //ZZ } 427 //ZZ 428 //ZZ /* Generate an expression for SRC rotated right by ROT. */ 429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot ) 430 //ZZ { 431 //ZZ vassert(rot >= 0 && rot < 32); 432 //ZZ if (rot == 0) 433 //ZZ return mkexpr(src); 434 //ZZ return 435 //ZZ binop(Iop_Or32, 436 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 437 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 438 //ZZ } 439 //ZZ 440 //ZZ static IRExpr* mkU128 ( ULong i ) 441 //ZZ { 442 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 443 //ZZ } 444 //ZZ 445 //ZZ /* Generate a 4-aligned version of the given expression if 446 //ZZ the given condition is true. Else return it unchanged. */ 447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) 448 //ZZ { 449 //ZZ if (b) 450 //ZZ return binop(Iop_And32, e, mkU32(~3)); 451 //ZZ else 452 //ZZ return e; 453 //ZZ } 454 455 /* Other IR construction helpers. */ 456 static IROp mkAND ( IRType ty ) { 457 switch (ty) { 458 case Ity_I32: return Iop_And32; 459 case Ity_I64: return Iop_And64; 460 default: vpanic("mkAND"); 461 } 462 } 463 464 static IROp mkOR ( IRType ty ) { 465 switch (ty) { 466 case Ity_I32: return Iop_Or32; 467 case Ity_I64: return Iop_Or64; 468 default: vpanic("mkOR"); 469 } 470 } 471 472 static IROp mkXOR ( IRType ty ) { 473 switch (ty) { 474 case Ity_I32: return Iop_Xor32; 475 case Ity_I64: return Iop_Xor64; 476 default: vpanic("mkXOR"); 477 } 478 } 479 480 static IROp mkSHL ( IRType ty ) { 481 switch (ty) { 482 case Ity_I32: return Iop_Shl32; 483 case Ity_I64: return Iop_Shl64; 484 default: vpanic("mkSHL"); 485 } 486 } 487 488 static IROp mkSHR ( IRType ty ) { 489 switch (ty) { 490 case Ity_I32: return Iop_Shr32; 491 case Ity_I64: return Iop_Shr64; 492 default: vpanic("mkSHR"); 493 } 494 } 495 496 static IROp mkSAR ( IRType ty ) { 497 switch (ty) { 498 case Ity_I32: return Iop_Sar32; 499 case Ity_I64: return Iop_Sar64; 500 default: vpanic("mkSAR"); 501 } 502 } 503 504 static IROp mkNOT ( IRType ty ) { 505 switch (ty) { 506 case Ity_I32: return Iop_Not32; 507 case Ity_I64: return Iop_Not64; 508 default: vpanic("mkNOT"); 509 } 510 } 511 512 static IROp mkADD ( IRType ty ) { 513 switch (ty) { 514 case Ity_I32: return Iop_Add32; 515 case Ity_I64: return Iop_Add64; 516 default: vpanic("mkADD"); 517 } 518 } 519 520 static IROp mkSUB ( IRType ty ) { 521 switch (ty) { 522 case Ity_I32: return Iop_Sub32; 523 case Ity_I64: return Iop_Sub64; 524 default: vpanic("mkSUB"); 525 } 526 } 527 528 static IROp mkADDF ( IRType ty ) { 529 switch (ty) { 530 case Ity_F32: return Iop_AddF32; 531 case Ity_F64: return Iop_AddF64; 532 default: vpanic("mkADDF"); 533 } 534 } 535 536 static IROp mkSUBF ( IRType ty ) { 537 switch (ty) { 538 case Ity_F32: return Iop_SubF32; 539 case Ity_F64: return Iop_SubF64; 540 default: vpanic("mkSUBF"); 541 } 542 } 543 544 static IROp mkMULF ( IRType ty ) { 545 switch (ty) { 546 case Ity_F32: return Iop_MulF32; 547 case Ity_F64: return Iop_MulF64; 548 default: vpanic("mkMULF"); 549 } 550 } 551 552 static IROp mkDIVF ( IRType ty ) { 553 switch (ty) { 554 case Ity_F32: return Iop_DivF32; 555 case Ity_F64: return Iop_DivF64; 556 default: vpanic("mkMULF"); 557 } 558 } 559 560 static IROp mkNEGF ( IRType ty ) { 561 switch (ty) { 562 case Ity_F32: return Iop_NegF32; 563 case Ity_F64: return Iop_NegF64; 564 default: vpanic("mkNEGF"); 565 } 566 } 567 568 static IROp mkABSF ( IRType ty ) { 569 switch (ty) { 570 case Ity_F32: return Iop_AbsF32; 571 case Ity_F64: return Iop_AbsF64; 572 default: vpanic("mkNEGF"); 573 } 574 } 575 576 static IROp mkSQRTF ( IRType ty ) { 577 switch (ty) { 578 case Ity_F32: return Iop_SqrtF32; 579 case Ity_F64: return Iop_SqrtF64; 580 default: vpanic("mkNEGF"); 581 } 582 } 583 584 static IROp mkVecADD ( UInt size ) { 585 const IROp ops[4] 586 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; 587 vassert(size < 4); 588 return ops[size]; 589 } 590 591 static IROp mkVecQADDU ( UInt size ) { 592 const IROp ops[4] 593 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 }; 594 vassert(size < 4); 595 return ops[size]; 596 } 597 598 static IROp mkVecQADDS ( UInt size ) { 599 const IROp ops[4] 600 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 }; 601 vassert(size < 4); 602 return ops[size]; 603 } 604 605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) { 606 const IROp ops[4] 607 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, 608 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 }; 609 vassert(size < 4); 610 return ops[size]; 611 } 612 613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) { 614 const IROp ops[4] 615 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, 616 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 }; 617 vassert(size < 4); 618 return ops[size]; 619 } 620 621 static IROp mkVecSUB ( UInt size ) { 622 const IROp ops[4] 623 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 624 vassert(size < 4); 625 return ops[size]; 626 } 627 628 static IROp mkVecQSUBU ( UInt size ) { 629 const IROp ops[4] 630 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 }; 631 vassert(size < 4); 632 return ops[size]; 633 } 634 635 static IROp mkVecQSUBS ( UInt size ) { 636 const IROp ops[4] 637 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 }; 638 vassert(size < 4); 639 return ops[size]; 640 } 641 642 static IROp mkVecSARN ( UInt size ) { 643 const IROp ops[4] 644 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; 645 vassert(size < 4); 646 return ops[size]; 647 } 648 649 static IROp mkVecSHRN ( UInt size ) { 650 const IROp ops[4] 651 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 652 vassert(size < 4); 653 return ops[size]; 654 } 655 656 static IROp mkVecSHLN ( UInt size ) { 657 const IROp ops[4] 658 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; 659 vassert(size < 4); 660 return ops[size]; 661 } 662 663 static IROp mkVecCATEVENLANES ( UInt size ) { 664 const IROp ops[4] 665 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, 666 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 }; 667 vassert(size < 4); 668 return ops[size]; 669 } 670 671 static IROp mkVecCATODDLANES ( UInt size ) { 672 const IROp ops[4] 673 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, 674 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 }; 675 vassert(size < 4); 676 return ops[size]; 677 } 678 679 static IROp mkVecINTERLEAVELO ( UInt size ) { 680 const IROp ops[4] 681 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 682 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 }; 683 vassert(size < 4); 684 return ops[size]; 685 } 686 687 static IROp mkVecINTERLEAVEHI ( UInt size ) { 688 const IROp ops[4] 689 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 690 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 }; 691 vassert(size < 4); 692 return ops[size]; 693 } 694 695 static IROp mkVecMAXU ( UInt size ) { 696 const IROp ops[4] 697 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; 698 vassert(size < 4); 699 return ops[size]; 700 } 701 702 static IROp mkVecMAXS ( UInt size ) { 703 const IROp ops[4] 704 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; 705 vassert(size < 4); 706 return ops[size]; 707 } 708 709 static IROp mkVecMINU ( UInt size ) { 710 const IROp ops[4] 711 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; 712 vassert(size < 4); 713 return ops[size]; 714 } 715 716 static IROp mkVecMINS ( UInt size ) { 717 const IROp ops[4] 718 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; 719 vassert(size < 4); 720 return ops[size]; 721 } 722 723 static IROp mkVecMUL ( UInt size ) { 724 const IROp ops[4] 725 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; 726 vassert(size < 3); 727 return ops[size]; 728 } 729 730 static IROp mkVecMULLU ( UInt sizeNarrow ) { 731 const IROp ops[4] 732 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID }; 733 vassert(sizeNarrow < 3); 734 return ops[sizeNarrow]; 735 } 736 737 static IROp mkVecMULLS ( UInt sizeNarrow ) { 738 const IROp ops[4] 739 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID }; 740 vassert(sizeNarrow < 3); 741 return ops[sizeNarrow]; 742 } 743 744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) { 745 const IROp ops[4] 746 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID }; 747 vassert(sizeNarrow < 3); 748 return ops[sizeNarrow]; 749 } 750 751 static IROp mkVecCMPEQ ( UInt size ) { 752 const IROp ops[4] 753 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 }; 754 vassert(size < 4); 755 return ops[size]; 756 } 757 758 static IROp mkVecCMPGTU ( UInt size ) { 759 const IROp ops[4] 760 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 }; 761 vassert(size < 4); 762 return ops[size]; 763 } 764 765 static IROp mkVecCMPGTS ( UInt size ) { 766 const IROp ops[4] 767 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 }; 768 vassert(size < 4); 769 return ops[size]; 770 } 771 772 static IROp mkVecABS ( UInt size ) { 773 const IROp ops[4] 774 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 }; 775 vassert(size < 4); 776 return ops[size]; 777 } 778 779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) { 780 const IROp ops[4] 781 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128, 782 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 }; 783 vassert(size < 4); 784 return ops[size]; 785 } 786 787 static IRExpr* mkU ( IRType ty, ULong imm ) { 788 switch (ty) { 789 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); 790 case Ity_I64: return mkU64(imm); 791 default: vpanic("mkU"); 792 } 793 } 794 795 static IROp mkVecQDMULHIS ( UInt size ) { 796 const IROp ops[4] 797 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID }; 798 vassert(size < 4); 799 return ops[size]; 800 } 801 802 static IROp mkVecQRDMULHIS ( UInt size ) { 803 const IROp ops[4] 804 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID }; 805 vassert(size < 4); 806 return ops[size]; 807 } 808 809 static IROp mkVecQANDUQSH ( UInt size ) { 810 const IROp ops[4] 811 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8, 812 Iop_QandUQsh32x4, Iop_QandUQsh64x2 }; 813 vassert(size < 4); 814 return ops[size]; 815 } 816 817 static IROp mkVecQANDSQSH ( UInt size ) { 818 const IROp ops[4] 819 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8, 820 Iop_QandSQsh32x4, Iop_QandSQsh64x2 }; 821 vassert(size < 4); 822 return ops[size]; 823 } 824 825 static IROp mkVecQANDUQRSH ( UInt size ) { 826 const IROp ops[4] 827 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8, 828 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 }; 829 vassert(size < 4); 830 return ops[size]; 831 } 832 833 static IROp mkVecQANDSQRSH ( UInt size ) { 834 const IROp ops[4] 835 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8, 836 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 }; 837 vassert(size < 4); 838 return ops[size]; 839 } 840 841 static IROp mkVecSHU ( UInt size ) { 842 const IROp ops[4] 843 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 }; 844 vassert(size < 4); 845 return ops[size]; 846 } 847 848 static IROp mkVecSHS ( UInt size ) { 849 const IROp ops[4] 850 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 }; 851 vassert(size < 4); 852 return ops[size]; 853 } 854 855 static IROp mkVecRSHU ( UInt size ) { 856 const IROp ops[4] 857 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 }; 858 vassert(size < 4); 859 return ops[size]; 860 } 861 862 static IROp mkVecRSHS ( UInt size ) { 863 const IROp ops[4] 864 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 }; 865 vassert(size < 4); 866 return ops[size]; 867 } 868 869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) { 870 const IROp ops[4] 871 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, 872 Iop_NarrowUn64to32x2, Iop_INVALID }; 873 vassert(sizeNarrow < 4); 874 return ops[sizeNarrow]; 875 } 876 877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) { 878 const IROp ops[4] 879 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, 880 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID }; 881 vassert(sizeNarrow < 4); 882 return ops[sizeNarrow]; 883 } 884 885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) { 886 const IROp ops[4] 887 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, 888 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID }; 889 vassert(sizeNarrow < 4); 890 return ops[sizeNarrow]; 891 } 892 893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) { 894 const IROp ops[4] 895 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, 896 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID }; 897 vassert(sizeNarrow < 4); 898 return ops[sizeNarrow]; 899 } 900 901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) { 902 const IROp ops[4] 903 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4, 904 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID }; 905 vassert(sizeNarrow < 4); 906 return ops[sizeNarrow]; 907 } 908 909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) { 910 const IROp ops[4] 911 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4, 912 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID }; 913 vassert(sizeNarrow < 4); 914 return ops[sizeNarrow]; 915 } 916 917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) { 918 const IROp ops[4] 919 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4, 920 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID }; 921 vassert(sizeNarrow < 4); 922 return ops[sizeNarrow]; 923 } 924 925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) { 926 const IROp ops[4] 927 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4, 928 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID }; 929 vassert(sizeNarrow < 4); 930 return ops[sizeNarrow]; 931 } 932 933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) { 934 const IROp ops[4] 935 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4, 936 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID }; 937 vassert(sizeNarrow < 4); 938 return ops[sizeNarrow]; 939 } 940 941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) { 942 const IROp ops[4] 943 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4, 944 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID }; 945 vassert(sizeNarrow < 4); 946 return ops[sizeNarrow]; 947 } 948 949 static IROp mkVecQSHLNSATUU ( UInt size ) { 950 const IROp ops[4] 951 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8, 952 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 }; 953 vassert(size < 4); 954 return ops[size]; 955 } 956 957 static IROp mkVecQSHLNSATSS ( UInt size ) { 958 const IROp ops[4] 959 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8, 960 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 }; 961 vassert(size < 4); 962 return ops[size]; 963 } 964 965 static IROp mkVecQSHLNSATSU ( UInt size ) { 966 const IROp ops[4] 967 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8, 968 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 }; 969 vassert(size < 4); 970 return ops[size]; 971 } 972 973 static IROp mkVecADDF ( UInt size ) { 974 const IROp ops[4] 975 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 }; 976 vassert(size < 4); 977 return ops[size]; 978 } 979 980 static IROp mkVecMAXF ( UInt size ) { 981 const IROp ops[4] 982 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 }; 983 vassert(size < 4); 984 return ops[size]; 985 } 986 987 static IROp mkVecMINF ( UInt size ) { 988 const IROp ops[4] 989 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 }; 990 vassert(size < 4); 991 return ops[size]; 992 } 993 994 /* Generate IR to create 'arg rotated right by imm', for sane values 995 of 'ty' and 'imm'. */ 996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) 997 { 998 UInt w = 0; 999 if (ty == Ity_I64) { 1000 w = 64; 1001 } else { 1002 vassert(ty == Ity_I32); 1003 w = 32; 1004 } 1005 vassert(w != 0); 1006 vassert(imm < w); 1007 if (imm == 0) { 1008 return arg; 1009 } 1010 IRTemp res = newTemp(ty); 1011 assign(res, binop(mkOR(ty), 1012 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), 1013 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); 1014 return res; 1015 } 1016 1017 /* Generate IR to set the returned temp to either all-zeroes or 1018 all ones, as a copy of arg<imm>. */ 1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) 1020 { 1021 UInt w = 0; 1022 if (ty == Ity_I64) { 1023 w = 64; 1024 } else { 1025 vassert(ty == Ity_I32); 1026 w = 32; 1027 } 1028 vassert(w != 0); 1029 vassert(imm < w); 1030 IRTemp res = newTemp(ty); 1031 assign(res, binop(mkSAR(ty), 1032 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), 1033 mkU8(w - 1))); 1034 return res; 1035 } 1036 1037 /* U-widen 8/16/32/64 bit int expr to 64. */ 1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) 1039 { 1040 switch (srcTy) { 1041 case Ity_I64: return e; 1042 case Ity_I32: return unop(Iop_32Uto64, e); 1043 case Ity_I16: return unop(Iop_16Uto64, e); 1044 case Ity_I8: return unop(Iop_8Uto64, e); 1045 default: vpanic("widenUto64(arm64)"); 1046 } 1047 } 1048 1049 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some 1050 of these combinations make sense. */ 1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) 1052 { 1053 switch (dstTy) { 1054 case Ity_I64: return e; 1055 case Ity_I32: return unop(Iop_64to32, e); 1056 case Ity_I16: return unop(Iop_64to16, e); 1057 case Ity_I8: return unop(Iop_64to8, e); 1058 default: vpanic("narrowFrom64(arm64)"); 1059 } 1060 } 1061 1062 1063 /*------------------------------------------------------------*/ 1064 /*--- Helpers for accessing guest registers. ---*/ 1065 /*------------------------------------------------------------*/ 1066 1067 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) 1068 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) 1069 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) 1070 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) 1071 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) 1072 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) 1073 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) 1074 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) 1075 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) 1076 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) 1077 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) 1078 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) 1079 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) 1080 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) 1081 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) 1082 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) 1083 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) 1084 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) 1085 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) 1086 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) 1087 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) 1088 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) 1089 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) 1090 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) 1091 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) 1092 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) 1093 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) 1094 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) 1095 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) 1096 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) 1097 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) 1098 1099 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) 1100 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC) 1101 1102 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) 1103 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) 1104 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) 1105 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) 1106 1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) 1108 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) 1109 1110 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) 1111 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) 1112 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) 1113 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) 1114 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) 1115 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) 1116 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) 1117 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) 1118 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) 1119 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) 1120 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) 1121 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) 1122 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) 1123 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) 1124 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) 1125 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) 1126 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) 1127 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) 1128 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) 1129 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) 1130 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) 1131 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) 1132 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) 1133 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) 1134 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) 1135 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) 1136 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) 1137 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) 1138 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) 1139 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) 1140 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) 1141 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) 1142 1143 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) 1144 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG) 1145 1146 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) 1147 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) 1148 1149 1150 /* ---------------- Integer registers ---------------- */ 1151 1152 static Int offsetIReg64 ( UInt iregNo ) 1153 { 1154 /* Do we care about endianness here? We do if sub-parts of integer 1155 registers are accessed. */ 1156 switch (iregNo) { 1157 case 0: return OFFB_X0; 1158 case 1: return OFFB_X1; 1159 case 2: return OFFB_X2; 1160 case 3: return OFFB_X3; 1161 case 4: return OFFB_X4; 1162 case 5: return OFFB_X5; 1163 case 6: return OFFB_X6; 1164 case 7: return OFFB_X7; 1165 case 8: return OFFB_X8; 1166 case 9: return OFFB_X9; 1167 case 10: return OFFB_X10; 1168 case 11: return OFFB_X11; 1169 case 12: return OFFB_X12; 1170 case 13: return OFFB_X13; 1171 case 14: return OFFB_X14; 1172 case 15: return OFFB_X15; 1173 case 16: return OFFB_X16; 1174 case 17: return OFFB_X17; 1175 case 18: return OFFB_X18; 1176 case 19: return OFFB_X19; 1177 case 20: return OFFB_X20; 1178 case 21: return OFFB_X21; 1179 case 22: return OFFB_X22; 1180 case 23: return OFFB_X23; 1181 case 24: return OFFB_X24; 1182 case 25: return OFFB_X25; 1183 case 26: return OFFB_X26; 1184 case 27: return OFFB_X27; 1185 case 28: return OFFB_X28; 1186 case 29: return OFFB_X29; 1187 case 30: return OFFB_X30; 1188 /* but not 31 */ 1189 default: vassert(0); 1190 } 1191 } 1192 1193 static Int offsetIReg64orSP ( UInt iregNo ) 1194 { 1195 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); 1196 } 1197 1198 static const HChar* nameIReg64orZR ( UInt iregNo ) 1199 { 1200 vassert(iregNo < 32); 1201 static const HChar* names[32] 1202 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 1203 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 1204 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 1205 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; 1206 return names[iregNo]; 1207 } 1208 1209 static const HChar* nameIReg64orSP ( UInt iregNo ) 1210 { 1211 if (iregNo == 31) { 1212 return "sp"; 1213 } 1214 vassert(iregNo < 31); 1215 return nameIReg64orZR(iregNo); 1216 } 1217 1218 static IRExpr* getIReg64orSP ( UInt iregNo ) 1219 { 1220 vassert(iregNo < 32); 1221 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1222 } 1223 1224 static IRExpr* getIReg64orZR ( UInt iregNo ) 1225 { 1226 if (iregNo == 31) { 1227 return mkU64(0); 1228 } 1229 vassert(iregNo < 31); 1230 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1231 } 1232 1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e ) 1234 { 1235 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1236 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1237 } 1238 1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e ) 1240 { 1241 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1242 if (iregNo == 31) { 1243 return; 1244 } 1245 vassert(iregNo < 31); 1246 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1247 } 1248 1249 static const HChar* nameIReg32orZR ( UInt iregNo ) 1250 { 1251 vassert(iregNo < 32); 1252 static const HChar* names[32] 1253 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 1254 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 1255 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 1256 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; 1257 return names[iregNo]; 1258 } 1259 1260 static const HChar* nameIReg32orSP ( UInt iregNo ) 1261 { 1262 if (iregNo == 31) { 1263 return "wsp"; 1264 } 1265 vassert(iregNo < 31); 1266 return nameIReg32orZR(iregNo); 1267 } 1268 1269 static IRExpr* getIReg32orSP ( UInt iregNo ) 1270 { 1271 vassert(iregNo < 32); 1272 return unop(Iop_64to32, 1273 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1274 } 1275 1276 static IRExpr* getIReg32orZR ( UInt iregNo ) 1277 { 1278 if (iregNo == 31) { 1279 return mkU32(0); 1280 } 1281 vassert(iregNo < 31); 1282 return unop(Iop_64to32, 1283 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1284 } 1285 1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e ) 1287 { 1288 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1290 } 1291 1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e ) 1293 { 1294 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1295 if (iregNo == 31) { 1296 return; 1297 } 1298 vassert(iregNo < 31); 1299 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1300 } 1301 1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) 1303 { 1304 vassert(is64 == True || is64 == False); 1305 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); 1306 } 1307 1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) 1309 { 1310 vassert(is64 == True || is64 == False); 1311 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); 1312 } 1313 1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) 1315 { 1316 vassert(is64 == True || is64 == False); 1317 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); 1318 } 1319 1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) 1321 { 1322 vassert(is64 == True || is64 == False); 1323 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); 1324 } 1325 1326 static void putPC ( IRExpr* e ) 1327 { 1328 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1329 stmt( IRStmt_Put(OFFB_PC, e) ); 1330 } 1331 1332 1333 /* ---------------- Vector (Q) registers ---------------- */ 1334 1335 static Int offsetQReg128 ( UInt qregNo ) 1336 { 1337 /* We don't care about endianness at this point. It only becomes 1338 relevant when dealing with sections of these registers.*/ 1339 switch (qregNo) { 1340 case 0: return OFFB_Q0; 1341 case 1: return OFFB_Q1; 1342 case 2: return OFFB_Q2; 1343 case 3: return OFFB_Q3; 1344 case 4: return OFFB_Q4; 1345 case 5: return OFFB_Q5; 1346 case 6: return OFFB_Q6; 1347 case 7: return OFFB_Q7; 1348 case 8: return OFFB_Q8; 1349 case 9: return OFFB_Q9; 1350 case 10: return OFFB_Q10; 1351 case 11: return OFFB_Q11; 1352 case 12: return OFFB_Q12; 1353 case 13: return OFFB_Q13; 1354 case 14: return OFFB_Q14; 1355 case 15: return OFFB_Q15; 1356 case 16: return OFFB_Q16; 1357 case 17: return OFFB_Q17; 1358 case 18: return OFFB_Q18; 1359 case 19: return OFFB_Q19; 1360 case 20: return OFFB_Q20; 1361 case 21: return OFFB_Q21; 1362 case 22: return OFFB_Q22; 1363 case 23: return OFFB_Q23; 1364 case 24: return OFFB_Q24; 1365 case 25: return OFFB_Q25; 1366 case 26: return OFFB_Q26; 1367 case 27: return OFFB_Q27; 1368 case 28: return OFFB_Q28; 1369 case 29: return OFFB_Q29; 1370 case 30: return OFFB_Q30; 1371 case 31: return OFFB_Q31; 1372 default: vassert(0); 1373 } 1374 } 1375 1376 /* Write to a complete Qreg. */ 1377 static void putQReg128 ( UInt qregNo, IRExpr* e ) 1378 { 1379 vassert(qregNo < 32); 1380 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 1381 stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); 1382 } 1383 1384 /* Read a complete Qreg. */ 1385 static IRExpr* getQReg128 ( UInt qregNo ) 1386 { 1387 vassert(qregNo < 32); 1388 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); 1389 } 1390 1391 /* Produce the IR type for some sub-part of a vector. For 32- and 64- 1392 bit sub-parts we can choose either integer or float types, and 1393 choose float on the basis that that is the common use case and so 1394 will give least interference with Put-to-Get forwarding later 1395 on. */ 1396 static IRType preferredVectorSubTypeFromSize ( UInt szB ) 1397 { 1398 switch (szB) { 1399 case 1: return Ity_I8; 1400 case 2: return Ity_I16; 1401 case 4: return Ity_I32; //Ity_F32; 1402 case 8: return Ity_F64; 1403 case 16: return Ity_V128; 1404 default: vassert(0); 1405 } 1406 } 1407 1408 /* Find the offset of the laneNo'th lane of type laneTy in the given 1409 Qreg. Since the host is little-endian, the least significant lane 1410 has the lowest offset. */ 1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) 1412 { 1413 vassert(host_endness == VexEndnessLE); 1414 Int base = offsetQReg128(qregNo); 1415 /* Since the host is little-endian, the least significant lane 1416 will be at the lowest address. */ 1417 /* Restrict this to known types, so as to avoid silently accepting 1418 stupid types. */ 1419 UInt laneSzB = 0; 1420 switch (laneTy) { 1421 case Ity_I8: laneSzB = 1; break; 1422 case Ity_F16: case Ity_I16: laneSzB = 2; break; 1423 case Ity_F32: case Ity_I32: laneSzB = 4; break; 1424 case Ity_F64: case Ity_I64: laneSzB = 8; break; 1425 case Ity_V128: laneSzB = 16; break; 1426 default: break; 1427 } 1428 vassert(laneSzB > 0); 1429 UInt minOff = laneNo * laneSzB; 1430 UInt maxOff = minOff + laneSzB - 1; 1431 vassert(maxOff < 16); 1432 return base + minOff; 1433 } 1434 1435 /* Put to the least significant lane of a Qreg. */ 1436 static void putQRegLO ( UInt qregNo, IRExpr* e ) 1437 { 1438 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1439 Int off = offsetQRegLane(qregNo, ty, 0); 1440 switch (ty) { 1441 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 1442 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128: 1443 break; 1444 default: 1445 vassert(0); // Other cases are probably invalid 1446 } 1447 stmt(IRStmt_Put(off, e)); 1448 } 1449 1450 /* Get from the least significant lane of a Qreg. */ 1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) 1452 { 1453 Int off = offsetQRegLane(qregNo, ty, 0); 1454 switch (ty) { 1455 case Ity_I8: 1456 case Ity_F16: case Ity_I16: 1457 case Ity_I32: case Ity_I64: 1458 case Ity_F32: case Ity_F64: case Ity_V128: 1459 break; 1460 default: 1461 vassert(0); // Other cases are ATC 1462 } 1463 return IRExpr_Get(off, ty); 1464 } 1465 1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) 1467 { 1468 static const HChar* namesQ[32] 1469 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1470 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", 1471 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", 1472 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; 1473 static const HChar* namesD[32] 1474 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1475 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 1476 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1477 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; 1478 static const HChar* namesS[32] 1479 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", 1480 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", 1481 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", 1482 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; 1483 static const HChar* namesH[32] 1484 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", 1485 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", 1486 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", 1487 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; 1488 static const HChar* namesB[32] 1489 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", 1490 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", 1491 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", 1492 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; 1493 vassert(qregNo < 32); 1494 switch (sizeofIRType(laneTy)) { 1495 case 1: return namesB[qregNo]; 1496 case 2: return namesH[qregNo]; 1497 case 4: return namesS[qregNo]; 1498 case 8: return namesD[qregNo]; 1499 case 16: return namesQ[qregNo]; 1500 default: vassert(0); 1501 } 1502 /*NOTREACHED*/ 1503 } 1504 1505 static const HChar* nameQReg128 ( UInt qregNo ) 1506 { 1507 return nameQRegLO(qregNo, Ity_V128); 1508 } 1509 1510 /* Find the offset of the most significant half (8 bytes) of the given 1511 Qreg. This requires knowing the endianness of the host. */ 1512 static Int offsetQRegHI64 ( UInt qregNo ) 1513 { 1514 return offsetQRegLane(qregNo, Ity_I64, 1); 1515 } 1516 1517 static IRExpr* getQRegHI64 ( UInt qregNo ) 1518 { 1519 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); 1520 } 1521 1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e ) 1523 { 1524 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1525 Int off = offsetQRegHI64(qregNo); 1526 switch (ty) { 1527 case Ity_I64: case Ity_F64: 1528 break; 1529 default: 1530 vassert(0); // Other cases are plain wrong 1531 } 1532 stmt(IRStmt_Put(off, e)); 1533 } 1534 1535 /* Put to a specified lane of a Qreg. */ 1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) 1537 { 1538 IRType laneTy = typeOfIRExpr(irsb->tyenv, e); 1539 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1540 switch (laneTy) { 1541 case Ity_F64: case Ity_I64: 1542 case Ity_I32: case Ity_F32: 1543 case Ity_I16: case Ity_F16: 1544 case Ity_I8: 1545 break; 1546 default: 1547 vassert(0); // Other cases are ATC 1548 } 1549 stmt(IRStmt_Put(off, e)); 1550 } 1551 1552 /* Get from a specified lane of a Qreg. */ 1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) 1554 { 1555 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1556 switch (laneTy) { 1557 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 1558 case Ity_F64: case Ity_F32: case Ity_F16: 1559 break; 1560 default: 1561 vassert(0); // Other cases are ATC 1562 } 1563 return IRExpr_Get(off, laneTy); 1564 } 1565 1566 1567 //ZZ /* ---------------- Misc registers ---------------- */ 1568 //ZZ 1569 //ZZ static void putMiscReg32 ( UInt gsoffset, 1570 //ZZ IRExpr* e, /* :: Ity_I32 */ 1571 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) 1572 //ZZ { 1573 //ZZ switch (gsoffset) { 1574 //ZZ case OFFB_FPSCR: break; 1575 //ZZ case OFFB_QFLAG32: break; 1576 //ZZ case OFFB_GEFLAG0: break; 1577 //ZZ case OFFB_GEFLAG1: break; 1578 //ZZ case OFFB_GEFLAG2: break; 1579 //ZZ case OFFB_GEFLAG3: break; 1580 //ZZ default: vassert(0); /* awaiting more cases */ 1581 //ZZ } 1582 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1583 //ZZ 1584 //ZZ if (guardT == IRTemp_INVALID) { 1585 //ZZ /* unconditional write */ 1586 //ZZ stmt(IRStmt_Put(gsoffset, e)); 1587 //ZZ } else { 1588 //ZZ stmt(IRStmt_Put( 1589 //ZZ gsoffset, 1590 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), 1591 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) 1592 //ZZ )); 1593 //ZZ } 1594 //ZZ } 1595 //ZZ 1596 //ZZ static IRTemp get_ITSTATE ( void ) 1597 //ZZ { 1598 //ZZ ASSERT_IS_THUMB; 1599 //ZZ IRTemp t = newTemp(Ity_I32); 1600 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 1601 //ZZ return t; 1602 //ZZ } 1603 //ZZ 1604 //ZZ static void put_ITSTATE ( IRTemp t ) 1605 //ZZ { 1606 //ZZ ASSERT_IS_THUMB; 1607 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 1608 //ZZ } 1609 //ZZ 1610 //ZZ static IRTemp get_QFLAG32 ( void ) 1611 //ZZ { 1612 //ZZ IRTemp t = newTemp(Ity_I32); 1613 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 1614 //ZZ return t; 1615 //ZZ } 1616 //ZZ 1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 1618 //ZZ { 1619 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 1620 //ZZ } 1621 //ZZ 1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 1623 //ZZ Status Register) to indicate that overflow or saturation occurred. 1624 //ZZ Nb: t must be zero to denote no saturation, and any nonzero 1625 //ZZ value to indicate saturation. */ 1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 1627 //ZZ { 1628 //ZZ IRTemp old = get_QFLAG32(); 1629 //ZZ IRTemp nyu = newTemp(Ity_I32); 1630 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 1631 //ZZ put_QFLAG32(nyu, condT); 1632 //ZZ } 1633 1634 1635 /* ---------------- FPCR stuff ---------------- */ 1636 1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and 1638 convert them to IR format. Bind the final result to the 1639 returned temp. */ 1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1641 { 1642 /* The ARMvfp encoding for rounding mode bits is: 1643 00 to nearest 1644 01 to +infinity 1645 10 to -infinity 1646 11 to zero 1647 We need to convert that to the IR encoding: 1648 00 to nearest (the default) 1649 10 to +infinity 1650 01 to -infinity 1651 11 to zero 1652 Which can be done by swapping bits 0 and 1. 1653 The rmode bits are at 23:22 in FPSCR. 1654 */ 1655 IRTemp armEncd = newTemp(Ity_I32); 1656 IRTemp swapped = newTemp(Ity_I32); 1657 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that 1658 we don't zero out bits 24 and above, since the assignment to 1659 'swapped' will mask them out anyway. */ 1660 assign(armEncd, 1661 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); 1662 /* Now swap them. */ 1663 assign(swapped, 1664 binop(Iop_Or32, 1665 binop(Iop_And32, 1666 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1667 mkU32(2)), 1668 binop(Iop_And32, 1669 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1670 mkU32(1)) 1671 )); 1672 return swapped; 1673 } 1674 1675 1676 /*------------------------------------------------------------*/ 1677 /*--- Helpers for flag handling and conditional insns ---*/ 1678 /*------------------------------------------------------------*/ 1679 1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond ) 1681 { 1682 switch (cond) { 1683 case ARM64CondEQ: return "eq"; 1684 case ARM64CondNE: return "ne"; 1685 case ARM64CondCS: return "cs"; // or 'hs' 1686 case ARM64CondCC: return "cc"; // or 'lo' 1687 case ARM64CondMI: return "mi"; 1688 case ARM64CondPL: return "pl"; 1689 case ARM64CondVS: return "vs"; 1690 case ARM64CondVC: return "vc"; 1691 case ARM64CondHI: return "hi"; 1692 case ARM64CondLS: return "ls"; 1693 case ARM64CondGE: return "ge"; 1694 case ARM64CondLT: return "lt"; 1695 case ARM64CondGT: return "gt"; 1696 case ARM64CondLE: return "le"; 1697 case ARM64CondAL: return "al"; 1698 case ARM64CondNV: return "nv"; 1699 default: vpanic("name_ARM64Condcode"); 1700 } 1701 } 1702 1703 /* and a handy shorthand for it */ 1704 static const HChar* nameCC ( ARM64Condcode cond ) { 1705 return nameARM64Condcode(cond); 1706 } 1707 1708 1709 /* Build IR to calculate some particular condition from stored 1710 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1711 Ity_I64, suitable for narrowing. Although the return type is 1712 Ity_I64, the returned value is either 0 or 1. 'cond' must be 1713 :: Ity_I64 and must denote the condition to compute in 1714 bits 7:4, and be zero everywhere else. 1715 */ 1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) 1717 { 1718 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); 1719 /* And 'cond' had better produce a value in which only bits 7:4 are 1720 nonzero. However, obviously we can't assert for that. */ 1721 1722 /* So what we're constructing for the first argument is 1723 "(cond << 4) | stored-operation". 1724 However, as per comments above, 'cond' must be supplied 1725 pre-shifted to this function. 1726 1727 This pairing scheme requires that the ARM64_CC_OP_ values all fit 1728 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1729 8 bits of the first argument. */ 1730 IRExpr** args 1731 = mkIRExprVec_4( 1732 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), 1733 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1734 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1735 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) 1736 ); 1737 IRExpr* call 1738 = mkIRExprCCall( 1739 Ity_I64, 1740 0/*regparm*/, 1741 "arm64g_calculate_condition", &arm64g_calculate_condition, 1742 args 1743 ); 1744 1745 /* Exclude the requested condition, OP and NDEP from definedness 1746 checking. We're only interested in DEP1 and DEP2. */ 1747 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1748 return call; 1749 } 1750 1751 1752 /* Build IR to calculate some particular condition from stored 1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1754 Ity_I64, suitable for narrowing. Although the return type is 1755 Ity_I64, the returned value is either 0 or 1. 1756 */ 1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) 1758 { 1759 /* First arg is "(cond << 4) | condition". This requires that the 1760 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a 1761 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1762 vassert(cond >= 0 && cond <= 15); 1763 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); 1764 } 1765 1766 1767 /* Build IR to calculate just the carry flag from stored 1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1769 Ity_I64. */ 1770 static IRExpr* mk_arm64g_calculate_flag_c ( void ) 1771 { 1772 IRExpr** args 1773 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1774 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1775 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1776 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1777 IRExpr* call 1778 = mkIRExprCCall( 1779 Ity_I64, 1780 0/*regparm*/, 1781 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c, 1782 args 1783 ); 1784 /* Exclude OP and NDEP from definedness checking. We're only 1785 interested in DEP1 and DEP2. */ 1786 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1787 return call; 1788 } 1789 1790 1791 //ZZ /* Build IR to calculate just the overflow flag from stored 1792 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1793 //ZZ Ity_I32. */ 1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) 1795 //ZZ { 1796 //ZZ IRExpr** args 1797 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1798 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1799 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1800 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1801 //ZZ IRExpr* call 1802 //ZZ = mkIRExprCCall( 1803 //ZZ Ity_I32, 1804 //ZZ 0/*regparm*/, 1805 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, 1806 //ZZ args 1807 //ZZ ); 1808 //ZZ /* Exclude OP and NDEP from definedness checking. We're only 1809 //ZZ interested in DEP1 and DEP2. */ 1810 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1811 //ZZ return call; 1812 //ZZ } 1813 1814 1815 /* Build IR to calculate N Z C V in bits 31:28 of the 1816 returned word. */ 1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) 1818 { 1819 IRExpr** args 1820 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1821 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1822 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1823 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1824 IRExpr* call 1825 = mkIRExprCCall( 1826 Ity_I64, 1827 0/*regparm*/, 1828 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, 1829 args 1830 ); 1831 /* Exclude OP and NDEP from definedness checking. We're only 1832 interested in DEP1 and DEP2. */ 1833 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1834 return call; 1835 } 1836 1837 1838 /* Build IR to set the flags thunk, in the most general case. */ 1839 static 1840 void setFlags_D1_D2_ND ( UInt cc_op, 1841 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) 1842 { 1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); 1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); 1845 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); 1846 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); 1847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); 1848 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1849 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1850 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1851 } 1852 1853 /* Build IR to set the flags thunk after ADD or SUB. */ 1854 static 1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) 1856 { 1857 IRTemp argL64 = IRTemp_INVALID; 1858 IRTemp argR64 = IRTemp_INVALID; 1859 IRTemp z64 = newTemp(Ity_I64); 1860 if (is64) { 1861 argL64 = argL; 1862 argR64 = argR; 1863 } else { 1864 argL64 = newTemp(Ity_I64); 1865 argR64 = newTemp(Ity_I64); 1866 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1867 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1868 } 1869 assign(z64, mkU64(0)); 1870 UInt cc_op = ARM64G_CC_OP_NUMBER; 1871 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } 1872 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } 1873 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } 1874 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } 1875 else { vassert(0); } 1876 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); 1877 } 1878 1879 /* Build IR to set the flags thunk after ADC or SBC. */ 1880 static 1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC, 1882 IRTemp argL, IRTemp argR, IRTemp oldC ) 1883 { 1884 IRTemp argL64 = IRTemp_INVALID; 1885 IRTemp argR64 = IRTemp_INVALID; 1886 IRTemp oldC64 = IRTemp_INVALID; 1887 if (is64) { 1888 argL64 = argL; 1889 argR64 = argR; 1890 oldC64 = oldC; 1891 } else { 1892 argL64 = newTemp(Ity_I64); 1893 argR64 = newTemp(Ity_I64); 1894 oldC64 = newTemp(Ity_I64); 1895 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1896 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1897 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC))); 1898 } 1899 UInt cc_op = ARM64G_CC_OP_NUMBER; 1900 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; } 1901 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; } 1902 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; } 1903 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; } 1904 else { vassert(0); } 1905 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64); 1906 } 1907 1908 /* Build IR to set the flags thunk after ADD or SUB, if the given 1909 condition evaluates to True at run time. If not, the flags are set 1910 to the specified NZCV value. */ 1911 static 1912 void setFlags_ADD_SUB_conditionally ( 1913 Bool is64, Bool isSUB, 1914 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv 1915 ) 1916 { 1917 /* Generate IR as follows: 1918 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) 1919 CC_DEP1 = ITE(cond, argL64, nzcv << 28) 1920 CC_DEP2 = ITE(cond, argR64, 0) 1921 CC_NDEP = 0 1922 */ 1923 1924 IRTemp z64 = newTemp(Ity_I64); 1925 assign(z64, mkU64(0)); 1926 1927 /* Establish the operation and operands for the True case. */ 1928 IRTemp t_dep1 = IRTemp_INVALID; 1929 IRTemp t_dep2 = IRTemp_INVALID; 1930 UInt t_op = ARM64G_CC_OP_NUMBER; 1931 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } 1932 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } 1933 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } 1934 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } 1935 else { vassert(0); } 1936 /* */ 1937 if (is64) { 1938 t_dep1 = argL; 1939 t_dep2 = argR; 1940 } else { 1941 t_dep1 = newTemp(Ity_I64); 1942 t_dep2 = newTemp(Ity_I64); 1943 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); 1944 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); 1945 } 1946 1947 /* Establish the operation and operands for the False case. */ 1948 IRTemp f_dep1 = newTemp(Ity_I64); 1949 IRTemp f_dep2 = z64; 1950 UInt f_op = ARM64G_CC_OP_COPY; 1951 assign(f_dep1, mkU64(nzcv << 28)); 1952 1953 /* Final thunk values */ 1954 IRTemp dep1 = newTemp(Ity_I64); 1955 IRTemp dep2 = newTemp(Ity_I64); 1956 IRTemp op = newTemp(Ity_I64); 1957 1958 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); 1959 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); 1960 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); 1961 1962 /* finally .. */ 1963 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); 1964 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); 1965 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); 1966 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); 1967 } 1968 1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ 1970 static 1971 void setFlags_LOGIC ( Bool is64, IRTemp res ) 1972 { 1973 IRTemp res64 = IRTemp_INVALID; 1974 IRTemp z64 = newTemp(Ity_I64); 1975 UInt cc_op = ARM64G_CC_OP_NUMBER; 1976 if (is64) { 1977 res64 = res; 1978 cc_op = ARM64G_CC_OP_LOGIC64; 1979 } else { 1980 res64 = newTemp(Ity_I64); 1981 assign(res64, unop(Iop_32Uto64, mkexpr(res))); 1982 cc_op = ARM64G_CC_OP_LOGIC32; 1983 } 1984 assign(z64, mkU64(0)); 1985 setFlags_D1_D2_ND(cc_op, res64, z64, z64); 1986 } 1987 1988 /* Build IR to set the flags thunk to a given NZCV value. NZCV is 1989 located in bits 31:28 of the supplied value. */ 1990 static 1991 void setFlags_COPY ( IRTemp nzcv_28x0 ) 1992 { 1993 IRTemp z64 = newTemp(Ity_I64); 1994 assign(z64, mkU64(0)); 1995 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); 1996 } 1997 1998 1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it 2000 //ZZ sets it at all) */ 2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 2002 //ZZ IRTemp t_dep2, 2003 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2004 //ZZ { 2005 //ZZ IRTemp z32 = newTemp(Ity_I32); 2006 //ZZ assign( z32, mkU32(0) ); 2007 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 2008 //ZZ } 2009 //ZZ 2010 //ZZ 2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it 2012 //ZZ sets it at all) */ 2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 2014 //ZZ IRTemp t_ndep, 2015 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2016 //ZZ { 2017 //ZZ IRTemp z32 = newTemp(Ity_I32); 2018 //ZZ assign( z32, mkU32(0) ); 2019 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 2020 //ZZ } 2021 //ZZ 2022 //ZZ 2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 2024 //ZZ sets them at all) */ 2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 2026 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2027 //ZZ { 2028 //ZZ IRTemp z32 = newTemp(Ity_I32); 2029 //ZZ assign( z32, mkU32(0) ); 2030 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 2031 //ZZ } 2032 2033 2034 /*------------------------------------------------------------*/ 2035 /*--- Misc math helpers ---*/ 2036 /*------------------------------------------------------------*/ 2037 2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ 2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) 2040 { 2041 IRTemp maskT = newTemp(Ity_I64); 2042 IRTemp res = newTemp(Ity_I64); 2043 vassert(sh >= 1 && sh <= 63); 2044 assign(maskT, mkU64(mask)); 2045 assign( res, 2046 binop(Iop_Or64, 2047 binop(Iop_Shr64, 2048 binop(Iop_And64,mkexpr(x),mkexpr(maskT)), 2049 mkU8(sh)), 2050 binop(Iop_And64, 2051 binop(Iop_Shl64,mkexpr(x),mkU8(sh)), 2052 mkexpr(maskT)) 2053 ) 2054 ); 2055 return res; 2056 } 2057 2058 /* Generates byte swaps within 32-bit lanes. */ 2059 static IRTemp math_UINTSWAP64 ( IRTemp src ) 2060 { 2061 IRTemp res; 2062 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2063 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2064 return res; 2065 } 2066 2067 /* Generates byte swaps within 16-bit lanes. */ 2068 static IRTemp math_USHORTSWAP64 ( IRTemp src ) 2069 { 2070 IRTemp res; 2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2072 return res; 2073 } 2074 2075 /* Generates a 64-bit byte swap. */ 2076 static IRTemp math_BYTESWAP64 ( IRTemp src ) 2077 { 2078 IRTemp res; 2079 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2080 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2081 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); 2082 return res; 2083 } 2084 2085 /* Generates a 64-bit bit swap. */ 2086 static IRTemp math_BITSWAP64 ( IRTemp src ) 2087 { 2088 IRTemp res; 2089 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); 2090 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); 2091 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); 2092 return math_BYTESWAP64(res); 2093 } 2094 2095 /* Duplicates the bits at the bottom of the given word to fill the 2096 whole word. src :: Ity_I64 is assumed to have zeroes everywhere 2097 except for the bottom bits. */ 2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) 2099 { 2100 if (srcTy == Ity_I8) { 2101 IRTemp t16 = newTemp(Ity_I64); 2102 assign(t16, binop(Iop_Or64, mkexpr(src), 2103 binop(Iop_Shl64, mkexpr(src), mkU8(8)))); 2104 IRTemp t32 = newTemp(Ity_I64); 2105 assign(t32, binop(Iop_Or64, mkexpr(t16), 2106 binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); 2107 IRTemp t64 = newTemp(Ity_I64); 2108 assign(t64, binop(Iop_Or64, mkexpr(t32), 2109 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2110 return t64; 2111 } 2112 if (srcTy == Ity_I16) { 2113 IRTemp t32 = newTemp(Ity_I64); 2114 assign(t32, binop(Iop_Or64, mkexpr(src), 2115 binop(Iop_Shl64, mkexpr(src), mkU8(16)))); 2116 IRTemp t64 = newTemp(Ity_I64); 2117 assign(t64, binop(Iop_Or64, mkexpr(t32), 2118 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2119 return t64; 2120 } 2121 if (srcTy == Ity_I32) { 2122 IRTemp t64 = newTemp(Ity_I64); 2123 assign(t64, binop(Iop_Or64, mkexpr(src), 2124 binop(Iop_Shl64, mkexpr(src), mkU8(32)))); 2125 return t64; 2126 } 2127 if (srcTy == Ity_I64) { 2128 return src; 2129 } 2130 vassert(0); 2131 } 2132 2133 2134 /* Duplicates the src element exactly so as to fill a V128 value. */ 2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy ) 2136 { 2137 IRTemp res = newTempV128(); 2138 if (srcTy == Ity_F64) { 2139 IRTemp i64 = newTemp(Ity_I64); 2140 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src))); 2141 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64))); 2142 return res; 2143 } 2144 if (srcTy == Ity_F32) { 2145 IRTemp i64a = newTemp(Ity_I64); 2146 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src)))); 2147 IRTemp i64b = newTemp(Ity_I64); 2148 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)), 2149 mkexpr(i64a))); 2150 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b))); 2151 return res; 2152 } 2153 if (srcTy == Ity_I64) { 2154 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src))); 2155 return res; 2156 } 2157 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) { 2158 IRTemp t1 = newTemp(Ity_I64); 2159 assign(t1, widenUto64(srcTy, mkexpr(src))); 2160 IRTemp t2 = math_DUP_TO_64(t1, srcTy); 2161 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2))); 2162 return res; 2163 } 2164 vassert(0); 2165 } 2166 2167 2168 /* |fullWidth| is a full V128 width result. Depending on bitQ, 2169 zero out the upper half. */ 2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth ) 2171 { 2172 if (bitQ == 1) return mkexpr(fullWidth); 2173 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth)); 2174 vassert(0); 2175 } 2176 2177 /* The same, but from an expression instead. */ 2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth ) 2179 { 2180 IRTemp fullWidthT = newTempV128(); 2181 assign(fullWidthT, fullWidth); 2182 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT); 2183 } 2184 2185 2186 /*------------------------------------------------------------*/ 2187 /*--- FP comparison helpers ---*/ 2188 /*------------------------------------------------------------*/ 2189 2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded 2191 as an IRCmpF64Result. Generate code to convert it to an 2192 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. 2193 Assign a new temp to hold that value, and return the temp. */ 2194 static 2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) 2196 { 2197 IRTemp ix = newTemp(Ity_I64); 2198 IRTemp termL = newTemp(Ity_I64); 2199 IRTemp termR = newTemp(Ity_I64); 2200 IRTemp nzcv = newTemp(Ity_I64); 2201 IRTemp irRes = newTemp(Ity_I64); 2202 2203 /* This is where the fun starts. We have to convert 'irRes' from 2204 an IR-convention return result (IRCmpF64Result) to an 2205 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 2206 4 bits of 'nzcv'. */ 2207 /* Map compare result from IR to ARM(nzcv) */ 2208 /* 2209 FP cmp result | IR | ARM(nzcv) 2210 -------------------------------- 2211 UN 0x45 0011 2212 LT 0x01 1000 2213 GT 0x00 0010 2214 EQ 0x40 0110 2215 */ 2216 /* Now since you're probably wondering WTF .. 2217 2218 ix fishes the useful bits out of the IR value, bits 6 and 0, and 2219 places them side by side, giving a number which is 0, 1, 2 or 3. 2220 2221 termL is a sequence cooked up by GNU superopt. It converts ix 2222 into an almost correct value NZCV value (incredibly), except 2223 for the case of UN, where it produces 0100 instead of the 2224 required 0011. 2225 2226 termR is therefore a correction term, also computed from ix. It 2227 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 2228 the final correct value, we subtract termR from termL. 2229 2230 Don't take my word for it. There's a test program at the bottom 2231 of guest_arm_toIR.c, to try this out with. 2232 */ 2233 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); 2234 2235 assign( 2236 ix, 2237 binop(Iop_Or64, 2238 binop(Iop_And64, 2239 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), 2240 mkU64(3)), 2241 binop(Iop_And64, mkexpr(irRes), mkU64(1)))); 2242 2243 assign( 2244 termL, 2245 binop(Iop_Add64, 2246 binop(Iop_Shr64, 2247 binop(Iop_Sub64, 2248 binop(Iop_Shl64, 2249 binop(Iop_Xor64, mkexpr(ix), mkU64(1)), 2250 mkU8(62)), 2251 mkU64(1)), 2252 mkU8(61)), 2253 mkU64(1))); 2254 2255 assign( 2256 termR, 2257 binop(Iop_And64, 2258 binop(Iop_And64, 2259 mkexpr(ix), 2260 binop(Iop_Shr64, mkexpr(ix), mkU8(1))), 2261 mkU64(1))); 2262 2263 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); 2264 return nzcv; 2265 } 2266 2267 2268 /*------------------------------------------------------------*/ 2269 /*--- Data processing (immediate) ---*/ 2270 /*------------------------------------------------------------*/ 2271 2272 /* Helper functions for supporting "DecodeBitMasks" */ 2273 2274 static ULong dbm_ROR ( Int width, ULong x, Int rot ) 2275 { 2276 vassert(width > 0 && width <= 64); 2277 vassert(rot >= 0 && rot < width); 2278 if (rot == 0) return x; 2279 ULong res = x >> rot; 2280 res |= (x << (width - rot)); 2281 if (width < 64) 2282 res &= ((1ULL << width) - 1); 2283 return res; 2284 } 2285 2286 static ULong dbm_RepTo64( Int esize, ULong x ) 2287 { 2288 switch (esize) { 2289 case 64: 2290 return x; 2291 case 32: 2292 x &= 0xFFFFFFFF; x |= (x << 32); 2293 return x; 2294 case 16: 2295 x &= 0xFFFF; x |= (x << 16); x |= (x << 32); 2296 return x; 2297 case 8: 2298 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); 2299 return x; 2300 case 4: 2301 x &= 0xF; x |= (x << 4); x |= (x << 8); 2302 x |= (x << 16); x |= (x << 32); 2303 return x; 2304 case 2: 2305 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); 2306 x |= (x << 16); x |= (x << 32); 2307 return x; 2308 default: 2309 break; 2310 } 2311 vpanic("dbm_RepTo64"); 2312 /*NOTREACHED*/ 2313 return 0; 2314 } 2315 2316 static Int dbm_highestSetBit ( ULong x ) 2317 { 2318 Int i; 2319 for (i = 63; i >= 0; i--) { 2320 if (x & (1ULL << i)) 2321 return i; 2322 } 2323 vassert(x == 0); 2324 return -1; 2325 } 2326 2327 static 2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, 2329 ULong immN, ULong imms, ULong immr, Bool immediate, 2330 UInt M /*32 or 64*/) 2331 { 2332 vassert(immN < (1ULL << 1)); 2333 vassert(imms < (1ULL << 6)); 2334 vassert(immr < (1ULL << 6)); 2335 vassert(immediate == False || immediate == True); 2336 vassert(M == 32 || M == 64); 2337 2338 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); 2339 if (len < 1) { /* printf("fail1\n"); */ return False; } 2340 vassert(len <= 6); 2341 vassert(M >= (1 << len)); 2342 2343 vassert(len >= 1 && len <= 6); 2344 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); 2345 (1 << len) - 1; 2346 vassert(levels >= 1 && levels <= 63); 2347 2348 if (immediate && ((imms & levels) == levels)) { 2349 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ 2350 return False; 2351 } 2352 2353 ULong S = imms & levels; 2354 ULong R = immr & levels; 2355 Int diff = S - R; 2356 diff &= 63; 2357 Int esize = 1 << len; 2358 vassert(2 <= esize && esize <= 64); 2359 2360 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the 2361 same below with d. S can be 63 in which case we have an out of 2362 range and hence undefined shift. */ 2363 vassert(S >= 0 && S <= 63); 2364 vassert(esize >= (S+1)); 2365 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) 2366 //(1ULL << (S+1)) - 1; 2367 ((1ULL << S) - 1) + (1ULL << S); 2368 2369 Int d = // diff<len-1:0> 2370 diff & ((1 << len)-1); 2371 vassert(esize >= (d+1)); 2372 vassert(d >= 0 && d <= 63); 2373 2374 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) 2375 //(1ULL << (d+1)) - 1; 2376 ((1ULL << d) - 1) + (1ULL << d); 2377 2378 if (esize != 64) vassert(elem_s < (1ULL << esize)); 2379 if (esize != 64) vassert(elem_d < (1ULL << esize)); 2380 2381 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); 2382 if (tmask) *tmask = dbm_RepTo64(esize, elem_d); 2383 2384 return True; 2385 } 2386 2387 2388 static 2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, 2390 UInt insn) 2391 { 2392 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2393 2394 /* insn[28:23] 2395 10000x PC-rel addressing 2396 10001x Add/subtract (immediate) 2397 100100 Logical (immediate) 2398 100101 Move Wide (immediate) 2399 100110 Bitfield 2400 100111 Extract 2401 */ 2402 2403 /* ------------------ ADD/SUB{,S} imm12 ------------------ */ 2404 if (INSN(28,24) == BITS5(1,0,0,0,1)) { 2405 Bool is64 = INSN(31,31) == 1; 2406 Bool isSub = INSN(30,30) == 1; 2407 Bool setCC = INSN(29,29) == 1; 2408 UInt sh = INSN(23,22); 2409 UInt uimm12 = INSN(21,10); 2410 UInt nn = INSN(9,5); 2411 UInt dd = INSN(4,0); 2412 const HChar* nm = isSub ? "sub" : "add"; 2413 if (sh >= 2) { 2414 /* Invalid; fall through */ 2415 } else { 2416 vassert(sh <= 1); 2417 uimm12 <<= (12 * sh); 2418 if (is64) { 2419 IRTemp argL = newTemp(Ity_I64); 2420 IRTemp argR = newTemp(Ity_I64); 2421 IRTemp res = newTemp(Ity_I64); 2422 assign(argL, getIReg64orSP(nn)); 2423 assign(argR, mkU64(uimm12)); 2424 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 2425 mkexpr(argL), mkexpr(argR))); 2426 if (setCC) { 2427 putIReg64orZR(dd, mkexpr(res)); 2428 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 2429 DIP("%ss %s, %s, 0x%x\n", 2430 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); 2431 } else { 2432 putIReg64orSP(dd, mkexpr(res)); 2433 DIP("%s %s, %s, 0x%x\n", 2434 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); 2435 } 2436 } else { 2437 IRTemp argL = newTemp(Ity_I32); 2438 IRTemp argR = newTemp(Ity_I32); 2439 IRTemp res = newTemp(Ity_I32); 2440 assign(argL, getIReg32orSP(nn)); 2441 assign(argR, mkU32(uimm12)); 2442 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, 2443 mkexpr(argL), mkexpr(argR))); 2444 if (setCC) { 2445 putIReg32orZR(dd, mkexpr(res)); 2446 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); 2447 DIP("%ss %s, %s, 0x%x\n", 2448 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); 2449 } else { 2450 putIReg32orSP(dd, mkexpr(res)); 2451 DIP("%s %s, %s, 0x%x\n", 2452 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); 2453 } 2454 } 2455 return True; 2456 } 2457 } 2458 2459 /* -------------------- ADR/ADRP -------------------- */ 2460 if (INSN(28,24) == BITS5(1,0,0,0,0)) { 2461 UInt bP = INSN(31,31); 2462 UInt immLo = INSN(30,29); 2463 UInt immHi = INSN(23,5); 2464 UInt rD = INSN(4,0); 2465 ULong uimm = (immHi << 2) | immLo; 2466 ULong simm = sx_to_64(uimm, 21); 2467 ULong val; 2468 if (bP) { 2469 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); 2470 } else { 2471 val = guest_PC_curr_instr + simm; 2472 } 2473 putIReg64orZR(rD, mkU64(val)); 2474 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); 2475 return True; 2476 } 2477 2478 /* -------------------- LOGIC(imm) -------------------- */ 2479 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { 2480 /* 31 30 28 22 21 15 9 4 2481 sf op 100100 N immr imms Rn Rd 2482 op=00: AND Rd|SP, Rn, #imm 2483 op=01: ORR Rd|SP, Rn, #imm 2484 op=10: EOR Rd|SP, Rn, #imm 2485 op=11: ANDS Rd|ZR, Rn, #imm 2486 */ 2487 Bool is64 = INSN(31,31) == 1; 2488 UInt op = INSN(30,29); 2489 UInt N = INSN(22,22); 2490 UInt immR = INSN(21,16); 2491 UInt immS = INSN(15,10); 2492 UInt nn = INSN(9,5); 2493 UInt dd = INSN(4,0); 2494 ULong imm = 0; 2495 Bool ok; 2496 if (N == 1 && !is64) 2497 goto after_logic_imm; /* not allowed; fall through */ 2498 ok = dbm_DecodeBitMasks(&imm, NULL, 2499 N, immS, immR, True, is64 ? 64 : 32); 2500 if (!ok) 2501 goto after_logic_imm; 2502 2503 const HChar* names[4] = { "and", "orr", "eor", "ands" }; 2504 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; 2505 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; 2506 2507 vassert(op < 4); 2508 if (is64) { 2509 IRExpr* argL = getIReg64orZR(nn); 2510 IRExpr* argR = mkU64(imm); 2511 IRTemp res = newTemp(Ity_I64); 2512 assign(res, binop(ops64[op], argL, argR)); 2513 if (op < 3) { 2514 putIReg64orSP(dd, mkexpr(res)); 2515 DIP("%s %s, %s, 0x%llx\n", names[op], 2516 nameIReg64orSP(dd), nameIReg64orZR(nn), imm); 2517 } else { 2518 putIReg64orZR(dd, mkexpr(res)); 2519 setFlags_LOGIC(True/*is64*/, res); 2520 DIP("%s %s, %s, 0x%llx\n", names[op], 2521 nameIReg64orZR(dd), nameIReg64orZR(nn), imm); 2522 } 2523 } else { 2524 IRExpr* argL = getIReg32orZR(nn); 2525 IRExpr* argR = mkU32((UInt)imm); 2526 IRTemp res = newTemp(Ity_I32); 2527 assign(res, binop(ops32[op], argL, argR)); 2528 if (op < 3) { 2529 putIReg32orSP(dd, mkexpr(res)); 2530 DIP("%s %s, %s, 0x%x\n", names[op], 2531 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); 2532 } else { 2533 putIReg32orZR(dd, mkexpr(res)); 2534 setFlags_LOGIC(False/*!is64*/, res); 2535 DIP("%s %s, %s, 0x%x\n", names[op], 2536 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); 2537 } 2538 } 2539 return True; 2540 } 2541 after_logic_imm: 2542 2543 /* -------------------- MOV{Z,N,K} -------------------- */ 2544 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { 2545 /* 31 30 28 22 20 4 2546 | | | | | | 2547 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) 2548 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) 2549 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) 2550 */ 2551 Bool is64 = INSN(31,31) == 1; 2552 UInt subopc = INSN(30,29); 2553 UInt hw = INSN(22,21); 2554 UInt imm16 = INSN(20,5); 2555 UInt dd = INSN(4,0); 2556 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { 2557 /* invalid; fall through */ 2558 } else { 2559 ULong imm64 = ((ULong)imm16) << (16 * hw); 2560 if (!is64) 2561 vassert(imm64 < 0x100000000ULL); 2562 switch (subopc) { 2563 case BITS2(1,0): // MOVZ 2564 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2565 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2566 break; 2567 case BITS2(0,0): // MOVN 2568 imm64 = ~imm64; 2569 if (!is64) 2570 imm64 &= 0xFFFFFFFFULL; 2571 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2572 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2573 break; 2574 case BITS2(1,1): // MOVK 2575 /* This is more complex. We are inserting a slice into 2576 the destination register, so we need to have the old 2577 value of it. */ 2578 if (is64) { 2579 IRTemp old = newTemp(Ity_I64); 2580 assign(old, getIReg64orZR(dd)); 2581 ULong mask = 0xFFFFULL << (16 * hw); 2582 IRExpr* res 2583 = binop(Iop_Or64, 2584 binop(Iop_And64, mkexpr(old), mkU64(~mask)), 2585 mkU64(imm64)); 2586 putIReg64orZR(dd, res); 2587 DIP("movk %s, 0x%x, lsl %u\n", 2588 nameIReg64orZR(dd), imm16, 16*hw); 2589 } else { 2590 IRTemp old = newTemp(Ity_I32); 2591 assign(old, getIReg32orZR(dd)); 2592 vassert(hw <= 1); 2593 UInt mask = 0xFFFF << (16 * hw); 2594 IRExpr* res 2595 = binop(Iop_Or32, 2596 binop(Iop_And32, mkexpr(old), mkU32(~mask)), 2597 mkU32((UInt)imm64)); 2598 putIReg32orZR(dd, res); 2599 DIP("movk %s, 0x%x, lsl %u\n", 2600 nameIReg32orZR(dd), imm16, 16*hw); 2601 } 2602 break; 2603 default: 2604 vassert(0); 2605 } 2606 return True; 2607 } 2608 } 2609 2610 /* -------------------- {U,S,}BFM -------------------- */ 2611 /* 30 28 22 21 15 9 4 2612 2613 sf 10 100110 N immr imms nn dd 2614 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2615 UBFM Xd, Xn, #immr, #imms when sf=1, N=1 2616 2617 sf 00 100110 N immr imms nn dd 2618 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2619 SBFM Xd, Xn, #immr, #imms when sf=1, N=1 2620 2621 sf 01 100110 N immr imms nn dd 2622 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2623 BFM Xd, Xn, #immr, #imms when sf=1, N=1 2624 */ 2625 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { 2626 UInt sf = INSN(31,31); 2627 UInt opc = INSN(30,29); 2628 UInt N = INSN(22,22); 2629 UInt immR = INSN(21,16); 2630 UInt immS = INSN(15,10); 2631 UInt nn = INSN(9,5); 2632 UInt dd = INSN(4,0); 2633 Bool inZero = False; 2634 Bool extend = False; 2635 const HChar* nm = "???"; 2636 /* skip invalid combinations */ 2637 switch (opc) { 2638 case BITS2(0,0): 2639 inZero = True; extend = True; nm = "sbfm"; break; 2640 case BITS2(0,1): 2641 inZero = False; extend = False; nm = "bfm"; break; 2642 case BITS2(1,0): 2643 inZero = True; extend = False; nm = "ubfm"; break; 2644 case BITS2(1,1): 2645 goto after_bfm; /* invalid */ 2646 default: 2647 vassert(0); 2648 } 2649 if (sf == 1 && N != 1) goto after_bfm; 2650 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 2651 || ((immS >> 5) & 1) != 0)) goto after_bfm; 2652 ULong wmask = 0, tmask = 0; 2653 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, 2654 N, immS, immR, False, sf == 1 ? 64 : 32); 2655 if (!ok) goto after_bfm; /* hmmm */ 2656 2657 Bool is64 = sf == 1; 2658 IRType ty = is64 ? Ity_I64 : Ity_I32; 2659 2660 IRTemp dst = newTemp(ty); 2661 IRTemp src = newTemp(ty); 2662 IRTemp bot = newTemp(ty); 2663 IRTemp top = newTemp(ty); 2664 IRTemp res = newTemp(ty); 2665 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); 2666 assign(src, getIRegOrZR(is64, nn)); 2667 /* perform bitfield move on low bits */ 2668 assign(bot, binop(mkOR(ty), 2669 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), 2670 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), 2671 mkU(ty, wmask)))); 2672 /* determine extension bits (sign, zero or dest register) */ 2673 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); 2674 /* combine extension bits and result bits */ 2675 assign(res, binop(mkOR(ty), 2676 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), 2677 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); 2678 putIRegOrZR(is64, dd, mkexpr(res)); 2679 DIP("%s %s, %s, immR=%u, immS=%u\n", 2680 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); 2681 return True; 2682 } 2683 after_bfm: 2684 2685 /* ---------------------- EXTR ---------------------- */ 2686 /* 30 28 22 20 15 9 4 2687 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 2688 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 2689 */ 2690 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { 2691 Bool is64 = INSN(31,31) == 1; 2692 UInt mm = INSN(20,16); 2693 UInt imm6 = INSN(15,10); 2694 UInt nn = INSN(9,5); 2695 UInt dd = INSN(4,0); 2696 Bool valid = True; 2697 if (INSN(31,31) != INSN(22,22)) 2698 valid = False; 2699 if (!is64 && imm6 >= 32) 2700 valid = False; 2701 if (!valid) goto after_extr; 2702 IRType ty = is64 ? Ity_I64 : Ity_I32; 2703 IRTemp srcHi = newTemp(ty); 2704 IRTemp srcLo = newTemp(ty); 2705 IRTemp res = newTemp(ty); 2706 assign(srcHi, getIRegOrZR(is64, nn)); 2707 assign(srcLo, getIRegOrZR(is64, mm)); 2708 if (imm6 == 0) { 2709 assign(res, mkexpr(srcLo)); 2710 } else { 2711 UInt szBits = 8 * sizeofIRType(ty); 2712 vassert(imm6 > 0 && imm6 < szBits); 2713 assign(res, binop(mkOR(ty), 2714 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), 2715 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); 2716 } 2717 putIRegOrZR(is64, dd, mkexpr(res)); 2718 DIP("extr %s, %s, %s, #%u\n", 2719 nameIRegOrZR(is64,dd), 2720 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); 2721 return True; 2722 } 2723 after_extr: 2724 2725 vex_printf("ARM64 front end: data_processing_immediate\n"); 2726 return False; 2727 # undef INSN 2728 } 2729 2730 2731 /*------------------------------------------------------------*/ 2732 /*--- Data processing (register) instructions ---*/ 2733 /*------------------------------------------------------------*/ 2734 2735 static const HChar* nameSH ( UInt sh ) { 2736 switch (sh) { 2737 case 0: return "lsl"; 2738 case 1: return "lsr"; 2739 case 2: return "asr"; 2740 case 3: return "ror"; 2741 default: vassert(0); 2742 } 2743 } 2744 2745 /* Generate IR to get a register value, possibly shifted by an 2746 immediate. Returns either a 32- or 64-bit temporary holding the 2747 result. After the shift, the value can optionally be NOT-ed 2748 too. 2749 2750 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be 2751 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR 2752 isn't allowed, but it's the job of the caller to check that. 2753 */ 2754 static IRTemp getShiftedIRegOrZR ( Bool is64, 2755 UInt sh_how, UInt sh_amt, UInt regNo, 2756 Bool invert ) 2757 { 2758 vassert(sh_how < 4); 2759 vassert(sh_amt < (is64 ? 64 : 32)); 2760 IRType ty = is64 ? Ity_I64 : Ity_I32; 2761 IRTemp t0 = newTemp(ty); 2762 assign(t0, getIRegOrZR(is64, regNo)); 2763 IRTemp t1 = newTemp(ty); 2764 switch (sh_how) { 2765 case BITS2(0,0): 2766 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); 2767 break; 2768 case BITS2(0,1): 2769 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); 2770 break; 2771 case BITS2(1,0): 2772 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); 2773 break; 2774 case BITS2(1,1): 2775 assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); 2776 break; 2777 default: 2778 vassert(0); 2779 } 2780 if (invert) { 2781 IRTemp t2 = newTemp(ty); 2782 assign(t2, unop(mkNOT(ty), mkexpr(t1))); 2783 return t2; 2784 } else { 2785 return t1; 2786 } 2787 } 2788 2789 2790 static 2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, 2792 UInt insn) 2793 { 2794 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2795 2796 /* ------------------- ADD/SUB(reg) ------------------- */ 2797 /* x==0 => 32 bit op x==1 => 64 bit op 2798 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) 2799 2800 31 30 29 28 23 21 20 15 9 4 2801 | | | | | | | | | | 2802 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) 2803 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) 2804 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) 2805 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) 2806 */ 2807 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { 2808 UInt bX = INSN(31,31); 2809 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ 2810 UInt bS = INSN(29, 29); /* set flags? */ 2811 UInt sh = INSN(23,22); 2812 UInt rM = INSN(20,16); 2813 UInt imm6 = INSN(15,10); 2814 UInt rN = INSN(9,5); 2815 UInt rD = INSN(4,0); 2816 Bool isSUB = bOP == 1; 2817 Bool is64 = bX == 1; 2818 IRType ty = is64 ? Ity_I64 : Ity_I32; 2819 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { 2820 /* invalid; fall through */ 2821 } else { 2822 IRTemp argL = newTemp(ty); 2823 assign(argL, getIRegOrZR(is64, rN)); 2824 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); 2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2826 IRTemp res = newTemp(ty); 2827 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2828 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2829 if (bS) { 2830 setFlags_ADD_SUB(is64, isSUB, argL, argR); 2831 } 2832 DIP("%s%s %s, %s, %s, %s #%u\n", 2833 bOP ? "sub" : "add", bS ? "s" : "", 2834 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2835 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2836 return True; 2837 } 2838 } 2839 2840 /* ------------------- ADC/SBC(reg) ------------------- */ 2841 /* x==0 => 32 bit op x==1 => 64 bit op 2842 2843 31 30 29 28 23 21 20 15 9 4 2844 | | | | | | | | | | 2845 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm 2846 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm 2847 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm 2848 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm 2849 */ 2850 2851 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) { 2852 UInt bX = INSN(31,31); 2853 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */ 2854 UInt bS = INSN(29,29); /* set flags */ 2855 UInt rM = INSN(20,16); 2856 UInt rN = INSN(9,5); 2857 UInt rD = INSN(4,0); 2858 2859 Bool isSUB = bOP == 1; 2860 Bool is64 = bX == 1; 2861 IRType ty = is64 ? Ity_I64 : Ity_I32; 2862 2863 IRTemp oldC = newTemp(ty); 2864 assign(oldC, 2865 is64 ? mk_arm64g_calculate_flag_c() 2866 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) ); 2867 2868 IRTemp argL = newTemp(ty); 2869 assign(argL, getIRegOrZR(is64, rN)); 2870 IRTemp argR = newTemp(ty); 2871 assign(argR, getIRegOrZR(is64, rM)); 2872 2873 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2874 IRTemp res = newTemp(ty); 2875 if (isSUB) { 2876 IRExpr* one = is64 ? mkU64(1) : mkU32(1); 2877 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32; 2878 assign(res, 2879 binop(op, 2880 binop(op, mkexpr(argL), mkexpr(argR)), 2881 binop(xorOp, mkexpr(oldC), one))); 2882 } else { 2883 assign(res, 2884 binop(op, 2885 binop(op, mkexpr(argL), mkexpr(argR)), 2886 mkexpr(oldC))); 2887 } 2888 2889 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2890 2891 if (bS) { 2892 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC); 2893 } 2894 2895 DIP("%s%s %s, %s, %s\n", 2896 bOP ? "sbc" : "adc", bS ? "s" : "", 2897 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2898 nameIRegOrZR(is64, rM)); 2899 return True; 2900 } 2901 2902 /* -------------------- LOGIC(reg) -------------------- */ 2903 /* x==0 => 32 bit op x==1 => 64 bit op 2904 N==0 => inv? is no-op (no inversion) 2905 N==1 => inv? is NOT 2906 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR 2907 2908 31 30 28 23 21 20 15 9 4 2909 | | | | | | | | | 2910 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) 2911 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) 2912 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) 2913 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) 2914 With N=1, the names are: BIC ORN EON BICS 2915 */ 2916 if (INSN(28,24) == BITS5(0,1,0,1,0)) { 2917 UInt bX = INSN(31,31); 2918 UInt sh = INSN(23,22); 2919 UInt bN = INSN(21,21); 2920 UInt rM = INSN(20,16); 2921 UInt imm6 = INSN(15,10); 2922 UInt rN = INSN(9,5); 2923 UInt rD = INSN(4,0); 2924 Bool is64 = bX == 1; 2925 IRType ty = is64 ? Ity_I64 : Ity_I32; 2926 if (!is64 && imm6 > 31) { 2927 /* invalid; fall though */ 2928 } else { 2929 IRTemp argL = newTemp(ty); 2930 assign(argL, getIRegOrZR(is64, rN)); 2931 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); 2932 IROp op = Iop_INVALID; 2933 switch (INSN(30,29)) { 2934 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; 2935 case BITS2(0,1): op = mkOR(ty); break; 2936 case BITS2(1,0): op = mkXOR(ty); break; 2937 default: vassert(0); 2938 } 2939 IRTemp res = newTemp(ty); 2940 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2941 if (INSN(30,29) == BITS2(1,1)) { 2942 setFlags_LOGIC(is64, res); 2943 } 2944 putIRegOrZR(is64, rD, mkexpr(res)); 2945 2946 static const HChar* names_op[8] 2947 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; 2948 vassert(((bN << 2) | INSN(30,29)) < 8); 2949 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; 2950 /* Special-case the printing of "MOV" */ 2951 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { 2952 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), 2953 nameIRegOrZR(is64, rM)); 2954 } else { 2955 DIP("%s %s, %s, %s, %s #%u\n", nm_op, 2956 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2957 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2958 } 2959 return True; 2960 } 2961 } 2962 2963 /* -------------------- {U,S}MULH -------------------- */ 2964 /* 31 23 22 20 15 9 4 2965 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm 2966 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm 2967 */ 2968 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) 2969 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { 2970 Bool isU = INSN(23,23) == 1; 2971 UInt mm = INSN(20,16); 2972 UInt nn = INSN(9,5); 2973 UInt dd = INSN(4,0); 2974 putIReg64orZR(dd, unop(Iop_128HIto64, 2975 binop(isU ? Iop_MullU64 : Iop_MullS64, 2976 getIReg64orZR(nn), getIReg64orZR(mm)))); 2977 DIP("%cmulh %s, %s, %s\n", 2978 isU ? 'u' : 's', 2979 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); 2980 return True; 2981 } 2982 2983 /* -------------------- M{ADD,SUB} -------------------- */ 2984 /* 31 30 20 15 14 9 4 2985 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n 2986 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n 2987 */ 2988 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { 2989 Bool is64 = INSN(31,31) == 1; 2990 UInt mm = INSN(20,16); 2991 Bool isAdd = INSN(15,15) == 0; 2992 UInt aa = INSN(14,10); 2993 UInt nn = INSN(9,5); 2994 UInt dd = INSN(4,0); 2995 if (is64) { 2996 putIReg64orZR( 2997 dd, 2998 binop(isAdd ? Iop_Add64 : Iop_Sub64, 2999 getIReg64orZR(aa), 3000 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); 3001 } else { 3002 putIReg32orZR( 3003 dd, 3004 binop(isAdd ? Iop_Add32 : Iop_Sub32, 3005 getIReg32orZR(aa), 3006 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); 3007 } 3008 DIP("%s %s, %s, %s, %s\n", 3009 isAdd ? "madd" : "msub", 3010 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3011 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); 3012 return True; 3013 } 3014 3015 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ 3016 /* 31 30 28 20 15 11 9 4 3017 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm 3018 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm 3019 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm 3020 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm 3021 In all cases, the operation is: Rd = if cond then Rn else OP(Rm) 3022 */ 3023 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { 3024 Bool is64 = INSN(31,31) == 1; 3025 UInt b30 = INSN(30,30); 3026 UInt mm = INSN(20,16); 3027 UInt cond = INSN(15,12); 3028 UInt b10 = INSN(10,10); 3029 UInt nn = INSN(9,5); 3030 UInt dd = INSN(4,0); 3031 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ 3032 IRType ty = is64 ? Ity_I64 : Ity_I32; 3033 IRExpr* argL = getIRegOrZR(is64, nn); 3034 IRExpr* argR = getIRegOrZR(is64, mm); 3035 switch (op) { 3036 case BITS2(0,0): 3037 break; 3038 case BITS2(0,1): 3039 argR = binop(mkADD(ty), argR, mkU(ty,1)); 3040 break; 3041 case BITS2(1,0): 3042 argR = unop(mkNOT(ty), argR); 3043 break; 3044 case BITS2(1,1): 3045 argR = binop(mkSUB(ty), mkU(ty,0), argR); 3046 break; 3047 default: 3048 vassert(0); 3049 } 3050 putIRegOrZR( 3051 is64, dd, 3052 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 3053 argL, argR) 3054 ); 3055 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; 3056 DIP("%s %s, %s, %s, %s\n", op_nm[op], 3057 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3058 nameIRegOrZR(is64, mm), nameCC(cond)); 3059 return True; 3060 } 3061 3062 /* -------------- ADD/SUB(extended reg) -------------- */ 3063 /* 28 20 15 12 9 4 3064 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld 3065 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld 3066 3067 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld 3068 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld 3069 3070 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld 3071 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld 3072 3073 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld 3074 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld 3075 3076 The 'm' operand is extended per opt, thusly: 3077 3078 000 Xm & 0xFF UXTB 3079 001 Xm & 0xFFFF UXTH 3080 010 Xm & (2^32)-1 UXTW 3081 011 Xm UXTX 3082 3083 100 Xm sx from bit 7 SXTB 3084 101 Xm sx from bit 15 SXTH 3085 110 Xm sx from bit 31 SXTW 3086 111 Xm SXTX 3087 3088 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity 3089 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX 3090 are the identity operation on Wm. 3091 3092 After extension, the value is shifted left by imm3 bits, which 3093 may only be in the range 0 .. 4 inclusive. 3094 */ 3095 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { 3096 Bool is64 = INSN(31,31) == 1; 3097 Bool isSub = INSN(30,30) == 1; 3098 Bool setCC = INSN(29,29) == 1; 3099 UInt mm = INSN(20,16); 3100 UInt opt = INSN(15,13); 3101 UInt imm3 = INSN(12,10); 3102 UInt nn = INSN(9,5); 3103 UInt dd = INSN(4,0); 3104 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", 3105 "sxtb", "sxth", "sxtw", "sxtx" }; 3106 /* Do almost the same thing in the 32- and 64-bit cases. */ 3107 IRTemp xN = newTemp(Ity_I64); 3108 IRTemp xM = newTemp(Ity_I64); 3109 assign(xN, getIReg64orSP(nn)); 3110 assign(xM, getIReg64orZR(mm)); 3111 IRExpr* xMw = mkexpr(xM); /* "xM widened" */ 3112 Int shSX = 0; 3113 /* widen Xm .. */ 3114 switch (opt) { 3115 case BITS3(0,0,0): // UXTB 3116 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; 3117 case BITS3(0,0,1): // UXTH 3118 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; 3119 case BITS3(0,1,0): // UXTW -- noop for the 32bit case 3120 if (is64) { 3121 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); 3122 } 3123 break; 3124 case BITS3(0,1,1): // UXTX -- always a noop 3125 break; 3126 case BITS3(1,0,0): // SXTB 3127 shSX = 56; goto sxTo64; 3128 case BITS3(1,0,1): // SXTH 3129 shSX = 48; goto sxTo64; 3130 case BITS3(1,1,0): // SXTW -- noop for the 32bit case 3131 if (is64) { 3132 shSX = 32; goto sxTo64; 3133 } 3134 break; 3135 case BITS3(1,1,1): // SXTX -- always a noop 3136 break; 3137 sxTo64: 3138 vassert(shSX >= 32); 3139 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), 3140 mkU8(shSX)); 3141 break; 3142 default: 3143 vassert(0); 3144 } 3145 /* and now shift */ 3146 IRTemp argL = xN; 3147 IRTemp argR = newTemp(Ity_I64); 3148 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); 3149 IRTemp res = newTemp(Ity_I64); 3150 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 3151 mkexpr(argL), mkexpr(argR))); 3152 if (is64) { 3153 if (setCC) { 3154 putIReg64orZR(dd, mkexpr(res)); 3155 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 3156 } else { 3157 putIReg64orSP(dd, mkexpr(res)); 3158 } 3159 } else { 3160 if (setCC) { 3161 IRTemp argL32 = newTemp(Ity_I32); 3162 IRTemp argR32 = newTemp(Ity_I32); 3163 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); 3164 assign(argL32, unop(Iop_64to32, mkexpr(argL))); 3165 assign(argR32, unop(Iop_64to32, mkexpr(argR))); 3166 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); 3167 } else { 3168 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); 3169 } 3170 } 3171 DIP("%s%s %s, %s, %s %s lsl %u\n", 3172 isSub ? "sub" : "add", setCC ? "s" : "", 3173 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), 3174 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), 3175 nameExt[opt], imm3); 3176 return True; 3177 } 3178 3179 /* ---------------- CCMP/CCMN(imm) ---------------- */ 3180 /* Bizarrely, these appear in the "data processing register" 3181 category, even though they are operations against an 3182 immediate. */ 3183 /* 31 29 20 15 11 9 3 3184 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond 3185 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond 3186 3187 Operation is: 3188 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv 3189 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv 3190 */ 3191 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3192 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { 3193 Bool is64 = INSN(31,31) == 1; 3194 Bool isSUB = INSN(30,30) == 1; 3195 UInt imm5 = INSN(20,16); 3196 UInt cond = INSN(15,12); 3197 UInt nn = INSN(9,5); 3198 UInt nzcv = INSN(3,0); 3199 3200 IRTemp condT = newTemp(Ity_I1); 3201 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3202 3203 IRType ty = is64 ? Ity_I64 : Ity_I32; 3204 IRTemp argL = newTemp(ty); 3205 IRTemp argR = newTemp(ty); 3206 3207 if (is64) { 3208 assign(argL, getIReg64orZR(nn)); 3209 assign(argR, mkU64(imm5)); 3210 } else { 3211 assign(argL, getIReg32orZR(nn)); 3212 assign(argR, mkU32(imm5)); 3213 } 3214 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3215 3216 DIP("ccm%c %s, #%u, #%u, %s\n", 3217 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3218 imm5, nzcv, nameCC(cond)); 3219 return True; 3220 } 3221 3222 /* ---------------- CCMP/CCMN(reg) ---------------- */ 3223 /* 31 29 20 15 11 9 3 3224 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond 3225 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond 3226 Operation is: 3227 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv 3228 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv 3229 */ 3230 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3231 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { 3232 Bool is64 = INSN(31,31) == 1; 3233 Bool isSUB = INSN(30,30) == 1; 3234 UInt mm = INSN(20,16); 3235 UInt cond = INSN(15,12); 3236 UInt nn = INSN(9,5); 3237 UInt nzcv = INSN(3,0); 3238 3239 IRTemp condT = newTemp(Ity_I1); 3240 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3241 3242 IRType ty = is64 ? Ity_I64 : Ity_I32; 3243 IRTemp argL = newTemp(ty); 3244 IRTemp argR = newTemp(ty); 3245 3246 if (is64) { 3247 assign(argL, getIReg64orZR(nn)); 3248 assign(argR, getIReg64orZR(mm)); 3249 } else { 3250 assign(argL, getIReg32orZR(nn)); 3251 assign(argR, getIReg32orZR(mm)); 3252 } 3253 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3254 3255 DIP("ccm%c %s, %s, #%u, %s\n", 3256 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3257 nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); 3258 return True; 3259 } 3260 3261 3262 /* -------------- REV/REV16/REV32/RBIT -------------- */ 3263 /* 31 30 28 20 15 11 9 4 3264 3265 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn 3266 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn 3267 3268 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn 3269 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn 3270 3271 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn 3272 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn 3273 3274 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn 3275 */ 3276 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3277 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { 3278 UInt b31 = INSN(31,31); 3279 UInt opc = INSN(11,10); 3280 3281 UInt ix = 0; 3282 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; 3283 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; 3284 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; 3285 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; 3286 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; 3287 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; 3288 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; 3289 if (ix >= 1 && ix <= 7) { 3290 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; 3291 UInt nn = INSN(9,5); 3292 UInt dd = INSN(4,0); 3293 IRTemp src = newTemp(Ity_I64); 3294 IRTemp dst = IRTemp_INVALID; 3295 IRTemp (*math)(IRTemp) = NULL; 3296 switch (ix) { 3297 case 1: case 2: math = math_BYTESWAP64; break; 3298 case 3: case 4: math = math_BITSWAP64; break; 3299 case 5: case 6: math = math_USHORTSWAP64; break; 3300 case 7: math = math_UINTSWAP64; break; 3301 default: vassert(0); 3302 } 3303 const HChar* names[7] 3304 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; 3305 const HChar* nm = names[ix-1]; 3306 vassert(math); 3307 if (ix == 6) { 3308 /* This has to be special cased, since the logic below doesn't 3309 handle it correctly. */ 3310 assign(src, getIReg64orZR(nn)); 3311 dst = math(src); 3312 putIReg64orZR(dd, 3313 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); 3314 } else if (is64) { 3315 assign(src, getIReg64orZR(nn)); 3316 dst = math(src); 3317 putIReg64orZR(dd, mkexpr(dst)); 3318 } else { 3319 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); 3320 dst = math(src); 3321 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3322 } 3323 DIP("%s %s, %s\n", nm, 3324 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); 3325 return True; 3326 } 3327 /* else fall through */ 3328 } 3329 3330 /* -------------------- CLZ/CLS -------------------- */ 3331 /* 30 28 24 20 15 9 4 3332 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn 3333 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn 3334 */ 3335 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3336 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { 3337 Bool is64 = INSN(31,31) == 1; 3338 Bool isCLS = INSN(10,10) == 1; 3339 UInt nn = INSN(9,5); 3340 UInt dd = INSN(4,0); 3341 IRTemp src = newTemp(Ity_I64); 3342 IRTemp srcZ = newTemp(Ity_I64); 3343 IRTemp dst = newTemp(Ity_I64); 3344 /* Get the argument, widened out to 64 bit */ 3345 if (is64) { 3346 assign(src, getIReg64orZR(nn)); 3347 } else { 3348 assign(src, binop(Iop_Shl64, 3349 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); 3350 } 3351 /* If this is CLS, mash the arg around accordingly */ 3352 if (isCLS) { 3353 IRExpr* one = mkU8(1); 3354 assign(srcZ, 3355 binop(Iop_Xor64, 3356 binop(Iop_Shl64, mkexpr(src), one), 3357 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one))); 3358 } else { 3359 assign(srcZ, mkexpr(src)); 3360 } 3361 /* And compute CLZ. */ 3362 if (is64) { 3363 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3364 mkU64(isCLS ? 63 : 64), 3365 unop(Iop_Clz64, mkexpr(srcZ)))); 3366 putIReg64orZR(dd, mkexpr(dst)); 3367 } else { 3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3369 mkU64(isCLS ? 31 : 32), 3370 unop(Iop_Clz64, mkexpr(srcZ)))); 3371 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3372 } 3373 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z', 3374 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); 3375 return True; 3376 } 3377 3378 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */ 3379 /* 30 28 20 15 11 9 4 3380 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm 3381 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm 3382 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm 3383 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm 3384 */ 3385 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3386 && INSN(15,12) == BITS4(0,0,1,0)) { 3387 Bool is64 = INSN(31,31) == 1; 3388 UInt mm = INSN(20,16); 3389 UInt op = INSN(11,10); 3390 UInt nn = INSN(9,5); 3391 UInt dd = INSN(4,0); 3392 IRType ty = is64 ? Ity_I64 : Ity_I32; 3393 IRTemp srcL = newTemp(ty); 3394 IRTemp srcR = newTemp(Ity_I64); 3395 IRTemp res = newTemp(ty); 3396 IROp iop = Iop_INVALID; 3397 assign(srcL, getIRegOrZR(is64, nn)); 3398 assign(srcR, binop(Iop_And64, getIReg64orZR(mm), 3399 mkU64(is64 ? 63 : 31))); 3400 if (op < 3) { 3401 // LSLV, LSRV, ASRV 3402 switch (op) { 3403 case BITS2(0,0): iop = mkSHL(ty); break; 3404 case BITS2(0,1): iop = mkSHR(ty); break; 3405 case BITS2(1,0): iop = mkSAR(ty); break; 3406 default: vassert(0); 3407 } 3408 assign(res, binop(iop, mkexpr(srcL), 3409 unop(Iop_64to8, mkexpr(srcR)))); 3410 } else { 3411 // RORV 3412 IROp opSHL = mkSHL(ty); 3413 IROp opSHR = mkSHR(ty); 3414 IROp opOR = mkOR(ty); 3415 IRExpr* width = mkU64(is64 ? 64: 32); 3416 assign( 3417 res, 3418 IRExpr_ITE( 3419 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)), 3420 mkexpr(srcL), 3421 binop(opOR, 3422 binop(opSHL, 3423 mkexpr(srcL), 3424 unop(Iop_64to8, binop(Iop_Sub64, width, 3425 mkexpr(srcR)))), 3426 binop(opSHR, 3427 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR)))) 3428 )); 3429 } 3430 putIRegOrZR(is64, dd, mkexpr(res)); 3431 vassert(op < 4); 3432 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" }; 3433 DIP("%s %s, %s, %s\n", 3434 names[op], nameIRegOrZR(is64,dd), 3435 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); 3436 return True; 3437 } 3438 3439 /* -------------------- SDIV/UDIV -------------------- */ 3440 /* 30 28 20 15 10 9 4 3441 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm 3442 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm 3443 */ 3444 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3445 && INSN(15,11) == BITS5(0,0,0,0,1)) { 3446 Bool is64 = INSN(31,31) == 1; 3447 UInt mm = INSN(20,16); 3448 Bool isS = INSN(10,10) == 1; 3449 UInt nn = INSN(9,5); 3450 UInt dd = INSN(4,0); 3451 if (isS) { 3452 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, 3453 getIRegOrZR(is64, nn), 3454 getIRegOrZR(is64, mm))); 3455 } else { 3456 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, 3457 getIRegOrZR(is64, nn), 3458 getIRegOrZR(is64, mm))); 3459 } 3460 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', 3461 nameIRegOrZR(is64, dd), 3462 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); 3463 return True; 3464 } 3465 3466 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ 3467 /* 31 23 20 15 14 9 4 3468 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa 3469 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa 3470 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa 3471 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa 3472 with operation 3473 Xd = Xa +/- (Wn *u/s Wm) 3474 */ 3475 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { 3476 Bool isU = INSN(23,23) == 1; 3477 UInt mm = INSN(20,16); 3478 Bool isAdd = INSN(15,15) == 0; 3479 UInt aa = INSN(14,10); 3480 UInt nn = INSN(9,5); 3481 UInt dd = INSN(4,0); 3482 IRTemp wN = newTemp(Ity_I32); 3483 IRTemp wM = newTemp(Ity_I32); 3484 IRTemp xA = newTemp(Ity_I64); 3485 IRTemp muld = newTemp(Ity_I64); 3486 IRTemp res = newTemp(Ity_I64); 3487 assign(wN, getIReg32orZR(nn)); 3488 assign(wM, getIReg32orZR(mm)); 3489 assign(xA, getIReg64orZR(aa)); 3490 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, 3491 mkexpr(wN), mkexpr(wM))); 3492 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, 3493 mkexpr(xA), mkexpr(muld))); 3494 putIReg64orZR(dd, mkexpr(res)); 3495 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", 3496 nameIReg64orZR(dd), nameIReg32orZR(nn), 3497 nameIReg32orZR(mm), nameIReg64orZR(aa)); 3498 return True; 3499 } 3500 vex_printf("ARM64 front end: data_processing_register\n"); 3501 return False; 3502 # undef INSN 3503 } 3504 3505 3506 /*------------------------------------------------------------*/ 3507 /*--- Math helpers for vector interleave/deinterleave ---*/ 3508 /*------------------------------------------------------------*/ 3509 3510 #define EX(_tmp) \ 3511 mkexpr(_tmp) 3512 #define SL(_hi128,_lo128,_nbytes) \ 3513 ( (_nbytes) == 0 \ 3514 ? (_lo128) \ 3515 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) ) 3516 #define ROR(_v128,_nbytes) \ 3517 SL((_v128),(_v128),(_nbytes)) 3518 #define ROL(_v128,_nbytes) \ 3519 SL((_v128),(_v128),16-(_nbytes)) 3520 #define SHR(_v128,_nbytes) \ 3521 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes))) 3522 #define SHL(_v128,_nbytes) \ 3523 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes))) 3524 #define ILO64x2(_argL,_argR) \ 3525 binop(Iop_InterleaveLO64x2,(_argL),(_argR)) 3526 #define IHI64x2(_argL,_argR) \ 3527 binop(Iop_InterleaveHI64x2,(_argL),(_argR)) 3528 #define ILO32x4(_argL,_argR) \ 3529 binop(Iop_InterleaveLO32x4,(_argL),(_argR)) 3530 #define IHI32x4(_argL,_argR) \ 3531 binop(Iop_InterleaveHI32x4,(_argL),(_argR)) 3532 #define ILO16x8(_argL,_argR) \ 3533 binop(Iop_InterleaveLO16x8,(_argL),(_argR)) 3534 #define IHI16x8(_argL,_argR) \ 3535 binop(Iop_InterleaveHI16x8,(_argL),(_argR)) 3536 #define ILO8x16(_argL,_argR) \ 3537 binop(Iop_InterleaveLO8x16,(_argL),(_argR)) 3538 #define IHI8x16(_argL,_argR) \ 3539 binop(Iop_InterleaveHI8x16,(_argL),(_argR)) 3540 #define CEV32x4(_argL,_argR) \ 3541 binop(Iop_CatEvenLanes32x4,(_argL),(_argR)) 3542 #define COD32x4(_argL,_argR) \ 3543 binop(Iop_CatOddLanes32x4,(_argL),(_argR)) 3544 #define COD16x8(_argL,_argR) \ 3545 binop(Iop_CatOddLanes16x8,(_argL),(_argR)) 3546 #define COD8x16(_argL,_argR) \ 3547 binop(Iop_CatOddLanes8x16,(_argL),(_argR)) 3548 #define CEV8x16(_argL,_argR) \ 3549 binop(Iop_CatEvenLanes8x16,(_argL),(_argR)) 3550 #define AND(_arg1,_arg2) \ 3551 binop(Iop_AndV128,(_arg1),(_arg2)) 3552 #define OR2(_arg1,_arg2) \ 3553 binop(Iop_OrV128,(_arg1),(_arg2)) 3554 #define OR3(_arg1,_arg2,_arg3) \ 3555 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3))) 3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \ 3557 binop(Iop_OrV128, \ 3558 binop(Iop_OrV128,(_arg1),(_arg2)), \ 3559 binop(Iop_OrV128,(_arg3),(_arg4))) 3560 3561 3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */ 3563 static 3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0, 3565 UInt laneSzBlg2, IRTemp u0 ) 3566 { 3567 assign(*i0, mkexpr(u0)); 3568 } 3569 3570 3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */ 3572 static 3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 3574 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 3575 { 3576 /* This is pretty easy, since we have primitives directly to 3577 hand. */ 3578 if (laneSzBlg2 == 3) { 3579 // 64x2 3580 // u1 == B1 B0, u0 == A1 A0 3581 // i1 == B1 A1, i0 == B0 A0 3582 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0))); 3583 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0))); 3584 return; 3585 } 3586 if (laneSzBlg2 == 2) { 3587 // 32x4 3588 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3589 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3590 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0))); 3591 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0))); 3592 return; 3593 } 3594 if (laneSzBlg2 == 1) { 3595 // 16x8 3596 // u1 == B{7..0}, u0 == A{7..0} 3597 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3598 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3599 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0))); 3600 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0))); 3601 return; 3602 } 3603 if (laneSzBlg2 == 0) { 3604 // 8x16 3605 // u1 == B{f..0}, u0 == A{f..0} 3606 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3607 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3608 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0))); 3609 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0))); 3610 return; 3611 } 3612 /*NOTREACHED*/ 3613 vassert(0); 3614 } 3615 3616 3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */ 3618 static 3619 void math_INTERLEAVE3_128( 3620 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 3621 UInt laneSzBlg2, 3622 IRTemp u0, IRTemp u1, IRTemp u2 ) 3623 { 3624 if (laneSzBlg2 == 3) { 3625 // 64x2 3626 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3627 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3628 assign(*i2, IHI64x2( EX(u2), EX(u1) )); 3629 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) )); 3630 assign(*i0, ILO64x2( EX(u1), EX(u0) )); 3631 return; 3632 } 3633 3634 if (laneSzBlg2 == 2) { 3635 // 32x4 3636 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3637 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3638 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3639 IRTemp p0 = newTempV128(); 3640 IRTemp p1 = newTempV128(); 3641 IRTemp p2 = newTempV128(); 3642 IRTemp c1100 = newTempV128(); 3643 IRTemp c0011 = newTempV128(); 3644 IRTemp c0110 = newTempV128(); 3645 assign(c1100, mkV128(0xFF00)); 3646 assign(c0011, mkV128(0x00FF)); 3647 assign(c0110, mkV128(0x0FF0)); 3648 // First interleave them at 64x2 granularity, 3649 // generating partial ("p") values. 3650 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2); 3651 // And more shuffling around for the final answer 3652 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ), 3653 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) )); 3654 assign(*i1, OR3( SHL(EX(p2),12), 3655 AND(EX(p1),EX(c0110)), 3656 SHR(EX(p0),12) )); 3657 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ), 3658 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) )); 3659 return; 3660 } 3661 3662 if (laneSzBlg2 == 1) { 3663 // 16x8 3664 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3665 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3666 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3667 // 3668 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3669 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3670 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3671 // 3672 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3673 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3674 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3675 IRTemp p0 = newTempV128(); 3676 IRTemp p1 = newTempV128(); 3677 IRTemp p2 = newTempV128(); 3678 IRTemp c1000 = newTempV128(); 3679 IRTemp c0100 = newTempV128(); 3680 IRTemp c0010 = newTempV128(); 3681 IRTemp c0001 = newTempV128(); 3682 assign(c1000, mkV128(0xF000)); 3683 assign(c0100, mkV128(0x0F00)); 3684 assign(c0010, mkV128(0x00F0)); 3685 assign(c0001, mkV128(0x000F)); 3686 // First interleave them at 32x4 granularity, 3687 // generating partial ("p") values. 3688 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2); 3689 // And more shuffling around for the final answer 3690 assign(*i2, 3691 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ), 3692 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ), 3693 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ), 3694 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) ) 3695 )); 3696 assign(*i1, 3697 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ), 3698 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ), 3699 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ), 3700 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) ) 3701 )); 3702 assign(*i0, 3703 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ), 3704 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ), 3705 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ), 3706 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) ) 3707 )); 3708 return; 3709 } 3710 3711 if (laneSzBlg2 == 0) { 3712 // 8x16. It doesn't seem worth the hassle of first doing a 3713 // 16x8 interleave, so just generate all 24 partial results 3714 // directly :-( 3715 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0 3716 // i2 == Cf Bf Af Ce .. Bb Ab Ca 3717 // i1 == Ba Aa C9 B9 .. A6 C5 B5 3718 // i0 == A5 C4 B4 A4 .. C0 B0 A0 3719 3720 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128(); 3721 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128(); 3722 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128(); 3723 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128(); 3724 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128(); 3725 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128(); 3726 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128(); 3727 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128(); 3728 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128(); 3729 3730 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector 3731 // of the form 14 bytes junk : CC[0xF] : BB[0xA] 3732 // 3733 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \ 3734 IRTemp t_##_tempName = newTempV128(); \ 3735 assign(t_##_tempName, \ 3736 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \ 3737 ROR(EX(_srcVec2),(_srcShift2)) ) ) 3738 3739 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively 3740 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0; 3741 3742 // The slicing and reassembly are done as interleavedly as possible, 3743 // so as to minimise the demand for registers in the back end, which 3744 // was observed to be a problem in testing. 3745 3746 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14] 3747 XXXX(AfCe, AA, 0xf, CC, 0xe); 3748 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe))); 3749 3750 XXXX(BeAe, BB, 0xe, AA, 0xe); 3751 XXXX(CdBd, CC, 0xd, BB, 0xd); 3752 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd))); 3753 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98))); 3754 3755 XXXX(AdCc, AA, 0xd, CC, 0xc); 3756 XXXX(BcAc, BB, 0xc, AA, 0xc); 3757 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc))); 3758 3759 XXXX(CbBb, CC, 0xb, BB, 0xb); 3760 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0] 3761 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa))); 3762 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210))); 3763 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64))); 3764 3765 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14] 3766 XXXX(C9B9, CC, 0x9, BB, 0x9); 3767 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9))); 3768 3769 XXXX(A9C8, AA, 0x9, CC, 0x8); 3770 XXXX(B8A8, BB, 0x8, AA, 0x8); 3771 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8))); 3772 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98))); 3773 3774 XXXX(C7B7, CC, 0x7, BB, 0x7); 3775 XXXX(A7C6, AA, 0x7, CC, 0x6); 3776 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6))); 3777 3778 XXXX(B6A6, BB, 0x6, AA, 0x6); 3779 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0] 3780 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5))); 3781 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210))); 3782 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64))); 3783 3784 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14] 3785 XXXX(B4A4, BB, 0x4, AA, 0x4); 3786 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4))); 3787 3788 XXXX(C3B3, CC, 0x3, BB, 0x3); 3789 XXXX(A3C2, AA, 0x3, CC, 0x2); 3790 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2))); 3791 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98))); 3792 3793 XXXX(B2A2, BB, 0x2, AA, 0x2); 3794 XXXX(C1B1, CC, 0x1, BB, 0x1); 3795 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1))); 3796 3797 XXXX(A1C0, AA, 0x1, CC, 0x0); 3798 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0] 3799 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0))); 3800 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210))); 3801 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64))); 3802 3803 # undef XXXX 3804 return; 3805 } 3806 3807 /*NOTREACHED*/ 3808 vassert(0); 3809 } 3810 3811 3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */ 3813 static 3814 void math_INTERLEAVE4_128( 3815 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 3816 UInt laneSzBlg2, 3817 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 3818 { 3819 if (laneSzBlg2 == 3) { 3820 // 64x2 3821 assign(*i0, ILO64x2(EX(u1), EX(u0))); 3822 assign(*i1, ILO64x2(EX(u3), EX(u2))); 3823 assign(*i2, IHI64x2(EX(u1), EX(u0))); 3824 assign(*i3, IHI64x2(EX(u3), EX(u2))); 3825 return; 3826 } 3827 if (laneSzBlg2 == 2) { 3828 // 32x4 3829 // First, interleave at the 64-bit lane size. 3830 IRTemp p0 = newTempV128(); 3831 IRTemp p1 = newTempV128(); 3832 IRTemp p2 = newTempV128(); 3833 IRTemp p3 = newTempV128(); 3834 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3); 3835 // And interleave (cat) at the 32 bit size. 3836 assign(*i0, CEV32x4(EX(p1), EX(p0))); 3837 assign(*i1, COD32x4(EX(p1), EX(p0))); 3838 assign(*i2, CEV32x4(EX(p3), EX(p2))); 3839 assign(*i3, COD32x4(EX(p3), EX(p2))); 3840 return; 3841 } 3842 if (laneSzBlg2 == 1) { 3843 // 16x8 3844 // First, interleave at the 32-bit lane size. 3845 IRTemp p0 = newTempV128(); 3846 IRTemp p1 = newTempV128(); 3847 IRTemp p2 = newTempV128(); 3848 IRTemp p3 = newTempV128(); 3849 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3); 3850 // And rearrange within each vector, to get the right 16 bit lanes. 3851 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2))); 3852 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2))); 3853 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2))); 3854 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2))); 3855 return; 3856 } 3857 if (laneSzBlg2 == 0) { 3858 // 8x16 3859 // First, interleave at the 16-bit lane size. 3860 IRTemp p0 = newTempV128(); 3861 IRTemp p1 = newTempV128(); 3862 IRTemp p2 = newTempV128(); 3863 IRTemp p3 = newTempV128(); 3864 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3); 3865 // And rearrange within each vector, to get the right 8 bit lanes. 3866 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0)))); 3867 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1)))); 3868 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2)))); 3869 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3)))); 3870 return; 3871 } 3872 /*NOTREACHED*/ 3873 vassert(0); 3874 } 3875 3876 3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */ 3878 static 3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0, 3880 UInt laneSzBlg2, IRTemp i0 ) 3881 { 3882 assign(*u0, mkexpr(i0)); 3883 } 3884 3885 3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */ 3887 static 3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 3889 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 3890 { 3891 /* This is pretty easy, since we have primitives directly to 3892 hand. */ 3893 if (laneSzBlg2 == 3) { 3894 // 64x2 3895 // i1 == B1 A1, i0 == B0 A0 3896 // u1 == B1 B0, u0 == A1 A0 3897 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0))); 3898 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0))); 3899 return; 3900 } 3901 if (laneSzBlg2 == 2) { 3902 // 32x4 3903 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3904 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3905 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0))); 3906 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0))); 3907 return; 3908 } 3909 if (laneSzBlg2 == 1) { 3910 // 16x8 3911 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3912 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3913 // u1 == B{7..0}, u0 == A{7..0} 3914 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0))); 3915 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0))); 3916 return; 3917 } 3918 if (laneSzBlg2 == 0) { 3919 // 8x16 3920 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3921 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3922 // u1 == B{f..0}, u0 == A{f..0} 3923 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0))); 3924 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0))); 3925 return; 3926 } 3927 /*NOTREACHED*/ 3928 vassert(0); 3929 } 3930 3931 3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */ 3933 static 3934 void math_DEINTERLEAVE3_128( 3935 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 3936 UInt laneSzBlg2, 3937 IRTemp i0, IRTemp i1, IRTemp i2 ) 3938 { 3939 if (laneSzBlg2 == 3) { 3940 // 64x2 3941 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3942 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3943 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) )); 3944 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) )); 3945 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) )); 3946 return; 3947 } 3948 3949 if (laneSzBlg2 == 2) { 3950 // 32x4 3951 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3952 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3953 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3954 IRTemp t_a1c0b0a0 = newTempV128(); 3955 IRTemp t_a2c1b1a1 = newTempV128(); 3956 IRTemp t_a3c2b2a2 = newTempV128(); 3957 IRTemp t_a0c3b3a3 = newTempV128(); 3958 IRTemp p0 = newTempV128(); 3959 IRTemp p1 = newTempV128(); 3960 IRTemp p2 = newTempV128(); 3961 // Compute some intermediate values. 3962 assign(t_a1c0b0a0, EX(i0)); 3963 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4)); 3964 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4)); 3965 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4)); 3966 // First deinterleave into lane-pairs 3967 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0))); 3968 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)), 3969 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0)))); 3970 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4))); 3971 // Then deinterleave at 64x2 granularity. 3972 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2); 3973 return; 3974 } 3975 3976 if (laneSzBlg2 == 1) { 3977 // 16x8 3978 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3979 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3980 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3981 // 3982 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3983 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3984 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3985 // 3986 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3987 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3988 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3989 3990 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111; 3991 s0 = s1 = s2 = s3 3992 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID; 3993 newTempsV128_4(&s0, &s1, &s2, &s3); 3994 newTempsV128_4(&t0, &t1, &t2, &t3); 3995 newTempsV128_4(&p0, &p1, &p2, &c00111111); 3996 3997 // s0 == b2a2 c1b1a1 c0b0a0 3998 // s1 == b4a4 c3b3c3 c2b2a2 3999 // s2 == b6a6 c5b5a5 c4b4a4 4000 // s3 == b0a0 c7b7a7 c6b6a6 4001 assign(s0, EX(i0)); 4002 assign(s1, SL(EX(i1),EX(i0),6*2)); 4003 assign(s2, SL(EX(i2),EX(i1),4*2)); 4004 assign(s3, SL(EX(i0),EX(i2),2*2)); 4005 4006 // t0 == 0 0 c1c0 b1b0 a1a0 4007 // t1 == 0 0 c3c2 b3b2 a3a2 4008 // t2 == 0 0 c5c4 b5b4 a5a4 4009 // t3 == 0 0 c7c6 b7b6 a7a6 4010 assign(c00111111, mkV128(0x0FFF)); 4011 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111))); 4012 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111))); 4013 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111))); 4014 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111))); 4015 4016 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2))); 4017 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2))); 4018 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2))); 4019 4020 // Then deinterleave at 32x4 granularity. 4021 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2); 4022 return; 4023 } 4024 4025 if (laneSzBlg2 == 0) { 4026 // 8x16. This is the same scheme as for 16x8, with twice the 4027 // number of intermediate values. 4028 // 4029 // u2 == C{f..0} 4030 // u1 == B{f..0} 4031 // u0 == A{f..0} 4032 // 4033 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a} 4034 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5} 4035 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4036 // 4037 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba} 4038 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54} 4039 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10} 4040 // 4041 IRTemp s0, s1, s2, s3, s4, s5, s6, s7, 4042 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK; 4043 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 4044 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK 4045 = IRTemp_INVALID; 4046 newTempsV128_4(&s0, &s1, &s2, &s3); 4047 newTempsV128_4(&s4, &s5, &s6, &s7); 4048 newTempsV128_4(&t0, &t1, &t2, &t3); 4049 newTempsV128_4(&t4, &t5, &t6, &t7); 4050 newTempsV128_4(&p0, &p1, &p2, &cMASK); 4051 4052 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4053 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2} 4054 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4} 4055 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6} 4056 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8} 4057 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a} 4058 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c} 4059 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e} 4060 assign(s0, SL(EX(i1),EX(i0), 0)); 4061 assign(s1, SL(EX(i1),EX(i0), 6)); 4062 assign(s2, SL(EX(i1),EX(i0),12)); 4063 assign(s3, SL(EX(i2),EX(i1), 2)); 4064 assign(s4, SL(EX(i2),EX(i1), 8)); 4065 assign(s5, SL(EX(i2),EX(i1),14)); 4066 assign(s6, SL(EX(i0),EX(i2), 4)); 4067 assign(s7, SL(EX(i0),EX(i2),10)); 4068 4069 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0 4070 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2 4071 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4 4072 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6 4073 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8 4074 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa 4075 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac 4076 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae 4077 assign(cMASK, mkV128(0x003F)); 4078 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK))); 4079 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK))); 4080 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK))); 4081 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK))); 4082 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK))); 4083 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK))); 4084 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK))); 4085 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK))); 4086 4087 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) )); 4088 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8), 4089 SHL(EX(t3),2), SHR(EX(t2),4) )); 4090 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) )); 4091 4092 // Then deinterleave at 16x8 granularity. 4093 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2); 4094 return; 4095 } 4096 4097 /*NOTREACHED*/ 4098 vassert(0); 4099 } 4100 4101 4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */ 4103 static 4104 void math_DEINTERLEAVE4_128( 4105 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4106 UInt laneSzBlg2, 4107 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4108 { 4109 if (laneSzBlg2 == 3) { 4110 // 64x2 4111 assign(*u0, ILO64x2(EX(i2), EX(i0))); 4112 assign(*u1, IHI64x2(EX(i2), EX(i0))); 4113 assign(*u2, ILO64x2(EX(i3), EX(i1))); 4114 assign(*u3, IHI64x2(EX(i3), EX(i1))); 4115 return; 4116 } 4117 if (laneSzBlg2 == 2) { 4118 // 32x4 4119 IRTemp p0 = newTempV128(); 4120 IRTemp p2 = newTempV128(); 4121 IRTemp p1 = newTempV128(); 4122 IRTemp p3 = newTempV128(); 4123 assign(p0, ILO32x4(EX(i1), EX(i0))); 4124 assign(p1, IHI32x4(EX(i1), EX(i0))); 4125 assign(p2, ILO32x4(EX(i3), EX(i2))); 4126 assign(p3, IHI32x4(EX(i3), EX(i2))); 4127 // And now do what we did for the 64-bit case. 4128 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3); 4129 return; 4130 } 4131 if (laneSzBlg2 == 1) { 4132 // 16x8 4133 // Deinterleave into 32-bit chunks, then do as the 32-bit case. 4134 IRTemp p0 = newTempV128(); 4135 IRTemp p1 = newTempV128(); 4136 IRTemp p2 = newTempV128(); 4137 IRTemp p3 = newTempV128(); 4138 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8))); 4139 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8))); 4140 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8))); 4141 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8))); 4142 // From here on is like the 32 bit case. 4143 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3); 4144 return; 4145 } 4146 if (laneSzBlg2 == 0) { 4147 // 8x16 4148 // Deinterleave into 16-bit chunks, then do as the 16-bit case. 4149 IRTemp p0 = newTempV128(); 4150 IRTemp p1 = newTempV128(); 4151 IRTemp p2 = newTempV128(); 4152 IRTemp p3 = newTempV128(); 4153 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)), 4154 ILO8x16(EX(i0),ROL(EX(i0),4)) )); 4155 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)), 4156 ILO8x16(EX(i1),ROL(EX(i1),4)) )); 4157 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)), 4158 ILO8x16(EX(i2),ROL(EX(i2),4)) )); 4159 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)), 4160 ILO8x16(EX(i3),ROL(EX(i3),4)) )); 4161 // From here on is like the 16 bit case. 4162 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3); 4163 return; 4164 } 4165 /*NOTREACHED*/ 4166 vassert(0); 4167 } 4168 4169 4170 /* Wrappers that use the full-width (de)interleavers to do half-width 4171 (de)interleaving. The scheme is to clone each input lane in the 4172 lower half of each incoming value, do a full width (de)interleave 4173 at the next lane size up, and remove every other lane of the the 4174 result. The returned values may have any old junk in the upper 4175 64 bits -- the caller must ignore that. */ 4176 4177 /* Helper function -- get doubling and narrowing operations. */ 4178 static 4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler, 4180 /*OUT*/IROp* halver, 4181 UInt laneSzBlg2 ) 4182 { 4183 switch (laneSzBlg2) { 4184 case 2: 4185 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4; 4186 break; 4187 case 1: 4188 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8; 4189 break; 4190 case 0: 4191 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16; 4192 break; 4193 default: 4194 vassert(0); 4195 } 4196 } 4197 4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */ 4199 static 4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0, 4201 UInt laneSzBlg2, IRTemp u0 ) 4202 { 4203 assign(*i0, mkexpr(u0)); 4204 } 4205 4206 4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */ 4208 static 4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 4210 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 4211 { 4212 if (laneSzBlg2 == 3) { 4213 // 1x64, degenerate case 4214 assign(*i0, EX(u0)); 4215 assign(*i1, EX(u1)); 4216 return; 4217 } 4218 4219 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4220 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4221 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4222 4223 IRTemp du0 = newTempV128(); 4224 IRTemp du1 = newTempV128(); 4225 assign(du0, binop(doubler, EX(u0), EX(u0))); 4226 assign(du1, binop(doubler, EX(u1), EX(u1))); 4227 IRTemp di0 = newTempV128(); 4228 IRTemp di1 = newTempV128(); 4229 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1); 4230 assign(*i0, binop(halver, EX(di0), EX(di0))); 4231 assign(*i1, binop(halver, EX(di1), EX(di1))); 4232 } 4233 4234 4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */ 4236 static 4237 void math_INTERLEAVE3_64( 4238 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 4239 UInt laneSzBlg2, 4240 IRTemp u0, IRTemp u1, IRTemp u2 ) 4241 { 4242 if (laneSzBlg2 == 3) { 4243 // 1x64, degenerate case 4244 assign(*i0, EX(u0)); 4245 assign(*i1, EX(u1)); 4246 assign(*i2, EX(u2)); 4247 return; 4248 } 4249 4250 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4251 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4252 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4253 4254 IRTemp du0 = newTempV128(); 4255 IRTemp du1 = newTempV128(); 4256 IRTemp du2 = newTempV128(); 4257 assign(du0, binop(doubler, EX(u0), EX(u0))); 4258 assign(du1, binop(doubler, EX(u1), EX(u1))); 4259 assign(du2, binop(doubler, EX(u2), EX(u2))); 4260 IRTemp di0 = newTempV128(); 4261 IRTemp di1 = newTempV128(); 4262 IRTemp di2 = newTempV128(); 4263 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2); 4264 assign(*i0, binop(halver, EX(di0), EX(di0))); 4265 assign(*i1, binop(halver, EX(di1), EX(di1))); 4266 assign(*i2, binop(halver, EX(di2), EX(di2))); 4267 } 4268 4269 4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */ 4271 static 4272 void math_INTERLEAVE4_64( 4273 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 4274 UInt laneSzBlg2, 4275 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 4276 { 4277 if (laneSzBlg2 == 3) { 4278 // 1x64, degenerate case 4279 assign(*i0, EX(u0)); 4280 assign(*i1, EX(u1)); 4281 assign(*i2, EX(u2)); 4282 assign(*i3, EX(u3)); 4283 return; 4284 } 4285 4286 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4287 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4288 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4289 4290 IRTemp du0 = newTempV128(); 4291 IRTemp du1 = newTempV128(); 4292 IRTemp du2 = newTempV128(); 4293 IRTemp du3 = newTempV128(); 4294 assign(du0, binop(doubler, EX(u0), EX(u0))); 4295 assign(du1, binop(doubler, EX(u1), EX(u1))); 4296 assign(du2, binop(doubler, EX(u2), EX(u2))); 4297 assign(du3, binop(doubler, EX(u3), EX(u3))); 4298 IRTemp di0 = newTempV128(); 4299 IRTemp di1 = newTempV128(); 4300 IRTemp di2 = newTempV128(); 4301 IRTemp di3 = newTempV128(); 4302 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3, 4303 laneSzBlg2 + 1, du0, du1, du2, du3); 4304 assign(*i0, binop(halver, EX(di0), EX(di0))); 4305 assign(*i1, binop(halver, EX(di1), EX(di1))); 4306 assign(*i2, binop(halver, EX(di2), EX(di2))); 4307 assign(*i3, binop(halver, EX(di3), EX(di3))); 4308 } 4309 4310 4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */ 4312 static 4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0, 4314 UInt laneSzBlg2, IRTemp i0 ) 4315 { 4316 assign(*u0, mkexpr(i0)); 4317 } 4318 4319 4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */ 4321 static 4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 4323 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 4324 { 4325 if (laneSzBlg2 == 3) { 4326 // 1x64, degenerate case 4327 assign(*u0, EX(i0)); 4328 assign(*u1, EX(i1)); 4329 return; 4330 } 4331 4332 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4333 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4334 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4335 4336 IRTemp di0 = newTempV128(); 4337 IRTemp di1 = newTempV128(); 4338 assign(di0, binop(doubler, EX(i0), EX(i0))); 4339 assign(di1, binop(doubler, EX(i1), EX(i1))); 4340 4341 IRTemp du0 = newTempV128(); 4342 IRTemp du1 = newTempV128(); 4343 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1); 4344 assign(*u0, binop(halver, EX(du0), EX(du0))); 4345 assign(*u1, binop(halver, EX(du1), EX(du1))); 4346 } 4347 4348 4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */ 4350 static 4351 void math_DEINTERLEAVE3_64( 4352 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 4353 UInt laneSzBlg2, 4354 IRTemp i0, IRTemp i1, IRTemp i2 ) 4355 { 4356 if (laneSzBlg2 == 3) { 4357 // 1x64, degenerate case 4358 assign(*u0, EX(i0)); 4359 assign(*u1, EX(i1)); 4360 assign(*u2, EX(i2)); 4361 return; 4362 } 4363 4364 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4365 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4366 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4367 4368 IRTemp di0 = newTempV128(); 4369 IRTemp di1 = newTempV128(); 4370 IRTemp di2 = newTempV128(); 4371 assign(di0, binop(doubler, EX(i0), EX(i0))); 4372 assign(di1, binop(doubler, EX(i1), EX(i1))); 4373 assign(di2, binop(doubler, EX(i2), EX(i2))); 4374 IRTemp du0 = newTempV128(); 4375 IRTemp du1 = newTempV128(); 4376 IRTemp du2 = newTempV128(); 4377 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2); 4378 assign(*u0, binop(halver, EX(du0), EX(du0))); 4379 assign(*u1, binop(halver, EX(du1), EX(du1))); 4380 assign(*u2, binop(halver, EX(du2), EX(du2))); 4381 } 4382 4383 4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */ 4385 static 4386 void math_DEINTERLEAVE4_64( 4387 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4388 UInt laneSzBlg2, 4389 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4390 { 4391 if (laneSzBlg2 == 3) { 4392 // 1x64, degenerate case 4393 assign(*u0, EX(i0)); 4394 assign(*u1, EX(i1)); 4395 assign(*u2, EX(i2)); 4396 assign(*u3, EX(i3)); 4397 return; 4398 } 4399 4400 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4401 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4402 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4403 4404 IRTemp di0 = newTempV128(); 4405 IRTemp di1 = newTempV128(); 4406 IRTemp di2 = newTempV128(); 4407 IRTemp di3 = newTempV128(); 4408 assign(di0, binop(doubler, EX(i0), EX(i0))); 4409 assign(di1, binop(doubler, EX(i1), EX(i1))); 4410 assign(di2, binop(doubler, EX(i2), EX(i2))); 4411 assign(di3, binop(doubler, EX(i3), EX(i3))); 4412 IRTemp du0 = newTempV128(); 4413 IRTemp du1 = newTempV128(); 4414 IRTemp du2 = newTempV128(); 4415 IRTemp du3 = newTempV128(); 4416 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3, 4417 laneSzBlg2 + 1, di0, di1, di2, di3); 4418 assign(*u0, binop(halver, EX(du0), EX(du0))); 4419 assign(*u1, binop(halver, EX(du1), EX(du1))); 4420 assign(*u2, binop(halver, EX(du2), EX(du2))); 4421 assign(*u3, binop(halver, EX(du3), EX(du3))); 4422 } 4423 4424 4425 #undef EX 4426 #undef SL 4427 #undef ROR 4428 #undef ROL 4429 #undef SHR 4430 #undef SHL 4431 #undef ILO64x2 4432 #undef IHI64x2 4433 #undef ILO32x4 4434 #undef IHI32x4 4435 #undef ILO16x8 4436 #undef IHI16x8 4437 #undef ILO16x8 4438 #undef IHI16x8 4439 #undef CEV32x4 4440 #undef COD32x4 4441 #undef COD16x8 4442 #undef COD8x16 4443 #undef CEV8x16 4444 #undef AND 4445 #undef OR2 4446 #undef OR3 4447 #undef OR4 4448 4449 4450 /*------------------------------------------------------------*/ 4451 /*--- Load and Store instructions ---*/ 4452 /*------------------------------------------------------------*/ 4453 4454 /* Generate the EA for a "reg + reg" style amode. This is done from 4455 parts of the insn, but for sanity checking sake it takes the whole 4456 insn. This appears to depend on insn[15:12], with opt=insn[15:13] 4457 and S=insn[12]: 4458 4459 The possible forms, along with their opt:S values, are: 4460 011:0 Xn|SP + Xm 4461 111:0 Xn|SP + Xm 4462 011:1 Xn|SP + Xm * transfer_szB 4463 111:1 Xn|SP + Xm * transfer_szB 4464 010:0 Xn|SP + 32Uto64(Wm) 4465 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB 4466 110:0 Xn|SP + 32Sto64(Wm) 4467 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB 4468 4469 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of 4470 the transfer size is insn[23,31,30]. For integer loads/stores, 4471 insn[23] is zero, hence szLg2 can be at most 3 in such cases. 4472 4473 If the decoding fails, it returns IRTemp_INVALID. 4474 4475 isInt is True iff this is decoding is for transfers to/from integer 4476 registers. If False it is for transfers to/from vector registers. 4477 */ 4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) 4479 { 4480 UInt optS = SLICE_UInt(insn, 15, 12); 4481 UInt mm = SLICE_UInt(insn, 20, 16); 4482 UInt nn = SLICE_UInt(insn, 9, 5); 4483 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) 4484 | SLICE_UInt(insn, 31, 30); // Log2 of the size 4485 4486 buf[0] = 0; 4487 4488 /* Sanity checks, that this really is a load/store insn. */ 4489 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) 4490 goto fail; 4491 4492 if (isInt 4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ 4494 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ 4495 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ 4496 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ 4497 goto fail; 4498 4499 if (!isInt 4500 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ 4501 goto fail; 4502 4503 /* Throw out non-verified but possibly valid cases. */ 4504 switch (szLg2) { 4505 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec 4506 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec 4507 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec 4508 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec 4509 case BITS3(1,0,0): // can only ever be valid for the vector case 4510 if (isInt) goto fail; else break; 4511 case BITS3(1,0,1): // these sizes are never valid 4512 case BITS3(1,1,0): 4513 case BITS3(1,1,1): goto fail; 4514 4515 default: vassert(0); 4516 } 4517 4518 IRExpr* rhs = NULL; 4519 switch (optS) { 4520 case BITS4(1,1,1,0): goto fail; //ATC 4521 case BITS4(0,1,1,0): 4522 rhs = getIReg64orZR(mm); 4523 vex_sprintf(buf, "[%s, %s]", 4524 nameIReg64orZR(nn), nameIReg64orZR(mm)); 4525 break; 4526 case BITS4(1,1,1,1): goto fail; //ATC 4527 case BITS4(0,1,1,1): 4528 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); 4529 vex_sprintf(buf, "[%s, %s lsl %u]", 4530 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); 4531 break; 4532 case BITS4(0,1,0,0): 4533 rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); 4534 vex_sprintf(buf, "[%s, %s uxtx]", 4535 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4536 break; 4537 case BITS4(0,1,0,1): 4538 rhs = binop(Iop_Shl64, 4539 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); 4540 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", 4541 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4542 break; 4543 case BITS4(1,1,0,0): 4544 rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); 4545 vex_sprintf(buf, "[%s, %s sxtx]", 4546 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4547 break; 4548 case BITS4(1,1,0,1): 4549 rhs = binop(Iop_Shl64, 4550 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); 4551 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", 4552 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4553 break; 4554 default: 4555 /* The rest appear to be genuinely invalid */ 4556 goto fail; 4557 } 4558 4559 vassert(rhs); 4560 IRTemp res = newTemp(Ity_I64); 4561 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); 4562 return res; 4563 4564 fail: 4565 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); 4566 return IRTemp_INVALID; 4567 } 4568 4569 4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest 4571 bits of DATAE :: Ity_I64. */ 4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) 4573 { 4574 IRExpr* addrE = mkexpr(addr); 4575 switch (szB) { 4576 case 8: 4577 storeLE(addrE, dataE); 4578 break; 4579 case 4: 4580 storeLE(addrE, unop(Iop_64to32, dataE)); 4581 break; 4582 case 2: 4583 storeLE(addrE, unop(Iop_64to16, dataE)); 4584 break; 4585 case 1: 4586 storeLE(addrE, unop(Iop_64to8, dataE)); 4587 break; 4588 default: 4589 vassert(0); 4590 } 4591 } 4592 4593 4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR, 4595 placing the result in an Ity_I64 temporary. */ 4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) 4597 { 4598 IRTemp res = newTemp(Ity_I64); 4599 IRExpr* addrE = mkexpr(addr); 4600 switch (szB) { 4601 case 8: 4602 assign(res, loadLE(Ity_I64,addrE)); 4603 break; 4604 case 4: 4605 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); 4606 break; 4607 case 2: 4608 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); 4609 break; 4610 case 1: 4611 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); 4612 break; 4613 default: 4614 vassert(0); 4615 } 4616 return res; 4617 } 4618 4619 4620 /* Generate a "standard 7" name, from bitQ and size. But also 4621 allow ".1d" since that's occasionally useful. */ 4622 static 4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size ) 4624 { 4625 vassert(bitQ <= 1 && size <= 3); 4626 const HChar* nms[8] 4627 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" }; 4628 UInt ix = (bitQ << 2) | size; 4629 vassert(ix < 8); 4630 return nms[ix]; 4631 } 4632 4633 4634 static 4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) 4636 { 4637 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 4638 4639 /* ------------ LDR,STR (immediate, uimm12) ----------- */ 4640 /* uimm12 is scaled by the transfer size 4641 4642 31 29 26 21 9 4 4643 | | | | | | 4644 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] 4645 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] 4646 4647 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] 4648 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] 4649 4650 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] 4651 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] 4652 4653 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] 4654 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] 4655 */ 4656 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { 4657 UInt szLg2 = INSN(31,30); 4658 UInt szB = 1 << szLg2; 4659 Bool isLD = INSN(22,22) == 1; 4660 UInt offs = INSN(21,10) * szB; 4661 UInt nn = INSN(9,5); 4662 UInt tt = INSN(4,0); 4663 IRTemp ta = newTemp(Ity_I64); 4664 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); 4665 if (nn == 31) { /* FIXME generate stack alignment check */ } 4666 vassert(szLg2 < 4); 4667 if (isLD) { 4668 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); 4669 } else { 4670 gen_narrowing_store(szB, ta, getIReg64orZR(tt)); 4671 } 4672 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; 4673 const HChar* st_name[4] = { "strb", "strh", "str", "str" }; 4674 DIP("%s %s, [%s, #%u]\n", 4675 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), 4676 nameIReg64orSP(nn), offs); 4677 return True; 4678 } 4679 4680 /* ------------ LDUR,STUR (immediate, simm9) ----------- */ 4681 /* 4682 31 29 26 20 11 9 4 4683 | | | | | | | 4684 (at-Rn-then-Rn=EA) | | | 4685 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 4686 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 4687 4688 (at-EA-then-Rn=EA) 4689 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! 4690 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! 4691 4692 (at-EA) 4693 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] 4694 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] 4695 4696 simm9 is unscaled. 4697 4698 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the 4699 load case this is because would create two competing values for 4700 Rt. In the store case the reason is unclear, but the spec 4701 disallows it anyway. 4702 4703 Stores are narrowing, loads are unsigned widening. sz encodes 4704 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. 4705 */ 4706 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) 4707 == BITS9(1,1,1, 0,0,0,0,0, 0)) { 4708 UInt szLg2 = INSN(31,30); 4709 UInt szB = 1 << szLg2; 4710 Bool isLoad = INSN(22,22) == 1; 4711 UInt imm9 = INSN(20,12); 4712 UInt nn = INSN(9,5); 4713 UInt tt = INSN(4,0); 4714 Bool wBack = INSN(10,10) == 1; 4715 UInt how = INSN(11,10); 4716 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { 4717 /* undecodable; fall through */ 4718 } else { 4719 if (nn == 31) { /* FIXME generate stack alignment check */ } 4720 4721 // Compute the transfer address TA and the writeback address WA. 4722 IRTemp tRN = newTemp(Ity_I64); 4723 assign(tRN, getIReg64orSP(nn)); 4724 IRTemp tEA = newTemp(Ity_I64); 4725 Long simm9 = (Long)sx_to_64(imm9, 9); 4726 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 4727 4728 IRTemp tTA = newTemp(Ity_I64); 4729 IRTemp tWA = newTemp(Ity_I64); 4730 switch (how) { 4731 case BITS2(0,1): 4732 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4733 case BITS2(1,1): 4734 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4735 case BITS2(0,0): 4736 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4737 default: 4738 vassert(0); /* NOTREACHED */ 4739 } 4740 4741 /* Normally rN would be updated after the transfer. However, in 4742 the special case typifed by 4743 str x30, [sp,#-16]! 4744 it is necessary to update SP before the transfer, (1) 4745 because Memcheck will otherwise complain about a write 4746 below the stack pointer, and (2) because the segfault 4747 stack extension mechanism will otherwise extend the stack 4748 only down to SP before the instruction, which might not be 4749 far enough, if the -16 bit takes the actual access 4750 address to the next page. 4751 */ 4752 Bool earlyWBack 4753 = wBack && simm9 < 0 && szB == 8 4754 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; 4755 4756 if (wBack && earlyWBack) 4757 putIReg64orSP(nn, mkexpr(tEA)); 4758 4759 if (isLoad) { 4760 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); 4761 } else { 4762 gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); 4763 } 4764 4765 if (wBack && !earlyWBack) 4766 putIReg64orSP(nn, mkexpr(tEA)); 4767 4768 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; 4769 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; 4770 const HChar* fmt_str = NULL; 4771 switch (how) { 4772 case BITS2(0,1): 4773 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4774 break; 4775 case BITS2(1,1): 4776 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4777 break; 4778 case BITS2(0,0): 4779 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; 4780 break; 4781 default: 4782 vassert(0); 4783 } 4784 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], 4785 nameIRegOrZR(szB == 8, tt), 4786 nameIReg64orSP(nn), simm9); 4787 return True; 4788 } 4789 } 4790 4791 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ 4792 /* L==1 => mm==LD 4793 L==0 => mm==ST 4794 x==0 => 32 bit transfers, and zero extended loads 4795 x==1 => 64 bit transfers 4796 simm7 is scaled by the (single-register) transfer size 4797 4798 (at-Rn-then-Rn=EA) 4799 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm 4800 4801 (at-EA-then-Rn=EA) 4802 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! 4803 4804 (at-EA) 4805 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] 4806 */ 4807 4808 UInt insn_30_23 = INSN(30,23); 4809 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) 4810 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) 4811 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { 4812 UInt bL = INSN(22,22); 4813 UInt bX = INSN(31,31); 4814 UInt bWBack = INSN(23,23); 4815 UInt rT1 = INSN(4,0); 4816 UInt rN = INSN(9,5); 4817 UInt rT2 = INSN(14,10); 4818 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 4819 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) 4820 || (bL && rT1 == rT2)) { 4821 /* undecodable; fall through */ 4822 } else { 4823 if (rN == 31) { /* FIXME generate stack alignment check */ } 4824 4825 // Compute the transfer address TA and the writeback address WA. 4826 IRTemp tRN = newTemp(Ity_I64); 4827 assign(tRN, getIReg64orSP(rN)); 4828 IRTemp tEA = newTemp(Ity_I64); 4829 simm7 = (bX ? 8 : 4) * simm7; 4830 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 4831 4832 IRTemp tTA = newTemp(Ity_I64); 4833 IRTemp tWA = newTemp(Ity_I64); 4834 switch (INSN(24,23)) { 4835 case BITS2(0,1): 4836 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4837 case BITS2(1,1): 4838 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4839 case BITS2(1,0): 4840 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4841 default: 4842 vassert(0); /* NOTREACHED */ 4843 } 4844 4845 /* Normally rN would be updated after the transfer. However, in 4846 the special case typifed by 4847 stp x29, x30, [sp,#-112]! 4848 it is necessary to update SP before the transfer, (1) 4849 because Memcheck will otherwise complain about a write 4850 below the stack pointer, and (2) because the segfault 4851 stack extension mechanism will otherwise extend the stack 4852 only down to SP before the instruction, which might not be 4853 far enough, if the -112 bit takes the actual access 4854 address to the next page. 4855 */ 4856 Bool earlyWBack 4857 = bWBack && simm7 < 0 4858 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; 4859 4860 if (bWBack && earlyWBack) 4861 putIReg64orSP(rN, mkexpr(tEA)); 4862 4863 /**/ if (bL == 1 && bX == 1) { 4864 // 64 bit load 4865 putIReg64orZR(rT1, loadLE(Ity_I64, 4866 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4867 putIReg64orZR(rT2, loadLE(Ity_I64, 4868 binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); 4869 } else if (bL == 1 && bX == 0) { 4870 // 32 bit load 4871 putIReg32orZR(rT1, loadLE(Ity_I32, 4872 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4873 putIReg32orZR(rT2, loadLE(Ity_I32, 4874 binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); 4875 } else if (bL == 0 && bX == 1) { 4876 // 64 bit store 4877 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4878 getIReg64orZR(rT1)); 4879 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), 4880 getIReg64orZR(rT2)); 4881 } else { 4882 vassert(bL == 0 && bX == 0); 4883 // 32 bit store 4884 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4885 getIReg32orZR(rT1)); 4886 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), 4887 getIReg32orZR(rT2)); 4888 } 4889 4890 if (bWBack && !earlyWBack) 4891 putIReg64orSP(rN, mkexpr(tEA)); 4892 4893 const HChar* fmt_str = NULL; 4894 switch (INSN(24,23)) { 4895 case BITS2(0,1): 4896 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4897 break; 4898 case BITS2(1,1): 4899 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4900 break; 4901 case BITS2(1,0): 4902 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 4903 break; 4904 default: 4905 vassert(0); 4906 } 4907 DIP(fmt_str, bL == 0 ? "st" : "ld", 4908 nameIRegOrZR(bX == 1, rT1), 4909 nameIRegOrZR(bX == 1, rT2), 4910 nameIReg64orSP(rN), simm7); 4911 return True; 4912 } 4913 } 4914 4915 /* ---------------- LDR (literal, int reg) ---------------- */ 4916 /* 31 29 23 4 4917 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] 4918 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] 4919 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] 4920 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] 4921 Just handles the first two cases for now. 4922 */ 4923 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { 4924 UInt imm19 = INSN(23,5); 4925 UInt rT = INSN(4,0); 4926 UInt bX = INSN(30,30); 4927 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 4928 if (bX) { 4929 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); 4930 } else { 4931 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); 4932 } 4933 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); 4934 return True; 4935 } 4936 4937 /* -------------- {LD,ST}R (integer register) --------------- */ 4938 /* 31 29 20 15 12 11 9 4 4939 | | | | | | | | 4940 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}] 4941 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}] 4942 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}] 4943 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}] 4944 4945 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}] 4946 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}] 4947 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}] 4948 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}] 4949 */ 4950 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) 4951 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 4952 HChar dis_buf[64]; 4953 UInt szLg2 = INSN(31,30); 4954 Bool isLD = INSN(22,22) == 1; 4955 UInt tt = INSN(4,0); 4956 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 4957 if (ea != IRTemp_INVALID) { 4958 switch (szLg2) { 4959 case 3: /* 64 bit */ 4960 if (isLD) { 4961 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); 4962 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); 4963 } else { 4964 storeLE(mkexpr(ea), getIReg64orZR(tt)); 4965 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); 4966 } 4967 break; 4968 case 2: /* 32 bit */ 4969 if (isLD) { 4970 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); 4971 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); 4972 } else { 4973 storeLE(mkexpr(ea), getIReg32orZR(tt)); 4974 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); 4975 } 4976 break; 4977 case 1: /* 16 bit */ 4978 if (isLD) { 4979 putIReg64orZR(tt, unop(Iop_16Uto64, 4980 loadLE(Ity_I16, mkexpr(ea)))); 4981 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4982 } else { 4983 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); 4984 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4985 } 4986 break; 4987 case 0: /* 8 bit */ 4988 if (isLD) { 4989 putIReg64orZR(tt, unop(Iop_8Uto64, 4990 loadLE(Ity_I8, mkexpr(ea)))); 4991 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); 4992 } else { 4993 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); 4994 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); 4995 } 4996 break; 4997 default: 4998 vassert(0); 4999 } 5000 return True; 5001 } 5002 } 5003 5004 /* -------------- LDRS{B,H,W} (uimm12) -------------- */ 5005 /* 31 29 26 23 21 9 4 5006 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] 5007 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] 5008 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] 5009 where 5010 Rt is Wt when x==1, Xt when x==0 5011 */ 5012 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { 5013 /* Further checks on bits 31:30 and 22 */ 5014 Bool valid = False; 5015 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5016 case BITS3(1,0,0): 5017 case BITS3(0,1,0): case BITS3(0,1,1): 5018 case BITS3(0,0,0): case BITS3(0,0,1): 5019 valid = True; 5020 break; 5021 } 5022 if (valid) { 5023 UInt szLg2 = INSN(31,30); 5024 UInt bitX = INSN(22,22); 5025 UInt imm12 = INSN(21,10); 5026 UInt nn = INSN(9,5); 5027 UInt tt = INSN(4,0); 5028 UInt szB = 1 << szLg2; 5029 IRExpr* ea = binop(Iop_Add64, 5030 getIReg64orSP(nn), mkU64(imm12 * szB)); 5031 switch (szB) { 5032 case 4: 5033 vassert(bitX == 0); 5034 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); 5035 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), 5036 nameIReg64orSP(nn), imm12 * szB); 5037 break; 5038 case 2: 5039 if (bitX == 1) { 5040 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); 5041 } else { 5042 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); 5043 } 5044 DIP("ldrsh %s, [%s, #%u]\n", 5045 nameIRegOrZR(bitX == 0, tt), 5046 nameIReg64orSP(nn), imm12 * szB); 5047 break; 5048 case 1: 5049 if (bitX == 1) { 5050 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); 5051 } else { 5052 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); 5053 } 5054 DIP("ldrsb %s, [%s, #%u]\n", 5055 nameIRegOrZR(bitX == 0, tt), 5056 nameIReg64orSP(nn), imm12 * szB); 5057 break; 5058 default: 5059 vassert(0); 5060 } 5061 return True; 5062 } 5063 /* else fall through */ 5064 } 5065 5066 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ 5067 /* (at-Rn-then-Rn=EA) 5068 31 29 23 21 20 11 9 4 5069 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 5070 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 5071 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 5072 5073 (at-EA-then-Rn=EA) 5074 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! 5075 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! 5076 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! 5077 where 5078 Rt is Wt when x==1, Xt when x==0 5079 transfer-at-Rn when [11]==0, at EA when [11]==1 5080 */ 5081 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5082 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5083 /* Further checks on bits 31:30 and 22 */ 5084 Bool valid = False; 5085 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5086 case BITS3(1,0,0): // LDRSW Xt 5087 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt 5088 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt 5089 valid = True; 5090 break; 5091 } 5092 if (valid) { 5093 UInt szLg2 = INSN(31,30); 5094 UInt imm9 = INSN(20,12); 5095 Bool atRN = INSN(11,11) == 0; 5096 UInt nn = INSN(9,5); 5097 UInt tt = INSN(4,0); 5098 IRTemp tRN = newTemp(Ity_I64); 5099 IRTemp tEA = newTemp(Ity_I64); 5100 IRTemp tTA = IRTemp_INVALID; 5101 ULong simm9 = sx_to_64(imm9, 9); 5102 Bool is64 = INSN(22,22) == 0; 5103 assign(tRN, getIReg64orSP(nn)); 5104 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5105 tTA = atRN ? tRN : tEA; 5106 HChar ch = '?'; 5107 /* There are 5 cases: 5108 byte load, SX to 64 5109 byte load, SX to 32, ZX to 64 5110 halfword load, SX to 64 5111 halfword load, SX to 32, ZX to 64 5112 word load, SX to 64 5113 The ifs below handle them in the listed order. 5114 */ 5115 if (szLg2 == 0) { 5116 ch = 'b'; 5117 if (is64) { 5118 putIReg64orZR(tt, unop(Iop_8Sto64, 5119 loadLE(Ity_I8, mkexpr(tTA)))); 5120 } else { 5121 putIReg32orZR(tt, unop(Iop_8Sto32, 5122 loadLE(Ity_I8, mkexpr(tTA)))); 5123 } 5124 } 5125 else if (szLg2 == 1) { 5126 ch = 'h'; 5127 if (is64) { 5128 putIReg64orZR(tt, unop(Iop_16Sto64, 5129 loadLE(Ity_I16, mkexpr(tTA)))); 5130 } else { 5131 putIReg32orZR(tt, unop(Iop_16Sto32, 5132 loadLE(Ity_I16, mkexpr(tTA)))); 5133 } 5134 } 5135 else if (szLg2 == 2 && is64) { 5136 ch = 'w'; 5137 putIReg64orZR(tt, unop(Iop_32Sto64, 5138 loadLE(Ity_I32, mkexpr(tTA)))); 5139 } 5140 else { 5141 vassert(0); 5142 } 5143 putIReg64orSP(nn, mkexpr(tEA)); 5144 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!", 5145 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 5146 return True; 5147 } 5148 /* else fall through */ 5149 } 5150 5151 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ 5152 /* 31 29 23 21 20 11 9 4 5153 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] 5154 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] 5155 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] 5156 where 5157 Rt is Wt when x==1, Xt when x==0 5158 */ 5159 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5160 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5161 /* Further checks on bits 31:30 and 22 */ 5162 Bool valid = False; 5163 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5164 case BITS3(1,0,0): // LDURSW Xt 5165 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt 5166 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt 5167 valid = True; 5168 break; 5169 } 5170 if (valid) { 5171 UInt szLg2 = INSN(31,30); 5172 UInt imm9 = INSN(20,12); 5173 UInt nn = INSN(9,5); 5174 UInt tt = INSN(4,0); 5175 IRTemp tRN = newTemp(Ity_I64); 5176 IRTemp tEA = newTemp(Ity_I64); 5177 ULong simm9 = sx_to_64(imm9, 9); 5178 Bool is64 = INSN(22,22) == 0; 5179 assign(tRN, getIReg64orSP(nn)); 5180 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5181 HChar ch = '?'; 5182 /* There are 5 cases: 5183 byte load, SX to 64 5184 byte load, SX to 32, ZX to 64 5185 halfword load, SX to 64 5186 halfword load, SX to 32, ZX to 64 5187 word load, SX to 64 5188 The ifs below handle them in the listed order. 5189 */ 5190 if (szLg2 == 0) { 5191 ch = 'b'; 5192 if (is64) { 5193 putIReg64orZR(tt, unop(Iop_8Sto64, 5194 loadLE(Ity_I8, mkexpr(tEA)))); 5195 } else { 5196 putIReg32orZR(tt, unop(Iop_8Sto32, 5197 loadLE(Ity_I8, mkexpr(tEA)))); 5198 } 5199 } 5200 else if (szLg2 == 1) { 5201 ch = 'h'; 5202 if (is64) { 5203 putIReg64orZR(tt, unop(Iop_16Sto64, 5204 loadLE(Ity_I16, mkexpr(tEA)))); 5205 } else { 5206 putIReg32orZR(tt, unop(Iop_16Sto32, 5207 loadLE(Ity_I16, mkexpr(tEA)))); 5208 } 5209 } 5210 else if (szLg2 == 2 && is64) { 5211 ch = 'w'; 5212 putIReg64orZR(tt, unop(Iop_32Sto64, 5213 loadLE(Ity_I32, mkexpr(tEA)))); 5214 } 5215 else { 5216 vassert(0); 5217 } 5218 DIP("ldurs%c %s, [%s, #%lld]", 5219 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 5220 return True; 5221 } 5222 /* else fall through */ 5223 } 5224 5225 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ 5226 /* L==1 => mm==LD 5227 L==0 => mm==ST 5228 sz==00 => 32 bit (S) transfers 5229 sz==01 => 64 bit (D) transfers 5230 sz==10 => 128 bit (Q) transfers 5231 sz==11 isn't allowed 5232 simm7 is scaled by the (single-register) transfer size 5233 5234 31 29 26 22 21 14 9 4 5235 5236 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm] 5237 (at-EA, with nontemporal hint) 5238 5239 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm 5240 (at-Rn-then-Rn=EA) 5241 5242 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] 5243 (at-EA) 5244 5245 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! 5246 (at-EA-then-Rn=EA) 5247 */ 5248 if (INSN(29,25) == BITS5(1,0,1,1,0)) { 5249 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units 5250 Bool isLD = INSN(22,22) == 1; 5251 Bool wBack = INSN(23,23) == 1; 5252 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 5253 UInt tt2 = INSN(14,10); 5254 UInt nn = INSN(9,5); 5255 UInt tt1 = INSN(4,0); 5256 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { 5257 /* undecodable; fall through */ 5258 } else { 5259 if (nn == 31) { /* FIXME generate stack alignment check */ } 5260 5261 // Compute the transfer address TA and the writeback address WA. 5262 UInt szB = 4 << szSlg2; /* szB is the per-register size */ 5263 IRTemp tRN = newTemp(Ity_I64); 5264 assign(tRN, getIReg64orSP(nn)); 5265 IRTemp tEA = newTemp(Ity_I64); 5266 simm7 = szB * simm7; 5267 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 5268 5269 IRTemp tTA = newTemp(Ity_I64); 5270 IRTemp tWA = newTemp(Ity_I64); 5271 switch (INSN(24,23)) { 5272 case BITS2(0,1): 5273 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 5274 case BITS2(1,1): 5275 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 5276 case BITS2(1,0): 5277 case BITS2(0,0): 5278 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 5279 default: 5280 vassert(0); /* NOTREACHED */ 5281 } 5282 5283 IRType ty = Ity_INVALID; 5284 switch (szB) { 5285 case 4: ty = Ity_F32; break; 5286 case 8: ty = Ity_F64; break; 5287 case 16: ty = Ity_V128; break; 5288 default: vassert(0); 5289 } 5290 5291 /* Normally rN would be updated after the transfer. However, in 5292 the special cases typifed by 5293 stp q0, q1, [sp,#-512]! 5294 stp d0, d1, [sp,#-512]! 5295 stp s0, s1, [sp,#-512]! 5296 it is necessary to update SP before the transfer, (1) 5297 because Memcheck will otherwise complain about a write 5298 below the stack pointer, and (2) because the segfault 5299 stack extension mechanism will otherwise extend the stack 5300 only down to SP before the instruction, which might not be 5301 far enough, if the -512 bit takes the actual access 5302 address to the next page. 5303 */ 5304 Bool earlyWBack 5305 = wBack && simm7 < 0 5306 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; 5307 5308 if (wBack && earlyWBack) 5309 putIReg64orSP(nn, mkexpr(tEA)); 5310 5311 if (isLD) { 5312 if (szB < 16) { 5313 putQReg128(tt1, mkV128(0x0000)); 5314 } 5315 putQRegLO(tt1, 5316 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); 5317 if (szB < 16) { 5318 putQReg128(tt2, mkV128(0x0000)); 5319 } 5320 putQRegLO(tt2, 5321 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); 5322 } else { 5323 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), 5324 getQRegLO(tt1, ty)); 5325 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), 5326 getQRegLO(tt2, ty)); 5327 } 5328 5329 if (wBack && !earlyWBack) 5330 putIReg64orSP(nn, mkexpr(tEA)); 5331 5332 const HChar* fmt_str = NULL; 5333 switch (INSN(24,23)) { 5334 case BITS2(0,1): 5335 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 5336 break; 5337 case BITS2(1,1): 5338 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 5339 break; 5340 case BITS2(1,0): 5341 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 5342 break; 5343 case BITS2(0,0): 5344 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n"; 5345 break; 5346 default: 5347 vassert(0); 5348 } 5349 DIP(fmt_str, isLD ? "ld" : "st", 5350 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), 5351 nameIReg64orSP(nn), simm7); 5352 return True; 5353 } 5354 } 5355 5356 /* -------------- {LD,ST}R (vector register) --------------- */ 5357 /* 31 29 23 20 15 12 11 9 4 5358 | | | | | | | | | 5359 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}] 5360 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}] 5361 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}] 5362 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}] 5363 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}] 5364 5365 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}] 5366 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}] 5367 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}] 5368 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}] 5369 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}] 5370 */ 5371 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5372 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5373 HChar dis_buf[64]; 5374 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5375 Bool isLD = INSN(22,22) == 1; 5376 UInt tt = INSN(4,0); 5377 if (szLg2 > 4) goto after_LDR_STR_vector_register; 5378 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); 5379 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; 5380 switch (szLg2) { 5381 case 0: /* 8 bit */ 5382 if (isLD) { 5383 putQReg128(tt, mkV128(0x0000)); 5384 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); 5385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5386 } else { 5387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); 5388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5389 } 5390 break; 5391 case 1: 5392 if (isLD) { 5393 putQReg128(tt, mkV128(0x0000)); 5394 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); 5395 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5396 } else { 5397 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); 5398 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5399 } 5400 break; 5401 case 2: /* 32 bit */ 5402 if (isLD) { 5403 putQReg128(tt, mkV128(0x0000)); 5404 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); 5405 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5406 } else { 5407 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); 5408 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5409 } 5410 break; 5411 case 3: /* 64 bit */ 5412 if (isLD) { 5413 putQReg128(tt, mkV128(0x0000)); 5414 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); 5415 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5416 } else { 5417 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); 5418 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5419 } 5420 break; 5421 case 4: 5422 if (isLD) { 5423 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea))); 5424 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf); 5425 } else { 5426 storeLE(mkexpr(ea), getQReg128(tt)); 5427 DIP("str %s, %s\n", nameQReg128(tt), dis_buf); 5428 } 5429 break; 5430 default: 5431 vassert(0); 5432 } 5433 return True; 5434 } 5435 after_LDR_STR_vector_register: 5436 5437 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ 5438 /* 31 29 22 20 15 12 11 9 4 5439 | | | | | | | | | 5440 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}] 5441 5442 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}] 5443 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}] 5444 5445 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}] 5446 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}] 5447 */ 5448 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5449 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5450 HChar dis_buf[64]; 5451 UInt szLg2 = INSN(31,30); 5452 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 5453 UInt tt = INSN(4,0); 5454 if (szLg2 == 3) goto after_LDRS_integer_register; 5455 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 5456 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; 5457 /* Enumerate the 5 variants explicitly. */ 5458 if (szLg2 == 2/*32 bit*/ && sxTo64) { 5459 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); 5460 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); 5461 return True; 5462 } 5463 else 5464 if (szLg2 == 1/*16 bit*/) { 5465 if (sxTo64) { 5466 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); 5467 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); 5468 } else { 5469 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); 5470 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); 5471 } 5472 return True; 5473 } 5474 else 5475 if (szLg2 == 0/*8 bit*/) { 5476 if (sxTo64) { 5477 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); 5478 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); 5479 } else { 5480 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); 5481 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); 5482 } 5483 return True; 5484 } 5485 /* else it's an invalid combination */ 5486 } 5487 after_LDRS_integer_register: 5488 5489 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ 5490 /* This is the Unsigned offset variant only. The Post-Index and 5491 Pre-Index variants are below. 5492 5493 31 29 23 21 9 4 5494 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] 5495 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] 5496 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] 5497 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] 5498 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] 5499 5500 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] 5501 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] 5502 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] 5503 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] 5504 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] 5505 */ 5506 if (INSN(29,24) == BITS6(1,1,1,1,0,1) 5507 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { 5508 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5509 Bool isLD = INSN(22,22) == 1; 5510 UInt pimm12 = INSN(21,10) << szLg2; 5511 UInt nn = INSN(9,5); 5512 UInt tt = INSN(4,0); 5513 IRTemp tEA = newTemp(Ity_I64); 5514 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5515 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); 5516 if (isLD) { 5517 if (szLg2 < 4) { 5518 putQReg128(tt, mkV128(0x0000)); 5519 } 5520 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5521 } else { 5522 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5523 } 5524 DIP("%s %s, [%s, #%u]\n", 5525 isLD ? "ldr" : "str", 5526 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); 5527 return True; 5528 } 5529 5530 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ 5531 /* These are the Post-Index and Pre-Index variants. 5532 5533 31 29 23 20 11 9 4 5534 (at-Rn-then-Rn=EA) 5535 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm 5536 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm 5537 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm 5538 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm 5539 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm 5540 5541 (at-EA-then-Rn=EA) 5542 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! 5543 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! 5544 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! 5545 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! 5546 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! 5547 5548 Stores are the same except with bit 22 set to 0. 5549 */ 5550 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5551 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5552 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5553 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5554 Bool isLD = INSN(22,22) == 1; 5555 UInt imm9 = INSN(20,12); 5556 Bool atRN = INSN(11,11) == 0; 5557 UInt nn = INSN(9,5); 5558 UInt tt = INSN(4,0); 5559 IRTemp tRN = newTemp(Ity_I64); 5560 IRTemp tEA = newTemp(Ity_I64); 5561 IRTemp tTA = IRTemp_INVALID; 5562 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5563 ULong simm9 = sx_to_64(imm9, 9); 5564 assign(tRN, getIReg64orSP(nn)); 5565 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5566 tTA = atRN ? tRN : tEA; 5567 if (isLD) { 5568 if (szLg2 < 4) { 5569 putQReg128(tt, mkV128(0x0000)); 5570 } 5571 putQRegLO(tt, loadLE(ty, mkexpr(tTA))); 5572 } else { 5573 storeLE(mkexpr(tTA), getQRegLO(tt, ty)); 5574 } 5575 putIReg64orSP(nn, mkexpr(tEA)); 5576 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", 5577 isLD ? "ldr" : "str", 5578 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9); 5579 return True; 5580 } 5581 5582 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ 5583 /* 31 29 23 20 11 9 4 5584 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] 5585 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] 5586 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] 5587 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] 5588 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] 5589 5590 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] 5591 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] 5592 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] 5593 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] 5594 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] 5595 */ 5596 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5597 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5598 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5599 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5600 Bool isLD = INSN(22,22) == 1; 5601 UInt imm9 = INSN(20,12); 5602 UInt nn = INSN(9,5); 5603 UInt tt = INSN(4,0); 5604 ULong simm9 = sx_to_64(imm9, 9); 5605 IRTemp tEA = newTemp(Ity_I64); 5606 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5607 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); 5608 if (isLD) { 5609 if (szLg2 < 4) { 5610 putQReg128(tt, mkV128(0x0000)); 5611 } 5612 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5613 } else { 5614 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5615 } 5616 DIP("%s %s, [%s, #%lld]\n", 5617 isLD ? "ldur" : "stur", 5618 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 5619 return True; 5620 } 5621 5622 /* ---------------- LDR (literal, SIMD&FP) ---------------- */ 5623 /* 31 29 23 4 5624 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] 5625 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] 5626 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] 5627 */ 5628 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { 5629 UInt szB = 4 << INSN(31,30); 5630 UInt imm19 = INSN(23,5); 5631 UInt tt = INSN(4,0); 5632 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 5633 IRType ty = preferredVectorSubTypeFromSize(szB); 5634 putQReg128(tt, mkV128(0x0000)); 5635 putQRegLO(tt, loadLE(ty, mkU64(ea))); 5636 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); 5637 return True; 5638 } 5639 5640 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */ 5641 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */ 5642 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */ 5643 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */ 5644 /* 31 29 26 22 21 20 15 11 9 4 5645 5646 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP] 5647 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step 5648 5649 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP] 5650 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step 5651 5652 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP] 5653 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step 5654 5655 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP] 5656 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step 5657 5658 T = defined by Q and sz in the normal way 5659 step = if m == 11111 then transfer-size else Xm 5660 xx = case L of 1 -> LD ; 0 -> ST 5661 */ 5662 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5663 && INSN(21,21) == 0) { 5664 Bool bitQ = INSN(30,30); 5665 Bool isPX = INSN(23,23) == 1; 5666 Bool isLD = INSN(22,22) == 1; 5667 UInt mm = INSN(20,16); 5668 UInt opc = INSN(15,12); 5669 UInt sz = INSN(11,10); 5670 UInt nn = INSN(9,5); 5671 UInt tt = INSN(4,0); 5672 Bool isQ = bitQ == 1; 5673 Bool is1d = sz == BITS2(1,1) && !isQ; 5674 UInt nRegs = 0; 5675 switch (opc) { 5676 case BITS4(0,0,0,0): nRegs = 4; break; 5677 case BITS4(0,1,0,0): nRegs = 3; break; 5678 case BITS4(1,0,0,0): nRegs = 2; break; 5679 case BITS4(0,1,1,1): nRegs = 1; break; 5680 default: break; 5681 } 5682 5683 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5684 If we see it, set nRegs to 0 so as to cause the next conditional 5685 to fail. */ 5686 if (!isPX && mm != 0) 5687 nRegs = 0; 5688 5689 if (nRegs == 1 /* .1d is allowed */ 5690 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) { 5691 5692 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5693 5694 /* Generate the transfer address (TA) and if necessary the 5695 writeback address (WB) */ 5696 IRTemp tTA = newTemp(Ity_I64); 5697 assign(tTA, getIReg64orSP(nn)); 5698 if (nn == 31) { /* FIXME generate stack alignment check */ } 5699 IRTemp tWB = IRTemp_INVALID; 5700 if (isPX) { 5701 tWB = newTemp(Ity_I64); 5702 assign(tWB, binop(Iop_Add64, 5703 mkexpr(tTA), 5704 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5705 : getIReg64orZR(mm))); 5706 } 5707 5708 /* -- BEGIN generate the transfers -- */ 5709 5710 IRTemp u0, u1, u2, u3, i0, i1, i2, i3; 5711 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID; 5712 switch (nRegs) { 5713 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */ 5714 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */ 5715 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */ 5716 case 1: u0 = newTempV128(); i0 = newTempV128(); break; 5717 default: vassert(0); 5718 } 5719 5720 /* -- Multiple 128 or 64 bit stores -- */ 5721 if (!isLD) { 5722 switch (nRegs) { 5723 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5724 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5725 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */ 5726 case 1: assign(u0, getQReg128((tt+0) % 32)); break; 5727 default: vassert(0); 5728 } 5729 switch (nRegs) { 5730 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64) 5731 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3); 5732 break; 5733 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64) 5734 (&i0, &i1, &i2, sz, u0, u1, u2); 5735 break; 5736 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64) 5737 (&i0, &i1, sz, u0, u1); 5738 break; 5739 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64) 5740 (&i0, sz, u0); 5741 break; 5742 default: vassert(0); 5743 } 5744 # define MAYBE_NARROW_TO_64(_expr) \ 5745 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5746 UInt step = isQ ? 16 : 8; 5747 switch (nRegs) { 5748 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5749 MAYBE_NARROW_TO_64(mkexpr(i3)) ); 5750 /* fallthru */ 5751 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5752 MAYBE_NARROW_TO_64(mkexpr(i2)) ); 5753 /* fallthru */ 5754 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5755 MAYBE_NARROW_TO_64(mkexpr(i1)) ); 5756 /* fallthru */ 5757 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5758 MAYBE_NARROW_TO_64(mkexpr(i0)) ); 5759 break; 5760 default: vassert(0); 5761 } 5762 # undef MAYBE_NARROW_TO_64 5763 } 5764 5765 /* -- Multiple 128 or 64 bit loads -- */ 5766 else /* isLD */ { 5767 UInt step = isQ ? 16 : 8; 5768 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5769 # define MAYBE_WIDEN_FROM_64(_expr) \ 5770 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5771 switch (nRegs) { 5772 case 4: 5773 assign(i3, MAYBE_WIDEN_FROM_64( 5774 loadLE(loadTy, 5775 binop(Iop_Add64, mkexpr(tTA), 5776 mkU64(3 * step))))); 5777 /* fallthru */ 5778 case 3: 5779 assign(i2, MAYBE_WIDEN_FROM_64( 5780 loadLE(loadTy, 5781 binop(Iop_Add64, mkexpr(tTA), 5782 mkU64(2 * step))))); 5783 /* fallthru */ 5784 case 2: 5785 assign(i1, MAYBE_WIDEN_FROM_64( 5786 loadLE(loadTy, 5787 binop(Iop_Add64, mkexpr(tTA), 5788 mkU64(1 * step))))); 5789 /* fallthru */ 5790 case 1: 5791 assign(i0, MAYBE_WIDEN_FROM_64( 5792 loadLE(loadTy, 5793 binop(Iop_Add64, mkexpr(tTA), 5794 mkU64(0 * step))))); 5795 break; 5796 default: 5797 vassert(0); 5798 } 5799 # undef MAYBE_WIDEN_FROM_64 5800 switch (nRegs) { 5801 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64) 5802 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3); 5803 break; 5804 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64) 5805 (&u0, &u1, &u2, sz, i0, i1, i2); 5806 break; 5807 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64) 5808 (&u0, &u1, sz, i0, i1); 5809 break; 5810 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64) 5811 (&u0, sz, i0); 5812 break; 5813 default: vassert(0); 5814 } 5815 switch (nRegs) { 5816 case 4: putQReg128( (tt+3) % 32, 5817 math_MAYBE_ZERO_HI64(bitQ, u3)); 5818 /* fallthru */ 5819 case 3: putQReg128( (tt+2) % 32, 5820 math_MAYBE_ZERO_HI64(bitQ, u2)); 5821 /* fallthru */ 5822 case 2: putQReg128( (tt+1) % 32, 5823 math_MAYBE_ZERO_HI64(bitQ, u1)); 5824 /* fallthru */ 5825 case 1: putQReg128( (tt+0) % 32, 5826 math_MAYBE_ZERO_HI64(bitQ, u0)); 5827 break; 5828 default: vassert(0); 5829 } 5830 } 5831 5832 /* -- END generate the transfers -- */ 5833 5834 /* Do the writeback, if necessary */ 5835 if (isPX) { 5836 putIReg64orSP(nn, mkexpr(tWB)); 5837 } 5838 5839 HChar pxStr[20]; 5840 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 5841 if (isPX) { 5842 if (mm == BITS5(1,1,1,1,1)) 5843 vex_sprintf(pxStr, ", #%u", xferSzB); 5844 else 5845 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 5846 } 5847 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 5848 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n", 5849 isLD ? "ld" : "st", nRegs, 5850 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 5851 pxStr); 5852 5853 return True; 5854 } 5855 /* else fall through */ 5856 } 5857 5858 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */ 5859 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */ 5860 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */ 5861 /* 31 29 26 22 21 20 15 11 9 4 5862 5863 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP] 5864 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step 5865 5866 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP] 5867 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step 5868 5869 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP] 5870 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step 5871 5872 T = defined by Q and sz in the normal way 5873 step = if m == 11111 then transfer-size else Xm 5874 xx = case L of 1 -> LD ; 0 -> ST 5875 */ 5876 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5877 && INSN(21,21) == 0) { 5878 Bool bitQ = INSN(30,30); 5879 Bool isPX = INSN(23,23) == 1; 5880 Bool isLD = INSN(22,22) == 1; 5881 UInt mm = INSN(20,16); 5882 UInt opc = INSN(15,12); 5883 UInt sz = INSN(11,10); 5884 UInt nn = INSN(9,5); 5885 UInt tt = INSN(4,0); 5886 Bool isQ = bitQ == 1; 5887 UInt nRegs = 0; 5888 switch (opc) { 5889 case BITS4(0,0,1,0): nRegs = 4; break; 5890 case BITS4(0,1,1,0): nRegs = 3; break; 5891 case BITS4(1,0,1,0): nRegs = 2; break; 5892 default: break; 5893 } 5894 5895 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5896 If we see it, set nRegs to 0 so as to cause the next conditional 5897 to fail. */ 5898 if (!isPX && mm != 0) 5899 nRegs = 0; 5900 5901 if (nRegs >= 2 && nRegs <= 4) { 5902 5903 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5904 5905 /* Generate the transfer address (TA) and if necessary the 5906 writeback address (WB) */ 5907 IRTemp tTA = newTemp(Ity_I64); 5908 assign(tTA, getIReg64orSP(nn)); 5909 if (nn == 31) { /* FIXME generate stack alignment check */ } 5910 IRTemp tWB = IRTemp_INVALID; 5911 if (isPX) { 5912 tWB = newTemp(Ity_I64); 5913 assign(tWB, binop(Iop_Add64, 5914 mkexpr(tTA), 5915 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5916 : getIReg64orZR(mm))); 5917 } 5918 5919 /* -- BEGIN generate the transfers -- */ 5920 5921 IRTemp u0, u1, u2, u3; 5922 u0 = u1 = u2 = u3 = IRTemp_INVALID; 5923 switch (nRegs) { 5924 case 4: u3 = newTempV128(); /* fallthru */ 5925 case 3: u2 = newTempV128(); /* fallthru */ 5926 case 2: u1 = newTempV128(); 5927 u0 = newTempV128(); break; 5928 default: vassert(0); 5929 } 5930 5931 /* -- Multiple 128 or 64 bit stores -- */ 5932 if (!isLD) { 5933 switch (nRegs) { 5934 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5935 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5936 case 2: assign(u1, getQReg128((tt+1) % 32)); 5937 assign(u0, getQReg128((tt+0) % 32)); break; 5938 default: vassert(0); 5939 } 5940 # define MAYBE_NARROW_TO_64(_expr) \ 5941 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5942 UInt step = isQ ? 16 : 8; 5943 switch (nRegs) { 5944 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5945 MAYBE_NARROW_TO_64(mkexpr(u3)) ); 5946 /* fallthru */ 5947 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5948 MAYBE_NARROW_TO_64(mkexpr(u2)) ); 5949 /* fallthru */ 5950 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5951 MAYBE_NARROW_TO_64(mkexpr(u1)) ); 5952 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5953 MAYBE_NARROW_TO_64(mkexpr(u0)) ); 5954 break; 5955 default: vassert(0); 5956 } 5957 # undef MAYBE_NARROW_TO_64 5958 } 5959 5960 /* -- Multiple 128 or 64 bit loads -- */ 5961 else /* isLD */ { 5962 UInt step = isQ ? 16 : 8; 5963 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5964 # define MAYBE_WIDEN_FROM_64(_expr) \ 5965 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5966 switch (nRegs) { 5967 case 4: 5968 assign(u3, MAYBE_WIDEN_FROM_64( 5969 loadLE(loadTy, 5970 binop(Iop_Add64, mkexpr(tTA), 5971 mkU64(3 * step))))); 5972 /* fallthru */ 5973 case 3: 5974 assign(u2, MAYBE_WIDEN_FROM_64( 5975 loadLE(loadTy, 5976 binop(Iop_Add64, mkexpr(tTA), 5977 mkU64(2 * step))))); 5978 /* fallthru */ 5979 case 2: 5980 assign(u1, MAYBE_WIDEN_FROM_64( 5981 loadLE(loadTy, 5982 binop(Iop_Add64, mkexpr(tTA), 5983 mkU64(1 * step))))); 5984 assign(u0, MAYBE_WIDEN_FROM_64( 5985 loadLE(loadTy, 5986 binop(Iop_Add64, mkexpr(tTA), 5987 mkU64(0 * step))))); 5988 break; 5989 default: 5990 vassert(0); 5991 } 5992 # undef MAYBE_WIDEN_FROM_64 5993 switch (nRegs) { 5994 case 4: putQReg128( (tt+3) % 32, 5995 math_MAYBE_ZERO_HI64(bitQ, u3)); 5996 /* fallthru */ 5997 case 3: putQReg128( (tt+2) % 32, 5998 math_MAYBE_ZERO_HI64(bitQ, u2)); 5999 /* fallthru */ 6000 case 2: putQReg128( (tt+1) % 32, 6001 math_MAYBE_ZERO_HI64(bitQ, u1)); 6002 putQReg128( (tt+0) % 32, 6003 math_MAYBE_ZERO_HI64(bitQ, u0)); 6004 break; 6005 default: vassert(0); 6006 } 6007 } 6008 6009 /* -- END generate the transfers -- */ 6010 6011 /* Do the writeback, if necessary */ 6012 if (isPX) { 6013 putIReg64orSP(nn, mkexpr(tWB)); 6014 } 6015 6016 HChar pxStr[20]; 6017 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6018 if (isPX) { 6019 if (mm == BITS5(1,1,1,1,1)) 6020 vex_sprintf(pxStr, ", #%u", xferSzB); 6021 else 6022 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6023 } 6024 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6025 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n", 6026 isLD ? "ld" : "st", 6027 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6028 pxStr); 6029 6030 return True; 6031 } 6032 /* else fall through */ 6033 } 6034 6035 /* ---------- LD1R (single structure, replicate) ---------- */ 6036 /* ---------- LD2R (single structure, replicate) ---------- */ 6037 /* ---------- LD3R (single structure, replicate) ---------- */ 6038 /* ---------- LD4R (single structure, replicate) ---------- */ 6039 /* 31 29 22 20 15 11 9 4 6040 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP] 6041 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step 6042 6043 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP] 6044 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step 6045 6046 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP] 6047 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step 6048 6049 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP] 6050 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step 6051 6052 step = if m == 11111 then transfer-size else Xm 6053 */ 6054 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1) 6055 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1) 6056 && INSN(12,12) == 0) { 6057 UInt bitQ = INSN(30,30); 6058 Bool isPX = INSN(23,23) == 1; 6059 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6060 UInt mm = INSN(20,16); 6061 UInt sz = INSN(11,10); 6062 UInt nn = INSN(9,5); 6063 UInt tt = INSN(4,0); 6064 6065 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6066 if (isPX || mm == 0) { 6067 6068 IRType ty = integerIRTypeOfSize(1 << sz); 6069 6070 UInt laneSzB = 1 << sz; 6071 UInt xferSzB = laneSzB * nRegs; 6072 6073 /* Generate the transfer address (TA) and if necessary the 6074 writeback address (WB) */ 6075 IRTemp tTA = newTemp(Ity_I64); 6076 assign(tTA, getIReg64orSP(nn)); 6077 if (nn == 31) { /* FIXME generate stack alignment check */ } 6078 IRTemp tWB = IRTemp_INVALID; 6079 if (isPX) { 6080 tWB = newTemp(Ity_I64); 6081 assign(tWB, binop(Iop_Add64, 6082 mkexpr(tTA), 6083 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6084 : getIReg64orZR(mm))); 6085 } 6086 6087 /* Do the writeback, if necessary */ 6088 if (isPX) { 6089 putIReg64orSP(nn, mkexpr(tWB)); 6090 } 6091 6092 IRTemp e0, e1, e2, e3, v0, v1, v2, v3; 6093 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID; 6094 switch (nRegs) { 6095 case 4: 6096 e3 = newTemp(ty); 6097 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6098 mkU64(3 * laneSzB)))); 6099 v3 = math_DUP_TO_V128(e3, ty); 6100 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3)); 6101 /* fallthrough */ 6102 case 3: 6103 e2 = newTemp(ty); 6104 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6105 mkU64(2 * laneSzB)))); 6106 v2 = math_DUP_TO_V128(e2, ty); 6107 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2)); 6108 /* fallthrough */ 6109 case 2: 6110 e1 = newTemp(ty); 6111 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6112 mkU64(1 * laneSzB)))); 6113 v1 = math_DUP_TO_V128(e1, ty); 6114 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1)); 6115 /* fallthrough */ 6116 case 1: 6117 e0 = newTemp(ty); 6118 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6119 mkU64(0 * laneSzB)))); 6120 v0 = math_DUP_TO_V128(e0, ty); 6121 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0)); 6122 break; 6123 default: 6124 vassert(0); 6125 } 6126 6127 HChar pxStr[20]; 6128 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6129 if (isPX) { 6130 if (mm == BITS5(1,1,1,1,1)) 6131 vex_sprintf(pxStr, ", #%u", xferSzB); 6132 else 6133 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6134 } 6135 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6136 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n", 6137 nRegs, 6138 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6139 pxStr); 6140 6141 return True; 6142 } 6143 /* else fall through */ 6144 } 6145 6146 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */ 6147 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */ 6148 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */ 6149 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */ 6150 /* 31 29 22 21 20 15 11 9 4 6151 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP] 6152 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step 6153 6154 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP] 6155 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step 6156 6157 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP] 6158 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step 6159 6160 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP] 6161 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step 6162 6163 step = if m == 11111 then transfer-size else Xm 6164 op = case L of 1 -> LD ; 0 -> ST 6165 6166 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb 6167 01:b:b:b0 -> 2, bbb 6168 10:b:b:00 -> 4, bb 6169 10:b:0:01 -> 8, b 6170 */ 6171 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) { 6172 UInt bitQ = INSN(30,30); 6173 Bool isPX = INSN(23,23) == 1; 6174 Bool isLD = INSN(22,22) == 1; 6175 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6176 UInt mm = INSN(20,16); 6177 UInt xx = INSN(15,14); 6178 UInt bitS = INSN(12,12); 6179 UInt sz = INSN(11,10); 6180 UInt nn = INSN(9,5); 6181 UInt tt = INSN(4,0); 6182 6183 Bool valid = True; 6184 6185 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6186 if (!isPX && mm != 0) 6187 valid = False; 6188 6189 UInt laneSzB = 0; /* invalid */ 6190 UInt ix = 16; /* invalid */ 6191 6192 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz; 6193 switch (xx_q_S_sz) { 6194 case 0x00: case 0x01: case 0x02: case 0x03: 6195 case 0x04: case 0x05: case 0x06: case 0x07: 6196 case 0x08: case 0x09: case 0x0A: case 0x0B: 6197 case 0x0C: case 0x0D: case 0x0E: case 0x0F: 6198 laneSzB = 1; ix = xx_q_S_sz & 0xF; 6199 break; 6200 case 0x10: case 0x12: case 0x14: case 0x16: 6201 case 0x18: case 0x1A: case 0x1C: case 0x1E: 6202 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7; 6203 break; 6204 case 0x20: case 0x24: case 0x28: case 0x2C: 6205 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3; 6206 break; 6207 case 0x21: case 0x29: 6208 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1; 6209 break; 6210 default: 6211 break; 6212 } 6213 6214 if (valid && laneSzB != 0) { 6215 6216 IRType ty = integerIRTypeOfSize(laneSzB); 6217 UInt xferSzB = laneSzB * nRegs; 6218 6219 /* Generate the transfer address (TA) and if necessary the 6220 writeback address (WB) */ 6221 IRTemp tTA = newTemp(Ity_I64); 6222 assign(tTA, getIReg64orSP(nn)); 6223 if (nn == 31) { /* FIXME generate stack alignment check */ } 6224 IRTemp tWB = IRTemp_INVALID; 6225 if (isPX) { 6226 tWB = newTemp(Ity_I64); 6227 assign(tWB, binop(Iop_Add64, 6228 mkexpr(tTA), 6229 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6230 : getIReg64orZR(mm))); 6231 } 6232 6233 /* Do the writeback, if necessary */ 6234 if (isPX) { 6235 putIReg64orSP(nn, mkexpr(tWB)); 6236 } 6237 6238 switch (nRegs) { 6239 case 4: { 6240 IRExpr* addr 6241 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB)); 6242 if (isLD) { 6243 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr)); 6244 } else { 6245 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty)); 6246 } 6247 /* fallthrough */ 6248 } 6249 case 3: { 6250 IRExpr* addr 6251 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB)); 6252 if (isLD) { 6253 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr)); 6254 } else { 6255 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty)); 6256 } 6257 /* fallthrough */ 6258 } 6259 case 2: { 6260 IRExpr* addr 6261 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB)); 6262 if (isLD) { 6263 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr)); 6264 } else { 6265 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty)); 6266 } 6267 /* fallthrough */ 6268 } 6269 case 1: { 6270 IRExpr* addr 6271 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB)); 6272 if (isLD) { 6273 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr)); 6274 } else { 6275 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty)); 6276 } 6277 break; 6278 } 6279 default: 6280 vassert(0); 6281 } 6282 6283 HChar pxStr[20]; 6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6285 if (isPX) { 6286 if (mm == BITS5(1,1,1,1,1)) 6287 vex_sprintf(pxStr, ", #%u", xferSzB); 6288 else 6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6290 } 6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6292 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n", 6293 isLD ? "ld" : "st", nRegs, 6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, 6295 ix, nameIReg64orSP(nn), pxStr); 6296 6297 return True; 6298 } 6299 /* else fall through */ 6300 } 6301 6302 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ 6303 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ 6304 /* 31 29 23 20 14 9 4 6305 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] 6306 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] 6307 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] 6308 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] 6309 */ 6310 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) 6311 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) 6312 && INSN(14,10) == BITS5(1,1,1,1,1)) { 6313 UInt szBlg2 = INSN(31,30); 6314 Bool isLD = INSN(22,22) == 1; 6315 Bool isAcqOrRel = INSN(15,15) == 1; 6316 UInt ss = INSN(20,16); 6317 UInt nn = INSN(9,5); 6318 UInt tt = INSN(4,0); 6319 6320 vassert(szBlg2 < 4); 6321 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6322 IRType ty = integerIRTypeOfSize(szB); 6323 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6324 6325 IRTemp ea = newTemp(Ity_I64); 6326 assign(ea, getIReg64orSP(nn)); 6327 /* FIXME generate check that ea is szB-aligned */ 6328 6329 if (isLD && ss == BITS5(1,1,1,1,1)) { 6330 IRTemp res = newTemp(ty); 6331 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); 6332 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6333 if (isAcqOrRel) { 6334 stmt(IRStmt_MBE(Imbe_Fence)); 6335 } 6336 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6337 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6338 return True; 6339 } 6340 if (!isLD) { 6341 if (isAcqOrRel) { 6342 stmt(IRStmt_MBE(Imbe_Fence)); 6343 } 6344 IRTemp res = newTemp(Ity_I1); 6345 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6346 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); 6347 /* IR semantics: res is 1 if store succeeds, 0 if it fails. 6348 Need to set rS to 1 on failure, 0 on success. */ 6349 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), 6350 mkU64(1))); 6351 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6352 nameIRegOrZR(False, ss), 6353 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6354 return True; 6355 } 6356 /* else fall through */ 6357 } 6358 6359 /* ------------------ LDA{R,RH,RB} ------------------ */ 6360 /* ------------------ STL{R,RH,RB} ------------------ */ 6361 /* 31 29 23 20 14 9 4 6362 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP] 6363 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP] 6364 */ 6365 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) 6366 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { 6367 UInt szBlg2 = INSN(31,30); 6368 Bool isLD = INSN(22,22) == 1; 6369 UInt nn = INSN(9,5); 6370 UInt tt = INSN(4,0); 6371 6372 vassert(szBlg2 < 4); 6373 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6374 IRType ty = integerIRTypeOfSize(szB); 6375 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6376 6377 IRTemp ea = newTemp(Ity_I64); 6378 assign(ea, getIReg64orSP(nn)); 6379 /* FIXME generate check that ea is szB-aligned */ 6380 6381 if (isLD) { 6382 IRTemp res = newTemp(ty); 6383 assign(res, loadLE(ty, mkexpr(ea))); 6384 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6385 stmt(IRStmt_MBE(Imbe_Fence)); 6386 DIP("lda%s %s, [%s]\n", suffix[szBlg2], 6387 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6388 } else { 6389 stmt(IRStmt_MBE(Imbe_Fence)); 6390 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6391 storeLE(mkexpr(ea), data); 6392 DIP("stl%s %s, [%s]\n", suffix[szBlg2], 6393 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6394 } 6395 return True; 6396 } 6397 6398 /* ------------------ PRFM (immediate) ------------------ */ 6399 /* 31 21 9 4 6400 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm] 6401 */ 6402 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) { 6403 UInt imm12 = INSN(21,10); 6404 UInt nn = INSN(9,5); 6405 UInt tt = INSN(4,0); 6406 /* Generating any IR here is pointless, except for documentation 6407 purposes, as it will get optimised away later. */ 6408 IRTemp ea = newTemp(Ity_I64); 6409 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8))); 6410 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8); 6411 return True; 6412 } 6413 6414 vex_printf("ARM64 front end: load_store\n"); 6415 return False; 6416 # undef INSN 6417 } 6418 6419 6420 /*------------------------------------------------------------*/ 6421 /*--- Control flow and misc instructions ---*/ 6422 /*------------------------------------------------------------*/ 6423 6424 static 6425 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, 6426 const VexArchInfo* archinfo) 6427 { 6428 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 6429 6430 /* ---------------------- B cond ----------------------- */ 6431 /* 31 24 4 3 6432 0101010 0 imm19 0 cond */ 6433 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { 6434 UInt cond = INSN(3,0); 6435 ULong uimm64 = INSN(23,5) << 2; 6436 Long simm64 = (Long)sx_to_64(uimm64, 21); 6437 vassert(dres->whatNext == Dis_Continue); 6438 vassert(dres->len == 4); 6439 vassert(dres->continueAt == 0); 6440 vassert(dres->jk_StopHere == Ijk_INVALID); 6441 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 6442 Ijk_Boring, 6443 IRConst_U64(guest_PC_curr_instr + simm64), 6444 OFFB_PC) ); 6445 putPC(mkU64(guest_PC_curr_instr + 4)); 6446 dres->whatNext = Dis_StopHere; 6447 dres->jk_StopHere = Ijk_Boring; 6448 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); 6449 return True; 6450 } 6451 6452 /* -------------------- B{L} uncond -------------------- */ 6453 if (INSN(30,26) == BITS5(0,0,1,0,1)) { 6454 /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) 6455 100101 imm26 B (PC + sxTo64(imm26 << 2)) 6456 */ 6457 UInt bLink = INSN(31,31); 6458 ULong uimm64 = INSN(25,0) << 2; 6459 Long simm64 = (Long)sx_to_64(uimm64, 28); 6460 if (bLink) { 6461 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6462 } 6463 putPC(mkU64(guest_PC_curr_instr + simm64)); 6464 dres->whatNext = Dis_StopHere; 6465 dres->jk_StopHere = Ijk_Call; 6466 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", 6467 guest_PC_curr_instr + simm64); 6468 return True; 6469 } 6470 6471 /* --------------------- B{L} reg --------------------- */ 6472 /* 31 24 22 20 15 9 4 6473 1101011 00 10 11111 000000 nn 00000 RET Rn 6474 1101011 00 01 11111 000000 nn 00000 CALL Rn 6475 1101011 00 00 11111 000000 nn 00000 JMP Rn 6476 */ 6477 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) 6478 && INSN(20,16) == BITS5(1,1,1,1,1) 6479 && INSN(15,10) == BITS6(0,0,0,0,0,0) 6480 && INSN(4,0) == BITS5(0,0,0,0,0)) { 6481 UInt branch_type = INSN(22,21); 6482 UInt nn = INSN(9,5); 6483 if (branch_type == BITS2(1,0) /* RET */) { 6484 putPC(getIReg64orZR(nn)); 6485 dres->whatNext = Dis_StopHere; 6486 dres->jk_StopHere = Ijk_Ret; 6487 DIP("ret %s\n", nameIReg64orZR(nn)); 6488 return True; 6489 } 6490 if (branch_type == BITS2(0,1) /* CALL */) { 6491 IRTemp dst = newTemp(Ity_I64); 6492 assign(dst, getIReg64orZR(nn)); 6493 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6494 putPC(mkexpr(dst)); 6495 dres->whatNext = Dis_StopHere; 6496 dres->jk_StopHere = Ijk_Call; 6497 DIP("blr %s\n", nameIReg64orZR(nn)); 6498 return True; 6499 } 6500 if (branch_type == BITS2(0,0) /* JMP */) { 6501 putPC(getIReg64orZR(nn)); 6502 dres->whatNext = Dis_StopHere; 6503 dres->jk_StopHere = Ijk_Boring; 6504 DIP("jmp %s\n", nameIReg64orZR(nn)); 6505 return True; 6506 } 6507 } 6508 6509 /* -------------------- CB{N}Z -------------------- */ 6510 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6511 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6512 */ 6513 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { 6514 Bool is64 = INSN(31,31) == 1; 6515 Bool bIfZ = INSN(24,24) == 0; 6516 ULong uimm64 = INSN(23,5) << 2; 6517 UInt rT = INSN(4,0); 6518 Long simm64 = (Long)sx_to_64(uimm64, 21); 6519 IRExpr* cond = NULL; 6520 if (is64) { 6521 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6522 getIReg64orZR(rT), mkU64(0)); 6523 } else { 6524 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, 6525 getIReg32orZR(rT), mkU32(0)); 6526 } 6527 stmt( IRStmt_Exit(cond, 6528 Ijk_Boring, 6529 IRConst_U64(guest_PC_curr_instr + simm64), 6530 OFFB_PC) ); 6531 putPC(mkU64(guest_PC_curr_instr + 4)); 6532 dres->whatNext = Dis_StopHere; 6533 dres->jk_StopHere = Ijk_Boring; 6534 DIP("cb%sz %s, 0x%llx\n", 6535 bIfZ ? "" : "n", nameIRegOrZR(is64, rT), 6536 guest_PC_curr_instr + simm64); 6537 return True; 6538 } 6539 6540 /* -------------------- TB{N}Z -------------------- */ 6541 /* 31 30 24 23 18 5 4 6542 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6543 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6544 */ 6545 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { 6546 UInt b5 = INSN(31,31); 6547 Bool bIfZ = INSN(24,24) == 0; 6548 UInt b40 = INSN(23,19); 6549 UInt imm14 = INSN(18,5); 6550 UInt tt = INSN(4,0); 6551 UInt bitNo = (b5 << 5) | b40; 6552 ULong uimm64 = imm14 << 2; 6553 Long simm64 = sx_to_64(uimm64, 16); 6554 IRExpr* cond 6555 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6556 binop(Iop_And64, 6557 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), 6558 mkU64(1)), 6559 mkU64(0)); 6560 stmt( IRStmt_Exit(cond, 6561 Ijk_Boring, 6562 IRConst_U64(guest_PC_curr_instr + simm64), 6563 OFFB_PC) ); 6564 putPC(mkU64(guest_PC_curr_instr + 4)); 6565 dres->whatNext = Dis_StopHere; 6566 dres->jk_StopHere = Ijk_Boring; 6567 DIP("tb%sz %s, #%u, 0x%llx\n", 6568 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, 6569 guest_PC_curr_instr + simm64); 6570 return True; 6571 } 6572 6573 /* -------------------- SVC -------------------- */ 6574 /* 11010100 000 imm16 000 01 6575 Don't bother with anything except the imm16==0 case. 6576 */ 6577 if (INSN(31,0) == 0xD4000001) { 6578 putPC(mkU64(guest_PC_curr_instr + 4)); 6579 dres->whatNext = Dis_StopHere; 6580 dres->jk_StopHere = Ijk_Sys_syscall; 6581 DIP("svc #0\n"); 6582 return True; 6583 } 6584 6585 /* ------------------ M{SR,RS} ------------------ */ 6586 /* ---- Cases for TPIDR_EL0 ---- 6587 0xD51BD0 010 Rt MSR tpidr_el0, rT 6588 0xD53BD0 010 Rt MRS rT, tpidr_el0 6589 */ 6590 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ 6591 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { 6592 Bool toSys = INSN(21,21) == 0; 6593 UInt tt = INSN(4,0); 6594 if (toSys) { 6595 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); 6596 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); 6597 } else { 6598 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); 6599 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); 6600 } 6601 return True; 6602 } 6603 /* ---- Cases for FPCR ---- 6604 0xD51B44 000 Rt MSR fpcr, rT 6605 0xD53B44 000 Rt MSR rT, fpcr 6606 */ 6607 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ 6608 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { 6609 Bool toSys = INSN(21,21) == 0; 6610 UInt tt = INSN(4,0); 6611 if (toSys) { 6612 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); 6613 DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); 6614 } else { 6615 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); 6616 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); 6617 } 6618 return True; 6619 } 6620 /* ---- Cases for FPSR ---- 6621 0xD51B44 001 Rt MSR fpsr, rT 6622 0xD53B44 001 Rt MSR rT, fpsr 6623 The only part of this we model is FPSR.QC. All other bits 6624 are ignored when writing to it and RAZ when reading from it. 6625 */ 6626 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ 6627 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { 6628 Bool toSys = INSN(21,21) == 0; 6629 UInt tt = INSN(4,0); 6630 if (toSys) { 6631 /* Just deal with FPSR.QC. Make up a V128 value which is 6632 zero if Xt[27] is zero and any other value if Xt[27] is 6633 nonzero. */ 6634 IRTemp qc64 = newTemp(Ity_I64); 6635 assign(qc64, binop(Iop_And64, 6636 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)), 6637 mkU64(1))); 6638 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64)); 6639 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) ); 6640 DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); 6641 } else { 6642 /* Generate a value which is all zeroes except for bit 27, 6643 which must be zero if QCFLAG is all zeroes and one otherwise. */ 6644 IRTemp qcV128 = newTempV128(); 6645 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 )); 6646 IRTemp qc64 = newTemp(Ity_I64); 6647 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)), 6648 unop(Iop_V128to64, mkexpr(qcV128)))); 6649 IRExpr* res = binop(Iop_Shl64, 6650 unop(Iop_1Uto64, 6651 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))), 6652 mkU8(27)); 6653 putIReg64orZR(tt, res); 6654 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); 6655 } 6656 return True; 6657 } 6658 /* ---- Cases for NZCV ---- 6659 D51B42 000 Rt MSR nzcv, rT 6660 D53B42 000 Rt MRS rT, nzcv 6661 The only parts of NZCV that actually exist are bits 31:28, which 6662 are the N Z C and V bits themselves. Hence the flags thunk provides 6663 all the state we need. 6664 */ 6665 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ 6666 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { 6667 Bool toSys = INSN(21,21) == 0; 6668 UInt tt = INSN(4,0); 6669 if (toSys) { 6670 IRTemp t = newTemp(Ity_I64); 6671 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); 6672 setFlags_COPY(t); 6673 DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); 6674 } else { 6675 IRTemp res = newTemp(Ity_I64); 6676 assign(res, mk_arm64g_calculate_flags_nzcv()); 6677 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); 6678 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); 6679 } 6680 return True; 6681 } 6682 /* ---- Cases for DCZID_EL0 ---- 6683 Don't support arbitrary reads and writes to this register. Just 6684 return the value 16, which indicates that the DC ZVA instruction 6685 is not permitted, so we don't have to emulate it. 6686 D5 3B 00 111 Rt MRS rT, dczid_el0 6687 */ 6688 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { 6689 UInt tt = INSN(4,0); 6690 putIReg64orZR(tt, mkU64(1<<4)); 6691 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); 6692 return True; 6693 } 6694 /* ---- Cases for CTR_EL0 ---- 6695 We just handle reads, and make up a value from the D and I line 6696 sizes in the VexArchInfo we are given, and patch in the following 6697 fields that the Foundation model gives ("natively"): 6698 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 6699 D5 3B 00 001 Rt MRS rT, dczid_el0 6700 */ 6701 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { 6702 UInt tt = INSN(4,0); 6703 /* Need to generate a value from dMinLine_lg2_szB and 6704 dMinLine_lg2_szB. The value in the register is in 32-bit 6705 units, so need to subtract 2 from the values in the 6706 VexArchInfo. We can assume that the values here are valid -- 6707 disInstr_ARM64 checks them -- so there's no need to deal with 6708 out-of-range cases. */ 6709 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6710 && archinfo->arm64_dMinLine_lg2_szB <= 17 6711 && archinfo->arm64_iMinLine_lg2_szB >= 2 6712 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6713 UInt val 6714 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) 6715 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); 6716 putIReg64orZR(tt, mkU64(val)); 6717 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); 6718 return True; 6719 } 6720 /* ---- Cases for CNTVCT_EL0 ---- 6721 This is a timestamp counter of some sort. Support reads of it only 6722 by passing through to the host. 6723 D5 3B E0 010 Rt MRS Xt, cntvct_el0 6724 */ 6725 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) { 6726 UInt tt = INSN(4,0); 6727 IRTemp val = newTemp(Ity_I64); 6728 IRExpr** args = mkIRExprVec_0(); 6729 IRDirty* d = unsafeIRDirty_1_N ( 6730 val, 6731 0/*regparms*/, 6732 "arm64g_dirtyhelper_MRS_CNTVCT_EL0", 6733 &arm64g_dirtyhelper_MRS_CNTVCT_EL0, 6734 args 6735 ); 6736 /* execute the dirty call, dumping the result in val. */ 6737 stmt( IRStmt_Dirty(d) ); 6738 putIReg64orZR(tt, mkexpr(val)); 6739 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt)); 6740 return True; 6741 } 6742 6743 /* ------------------ IC_IVAU ------------------ */ 6744 /* D5 0B 75 001 Rt ic ivau, rT 6745 */ 6746 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { 6747 /* We will always be provided with a valid iMinLine value. */ 6748 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 6749 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6750 /* Round the requested address, in rT, down to the start of the 6751 containing block. */ 6752 UInt tt = INSN(4,0); 6753 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; 6754 IRTemp addr = newTemp(Ity_I64); 6755 assign( addr, binop( Iop_And64, 6756 getIReg64orZR(tt), 6757 mkU64(~(lineszB - 1))) ); 6758 /* Set the invalidation range, request exit-and-invalidate, with 6759 continuation at the next instruction. */ 6760 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6761 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6762 /* be paranoid ... */ 6763 stmt( IRStmt_MBE(Imbe_Fence) ); 6764 putPC(mkU64( guest_PC_curr_instr + 4 )); 6765 dres->whatNext = Dis_StopHere; 6766 dres->jk_StopHere = Ijk_InvalICache; 6767 DIP("ic ivau, %s\n", nameIReg64orZR(tt)); 6768 return True; 6769 } 6770 6771 /* ------------------ DC_CVAU ------------------ */ 6772 /* D5 0B 7B 001 Rt dc cvau, rT 6773 */ 6774 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { 6775 /* Exactly the same scheme as for IC IVAU, except we observe the 6776 dMinLine size, and request an Ijk_FlushDCache instead of 6777 Ijk_InvalICache. */ 6778 /* We will always be provided with a valid dMinLine value. */ 6779 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6780 && archinfo->arm64_dMinLine_lg2_szB <= 17); 6781 /* Round the requested address, in rT, down to the start of the 6782 containing block. */ 6783 UInt tt = INSN(4,0); 6784 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; 6785 IRTemp addr = newTemp(Ity_I64); 6786 assign( addr, binop( Iop_And64, 6787 getIReg64orZR(tt), 6788 mkU64(~(lineszB - 1))) ); 6789 /* Set the flush range, request exit-and-flush, with 6790 continuation at the next instruction. */ 6791 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6792 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6793 /* be paranoid ... */ 6794 stmt( IRStmt_MBE(Imbe_Fence) ); 6795 putPC(mkU64( guest_PC_curr_instr + 4 )); 6796 dres->whatNext = Dis_StopHere; 6797 dres->jk_StopHere = Ijk_FlushDCache; 6798 DIP("dc cvau, %s\n", nameIReg64orZR(tt)); 6799 return True; 6800 } 6801 6802 /* ------------------ ISB, DMB, DSB ------------------ */ 6803 /* 31 21 11 7 6 4 6804 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt 6805 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt 6806 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt 6807 */ 6808 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0) 6809 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1) 6810 && INSN(7,7) == 1 6811 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) { 6812 UInt opc = INSN(6,5); 6813 UInt CRm = INSN(11,8); 6814 vassert(opc <= 2 && CRm <= 15); 6815 stmt(IRStmt_MBE(Imbe_Fence)); 6816 const HChar* opNames[3] 6817 = { "dsb", "dmb", "isb" }; 6818 const HChar* howNames[16] 6819 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh", 6820 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" }; 6821 DIP("%s %s\n", opNames[opc], howNames[CRm]); 6822 return True; 6823 } 6824 6825 /* -------------------- NOP -------------------- */ 6826 if (INSN(31,0) == 0xD503201F) { 6827 DIP("nop\n"); 6828 return True; 6829 } 6830 6831 /* -------------------- BRK -------------------- */ 6832 /* 31 23 20 4 6833 1101 0100 001 imm16 00000 BRK #imm16 6834 */ 6835 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0) 6836 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) { 6837 UInt imm16 = INSN(20,5); 6838 /* Request SIGTRAP and then restart of this insn. */ 6839 putPC(mkU64(guest_PC_curr_instr + 0)); 6840 dres->whatNext = Dis_StopHere; 6841 dres->jk_StopHere = Ijk_SigTRAP; 6842 DIP("brk #%u\n", imm16); 6843 return True; 6844 } 6845 6846 //fail: 6847 vex_printf("ARM64 front end: branch_etc\n"); 6848 return False; 6849 # undef INSN 6850 } 6851 6852 6853 /*------------------------------------------------------------*/ 6854 /*--- SIMD and FP instructions: helper functions ---*/ 6855 /*------------------------------------------------------------*/ 6856 6857 /* Some constructors for interleave/deinterleave expressions. */ 6858 6859 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6860 // returns a0 b0 6861 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10)); 6862 } 6863 6864 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6865 // returns a1 b1 6866 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10)); 6867 } 6868 6869 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6870 // returns a2 a0 b2 b0 6871 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6872 } 6873 6874 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6875 // returns a3 a1 b3 b1 6876 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6877 } 6878 6879 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) { 6880 // returns a1 b1 a0 b0 6881 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210)); 6882 } 6883 6884 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) { 6885 // returns a3 b3 a2 b2 6886 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210)); 6887 } 6888 6889 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6890 // returns a6 a4 a2 a0 b6 b4 b2 b0 6891 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6892 } 6893 6894 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6895 // returns a7 a5 a3 a1 b7 b5 b3 b1 6896 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6897 } 6898 6899 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6900 // returns a3 b3 a2 b2 a1 b1 a0 b0 6901 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210)); 6902 } 6903 6904 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6905 // returns a7 b7 a6 b6 a5 b5 a4 b4 6906 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210)); 6907 } 6908 6909 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, 6910 IRTemp bFEDCBA9876543210 ) { 6911 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 6912 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210), 6913 mkexpr(bFEDCBA9876543210)); 6914 } 6915 6916 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, 6917 IRTemp bFEDCBA9876543210 ) { 6918 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 6919 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210), 6920 mkexpr(bFEDCBA9876543210)); 6921 } 6922 6923 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, 6924 IRTemp bFEDCBA9876543210 ) { 6925 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 6926 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210), 6927 mkexpr(bFEDCBA9876543210)); 6928 } 6929 6930 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, 6931 IRTemp bFEDCBA9876543210 ) { 6932 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 6933 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210), 6934 mkexpr(bFEDCBA9876543210)); 6935 } 6936 6937 /* Generate N copies of |bit| in the bottom of a ULong. */ 6938 static ULong Replicate ( ULong bit, Int N ) 6939 { 6940 vassert(bit <= 1 && N >= 1 && N < 64); 6941 if (bit == 0) { 6942 return 0; 6943 } else { 6944 /* Careful. This won't work for N == 64. */ 6945 return (1ULL << N) - 1; 6946 } 6947 } 6948 6949 static ULong Replicate32x2 ( ULong bits32 ) 6950 { 6951 vassert(0 == (bits32 & ~0xFFFFFFFFULL)); 6952 return (bits32 << 32) | bits32; 6953 } 6954 6955 static ULong Replicate16x4 ( ULong bits16 ) 6956 { 6957 vassert(0 == (bits16 & ~0xFFFFULL)); 6958 return Replicate32x2((bits16 << 16) | bits16); 6959 } 6960 6961 static ULong Replicate8x8 ( ULong bits8 ) 6962 { 6963 vassert(0 == (bits8 & ~0xFFULL)); 6964 return Replicate16x4((bits8 << 8) | bits8); 6965 } 6966 6967 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of 6968 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N 6969 is 64. In the former case, the upper 32 bits of the returned value 6970 are guaranteed to be zero. */ 6971 static ULong VFPExpandImm ( ULong imm8, Int N ) 6972 { 6973 vassert(imm8 <= 0xFF); 6974 vassert(N == 32 || N == 64); 6975 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. 6976 Int F = N - E - 1; 6977 ULong imm8_6 = (imm8 >> 6) & 1; 6978 /* sign: 1 bit */ 6979 /* exp: E bits */ 6980 /* frac: F bits */ 6981 ULong sign = (imm8 >> 7) & 1; 6982 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); 6983 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); 6984 vassert(sign < (1ULL << 1)); 6985 vassert(exp < (1ULL << E)); 6986 vassert(frac < (1ULL << F)); 6987 vassert(1 + E + F == N); 6988 ULong res = (sign << (E+F)) | (exp << F) | frac; 6989 return res; 6990 } 6991 6992 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. 6993 This might fail, as indicated by the returned Bool. Page 2530 of 6994 the manual. */ 6995 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, 6996 UInt op, UInt cmode, UInt imm8 ) 6997 { 6998 vassert(op <= 1); 6999 vassert(cmode <= 15); 7000 vassert(imm8 <= 255); 7001 7002 *res = 0; /* will overwrite iff returning True */ 7003 7004 ULong imm64 = 0; 7005 Bool testimm8 = False; 7006 7007 switch (cmode >> 1) { 7008 case 0: 7009 testimm8 = False; imm64 = Replicate32x2(imm8); break; 7010 case 1: 7011 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; 7012 case 2: 7013 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; 7014 case 3: 7015 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; 7016 case 4: 7017 testimm8 = False; imm64 = Replicate16x4(imm8); break; 7018 case 5: 7019 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; 7020 case 6: 7021 testimm8 = True; 7022 if ((cmode & 1) == 0) 7023 imm64 = Replicate32x2((imm8 << 8) | 0xFF); 7024 else 7025 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); 7026 break; 7027 case 7: 7028 testimm8 = False; 7029 if ((cmode & 1) == 0 && op == 0) 7030 imm64 = Replicate8x8(imm8); 7031 if ((cmode & 1) == 0 && op == 1) { 7032 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; 7033 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; 7034 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; 7035 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; 7036 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; 7037 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; 7038 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; 7039 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; 7040 } 7041 if ((cmode & 1) == 1 && op == 0) { 7042 ULong imm8_7 = (imm8 >> 7) & 1; 7043 ULong imm8_6 = (imm8 >> 6) & 1; 7044 ULong imm8_50 = imm8 & 63; 7045 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) 7046 | ((imm8_6 ^ 1) << (5 + 6 + 19)) 7047 | (Replicate(imm8_6, 5) << (6 + 19)) 7048 | (imm8_50 << 19); 7049 imm64 = Replicate32x2(imm32); 7050 } 7051 if ((cmode & 1) == 1 && op == 1) { 7052 // imm64 = imm8<7>:NOT(imm8<6>) 7053 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); 7054 ULong imm8_7 = (imm8 >> 7) & 1; 7055 ULong imm8_6 = (imm8 >> 6) & 1; 7056 ULong imm8_50 = imm8 & 63; 7057 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) 7058 | (Replicate(imm8_6, 8) << 54) 7059 | (imm8_50 << 48); 7060 } 7061 break; 7062 default: 7063 vassert(0); 7064 } 7065 7066 if (testimm8 && imm8 == 0) 7067 return False; 7068 7069 *res = imm64; 7070 return True; 7071 } 7072 7073 /* Help a bit for decoding laneage for vector operations that can be 7074 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q 7075 and SZ bits, typically for vector floating point. */ 7076 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, 7077 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, 7078 /*OUT*/const HChar** arrSpec, 7079 Bool bitQ, Bool bitSZ ) 7080 { 7081 vassert(bitQ == True || bitQ == False); 7082 vassert(bitSZ == True || bitSZ == False); 7083 if (bitQ && bitSZ) { // 2x64 7084 if (tyI) *tyI = Ity_I64; 7085 if (tyF) *tyF = Ity_F64; 7086 if (nLanes) *nLanes = 2; 7087 if (zeroUpper) *zeroUpper = False; 7088 if (arrSpec) *arrSpec = "2d"; 7089 return True; 7090 } 7091 if (bitQ && !bitSZ) { // 4x32 7092 if (tyI) *tyI = Ity_I32; 7093 if (tyF) *tyF = Ity_F32; 7094 if (nLanes) *nLanes = 4; 7095 if (zeroUpper) *zeroUpper = False; 7096 if (arrSpec) *arrSpec = "4s"; 7097 return True; 7098 } 7099 if (!bitQ && !bitSZ) { // 2x32 7100 if (tyI) *tyI = Ity_I32; 7101 if (tyF) *tyF = Ity_F32; 7102 if (nLanes) *nLanes = 2; 7103 if (zeroUpper) *zeroUpper = True; 7104 if (arrSpec) *arrSpec = "2s"; 7105 return True; 7106 } 7107 // Else impliedly 1x64, which isn't allowed. 7108 return False; 7109 } 7110 7111 /* Helper for decoding laneage for shift-style vector operations 7112 that involve an immediate shift amount. */ 7113 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, 7114 UInt immh, UInt immb ) 7115 { 7116 vassert(immh < (1<<4)); 7117 vassert(immb < (1<<3)); 7118 UInt immhb = (immh << 3) | immb; 7119 if (immh & 8) { 7120 if (shift) *shift = 128 - immhb; 7121 if (szBlg2) *szBlg2 = 3; 7122 return True; 7123 } 7124 if (immh & 4) { 7125 if (shift) *shift = 64 - immhb; 7126 if (szBlg2) *szBlg2 = 2; 7127 return True; 7128 } 7129 if (immh & 2) { 7130 if (shift) *shift = 32 - immhb; 7131 if (szBlg2) *szBlg2 = 1; 7132 return True; 7133 } 7134 if (immh & 1) { 7135 if (shift) *shift = 16 - immhb; 7136 if (szBlg2) *szBlg2 = 0; 7137 return True; 7138 } 7139 return False; 7140 } 7141 7142 /* Generate IR to fold all lanes of the V128 value in 'src' as 7143 characterised by the operator 'op', and return the result in the 7144 bottom bits of a V128, with all other bits set to zero. */ 7145 static IRTemp math_FOLDV ( IRTemp src, IROp op ) 7146 { 7147 /* The basic idea is to use repeated applications of Iop_CatEven* 7148 and Iop_CatOdd* operators to 'src' so as to clone each lane into 7149 a complete vector. Then fold all those vectors with 'op' and 7150 zero out all but the least significant lane. */ 7151 switch (op) { 7152 case Iop_Min8Sx16: case Iop_Min8Ux16: 7153 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: { 7154 /* NB: temp naming here is misleading -- the naming is for 8 7155 lanes of 16 bit, whereas what is being operated on is 16 7156 lanes of 8 bits. */ 7157 IRTemp x76543210 = src; 7158 IRTemp x76547654 = newTempV128(); 7159 IRTemp x32103210 = newTempV128(); 7160 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7161 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7162 IRTemp x76767676 = newTempV128(); 7163 IRTemp x54545454 = newTempV128(); 7164 IRTemp x32323232 = newTempV128(); 7165 IRTemp x10101010 = newTempV128(); 7166 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7167 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7168 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7169 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7170 IRTemp x77777777 = newTempV128(); 7171 IRTemp x66666666 = newTempV128(); 7172 IRTemp x55555555 = newTempV128(); 7173 IRTemp x44444444 = newTempV128(); 7174 IRTemp x33333333 = newTempV128(); 7175 IRTemp x22222222 = newTempV128(); 7176 IRTemp x11111111 = newTempV128(); 7177 IRTemp x00000000 = newTempV128(); 7178 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7179 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7180 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7181 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7182 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7183 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7184 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7185 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7186 /* Naming not misleading after here. */ 7187 IRTemp xAllF = newTempV128(); 7188 IRTemp xAllE = newTempV128(); 7189 IRTemp xAllD = newTempV128(); 7190 IRTemp xAllC = newTempV128(); 7191 IRTemp xAllB = newTempV128(); 7192 IRTemp xAllA = newTempV128(); 7193 IRTemp xAll9 = newTempV128(); 7194 IRTemp xAll8 = newTempV128(); 7195 IRTemp xAll7 = newTempV128(); 7196 IRTemp xAll6 = newTempV128(); 7197 IRTemp xAll5 = newTempV128(); 7198 IRTemp xAll4 = newTempV128(); 7199 IRTemp xAll3 = newTempV128(); 7200 IRTemp xAll2 = newTempV128(); 7201 IRTemp xAll1 = newTempV128(); 7202 IRTemp xAll0 = newTempV128(); 7203 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); 7204 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); 7205 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); 7206 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); 7207 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); 7208 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); 7209 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); 7210 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); 7211 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); 7212 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); 7213 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); 7214 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); 7215 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); 7216 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); 7217 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); 7218 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); 7219 IRTemp maxFE = newTempV128(); 7220 IRTemp maxDC = newTempV128(); 7221 IRTemp maxBA = newTempV128(); 7222 IRTemp max98 = newTempV128(); 7223 IRTemp max76 = newTempV128(); 7224 IRTemp max54 = newTempV128(); 7225 IRTemp max32 = newTempV128(); 7226 IRTemp max10 = newTempV128(); 7227 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); 7228 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); 7229 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); 7230 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); 7231 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); 7232 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); 7233 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); 7234 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); 7235 IRTemp maxFEDC = newTempV128(); 7236 IRTemp maxBA98 = newTempV128(); 7237 IRTemp max7654 = newTempV128(); 7238 IRTemp max3210 = newTempV128(); 7239 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); 7240 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); 7241 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7242 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7243 IRTemp maxFEDCBA98 = newTempV128(); 7244 IRTemp max76543210 = newTempV128(); 7245 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); 7246 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7247 IRTemp maxAllLanes = newTempV128(); 7248 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), 7249 mkexpr(max76543210))); 7250 IRTemp res = newTempV128(); 7251 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); 7252 return res; 7253 } 7254 case Iop_Min16Sx8: case Iop_Min16Ux8: 7255 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: { 7256 IRTemp x76543210 = src; 7257 IRTemp x76547654 = newTempV128(); 7258 IRTemp x32103210 = newTempV128(); 7259 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7260 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7261 IRTemp x76767676 = newTempV128(); 7262 IRTemp x54545454 = newTempV128(); 7263 IRTemp x32323232 = newTempV128(); 7264 IRTemp x10101010 = newTempV128(); 7265 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7266 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7267 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7268 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7269 IRTemp x77777777 = newTempV128(); 7270 IRTemp x66666666 = newTempV128(); 7271 IRTemp x55555555 = newTempV128(); 7272 IRTemp x44444444 = newTempV128(); 7273 IRTemp x33333333 = newTempV128(); 7274 IRTemp x22222222 = newTempV128(); 7275 IRTemp x11111111 = newTempV128(); 7276 IRTemp x00000000 = newTempV128(); 7277 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7278 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7279 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7280 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7281 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7282 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7283 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7284 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7285 IRTemp max76 = newTempV128(); 7286 IRTemp max54 = newTempV128(); 7287 IRTemp max32 = newTempV128(); 7288 IRTemp max10 = newTempV128(); 7289 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); 7290 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); 7291 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); 7292 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); 7293 IRTemp max7654 = newTempV128(); 7294 IRTemp max3210 = newTempV128(); 7295 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7296 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7297 IRTemp max76543210 = newTempV128(); 7298 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7299 IRTemp res = newTempV128(); 7300 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); 7301 return res; 7302 } 7303 case Iop_Max32Fx4: case Iop_Min32Fx4: 7304 case Iop_Min32Sx4: case Iop_Min32Ux4: 7305 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: { 7306 IRTemp x3210 = src; 7307 IRTemp x3232 = newTempV128(); 7308 IRTemp x1010 = newTempV128(); 7309 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); 7310 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); 7311 IRTemp x3333 = newTempV128(); 7312 IRTemp x2222 = newTempV128(); 7313 IRTemp x1111 = newTempV128(); 7314 IRTemp x0000 = newTempV128(); 7315 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); 7316 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); 7317 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); 7318 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); 7319 IRTemp max32 = newTempV128(); 7320 IRTemp max10 = newTempV128(); 7321 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); 7322 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); 7323 IRTemp max3210 = newTempV128(); 7324 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7325 IRTemp res = newTempV128(); 7326 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); 7327 return res; 7328 } 7329 case Iop_Add64x2: { 7330 IRTemp x10 = src; 7331 IRTemp x00 = newTempV128(); 7332 IRTemp x11 = newTempV128(); 7333 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10))); 7334 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10))); 7335 IRTemp max10 = newTempV128(); 7336 assign(max10, binop(op, mkexpr(x11), mkexpr(x00))); 7337 IRTemp res = newTempV128(); 7338 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10))); 7339 return res; 7340 } 7341 default: 7342 vassert(0); 7343 } 7344 } 7345 7346 7347 /* Generate IR for TBL and TBX. This deals with the 128 bit case 7348 only. */ 7349 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, 7350 IRTemp oor_values ) 7351 { 7352 vassert(len >= 0 && len <= 3); 7353 7354 /* Generate some useful constants as concisely as possible. */ 7355 IRTemp half15 = newTemp(Ity_I64); 7356 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); 7357 IRTemp half16 = newTemp(Ity_I64); 7358 assign(half16, mkU64(0x1010101010101010ULL)); 7359 7360 /* A zero vector */ 7361 IRTemp allZero = newTempV128(); 7362 assign(allZero, mkV128(0x0000)); 7363 /* A vector containing 15 in each 8-bit lane */ 7364 IRTemp all15 = newTempV128(); 7365 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); 7366 /* A vector containing 16 in each 8-bit lane */ 7367 IRTemp all16 = newTempV128(); 7368 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); 7369 /* A vector containing 32 in each 8-bit lane */ 7370 IRTemp all32 = newTempV128(); 7371 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); 7372 /* A vector containing 48 in each 8-bit lane */ 7373 IRTemp all48 = newTempV128(); 7374 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); 7375 /* A vector containing 64 in each 8-bit lane */ 7376 IRTemp all64 = newTempV128(); 7377 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); 7378 7379 /* Group the 16/32/48/64 vectors so as to be indexable. */ 7380 IRTemp allXX[4] = { all16, all32, all48, all64 }; 7381 7382 /* Compute the result for each table vector, with zeroes in places 7383 where the index values are out of range, and OR them into the 7384 running vector. */ 7385 IRTemp running_result = newTempV128(); 7386 assign(running_result, mkV128(0)); 7387 7388 UInt tabent; 7389 for (tabent = 0; tabent <= len; tabent++) { 7390 vassert(tabent >= 0 && tabent < 4); 7391 IRTemp bias = newTempV128(); 7392 assign(bias, 7393 mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); 7394 IRTemp biased_indices = newTempV128(); 7395 assign(biased_indices, 7396 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); 7397 IRTemp valid_mask = newTempV128(); 7398 assign(valid_mask, 7399 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); 7400 IRTemp safe_biased_indices = newTempV128(); 7401 assign(safe_biased_indices, 7402 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); 7403 IRTemp results_or_junk = newTempV128(); 7404 assign(results_or_junk, 7405 binop(Iop_Perm8x16, mkexpr(tab[tabent]), 7406 mkexpr(safe_biased_indices))); 7407 IRTemp results_or_zero = newTempV128(); 7408 assign(results_or_zero, 7409 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); 7410 /* And OR that into the running result. */ 7411 IRTemp tmp = newTempV128(); 7412 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), 7413 mkexpr(running_result))); 7414 running_result = tmp; 7415 } 7416 7417 /* So now running_result holds the overall result where the indices 7418 are in range, and zero in out-of-range lanes. Now we need to 7419 compute an overall validity mask and use this to copy in the 7420 lanes in the oor_values for out of range indices. This is 7421 unnecessary for TBL but will get folded out by iropt, so we lean 7422 on that and generate the same code for TBL and TBX here. */ 7423 IRTemp overall_valid_mask = newTempV128(); 7424 assign(overall_valid_mask, 7425 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); 7426 IRTemp result = newTempV128(); 7427 assign(result, 7428 binop(Iop_OrV128, 7429 mkexpr(running_result), 7430 binop(Iop_AndV128, 7431 mkexpr(oor_values), 7432 unop(Iop_NotV128, mkexpr(overall_valid_mask))))); 7433 return result; 7434 } 7435 7436 7437 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be 7438 an op which takes two I64s and produces a V128. That is, a widening 7439 operator. Generate IR which applies |opI64x2toV128| to either the 7440 lower (if |is2| is False) or upper (if |is2| is True) halves of 7441 |argL| and |argR|, and return the value in a new IRTemp. 7442 */ 7443 static 7444 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128, 7445 IRExpr* argL, IRExpr* argR ) 7446 { 7447 IRTemp res = newTempV128(); 7448 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64; 7449 assign(res, binop(opI64x2toV128, unop(slice, argL), 7450 unop(slice, argR))); 7451 return res; 7452 } 7453 7454 7455 /* Generate signed/unsigned absolute difference vector IR. */ 7456 static 7457 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE ) 7458 { 7459 vassert(size <= 3); 7460 IRTemp argL = newTempV128(); 7461 IRTemp argR = newTempV128(); 7462 IRTemp msk = newTempV128(); 7463 IRTemp res = newTempV128(); 7464 assign(argL, argLE); 7465 assign(argR, argRE); 7466 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size), 7467 mkexpr(argL), mkexpr(argR))); 7468 assign(res, 7469 binop(Iop_OrV128, 7470 binop(Iop_AndV128, 7471 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)), 7472 mkexpr(msk)), 7473 binop(Iop_AndV128, 7474 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)), 7475 unop(Iop_NotV128, mkexpr(msk))))); 7476 return res; 7477 } 7478 7479 7480 /* Generate IR that takes a V128 and sign- or zero-widens 7481 either the lower or upper set of lanes to twice-as-wide, 7482 resulting in a new V128 value. */ 7483 static 7484 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf, 7485 UInt sizeNarrow, IRExpr* srcE ) 7486 { 7487 IRTemp src = newTempV128(); 7488 IRTemp res = newTempV128(); 7489 assign(src, srcE); 7490 switch (sizeNarrow) { 7491 case X10: 7492 assign(res, 7493 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2, 7494 binop(fromUpperHalf ? Iop_InterleaveHI32x4 7495 : Iop_InterleaveLO32x4, 7496 mkexpr(src), 7497 mkexpr(src)), 7498 mkU8(32))); 7499 break; 7500 case X01: 7501 assign(res, 7502 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4, 7503 binop(fromUpperHalf ? Iop_InterleaveHI16x8 7504 : Iop_InterleaveLO16x8, 7505 mkexpr(src), 7506 mkexpr(src)), 7507 mkU8(16))); 7508 break; 7509 case X00: 7510 assign(res, 7511 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8, 7512 binop(fromUpperHalf ? Iop_InterleaveHI8x16 7513 : Iop_InterleaveLO8x16, 7514 mkexpr(src), 7515 mkexpr(src)), 7516 mkU8(8))); 7517 break; 7518 default: 7519 vassert(0); 7520 } 7521 return res; 7522 } 7523 7524 7525 /* Generate IR that takes a V128 and sign- or zero-widens 7526 either the even or odd lanes to twice-as-wide, 7527 resulting in a new V128 value. */ 7528 static 7529 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd, 7530 UInt sizeNarrow, IRExpr* srcE ) 7531 { 7532 IRTemp src = newTempV128(); 7533 IRTemp res = newTempV128(); 7534 IROp opSAR = mkVecSARN(sizeNarrow+1); 7535 IROp opSHR = mkVecSHRN(sizeNarrow+1); 7536 IROp opSHL = mkVecSHLN(sizeNarrow+1); 7537 IROp opSxR = zWiden ? opSHR : opSAR; 7538 UInt amt = 0; 7539 switch (sizeNarrow) { 7540 case X10: amt = 32; break; 7541 case X01: amt = 16; break; 7542 case X00: amt = 8; break; 7543 default: vassert(0); 7544 } 7545 assign(src, srcE); 7546 if (fromOdd) { 7547 assign(res, binop(opSxR, mkexpr(src), mkU8(amt))); 7548 } else { 7549 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)), 7550 mkU8(amt))); 7551 } 7552 return res; 7553 } 7554 7555 7556 /* Generate IR that takes two V128s and narrows (takes lower half) 7557 of each lane, producing a single V128 value. */ 7558 static 7559 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow ) 7560 { 7561 IRTemp res = newTempV128(); 7562 assign(res, binop(mkVecCATEVENLANES(sizeNarrow), 7563 mkexpr(argHi), mkexpr(argLo))); 7564 return res; 7565 } 7566 7567 7568 /* Return a temp which holds the vector dup of the lane of width 7569 (1 << size) obtained from src[laneNo]. */ 7570 static 7571 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo ) 7572 { 7573 vassert(size <= 3); 7574 /* Normalise |laneNo| so it is of the form 7575 x000 for D, xx00 for S, xxx0 for H, and xxxx for B. 7576 This puts the bits we want to inspect at constant offsets 7577 regardless of the value of |size|. 7578 */ 7579 UInt ix = laneNo << size; 7580 vassert(ix <= 15); 7581 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 7582 switch (size) { 7583 case 0: /* B */ 7584 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16; 7585 /* fallthrough */ 7586 case 1: /* H */ 7587 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8; 7588 /* fallthrough */ 7589 case 2: /* S */ 7590 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4; 7591 /* fallthrough */ 7592 case 3: /* D */ 7593 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2; 7594 break; 7595 default: 7596 vassert(0); 7597 } 7598 IRTemp res = newTempV128(); 7599 assign(res, src); 7600 Int i; 7601 for (i = 3; i >= 0; i--) { 7602 if (ops[i] == Iop_INVALID) 7603 break; 7604 IRTemp tmp = newTempV128(); 7605 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res))); 7606 res = tmp; 7607 } 7608 return res; 7609 } 7610 7611 7612 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size 7613 selector encoded as shown below. Return a new V128 holding the 7614 selected lane from |srcV| dup'd out to V128, and also return the 7615 lane number, log2 of the lane size in bytes, and width-character via 7616 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5 7617 is an invalid selector, in which case return 7618 IRTemp_INVALID, 0, 0 and '?' respectively. 7619 7620 imm5 = xxxx1 signifies .b[xxxx] 7621 = xxx10 .h[xxx] 7622 = xx100 .s[xx] 7623 = x1000 .d[x] 7624 otherwise invalid 7625 */ 7626 static 7627 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo, 7628 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh, 7629 IRExpr* srcV, UInt imm5 ) 7630 { 7631 *laneNo = 0; 7632 *laneSzLg2 = 0; 7633 *laneCh = '?'; 7634 7635 if (imm5 & 1) { 7636 *laneNo = (imm5 >> 1) & 15; 7637 *laneSzLg2 = 0; 7638 *laneCh = 'b'; 7639 } 7640 else if (imm5 & 2) { 7641 *laneNo = (imm5 >> 2) & 7; 7642 *laneSzLg2 = 1; 7643 *laneCh = 'h'; 7644 } 7645 else if (imm5 & 4) { 7646 *laneNo = (imm5 >> 3) & 3; 7647 *laneSzLg2 = 2; 7648 *laneCh = 's'; 7649 } 7650 else if (imm5 & 8) { 7651 *laneNo = (imm5 >> 4) & 1; 7652 *laneSzLg2 = 3; 7653 *laneCh = 'd'; 7654 } 7655 else { 7656 /* invalid */ 7657 return IRTemp_INVALID; 7658 } 7659 7660 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo); 7661 } 7662 7663 7664 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */ 7665 static 7666 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm ) 7667 { 7668 IRType ty = Ity_INVALID; 7669 IRTemp rcS = IRTemp_INVALID; 7670 switch (size) { 7671 case X01: 7672 vassert(imm <= 0xFFFFULL); 7673 ty = Ity_I16; 7674 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm )); 7675 break; 7676 case X10: 7677 vassert(imm <= 0xFFFFFFFFULL); 7678 ty = Ity_I32; 7679 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm )); 7680 break; 7681 case X11: 7682 ty = Ity_I64; 7683 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break; 7684 default: 7685 vassert(0); 7686 } 7687 IRTemp rcV = math_DUP_TO_V128(rcS, ty); 7688 return rcV; 7689 } 7690 7691 7692 /* Let |new64| be a V128 in which only the lower 64 bits are interesting, 7693 and the upper can contain any value -- it is ignored. If |is2| is False, 7694 generate IR to put |new64| in the lower half of vector reg |dd| and zero 7695 the upper half. If |is2| is True, generate IR to put |new64| in the upper 7696 half of vector reg |dd| and leave the lower half unchanged. This 7697 simulates the behaviour of the "foo/foo2" instructions in which the 7698 destination is half the width of sources, for example addhn/addhn2. 7699 */ 7700 static 7701 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 ) 7702 { 7703 if (is2) { 7704 /* Get the old contents of Vdd, zero the upper half, and replace 7705 it with 'x'. */ 7706 IRTemp t_zero_oldLO = newTempV128(); 7707 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd))); 7708 IRTemp t_newHI_zero = newTempV128(); 7709 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64), 7710 mkV128(0x0000))); 7711 IRTemp res = newTempV128(); 7712 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO), 7713 mkexpr(t_newHI_zero))); 7714 putQReg128(dd, mkexpr(res)); 7715 } else { 7716 /* This is simple. */ 7717 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64))); 7718 } 7719 } 7720 7721 7722 /* Compute vector SQABS at lane size |size| for |srcE|, returning 7723 the q result in |*qabs| and the normal result in |*nabs|. */ 7724 static 7725 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs, 7726 IRExpr* srcE, UInt size ) 7727 { 7728 IRTemp src, mask, maskn, nsub, qsub; 7729 src = mask = maskn = nsub = qsub = IRTemp_INVALID; 7730 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs); 7731 assign(src, srcE); 7732 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src))); 7733 assign(maskn, unop(Iop_NotV128, mkexpr(mask))); 7734 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7735 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7736 assign(*nabs, binop(Iop_OrV128, 7737 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)), 7738 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7739 assign(*qabs, binop(Iop_OrV128, 7740 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)), 7741 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7742 } 7743 7744 7745 /* Compute vector SQNEG at lane size |size| for |srcE|, returning 7746 the q result in |*qneg| and the normal result in |*nneg|. */ 7747 static 7748 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg, 7749 IRExpr* srcE, UInt size ) 7750 { 7751 IRTemp src = IRTemp_INVALID; 7752 newTempsV128_3(&src, nneg, qneg); 7753 assign(src, srcE); 7754 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7755 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7756 } 7757 7758 7759 /* Zero all except the least significant lane of |srcE|, where |size| 7760 indicates the lane size in the usual way. */ 7761 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE ) 7762 { 7763 vassert(size < 4); 7764 IRTemp t = newTempV128(); 7765 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE)); 7766 return t; 7767 } 7768 7769 7770 /* Generate IR to compute vector widening MULL from either the lower 7771 (is2==False) or upper (is2==True) halves of vecN and vecM. The 7772 widening multiplies are unsigned when isU==True and signed when 7773 isU==False. |size| is the narrow lane size indication. Optionally, 7774 the product may be added to or subtracted from vecD, at the wide lane 7775 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas| 7776 is 'm' (only multiply) then the accumulate part does not happen, and 7777 |vecD| is expected to == IRTemp_INVALID. 7778 7779 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants 7780 are allowed. The result is returned in a new IRTemp, which is 7781 returned in *res. */ 7782 static 7783 void math_MULL_ACC ( /*OUT*/IRTemp* res, 7784 Bool is2, Bool isU, UInt size, HChar mas, 7785 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7786 { 7787 vassert(res && *res == IRTemp_INVALID); 7788 vassert(size <= 2); 7789 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7790 if (mas == 'm') vassert(vecD == IRTemp_INVALID); 7791 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size); 7792 IROp accOp = (mas == 'a') ? mkVecADD(size+1) 7793 : (mas == 's' ? mkVecSUB(size+1) 7794 : Iop_INVALID); 7795 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp, 7796 mkexpr(vecN), mkexpr(vecM)); 7797 *res = newTempV128(); 7798 assign(*res, mas == 'm' ? mkexpr(mul) 7799 : binop(accOp, mkexpr(vecD), mkexpr(mul))); 7800 } 7801 7802 7803 /* Same as math_MULL_ACC, except the multiply is signed widening, 7804 the multiplied value is then doubled, before being added to or 7805 subtracted from the accumulated value. And everything is 7806 saturated. In all cases, saturation residuals are returned 7807 via (sat1q, sat1n), and in the accumulate cases, 7808 via (sat2q, sat2n) too. All results are returned in new temporaries. 7809 In the no-accumulate case, *sat2q and *sat2n are never instantiated, 7810 so the caller can tell this has happened. */ 7811 static 7812 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res, 7813 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7814 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n, 7815 Bool is2, UInt size, HChar mas, 7816 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7817 { 7818 vassert(size <= 2); 7819 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7820 /* Compute 7821 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2 7822 sat1n = vecN.D[is2] *s vecM.d[is2] * 2 7823 IOW take either the low or high halves of vecN and vecM, signed widen, 7824 multiply, double that, and signedly saturate. Also compute the same 7825 but without saturation. 7826 */ 7827 vassert(sat2q && *sat2q == IRTemp_INVALID); 7828 vassert(sat2n && *sat2n == IRTemp_INVALID); 7829 newTempsV128_3(sat1q, sat1n, res); 7830 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size), 7831 mkexpr(vecN), mkexpr(vecM)); 7832 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size), 7833 mkexpr(vecN), mkexpr(vecM)); 7834 assign(*sat1q, mkexpr(tq)); 7835 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn))); 7836 7837 /* If there is no accumulation, the final result is sat1q, 7838 and there's no assignment to sat2q or sat2n. */ 7839 if (mas == 'm') { 7840 assign(*res, mkexpr(*sat1q)); 7841 return; 7842 } 7843 7844 /* Compute 7845 sat2q = vecD +sq/-sq sat1q 7846 sat2n = vecD +/- sat1n 7847 result = sat2q 7848 */ 7849 newTempsV128_2(sat2q, sat2n); 7850 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1), 7851 mkexpr(vecD), mkexpr(*sat1q))); 7852 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1), 7853 mkexpr(vecD), mkexpr(*sat1n))); 7854 assign(*res, mkexpr(*sat2q)); 7855 } 7856 7857 7858 /* Generate IR for widening signed vector multiplies. The operands 7859 have their lane width signedly widened, and they are then multiplied 7860 at the wider width, returning results in two new IRTemps. */ 7861 static 7862 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO, 7863 UInt sizeNarrow, IRTemp argL, IRTemp argR ) 7864 { 7865 vassert(sizeNarrow <= 2); 7866 newTempsV128_2(resHI, resLO); 7867 IRTemp argLhi = newTemp(Ity_I64); 7868 IRTemp argLlo = newTemp(Ity_I64); 7869 IRTemp argRhi = newTemp(Ity_I64); 7870 IRTemp argRlo = newTemp(Ity_I64); 7871 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL))); 7872 assign(argLlo, unop(Iop_V128to64, mkexpr(argL))); 7873 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR))); 7874 assign(argRlo, unop(Iop_V128to64, mkexpr(argR))); 7875 IROp opMulls = mkVecMULLS(sizeNarrow); 7876 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi))); 7877 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo))); 7878 } 7879 7880 7881 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply, 7882 double that, possibly add a rounding constant (R variants), and take 7883 the high half. */ 7884 static 7885 void math_SQDMULH ( /*OUT*/IRTemp* res, 7886 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7887 Bool isR, UInt size, IRTemp vN, IRTemp vM ) 7888 { 7889 vassert(size == X01 || size == X10); /* s or h only */ 7890 7891 newTempsV128_3(res, sat1q, sat1n); 7892 7893 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID; 7894 math_MULLS(&mullsHI, &mullsLO, size, vN, vM); 7895 7896 IRTemp addWide = mkVecADD(size+1); 7897 7898 if (isR) { 7899 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7900 7901 Int rcShift = size == X01 ? 15 : 31; 7902 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift); 7903 assign(*sat1n, 7904 binop(mkVecCATODDLANES(size), 7905 binop(addWide, 7906 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7907 mkexpr(roundConst)), 7908 binop(addWide, 7909 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)), 7910 mkexpr(roundConst)))); 7911 } else { 7912 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7913 7914 assign(*sat1n, 7915 binop(mkVecCATODDLANES(size), 7916 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7917 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)))); 7918 } 7919 7920 assign(*res, mkexpr(*sat1q)); 7921 } 7922 7923 7924 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in 7925 a new temp in *res, and the Q difference pair in new temps in 7926 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the 7927 three operations it is. */ 7928 static 7929 void math_QSHL_IMM ( /*OUT*/IRTemp* res, 7930 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2, 7931 IRTemp src, UInt size, UInt shift, const HChar* nm ) 7932 { 7933 vassert(size <= 3); 7934 UInt laneBits = 8 << size; 7935 vassert(shift < laneBits); 7936 newTempsV128_3(res, qDiff1, qDiff2); 7937 IRTemp z128 = newTempV128(); 7938 assign(z128, mkV128(0x0000)); 7939 7940 /* UQSHL */ 7941 if (vex_streq(nm, "uqshl")) { 7942 IROp qop = mkVecQSHLNSATUU(size); 7943 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7944 if (shift == 0) { 7945 /* No shift means no saturation. */ 7946 assign(*qDiff1, mkexpr(z128)); 7947 assign(*qDiff2, mkexpr(z128)); 7948 } else { 7949 /* Saturation has occurred if any of the shifted-out bits are 7950 nonzero. We get the shifted-out bits by right-shifting the 7951 original value. */ 7952 UInt rshift = laneBits - shift; 7953 vassert(rshift >= 1 && rshift < laneBits); 7954 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 7955 assign(*qDiff2, mkexpr(z128)); 7956 } 7957 return; 7958 } 7959 7960 /* SQSHL */ 7961 if (vex_streq(nm, "sqshl")) { 7962 IROp qop = mkVecQSHLNSATSS(size); 7963 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7964 if (shift == 0) { 7965 /* No shift means no saturation. */ 7966 assign(*qDiff1, mkexpr(z128)); 7967 assign(*qDiff2, mkexpr(z128)); 7968 } else { 7969 /* Saturation has occurred if any of the shifted-out bits are 7970 different from the top bit of the original value. */ 7971 UInt rshift = laneBits - 1 - shift; 7972 vassert(rshift >= 0 && rshift < laneBits-1); 7973 /* qDiff1 is the shifted out bits, and the top bit of the original 7974 value, preceded by zeroes. */ 7975 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 7976 /* qDiff2 is the top bit of the original value, cloned the 7977 correct number of times. */ 7978 assign(*qDiff2, binop(mkVecSHRN(size), 7979 binop(mkVecSARN(size), mkexpr(src), 7980 mkU8(laneBits-1)), 7981 mkU8(rshift))); 7982 /* This also succeeds in comparing the top bit of the original 7983 value to itself, which is a bit stupid, but not wrong. */ 7984 } 7985 return; 7986 } 7987 7988 /* SQSHLU */ 7989 if (vex_streq(nm, "sqshlu")) { 7990 IROp qop = mkVecQSHLNSATSU(size); 7991 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7992 if (shift == 0) { 7993 /* If there's no shift, saturation depends on the top bit 7994 of the source. */ 7995 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1))); 7996 assign(*qDiff2, mkexpr(z128)); 7997 } else { 7998 /* Saturation has occurred if any of the shifted-out bits are 7999 nonzero. We get the shifted-out bits by right-shifting the 8000 original value. */ 8001 UInt rshift = laneBits - shift; 8002 vassert(rshift >= 1 && rshift < laneBits); 8003 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 8004 assign(*qDiff2, mkexpr(z128)); 8005 } 8006 return; 8007 } 8008 8009 vassert(0); 8010 } 8011 8012 8013 /* Generate IR to do SRHADD and URHADD. */ 8014 static 8015 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb ) 8016 { 8017 /* Generate this: 8018 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) 8019 */ 8020 vassert(size <= 3); 8021 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size); 8022 IROp opADD = mkVecADD(size); 8023 /* The only tricky bit is to generate the correct vector 1 constant. */ 8024 const ULong ones64[4] 8025 = { 0x0101010101010101ULL, 0x0001000100010001ULL, 8026 0x0000000100000001ULL, 0x0000000000000001ULL }; 8027 IRTemp imm64 = newTemp(Ity_I64); 8028 assign(imm64, mkU64(ones64[size])); 8029 IRTemp vecOne = newTempV128(); 8030 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64))); 8031 IRTemp scaOne = newTemp(Ity_I8); 8032 assign(scaOne, mkU8(1)); 8033 IRTemp res = newTempV128(); 8034 assign(res, 8035 binop(opADD, 8036 binop(opSHR, mkexpr(aa), mkexpr(scaOne)), 8037 binop(opADD, 8038 binop(opSHR, mkexpr(bb), mkexpr(scaOne)), 8039 binop(opSHR, 8040 binop(opADD, 8041 binop(opADD, 8042 binop(Iop_AndV128, mkexpr(aa), 8043 mkexpr(vecOne)), 8044 binop(Iop_AndV128, mkexpr(bb), 8045 mkexpr(vecOne)) 8046 ), 8047 mkexpr(vecOne) 8048 ), 8049 mkexpr(scaOne) 8050 ) 8051 ) 8052 ) 8053 ); 8054 return res; 8055 } 8056 8057 8058 /* QCFLAG tracks the SIMD sticky saturation status. Update the status 8059 thusly: if, after application of |opZHI| to both |qres| and |nres|, 8060 they have the same value, leave QCFLAG unchanged. Otherwise, set it 8061 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128 8062 operators, or Iop_INVALID, in which case |qres| and |nres| are used 8063 unmodified. The presence |opZHI| means this function can be used to 8064 generate QCFLAG update code for both scalar and vector SIMD operations. 8065 */ 8066 static 8067 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI ) 8068 { 8069 IRTemp diff = newTempV128(); 8070 IRTemp oldQCFLAG = newTempV128(); 8071 IRTemp newQCFLAG = newTempV128(); 8072 if (opZHI == Iop_INVALID) { 8073 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))); 8074 } else { 8075 vassert(opZHI == Iop_ZeroHI64ofV128 8076 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128); 8077 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)))); 8078 } 8079 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128)); 8080 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff))); 8081 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG))); 8082 } 8083 8084 8085 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres| 8086 are used unmodified, hence suitable for QCFLAG updates for whole-vector 8087 operations. */ 8088 static 8089 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres ) 8090 { 8091 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID); 8092 } 8093 8094 8095 /* Generate IR to rearrange two vector values in a way which is useful 8096 for doing S/D add-pair etc operations. There are 3 cases: 8097 8098 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0] 8099 8100 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0] 8101 8102 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0] 8103 8104 The cases are distinguished as follows: 8105 isD == True, bitQ == 1 => 2d 8106 isD == False, bitQ == 1 => 4s 8107 isD == False, bitQ == 0 => 2s 8108 */ 8109 static 8110 void math_REARRANGE_FOR_FLOATING_PAIRWISE ( 8111 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR, 8112 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ 8113 ) 8114 { 8115 vassert(rearrL && *rearrL == IRTemp_INVALID); 8116 vassert(rearrR && *rearrR == IRTemp_INVALID); 8117 *rearrL = newTempV128(); 8118 *rearrR = newTempV128(); 8119 if (isD) { 8120 // 2d case 8121 vassert(bitQ == 1); 8122 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN))); 8123 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN))); 8124 } 8125 else if (!isD && bitQ == 1) { 8126 // 4s case 8127 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8128 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8129 } else { 8130 // 2s case 8131 vassert(!isD && bitQ == 0); 8132 IRTemp m1n1m0n0 = newTempV128(); 8133 IRTemp m0n0m1n1 = newTempV128(); 8134 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4, 8135 mkexpr(vecM), mkexpr(vecN))); 8136 assign(m0n0m1n1, triop(Iop_SliceV128, 8137 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8))); 8138 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0))); 8139 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1))); 8140 } 8141 } 8142 8143 8144 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */ 8145 static Double two_to_the_minus ( Int n ) 8146 { 8147 if (n == 1) return 0.5; 8148 vassert(n >= 2 && n <= 64); 8149 Int half = n / 2; 8150 return two_to_the_minus(half) * two_to_the_minus(n - half); 8151 } 8152 8153 8154 /* Returns 2.0 ^ n for n in 1 .. 64 */ 8155 static Double two_to_the_plus ( Int n ) 8156 { 8157 if (n == 1) return 2.0; 8158 vassert(n >= 2 && n <= 64); 8159 Int half = n / 2; 8160 return two_to_the_plus(half) * two_to_the_plus(n - half); 8161 } 8162 8163 8164 /*------------------------------------------------------------*/ 8165 /*--- SIMD and FP instructions ---*/ 8166 /*------------------------------------------------------------*/ 8167 8168 static 8169 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) 8170 { 8171 /* 31 29 23 21 20 15 14 10 9 4 8172 0 q 101110 op2 0 m 0 imm4 0 n d 8173 Decode fields: op2 8174 */ 8175 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8176 if (INSN(31,31) != 0 8177 || INSN(29,24) != BITS6(1,0,1,1,1,0) 8178 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) { 8179 return False; 8180 } 8181 UInt bitQ = INSN(30,30); 8182 UInt op2 = INSN(23,22); 8183 UInt mm = INSN(20,16); 8184 UInt imm4 = INSN(14,11); 8185 UInt nn = INSN(9,5); 8186 UInt dd = INSN(4,0); 8187 8188 if (op2 == BITS2(0,0)) { 8189 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */ 8190 IRTemp sHi = newTempV128(); 8191 IRTemp sLo = newTempV128(); 8192 IRTemp res = newTempV128(); 8193 assign(sHi, getQReg128(mm)); 8194 assign(sLo, getQReg128(nn)); 8195 if (bitQ == 1) { 8196 if (imm4 == 0) { 8197 assign(res, mkexpr(sLo)); 8198 } else { 8199 vassert(imm4 >= 1 && imm4 <= 15); 8200 assign(res, triop(Iop_SliceV128, 8201 mkexpr(sHi), mkexpr(sLo), mkU8(imm4))); 8202 } 8203 putQReg128(dd, mkexpr(res)); 8204 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4); 8205 } else { 8206 if (imm4 >= 8) return False; 8207 if (imm4 == 0) { 8208 assign(res, mkexpr(sLo)); 8209 } else { 8210 vassert(imm4 >= 1 && imm4 <= 7); 8211 IRTemp hi64lo64 = newTempV128(); 8212 assign(hi64lo64, binop(Iop_InterleaveLO64x2, 8213 mkexpr(sHi), mkexpr(sLo))); 8214 assign(res, triop(Iop_SliceV128, 8215 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4))); 8216 } 8217 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 8218 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4); 8219 } 8220 return True; 8221 } 8222 8223 return False; 8224 # undef INSN 8225 } 8226 8227 8228 static 8229 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn) 8230 { 8231 /* 31 29 23 21 20 15 14 12 11 9 4 8232 0 q 001110 op2 0 m 0 len op 00 n d 8233 Decode fields: op2,len,op 8234 */ 8235 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8236 if (INSN(31,31) != 0 8237 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8238 || INSN(21,21) != 0 8239 || INSN(15,15) != 0 8240 || INSN(11,10) != BITS2(0,0)) { 8241 return False; 8242 } 8243 UInt bitQ = INSN(30,30); 8244 UInt op2 = INSN(23,22); 8245 UInt mm = INSN(20,16); 8246 UInt len = INSN(14,13); 8247 UInt bitOP = INSN(12,12); 8248 UInt nn = INSN(9,5); 8249 UInt dd = INSN(4,0); 8250 8251 if (op2 == X00) { 8252 /* -------- 00,xx,0 TBL, xx register table -------- */ 8253 /* -------- 00,xx,1 TBX, xx register table -------- */ 8254 /* 31 28 20 15 14 12 9 4 8255 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8256 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8257 where Ta = 16b(q=1) or 8b(q=0) 8258 */ 8259 Bool isTBX = bitOP == 1; 8260 /* The out-of-range values to use. */ 8261 IRTemp oor_values = newTempV128(); 8262 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); 8263 /* src value */ 8264 IRTemp src = newTempV128(); 8265 assign(src, getQReg128(mm)); 8266 /* The table values */ 8267 IRTemp tab[4]; 8268 UInt i; 8269 for (i = 0; i <= len; i++) { 8270 vassert(i < 4); 8271 tab[i] = newTempV128(); 8272 assign(tab[i], getQReg128((nn + i) % 32)); 8273 } 8274 IRTemp res = math_TBL_TBX(tab, len, src, oor_values); 8275 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8276 const HChar* Ta = bitQ ==1 ? "16b" : "8b"; 8277 const HChar* nm = isTBX ? "tbx" : "tbl"; 8278 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n", 8279 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); 8280 return True; 8281 } 8282 8283 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8284 return False; 8285 # undef INSN 8286 } 8287 8288 8289 static 8290 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn) 8291 { 8292 /* 31 29 23 21 20 15 14 11 9 4 8293 0 q 001110 size 0 m 0 opcode 10 n d 8294 Decode fields: opcode 8295 */ 8296 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8297 if (INSN(31,31) != 0 8298 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8299 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) { 8300 return False; 8301 } 8302 UInt bitQ = INSN(30,30); 8303 UInt size = INSN(23,22); 8304 UInt mm = INSN(20,16); 8305 UInt opcode = INSN(14,12); 8306 UInt nn = INSN(9,5); 8307 UInt dd = INSN(4,0); 8308 8309 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) { 8310 /* -------- 001 UZP1 std7_std7_std7 -------- */ 8311 /* -------- 101 UZP2 std7_std7_std7 -------- */ 8312 if (bitQ == 0 && size == X11) return False; // implied 1d case 8313 Bool isUZP1 = opcode == BITS3(0,0,1); 8314 IROp op = isUZP1 ? mkVecCATEVENLANES(size) 8315 : mkVecCATODDLANES(size); 8316 IRTemp preL = newTempV128(); 8317 IRTemp preR = newTempV128(); 8318 IRTemp res = newTempV128(); 8319 if (bitQ == 0) { 8320 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm), 8321 getQReg128(nn))); 8322 assign(preR, mkexpr(preL)); 8323 } else { 8324 assign(preL, getQReg128(mm)); 8325 assign(preR, getQReg128(nn)); 8326 } 8327 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8328 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8329 const HChar* nm = isUZP1 ? "uzp1" : "uzp2"; 8330 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8331 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8332 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8333 return True; 8334 } 8335 8336 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) { 8337 /* -------- 010 TRN1 std7_std7_std7 -------- */ 8338 /* -------- 110 TRN2 std7_std7_std7 -------- */ 8339 if (bitQ == 0 && size == X11) return False; // implied 1d case 8340 Bool isTRN1 = opcode == BITS3(0,1,0); 8341 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size) 8342 : mkVecCATODDLANES(size); 8343 IROp op2 = mkVecINTERLEAVEHI(size); 8344 IRTemp srcM = newTempV128(); 8345 IRTemp srcN = newTempV128(); 8346 IRTemp res = newTempV128(); 8347 assign(srcM, getQReg128(mm)); 8348 assign(srcN, getQReg128(nn)); 8349 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)), 8350 binop(op1, mkexpr(srcN), mkexpr(srcN)))); 8351 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8352 const HChar* nm = isTRN1 ? "trn1" : "trn2"; 8353 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8354 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8355 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8356 return True; 8357 } 8358 8359 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) { 8360 /* -------- 011 ZIP1 std7_std7_std7 -------- */ 8361 /* -------- 111 ZIP2 std7_std7_std7 -------- */ 8362 if (bitQ == 0 && size == X11) return False; // implied 1d case 8363 Bool isZIP1 = opcode == BITS3(0,1,1); 8364 IROp op = isZIP1 ? mkVecINTERLEAVELO(size) 8365 : mkVecINTERLEAVEHI(size); 8366 IRTemp preL = newTempV128(); 8367 IRTemp preR = newTempV128(); 8368 IRTemp res = newTempV128(); 8369 if (bitQ == 0 && !isZIP1) { 8370 IRTemp z128 = newTempV128(); 8371 assign(z128, mkV128(0x0000)); 8372 // preL = Vm shifted left 32 bits 8373 // preR = Vn shifted left 32 bits 8374 assign(preL, triop(Iop_SliceV128, 8375 getQReg128(mm), mkexpr(z128), mkU8(12))); 8376 assign(preR, triop(Iop_SliceV128, 8377 getQReg128(nn), mkexpr(z128), mkU8(12))); 8378 8379 } else { 8380 assign(preL, getQReg128(mm)); 8381 assign(preR, getQReg128(nn)); 8382 } 8383 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8384 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8385 const HChar* nm = isZIP1 ? "zip1" : "zip2"; 8386 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8387 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8388 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8389 return True; 8390 } 8391 8392 return False; 8393 # undef INSN 8394 } 8395 8396 8397 static 8398 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn) 8399 { 8400 /* 31 28 23 21 16 11 9 4 8401 0 q u 01110 size 11000 opcode 10 n d 8402 Decode fields: u,size,opcode 8403 */ 8404 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8405 if (INSN(31,31) != 0 8406 || INSN(28,24) != BITS5(0,1,1,1,0) 8407 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) { 8408 return False; 8409 } 8410 UInt bitQ = INSN(30,30); 8411 UInt bitU = INSN(29,29); 8412 UInt size = INSN(23,22); 8413 UInt opcode = INSN(16,12); 8414 UInt nn = INSN(9,5); 8415 UInt dd = INSN(4,0); 8416 8417 if (opcode == BITS5(0,0,0,1,1)) { 8418 /* -------- 0,xx,00011 SADDLV -------- */ 8419 /* -------- 1,xx,00011 UADDLV -------- */ 8420 /* size is the narrow size */ 8421 if (size == X11 || (size == X10 && bitQ == 0)) return False; 8422 Bool isU = bitU == 1; 8423 IRTemp src = newTempV128(); 8424 assign(src, getQReg128(nn)); 8425 /* The basic plan is to widen the lower half, and if Q = 1, 8426 the upper half too. Add them together (if Q = 1), and in 8427 either case fold with add at twice the lane width. 8428 */ 8429 IRExpr* widened 8430 = mkexpr(math_WIDEN_LO_OR_HI_LANES( 8431 isU, False/*!fromUpperHalf*/, size, mkexpr(src))); 8432 if (bitQ == 1) { 8433 widened 8434 = binop(mkVecADD(size+1), 8435 widened, 8436 mkexpr(math_WIDEN_LO_OR_HI_LANES( 8437 isU, True/*fromUpperHalf*/, size, mkexpr(src))) 8438 ); 8439 } 8440 /* Now fold. */ 8441 IRTemp tWi = newTempV128(); 8442 assign(tWi, widened); 8443 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1)); 8444 putQReg128(dd, mkexpr(res)); 8445 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8446 const HChar ch = "bhsd"[size]; 8447 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv", 8448 nameQReg128(dd), ch, nameQReg128(nn), arr); 8449 return True; 8450 } 8451 8452 UInt ix = 0; 8453 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; } 8454 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; } 8455 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; } 8456 /**/ 8457 if (ix != 0) { 8458 /* -------- 0,xx,01010: SMAXV -------- (1) */ 8459 /* -------- 1,xx,01010: UMAXV -------- (2) */ 8460 /* -------- 0,xx,11010: SMINV -------- (3) */ 8461 /* -------- 1,xx,11010: UMINV -------- (4) */ 8462 /* -------- 0,xx,11011: ADDV -------- (5) */ 8463 vassert(ix >= 1 && ix <= 5); 8464 if (size == X11) return False; // 1d,2d cases not allowed 8465 if (size == X10 && bitQ == 0) return False; // 2s case not allowed 8466 const IROp opMAXS[3] 8467 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; 8468 const IROp opMAXU[3] 8469 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; 8470 const IROp opMINS[3] 8471 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; 8472 const IROp opMINU[3] 8473 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 }; 8474 const IROp opADD[3] 8475 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 }; 8476 vassert(size < 3); 8477 IROp op = Iop_INVALID; 8478 const HChar* nm = NULL; 8479 switch (ix) { 8480 case 1: op = opMAXS[size]; nm = "smaxv"; break; 8481 case 2: op = opMAXU[size]; nm = "umaxv"; break; 8482 case 3: op = opMINS[size]; nm = "sminv"; break; 8483 case 4: op = opMINU[size]; nm = "uminv"; break; 8484 case 5: op = opADD[size]; nm = "addv"; break; 8485 default: vassert(0); 8486 } 8487 vassert(op != Iop_INVALID && nm != NULL); 8488 IRTemp tN1 = newTempV128(); 8489 assign(tN1, getQReg128(nn)); 8490 /* If Q == 0, we're just folding lanes in the lower half of 8491 the value. In which case, copy the lower half of the 8492 source into the upper half, so we can then treat it the 8493 same as the full width case. Except for the addition case, 8494 in which we have to zero out the upper half. */ 8495 IRTemp tN2 = newTempV128(); 8496 assign(tN2, bitQ == 0 8497 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1)) 8498 : mk_CatEvenLanes64x2(tN1,tN1)) 8499 : mkexpr(tN1)); 8500 IRTemp res = math_FOLDV(tN2, op); 8501 if (res == IRTemp_INVALID) 8502 return False; /* means math_FOLDV 8503 doesn't handle this case yet */ 8504 putQReg128(dd, mkexpr(res)); 8505 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 }; 8506 IRType laneTy = tys[size]; 8507 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8508 DIP("%s %s, %s.%s\n", nm, 8509 nameQRegLO(dd, laneTy), nameQReg128(nn), arr); 8510 return True; 8511 } 8512 8513 if ((size == X00 || size == X10) 8514 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) { 8515 /* -------- 0,00,01100: FMAXMNV s_4s -------- */ 8516 /* -------- 0,10,01100: FMINMNV s_4s -------- */ 8517 /* -------- 1,00,01111: FMAXV s_4s -------- */ 8518 /* -------- 1,10,01111: FMINV s_4s -------- */ 8519 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 8520 if (bitQ == 0) return False; // Only 4s is allowed 8521 Bool isMIN = (size & 2) == 2; 8522 Bool isNM = opcode == BITS5(0,1,1,0,0); 8523 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2); 8524 IRTemp src = newTempV128(); 8525 assign(src, getQReg128(nn)); 8526 IRTemp res = math_FOLDV(src, opMXX); 8527 putQReg128(dd, mkexpr(res)); 8528 DIP("%s%sv s%u, %u.4s\n", 8529 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn); 8530 return True; 8531 } 8532 8533 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8534 return False; 8535 # undef INSN 8536 } 8537 8538 8539 static 8540 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn) 8541 { 8542 /* 31 28 20 15 14 10 9 4 8543 0 q op 01110000 imm5 0 imm4 1 n d 8544 Decode fields: q,op,imm4 8545 */ 8546 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8547 if (INSN(31,31) != 0 8548 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0) 8549 || INSN(15,15) != 0 || INSN(10,10) != 1) { 8550 return False; 8551 } 8552 UInt bitQ = INSN(30,30); 8553 UInt bitOP = INSN(29,29); 8554 UInt imm5 = INSN(20,16); 8555 UInt imm4 = INSN(14,11); 8556 UInt nn = INSN(9,5); 8557 UInt dd = INSN(4,0); 8558 8559 /* -------- x,0,0000: DUP (element, vector) -------- */ 8560 /* 31 28 20 15 9 4 8561 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index] 8562 */ 8563 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) { 8564 UInt laneNo = 0; 8565 UInt laneSzLg2 = 0; 8566 HChar laneCh = '?'; 8567 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh, 8568 getQReg128(nn), imm5); 8569 if (res == IRTemp_INVALID) 8570 return False; 8571 if (bitQ == 0 && laneSzLg2 == X11) 8572 return False; /* .1d case */ 8573 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8574 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2); 8575 DIP("dup %s.%s, %s.%c[%u]\n", 8576 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo); 8577 return True; 8578 } 8579 8580 /* -------- x,0,0001: DUP (general, vector) -------- */ 8581 /* 31 28 20 15 9 4 8582 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn 8583 Q=0 writes 64, Q=1 writes 128 8584 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W 8585 xxx10 4H(q=0) or 8H(q=1), R=W 8586 xx100 2S(q=0) or 4S(q=1), R=W 8587 x1000 Invalid(q=0) or 2D(q=1), R=X 8588 x0000 Invalid(q=0) or Invalid(q=1) 8589 Require op=0, imm4=0001 8590 */ 8591 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) { 8592 Bool isQ = bitQ == 1; 8593 IRTemp w0 = newTemp(Ity_I64); 8594 const HChar* arT = "??"; 8595 IRType laneTy = Ity_INVALID; 8596 if (imm5 & 1) { 8597 arT = isQ ? "16b" : "8b"; 8598 laneTy = Ity_I8; 8599 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn)))); 8600 } 8601 else if (imm5 & 2) { 8602 arT = isQ ? "8h" : "4h"; 8603 laneTy = Ity_I16; 8604 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn)))); 8605 } 8606 else if (imm5 & 4) { 8607 arT = isQ ? "4s" : "2s"; 8608 laneTy = Ity_I32; 8609 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn)))); 8610 } 8611 else if ((imm5 & 8) && isQ) { 8612 arT = "2d"; 8613 laneTy = Ity_I64; 8614 assign(w0, getIReg64orZR(nn)); 8615 } 8616 else { 8617 /* invalid; leave laneTy unchanged. */ 8618 } 8619 /* */ 8620 if (laneTy != Ity_INVALID) { 8621 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 8622 putQReg128(dd, binop(Iop_64HLtoV128, 8623 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 8624 DIP("dup %s.%s, %s\n", 8625 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn)); 8626 return True; 8627 } 8628 /* invalid */ 8629 return False; 8630 } 8631 8632 /* -------- 1,0,0011: INS (general) -------- */ 8633 /* 31 28 20 15 9 4 8634 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn 8635 where Ts,ix = case imm5 of xxxx1 -> B, xxxx 8636 xxx10 -> H, xxx 8637 xx100 -> S, xx 8638 x1000 -> D, x 8639 */ 8640 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) { 8641 HChar ts = '?'; 8642 UInt laneNo = 16; 8643 IRExpr* src = NULL; 8644 if (imm5 & 1) { 8645 src = unop(Iop_64to8, getIReg64orZR(nn)); 8646 laneNo = (imm5 >> 1) & 15; 8647 ts = 'b'; 8648 } 8649 else if (imm5 & 2) { 8650 src = unop(Iop_64to16, getIReg64orZR(nn)); 8651 laneNo = (imm5 >> 2) & 7; 8652 ts = 'h'; 8653 } 8654 else if (imm5 & 4) { 8655 src = unop(Iop_64to32, getIReg64orZR(nn)); 8656 laneNo = (imm5 >> 3) & 3; 8657 ts = 's'; 8658 } 8659 else if (imm5 & 8) { 8660 src = getIReg64orZR(nn); 8661 laneNo = (imm5 >> 4) & 1; 8662 ts = 'd'; 8663 } 8664 /* */ 8665 if (src) { 8666 vassert(laneNo < 16); 8667 putQRegLane(dd, laneNo, src); 8668 DIP("ins %s.%c[%u], %s\n", 8669 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); 8670 return True; 8671 } 8672 /* invalid */ 8673 return False; 8674 } 8675 8676 /* -------- x,0,0101: SMOV -------- */ 8677 /* -------- x,0,0111: UMOV -------- */ 8678 /* 31 28 20 15 9 4 8679 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index] 8680 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index] 8681 dest is Xd when q==1, Wd when q==0 8682 UMOV: 8683 Ts,index,ops = case q:imm5 of 8684 0:xxxx1 -> B, xxxx, 8Uto64 8685 1:xxxx1 -> invalid 8686 0:xxx10 -> H, xxx, 16Uto64 8687 1:xxx10 -> invalid 8688 0:xx100 -> S, xx, 32Uto64 8689 1:xx100 -> invalid 8690 1:x1000 -> D, x, copy64 8691 other -> invalid 8692 SMOV: 8693 Ts,index,ops = case q:imm5 of 8694 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32) 8695 1:xxxx1 -> B, xxxx, 8Sto64 8696 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32) 8697 1:xxx10 -> H, xxx, 16Sto64 8698 0:xx100 -> invalid 8699 1:xx100 -> S, xx, 32Sto64 8700 1:x1000 -> invalid 8701 other -> invalid 8702 */ 8703 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) { 8704 Bool isU = (imm4 & 2) == 2; 8705 const HChar* arTs = "??"; 8706 UInt laneNo = 16; /* invalid */ 8707 // Setting 'res' to non-NULL determines valid/invalid 8708 IRExpr* res = NULL; 8709 if (!bitQ && (imm5 & 1)) { // 0:xxxx1 8710 laneNo = (imm5 >> 1) & 15; 8711 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 8712 res = isU ? unop(Iop_8Uto64, lane) 8713 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane)); 8714 arTs = "b"; 8715 } 8716 else if (bitQ && (imm5 & 1)) { // 1:xxxx1 8717 laneNo = (imm5 >> 1) & 15; 8718 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 8719 res = isU ? NULL 8720 : unop(Iop_8Sto64, lane); 8721 arTs = "b"; 8722 } 8723 else if (!bitQ && (imm5 & 2)) { // 0:xxx10 8724 laneNo = (imm5 >> 2) & 7; 8725 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 8726 res = isU ? unop(Iop_16Uto64, lane) 8727 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane)); 8728 arTs = "h"; 8729 } 8730 else if (bitQ && (imm5 & 2)) { // 1:xxx10 8731 laneNo = (imm5 >> 2) & 7; 8732 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 8733 res = isU ? NULL 8734 : unop(Iop_16Sto64, lane); 8735 arTs = "h"; 8736 } 8737 else if (!bitQ && (imm5 & 4)) { // 0:xx100 8738 laneNo = (imm5 >> 3) & 3; 8739 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 8740 res = isU ? unop(Iop_32Uto64, lane) 8741 : NULL; 8742 arTs = "s"; 8743 } 8744 else if (bitQ && (imm5 & 4)) { // 1:xxx10 8745 laneNo = (imm5 >> 3) & 3; 8746 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 8747 res = isU ? NULL 8748 : unop(Iop_32Sto64, lane); 8749 arTs = "s"; 8750 } 8751 else if (bitQ && (imm5 & 8)) { // 1:x1000 8752 laneNo = (imm5 >> 4) & 1; 8753 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64); 8754 res = isU ? lane 8755 : NULL; 8756 arTs = "d"; 8757 } 8758 /* */ 8759 if (res) { 8760 vassert(laneNo < 16); 8761 putIReg64orZR(dd, res); 8762 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's', 8763 nameIRegOrZR(bitQ == 1, dd), 8764 nameQReg128(nn), arTs, laneNo); 8765 return True; 8766 } 8767 /* invalid */ 8768 return False; 8769 } 8770 8771 /* -------- 1,1,xxxx: INS (element) -------- */ 8772 /* 31 28 20 14 9 4 8773 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2] 8774 where Ts,ix1,ix2 8775 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0] 8776 xxx10 -> H, xxx, imm4[3:1] 8777 xx100 -> S, xx, imm4[3:2] 8778 x1000 -> D, x, imm4[3:3] 8779 */ 8780 if (bitQ == 1 && bitOP == 1) { 8781 HChar ts = '?'; 8782 IRType ity = Ity_INVALID; 8783 UInt ix1 = 16; 8784 UInt ix2 = 16; 8785 if (imm5 & 1) { 8786 ts = 'b'; 8787 ity = Ity_I8; 8788 ix1 = (imm5 >> 1) & 15; 8789 ix2 = (imm4 >> 0) & 15; 8790 } 8791 else if (imm5 & 2) { 8792 ts = 'h'; 8793 ity = Ity_I16; 8794 ix1 = (imm5 >> 2) & 7; 8795 ix2 = (imm4 >> 1) & 7; 8796 } 8797 else if (imm5 & 4) { 8798 ts = 's'; 8799 ity = Ity_I32; 8800 ix1 = (imm5 >> 3) & 3; 8801 ix2 = (imm4 >> 2) & 3; 8802 } 8803 else if (imm5 & 8) { 8804 ts = 'd'; 8805 ity = Ity_I64; 8806 ix1 = (imm5 >> 4) & 1; 8807 ix2 = (imm4 >> 3) & 1; 8808 } 8809 /* */ 8810 if (ity != Ity_INVALID) { 8811 vassert(ix1 < 16); 8812 vassert(ix2 < 16); 8813 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity)); 8814 DIP("ins %s.%c[%u], %s.%c[%u]\n", 8815 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2); 8816 return True; 8817 } 8818 /* invalid */ 8819 return False; 8820 } 8821 8822 return False; 8823 # undef INSN 8824 } 8825 8826 8827 static 8828 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 8829 { 8830 /* 31 28 18 15 11 9 4 8831 0q op 01111 00000 abc cmode 01 defgh d 8832 Decode fields: q,op,cmode 8833 Bit 11 is really "o2", but it is always zero. 8834 */ 8835 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8836 if (INSN(31,31) != 0 8837 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0) 8838 || INSN(11,10) != BITS2(0,1)) { 8839 return False; 8840 } 8841 UInt bitQ = INSN(30,30); 8842 UInt bitOP = INSN(29,29); 8843 UInt cmode = INSN(15,12); 8844 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5); 8845 UInt dd = INSN(4,0); 8846 8847 ULong imm64lo = 0; 8848 UInt op_cmode = (bitOP << 4) | cmode; 8849 Bool ok = False; 8850 Bool isORR = False; 8851 Bool isBIC = False; 8852 Bool isMOV = False; 8853 Bool isMVN = False; 8854 Bool isFMOV = False; 8855 switch (op_cmode) { 8856 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */ 8857 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */ 8858 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */ 8859 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */ 8860 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0): 8861 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0 8862 ok = True; isMOV = True; break; 8863 8864 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */ 8865 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */ 8866 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */ 8867 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */ 8868 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1): 8869 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1 8870 ok = True; isORR = True; break; 8871 8872 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */ 8873 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */ 8874 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0 8875 ok = True; isMOV = True; break; 8876 8877 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */ 8878 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */ 8879 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1 8880 ok = True; isORR = True; break; 8881 8882 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */ 8883 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */ 8884 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x 8885 ok = True; isMOV = True; break; 8886 8887 /* -------- x,0,1110 MOVI 8-bit -------- */ 8888 case BITS5(0,1,1,1,0): 8889 ok = True; isMOV = True; break; 8890 8891 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */ 8892 case BITS5(0,1,1,1,1): // 0:1111 8893 ok = True; isFMOV = True; break; 8894 8895 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */ 8896 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */ 8897 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */ 8898 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */ 8899 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0): 8900 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0 8901 ok = True; isMVN = True; break; 8902 8903 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */ 8904 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */ 8905 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */ 8906 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */ 8907 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1): 8908 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1 8909 ok = True; isBIC = True; break; 8910 8911 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */ 8912 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */ 8913 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 8914 ok = True; isMVN = True; break; 8915 8916 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */ 8917 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */ 8918 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1 8919 ok = True; isBIC = True; break; 8920 8921 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */ 8922 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */ 8923 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x 8924 ok = True; isMVN = True; break; 8925 8926 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */ 8927 /* -------- 1,1,1110 MOVI 64-bit vector -------- */ 8928 case BITS5(1,1,1,1,0): 8929 ok = True; isMOV = True; break; 8930 8931 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */ 8932 case BITS5(1,1,1,1,1): // 1:1111 8933 ok = bitQ == 1; isFMOV = True; break; 8934 8935 default: 8936 break; 8937 } 8938 if (ok) { 8939 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0) 8940 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0)); 8941 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh); 8942 } 8943 if (ok) { 8944 if (isORR || isBIC) { 8945 ULong inv 8946 = isORR ? 0ULL : ~0ULL; 8947 IRExpr* immV128 8948 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo)); 8949 IRExpr* res 8950 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128); 8951 const HChar* nm = isORR ? "orr" : "bic"; 8952 if (bitQ == 0) { 8953 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res)); 8954 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo); 8955 } else { 8956 putQReg128(dd, res); 8957 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm, 8958 nameQReg128(dd), imm64lo, imm64lo); 8959 } 8960 } 8961 else if (isMOV || isMVN || isFMOV) { 8962 if (isMVN) imm64lo = ~imm64lo; 8963 ULong imm64hi = bitQ == 0 ? 0 : imm64lo; 8964 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi), 8965 mkU64(imm64lo)); 8966 putQReg128(dd, immV128); 8967 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); 8968 } 8969 return True; 8970 } 8971 /* else fall through */ 8972 8973 return False; 8974 # undef INSN 8975 } 8976 8977 8978 static 8979 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn) 8980 { 8981 /* 31 28 20 15 14 10 9 4 8982 01 op 11110000 imm5 0 imm4 1 n d 8983 Decode fields: op,imm4 8984 */ 8985 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8986 if (INSN(31,30) != BITS2(0,1) 8987 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0) 8988 || INSN(15,15) != 0 || INSN(10,10) != 1) { 8989 return False; 8990 } 8991 UInt bitOP = INSN(29,29); 8992 UInt imm5 = INSN(20,16); 8993 UInt imm4 = INSN(14,11); 8994 UInt nn = INSN(9,5); 8995 UInt dd = INSN(4,0); 8996 8997 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) { 8998 /* -------- 0,0000 DUP (element, scalar) -------- */ 8999 IRTemp w0 = newTemp(Ity_I64); 9000 const HChar* arTs = "??"; 9001 IRType laneTy = Ity_INVALID; 9002 UInt laneNo = 16; /* invalid */ 9003 if (imm5 & 1) { 9004 arTs = "b"; 9005 laneNo = (imm5 >> 1) & 15; 9006 laneTy = Ity_I8; 9007 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); 9008 } 9009 else if (imm5 & 2) { 9010 arTs = "h"; 9011 laneNo = (imm5 >> 2) & 7; 9012 laneTy = Ity_I16; 9013 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); 9014 } 9015 else if (imm5 & 4) { 9016 arTs = "s"; 9017 laneNo = (imm5 >> 3) & 3; 9018 laneTy = Ity_I32; 9019 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); 9020 } 9021 else if (imm5 & 8) { 9022 arTs = "d"; 9023 laneNo = (imm5 >> 4) & 1; 9024 laneTy = Ity_I64; 9025 assign(w0, getQRegLane(nn, laneNo, laneTy)); 9026 } 9027 else { 9028 /* invalid; leave laneTy unchanged. */ 9029 } 9030 /* */ 9031 if (laneTy != Ity_INVALID) { 9032 vassert(laneNo < 16); 9033 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0))); 9034 DIP("dup %s, %s.%s[%u]\n", 9035 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo); 9036 return True; 9037 } 9038 /* else fall through */ 9039 } 9040 9041 return False; 9042 # undef INSN 9043 } 9044 9045 9046 static 9047 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn) 9048 { 9049 /* 31 28 23 21 16 11 9 4 9050 01 u 11110 sz 11000 opcode 10 n d 9051 Decode fields: u,sz,opcode 9052 */ 9053 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9054 if (INSN(31,30) != BITS2(0,1) 9055 || INSN(28,24) != BITS5(1,1,1,1,0) 9056 || INSN(21,17) != BITS5(1,1,0,0,0) 9057 || INSN(11,10) != BITS2(1,0)) { 9058 return False; 9059 } 9060 UInt bitU = INSN(29,29); 9061 UInt sz = INSN(23,22); 9062 UInt opcode = INSN(16,12); 9063 UInt nn = INSN(9,5); 9064 UInt dd = INSN(4,0); 9065 9066 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) { 9067 /* -------- 0,11,11011 ADDP d_2d -------- */ 9068 IRTemp xy = newTempV128(); 9069 IRTemp xx = newTempV128(); 9070 assign(xy, getQReg128(nn)); 9071 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy))); 9072 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9073 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx)))); 9074 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn)); 9075 return True; 9076 } 9077 9078 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) { 9079 /* -------- 1,00,01101 ADDP s_2s -------- */ 9080 /* -------- 1,01,01101 ADDP d_2d -------- */ 9081 Bool isD = sz == X01; 9082 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2); 9083 IROp opADD = mkVecADDF(isD ? 3 : 2); 9084 IRTemp src = newTempV128(); 9085 IRTemp argL = newTempV128(); 9086 IRTemp argR = newTempV128(); 9087 assign(src, getQReg128(nn)); 9088 assign(argL, unop(opZHI, mkexpr(src))); 9089 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src), 9090 mkU8(isD ? 8 : 4)))); 9091 putQReg128(dd, unop(opZHI, 9092 triop(opADD, mkexpr(mk_get_IR_rounding_mode()), 9093 mkexpr(argL), mkexpr(argR)))); 9094 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn); 9095 return True; 9096 } 9097 9098 if (bitU == 1 9099 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) { 9100 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */ 9101 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */ 9102 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */ 9103 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */ 9104 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 9105 Bool isD = (sz & 1) == 1; 9106 Bool isMIN = (sz & 2) == 2; 9107 Bool isNM = opcode == BITS5(0,1,1,0,0); 9108 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2); 9109 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2); 9110 IRTemp src = newTempV128(); 9111 IRTemp argL = newTempV128(); 9112 IRTemp argR = newTempV128(); 9113 assign(src, getQReg128(nn)); 9114 assign(argL, unop(opZHI, mkexpr(src))); 9115 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src), 9116 mkU8(isD ? 8 : 4)))); 9117 putQReg128(dd, unop(opZHI, 9118 binop(opMXX, mkexpr(argL), mkexpr(argR)))); 9119 HChar c = isD ? 'd' : 's'; 9120 DIP("%s%sp %c%u, v%u.2%c\n", 9121 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c); 9122 return True; 9123 } 9124 9125 return False; 9126 # undef INSN 9127 } 9128 9129 9130 static 9131 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn) 9132 { 9133 /* 31 28 22 18 15 10 9 4 9134 01 u 111110 immh immb opcode 1 n d 9135 Decode fields: u,immh,opcode 9136 */ 9137 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9138 if (INSN(31,30) != BITS2(0,1) 9139 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) { 9140 return False; 9141 } 9142 UInt bitU = INSN(29,29); 9143 UInt immh = INSN(22,19); 9144 UInt immb = INSN(18,16); 9145 UInt opcode = INSN(15,11); 9146 UInt nn = INSN(9,5); 9147 UInt dd = INSN(4,0); 9148 UInt immhb = (immh << 3) | immb; 9149 9150 if ((immh & 8) == 8 9151 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) { 9152 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */ 9153 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */ 9154 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */ 9155 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */ 9156 Bool isU = bitU == 1; 9157 Bool isAcc = opcode == BITS5(0,0,0,1,0); 9158 UInt sh = 128 - immhb; 9159 vassert(sh >= 1 && sh <= 64); 9160 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2; 9161 IRExpr* src = getQReg128(nn); 9162 IRTemp shf = newTempV128(); 9163 IRTemp res = newTempV128(); 9164 if (sh == 64 && isU) { 9165 assign(shf, mkV128(0x0000)); 9166 } else { 9167 UInt nudge = 0; 9168 if (sh == 64) { 9169 vassert(!isU); 9170 nudge = 1; 9171 } 9172 assign(shf, binop(op, src, mkU8(sh - nudge))); 9173 } 9174 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf)) 9175 : mkexpr(shf)); 9176 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9177 const HChar* nm = isAcc ? (isU ? "usra" : "ssra") 9178 : (isU ? "ushr" : "sshr"); 9179 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh); 9180 return True; 9181 } 9182 9183 if ((immh & 8) == 8 9184 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) { 9185 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */ 9186 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */ 9187 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */ 9188 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */ 9189 Bool isU = bitU == 1; 9190 Bool isAcc = opcode == BITS5(0,0,1,1,0); 9191 UInt sh = 128 - immhb; 9192 vassert(sh >= 1 && sh <= 64); 9193 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2; 9194 vassert(sh >= 1 && sh <= 64); 9195 IRExpr* src = getQReg128(nn); 9196 IRTemp imm8 = newTemp(Ity_I8); 9197 assign(imm8, mkU8((UChar)(-sh))); 9198 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8)); 9199 IRTemp shf = newTempV128(); 9200 IRTemp res = newTempV128(); 9201 assign(shf, binop(op, src, amt)); 9202 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf)) 9203 : mkexpr(shf)); 9204 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9205 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra") 9206 : (isU ? "urshr" : "srshr"); 9207 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh); 9208 return True; 9209 } 9210 9211 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) { 9212 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */ 9213 UInt sh = 128 - immhb; 9214 vassert(sh >= 1 && sh <= 64); 9215 if (sh == 64) { 9216 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd))); 9217 } else { 9218 /* sh is in range 1 .. 63 */ 9219 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1)); 9220 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask)); 9221 IRTemp res = newTempV128(); 9222 assign(res, binop(Iop_OrV128, 9223 binop(Iop_AndV128, getQReg128(dd), nmaskV), 9224 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh)))); 9225 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9226 } 9227 DIP("sri d%u, d%u, #%u\n", dd, nn, sh); 9228 return True; 9229 } 9230 9231 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) { 9232 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */ 9233 UInt sh = immhb - 64; 9234 vassert(sh >= 0 && sh < 64); 9235 putQReg128(dd, 9236 unop(Iop_ZeroHI64ofV128, 9237 sh == 0 ? getQReg128(nn) 9238 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 9239 DIP("shl d%u, d%u, #%u\n", dd, nn, sh); 9240 return True; 9241 } 9242 9243 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) { 9244 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */ 9245 UInt sh = immhb - 64; 9246 vassert(sh >= 0 && sh < 64); 9247 if (sh == 0) { 9248 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn))); 9249 } else { 9250 /* sh is in range 1 .. 63 */ 9251 ULong nmask = (1ULL << sh) - 1; 9252 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask)); 9253 IRTemp res = newTempV128(); 9254 assign(res, binop(Iop_OrV128, 9255 binop(Iop_AndV128, getQReg128(dd), nmaskV), 9256 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 9257 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9258 } 9259 DIP("sli d%u, d%u, #%u\n", dd, nn, sh); 9260 return True; 9261 } 9262 9263 if (opcode == BITS5(0,1,1,1,0) 9264 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) { 9265 /* -------- 0,01110 SQSHL #imm -------- */ 9266 /* -------- 1,01110 UQSHL #imm -------- */ 9267 /* -------- 1,01100 SQSHLU #imm -------- */ 9268 UInt size = 0; 9269 UInt shift = 0; 9270 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 9271 if (!ok) return False; 9272 vassert(size >= 0 && size <= 3); 9273 /* The shift encoding has opposite sign for the leftwards case. 9274 Adjust shift to compensate. */ 9275 UInt lanebits = 8 << size; 9276 shift = lanebits - shift; 9277 vassert(shift >= 0 && shift < lanebits); 9278 const HChar* nm = NULL; 9279 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl"; 9280 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl"; 9281 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu"; 9282 else vassert(0); 9283 IRTemp qDiff1 = IRTemp_INVALID; 9284 IRTemp qDiff2 = IRTemp_INVALID; 9285 IRTemp res = IRTemp_INVALID; 9286 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn)); 9287 /* This relies on the fact that the zeroed out lanes generate zeroed 9288 result lanes and don't saturate, so there's no point in trimming 9289 the resulting res, qDiff1 or qDiff2 values. */ 9290 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm); 9291 putQReg128(dd, mkexpr(res)); 9292 updateQCFLAGwithDifference(qDiff1, qDiff2); 9293 const HChar arr = "bhsd"[size]; 9294 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift); 9295 return True; 9296 } 9297 9298 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1) 9299 || (bitU == 1 9300 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) { 9301 /* -------- 0,10010 SQSHRN #imm -------- */ 9302 /* -------- 1,10010 UQSHRN #imm -------- */ 9303 /* -------- 0,10011 SQRSHRN #imm -------- */ 9304 /* -------- 1,10011 UQRSHRN #imm -------- */ 9305 /* -------- 1,10000 SQSHRUN #imm -------- */ 9306 /* -------- 1,10001 SQRSHRUN #imm -------- */ 9307 UInt size = 0; 9308 UInt shift = 0; 9309 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 9310 if (!ok || size == X11) return False; 9311 vassert(size >= X00 && size <= X10); 9312 vassert(shift >= 1 && shift <= (8 << size)); 9313 const HChar* nm = "??"; 9314 IROp op = Iop_INVALID; 9315 /* Decide on the name and the operation. */ 9316 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 9317 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size); 9318 } 9319 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 9320 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size); 9321 } 9322 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) { 9323 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size); 9324 } 9325 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 9326 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size); 9327 } 9328 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) { 9329 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size); 9330 } 9331 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) { 9332 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size); 9333 } 9334 else vassert(0); 9335 /* Compute the result (Q, shifted value) pair. */ 9336 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn)); 9337 IRTemp pair = newTempV128(); 9338 assign(pair, binop(op, mkexpr(src128), mkU8(shift))); 9339 /* Update the result reg */ 9340 IRTemp res64in128 = newTempV128(); 9341 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair))); 9342 putQReg128(dd, mkexpr(res64in128)); 9343 /* Update the Q flag. */ 9344 IRTemp q64q64 = newTempV128(); 9345 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair))); 9346 IRTemp z128 = newTempV128(); 9347 assign(z128, mkV128(0x0000)); 9348 updateQCFLAGwithDifference(q64q64, z128); 9349 /* */ 9350 const HChar arrNarrow = "bhsd"[size]; 9351 const HChar arrWide = "bhsd"[size+1]; 9352 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift); 9353 return True; 9354 } 9355 9356 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) { 9357 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */ 9358 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */ 9359 UInt size = 0; 9360 UInt fbits = 0; 9361 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 9362 /* The following holds because immh is never zero. */ 9363 vassert(ok); 9364 /* The following holds because immh >= 0100. */ 9365 vassert(size == X10 || size == X11); 9366 Bool isD = size == X11; 9367 Bool isU = bitU == 1; 9368 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 9369 Double scale = two_to_the_minus(fbits); 9370 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 9371 : IRExpr_Const(IRConst_F32( (Float)scale )); 9372 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 9373 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 9374 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 9375 IRType tyF = isD ? Ity_F64 : Ity_F32; 9376 IRType tyI = isD ? Ity_I64 : Ity_I32; 9377 IRTemp src = newTemp(tyI); 9378 IRTemp res = newTemp(tyF); 9379 IRTemp rm = mk_get_IR_rounding_mode(); 9380 assign(src, getQRegLane(nn, 0, tyI)); 9381 assign(res, triop(opMUL, mkexpr(rm), 9382 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE)); 9383 putQRegLane(dd, 0, mkexpr(res)); 9384 if (!isD) { 9385 putQRegLane(dd, 1, mkU32(0)); 9386 } 9387 putQRegLane(dd, 1, mkU64(0)); 9388 const HChar ch = isD ? 'd' : 's'; 9389 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf", 9390 ch, dd, ch, nn, fbits); 9391 return True; 9392 } 9393 9394 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) { 9395 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */ 9396 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */ 9397 UInt size = 0; 9398 UInt fbits = 0; 9399 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 9400 /* The following holds because immh is never zero. */ 9401 vassert(ok); 9402 /* The following holds because immh >= 0100. */ 9403 vassert(size == X10 || size == X11); 9404 Bool isD = size == X11; 9405 Bool isU = bitU == 1; 9406 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 9407 Double scale = two_to_the_plus(fbits); 9408 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 9409 : IRExpr_Const(IRConst_F32( (Float)scale )); 9410 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 9411 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U) 9412 : (isD ? Iop_F64toI64S : Iop_F32toI32S); 9413 IRType tyF = isD ? Ity_F64 : Ity_F32; 9414 IRType tyI = isD ? Ity_I64 : Ity_I32; 9415 IRTemp src = newTemp(tyF); 9416 IRTemp res = newTemp(tyI); 9417 IRTemp rm = newTemp(Ity_I32); 9418 assign(src, getQRegLane(nn, 0, tyF)); 9419 assign(rm, mkU32(Irrm_ZERO)); 9420 assign(res, binop(opCVT, mkexpr(rm), 9421 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE))); 9422 putQRegLane(dd, 0, mkexpr(res)); 9423 if (!isD) { 9424 putQRegLane(dd, 1, mkU32(0)); 9425 } 9426 putQRegLane(dd, 1, mkU64(0)); 9427 const HChar ch = isD ? 'd' : 's'; 9428 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs", 9429 ch, dd, ch, nn, fbits); 9430 return True; 9431 } 9432 9433 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9434 return False; 9435 # undef INSN 9436 } 9437 9438 9439 static 9440 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn) 9441 { 9442 /* 31 29 28 23 21 20 15 11 9 4 9443 01 U 11110 size 1 m opcode 00 n d 9444 Decode fields: u,opcode 9445 */ 9446 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9447 if (INSN(31,30) != BITS2(0,1) 9448 || INSN(28,24) != BITS5(1,1,1,1,0) 9449 || INSN(21,21) != 1 9450 || INSN(11,10) != BITS2(0,0)) { 9451 return False; 9452 } 9453 UInt bitU = INSN(29,29); 9454 UInt size = INSN(23,22); 9455 UInt mm = INSN(20,16); 9456 UInt opcode = INSN(15,12); 9457 UInt nn = INSN(9,5); 9458 UInt dd = INSN(4,0); 9459 vassert(size < 4); 9460 9461 if (bitU == 0 9462 && (opcode == BITS4(1,1,0,1) 9463 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) { 9464 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks) 9465 /* -------- 0,1001 SQDMLAL -------- */ // 1 9466 /* -------- 0,1011 SQDMLSL -------- */ // 2 9467 /* Widens, and size refers to the narrowed lanes. */ 9468 UInt ks = 3; 9469 switch (opcode) { 9470 case BITS4(1,1,0,1): ks = 0; break; 9471 case BITS4(1,0,0,1): ks = 1; break; 9472 case BITS4(1,0,1,1): ks = 2; break; 9473 default: vassert(0); 9474 } 9475 vassert(ks >= 0 && ks <= 2); 9476 if (size == X00 || size == X11) return False; 9477 vassert(size <= 2); 9478 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n; 9479 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 9480 newTempsV128_3(&vecN, &vecM, &vecD); 9481 assign(vecN, getQReg128(nn)); 9482 assign(vecM, getQReg128(mm)); 9483 assign(vecD, getQReg128(dd)); 9484 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 9485 False/*!is2*/, size, "mas"[ks], 9486 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 9487 IROp opZHI = mkVecZEROHIxxOFV128(size+1); 9488 putQReg128(dd, unop(opZHI, mkexpr(res))); 9489 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 9490 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 9491 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 9492 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI); 9493 } 9494 const HChar* nm = ks == 0 ? "sqdmull" 9495 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 9496 const HChar arrNarrow = "bhsd"[size]; 9497 const HChar arrWide = "bhsd"[size+1]; 9498 DIP("%s %c%d, %c%d, %c%d\n", 9499 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm); 9500 return True; 9501 } 9502 9503 return False; 9504 # undef INSN 9505 } 9506 9507 9508 static 9509 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) 9510 { 9511 /* 31 29 28 23 21 20 15 10 9 4 9512 01 U 11110 size 1 m opcode 1 n d 9513 Decode fields: u,size,opcode 9514 */ 9515 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9516 if (INSN(31,30) != BITS2(0,1) 9517 || INSN(28,24) != BITS5(1,1,1,1,0) 9518 || INSN(21,21) != 1 9519 || INSN(10,10) != 1) { 9520 return False; 9521 } 9522 UInt bitU = INSN(29,29); 9523 UInt size = INSN(23,22); 9524 UInt mm = INSN(20,16); 9525 UInt opcode = INSN(15,11); 9526 UInt nn = INSN(9,5); 9527 UInt dd = INSN(4,0); 9528 vassert(size < 4); 9529 9530 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) { 9531 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */ 9532 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */ 9533 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */ 9534 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */ 9535 Bool isADD = opcode == BITS5(0,0,0,0,1); 9536 Bool isU = bitU == 1; 9537 IROp qop = Iop_INVALID; 9538 IROp nop = Iop_INVALID; 9539 if (isADD) { 9540 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size); 9541 nop = mkVecADD(size); 9542 } else { 9543 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size); 9544 nop = mkVecSUB(size); 9545 } 9546 IRTemp argL = newTempV128(); 9547 IRTemp argR = newTempV128(); 9548 IRTemp qres = newTempV128(); 9549 IRTemp nres = newTempV128(); 9550 assign(argL, getQReg128(nn)); 9551 assign(argR, getQReg128(mm)); 9552 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9553 size, binop(qop, mkexpr(argL), mkexpr(argR))))); 9554 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9555 size, binop(nop, mkexpr(argL), mkexpr(argR))))); 9556 putQReg128(dd, mkexpr(qres)); 9557 updateQCFLAGwithDifference(qres, nres); 9558 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd") 9559 : (isU ? "uqsub" : "sqsub"); 9560 const HChar arr = "bhsd"[size]; 9561 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); 9562 return True; 9563 } 9564 9565 if (size == X11 && opcode == BITS5(0,0,1,1,0)) { 9566 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s 9567 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u 9568 Bool isGT = bitU == 0; 9569 IRExpr* argL = getQReg128(nn); 9570 IRExpr* argR = getQReg128(mm); 9571 IRTemp res = newTempV128(); 9572 assign(res, 9573 isGT ? binop(Iop_CmpGT64Sx2, argL, argR) 9574 : binop(Iop_CmpGT64Ux2, argL, argR)); 9575 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9576 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi", 9577 nameQRegLO(dd, Ity_I64), 9578 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9579 return True; 9580 } 9581 9582 if (size == X11 && opcode == BITS5(0,0,1,1,1)) { 9583 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s 9584 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u 9585 Bool isGE = bitU == 0; 9586 IRExpr* argL = getQReg128(nn); 9587 IRExpr* argR = getQReg128(mm); 9588 IRTemp res = newTempV128(); 9589 assign(res, 9590 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)) 9591 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL))); 9592 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9593 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs", 9594 nameQRegLO(dd, Ity_I64), 9595 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9596 return True; 9597 } 9598 9599 if (size == X11 && (opcode == BITS5(0,1,0,0,0) 9600 || opcode == BITS5(0,1,0,1,0))) { 9601 /* -------- 0,xx,01000 SSHL d_d_d -------- */ 9602 /* -------- 0,xx,01010 SRSHL d_d_d -------- */ 9603 /* -------- 1,xx,01000 USHL d_d_d -------- */ 9604 /* -------- 1,xx,01010 URSHL d_d_d -------- */ 9605 Bool isU = bitU == 1; 9606 Bool isR = opcode == BITS5(0,1,0,1,0); 9607 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size)) 9608 : (isU ? mkVecSHU(size) : mkVecSHS(size)); 9609 IRTemp res = newTempV128(); 9610 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 9611 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9612 const HChar* nm = isR ? (isU ? "urshl" : "srshl") 9613 : (isU ? "ushl" : "sshl"); 9614 DIP("%s %s, %s, %s\n", nm, 9615 nameQRegLO(dd, Ity_I64), 9616 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9617 return True; 9618 } 9619 9620 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) { 9621 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */ 9622 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */ 9623 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */ 9624 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */ 9625 Bool isU = bitU == 1; 9626 Bool isR = opcode == BITS5(0,1,0,1,1); 9627 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size)) 9628 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size)); 9629 /* This is a bit tricky. Since we're only interested in the lowest 9630 lane of the result, we zero out all the rest in the operands, so 9631 as to ensure that other lanes don't pollute the returned Q value. 9632 This works because it means, for the lanes we don't care about, we 9633 are shifting zero by zero, which can never saturate. */ 9634 IRTemp res256 = newTemp(Ity_V256); 9635 IRTemp resSH = newTempV128(); 9636 IRTemp resQ = newTempV128(); 9637 IRTemp zero = newTempV128(); 9638 assign( 9639 res256, 9640 binop(op, 9641 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))), 9642 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm))))); 9643 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256))); 9644 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256))); 9645 assign(zero, mkV128(0x0000)); 9646 putQReg128(dd, mkexpr(resSH)); 9647 updateQCFLAGwithDifference(resQ, zero); 9648 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl") 9649 : (isU ? "uqshl" : "sqshl"); 9650 const HChar arr = "bhsd"[size]; 9651 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); 9652 return True; 9653 } 9654 9655 if (size == X11 && opcode == BITS5(1,0,0,0,0)) { 9656 /* -------- 0,11,10000 ADD d_d_d -------- */ 9657 /* -------- 1,11,10000 SUB d_d_d -------- */ 9658 Bool isSUB = bitU == 1; 9659 IRTemp res = newTemp(Ity_I64); 9660 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64, 9661 getQRegLane(nn, 0, Ity_I64), 9662 getQRegLane(mm, 0, Ity_I64))); 9663 putQRegLane(dd, 0, mkexpr(res)); 9664 putQRegLane(dd, 1, mkU64(0)); 9665 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add", 9666 nameQRegLO(dd, Ity_I64), 9667 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9668 return True; 9669 } 9670 9671 if (size == X11 && opcode == BITS5(1,0,0,0,1)) { 9672 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0 9673 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // == 9674 Bool isEQ = bitU == 1; 9675 IRExpr* argL = getQReg128(nn); 9676 IRExpr* argR = getQReg128(mm); 9677 IRTemp res = newTempV128(); 9678 assign(res, 9679 isEQ ? binop(Iop_CmpEQ64x2, argL, argR) 9680 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2, 9681 binop(Iop_AndV128, argL, argR), 9682 mkV128(0x0000)))); 9683 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9684 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst", 9685 nameQRegLO(dd, Ity_I64), 9686 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9687 return True; 9688 } 9689 9690 if (opcode == BITS5(1,0,1,1,0)) { 9691 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */ 9692 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */ 9693 if (size == X00 || size == X11) return False; 9694 Bool isR = bitU == 1; 9695 IRTemp res, sat1q, sat1n, vN, vM; 9696 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 9697 newTempsV128_2(&vN, &vM); 9698 assign(vN, getQReg128(nn)); 9699 assign(vM, getQReg128(mm)); 9700 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 9701 putQReg128(dd, 9702 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)))); 9703 updateQCFLAGwithDifference( 9704 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)), 9705 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n))); 9706 const HChar arr = "bhsd"[size]; 9707 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 9708 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm); 9709 return True; 9710 } 9711 9712 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) { 9713 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */ 9714 IRType ity = size == X11 ? Ity_F64 : Ity_F32; 9715 IRTemp res = newTemp(ity); 9716 assign(res, unop(mkABSF(ity), 9717 triop(mkSUBF(ity), 9718 mkexpr(mk_get_IR_rounding_mode()), 9719 getQRegLO(nn,ity), getQRegLO(mm,ity)))); 9720 putQReg128(dd, mkV128(0x0000)); 9721 putQRegLO(dd, mkexpr(res)); 9722 DIP("fabd %s, %s, %s\n", 9723 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9724 return True; 9725 } 9726 9727 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) { 9728 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */ 9729 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 9730 IRType ity = size == X01 ? Ity_F64 : Ity_F32; 9731 IRTemp res = newTemp(ity); 9732 assign(res, triop(mkMULF(ity), 9733 mkexpr(mk_get_IR_rounding_mode()), 9734 getQRegLO(nn,ity), getQRegLO(mm,ity))); 9735 putQReg128(dd, mkV128(0x0000)); 9736 putQRegLO(dd, mkexpr(res)); 9737 DIP("fmulx %s, %s, %s\n", 9738 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9739 return True; 9740 } 9741 9742 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) { 9743 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */ 9744 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */ 9745 Bool isD = size == X01; 9746 IRType ity = isD ? Ity_F64 : Ity_F32; 9747 Bool isGE = bitU == 1; 9748 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4) 9749 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4); 9750 IRTemp res = newTempV128(); 9751 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd 9752 : binop(opCMP, getQReg128(nn), getQReg128(mm))); 9753 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9754 mkexpr(res)))); 9755 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq", 9756 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9757 return True; 9758 } 9759 9760 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) { 9761 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */ 9762 Bool isD = size == X11; 9763 IRType ity = isD ? Ity_F64 : Ity_F32; 9764 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 9765 IRTemp res = newTempV128(); 9766 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd 9767 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9768 mkexpr(res)))); 9769 DIP("%s %s, %s, %s\n", "fcmgt", 9770 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9771 return True; 9772 } 9773 9774 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) { 9775 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */ 9776 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */ 9777 Bool isD = (size & 1) == 1; 9778 IRType ity = isD ? Ity_F64 : Ity_F32; 9779 Bool isGT = (size & 2) == 2; 9780 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4) 9781 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4); 9782 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 9783 IRTemp res = newTempV128(); 9784 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)), 9785 unop(opABS, getQReg128(nn)))); // swapd 9786 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9787 mkexpr(res)))); 9788 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge", 9789 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9790 return True; 9791 } 9792 9793 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { 9794 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */ 9795 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */ 9796 Bool isSQRT = (size & 2) == 2; 9797 Bool isD = (size & 1) == 1; 9798 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) 9799 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); 9800 IRTemp res = newTempV128(); 9801 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 9802 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9803 mkexpr(res)))); 9804 HChar c = isD ? 'd' : 's'; 9805 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps", 9806 c, dd, c, nn, c, mm); 9807 return True; 9808 } 9809 9810 return False; 9811 # undef INSN 9812 } 9813 9814 9815 static 9816 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) 9817 { 9818 /* 31 29 28 23 21 16 11 9 4 9819 01 U 11110 size 10000 opcode 10 n d 9820 Decode fields: u,size,opcode 9821 */ 9822 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9823 if (INSN(31,30) != BITS2(0,1) 9824 || INSN(28,24) != BITS5(1,1,1,1,0) 9825 || INSN(21,17) != BITS5(1,0,0,0,0) 9826 || INSN(11,10) != BITS2(1,0)) { 9827 return False; 9828 } 9829 UInt bitU = INSN(29,29); 9830 UInt size = INSN(23,22); 9831 UInt opcode = INSN(16,12); 9832 UInt nn = INSN(9,5); 9833 UInt dd = INSN(4,0); 9834 vassert(size < 4); 9835 9836 if (opcode == BITS5(0,0,0,1,1)) { 9837 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */ 9838 /* -------- 1,xx,00011: USQADD std4_std4 -------- */ 9839 /* These are a bit tricky (to say the least). See comments on 9840 the vector variants (in dis_AdvSIMD_two_reg_misc) below for 9841 details. */ 9842 Bool isUSQADD = bitU == 1; 9843 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) 9844 : mkVecQADDEXTUSSATSS(size); 9845 IROp nop = mkVecADD(size); 9846 IRTemp argL = newTempV128(); 9847 IRTemp argR = newTempV128(); 9848 assign(argL, getQReg128(nn)); 9849 assign(argR, getQReg128(dd)); 9850 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9851 size, binop(qop, mkexpr(argL), mkexpr(argR))); 9852 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9853 size, binop(nop, mkexpr(argL), mkexpr(argR))); 9854 putQReg128(dd, mkexpr(qres)); 9855 updateQCFLAGwithDifference(qres, nres); 9856 const HChar arr = "bhsd"[size]; 9857 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn); 9858 return True; 9859 } 9860 9861 if (opcode == BITS5(0,0,1,1,1)) { 9862 /* -------- 0,xx,00111 SQABS std4_std4 -------- */ 9863 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */ 9864 Bool isNEG = bitU == 1; 9865 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID; 9866 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW, 9867 getQReg128(nn), size ); 9868 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW)); 9869 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW)); 9870 putQReg128(dd, mkexpr(qres)); 9871 updateQCFLAGwithDifference(qres, nres); 9872 const HChar arr = "bhsd"[size]; 9873 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn); 9874 return True; 9875 } 9876 9877 if (size == X11 && opcode == BITS5(0,1,0,0,0)) { 9878 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0 9879 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0 9880 Bool isGT = bitU == 0; 9881 IRExpr* argL = getQReg128(nn); 9882 IRExpr* argR = mkV128(0x0000); 9883 IRTemp res = newTempV128(); 9884 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR) 9885 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))); 9886 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9887 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn); 9888 return True; 9889 } 9890 9891 if (size == X11 && opcode == BITS5(0,1,0,0,1)) { 9892 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0 9893 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0 9894 Bool isEQ = bitU == 0; 9895 IRExpr* argL = getQReg128(nn); 9896 IRExpr* argR = mkV128(0x0000); 9897 IRTemp res = newTempV128(); 9898 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR) 9899 : unop(Iop_NotV128, 9900 binop(Iop_CmpGT64Sx2, argL, argR))); 9901 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9902 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn); 9903 return True; 9904 } 9905 9906 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) { 9907 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0 9908 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9909 binop(Iop_CmpGT64Sx2, mkV128(0x0000), 9910 getQReg128(nn)))); 9911 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn); 9912 return True; 9913 } 9914 9915 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) { 9916 /* -------- 0,11,01011 ABS d_d -------- */ 9917 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9918 unop(Iop_Abs64x2, getQReg128(nn)))); 9919 DIP("abs d%u, d%u\n", dd, nn); 9920 return True; 9921 } 9922 9923 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) { 9924 /* -------- 1,11,01011 NEG d_d -------- */ 9925 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9926 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn)))); 9927 DIP("neg d%u, d%u\n", dd, nn); 9928 return True; 9929 } 9930 9931 UInt ix = 0; /*INVALID*/ 9932 if (size >= X10) { 9933 switch (opcode) { 9934 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break; 9935 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break; 9936 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; 9937 default: break; 9938 } 9939 } 9940 if (ix > 0) { 9941 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */ 9942 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */ 9943 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */ 9944 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */ 9945 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */ 9946 Bool isD = size == X11; 9947 IRType ity = isD ? Ity_F64 : Ity_F32; 9948 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 9949 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 9950 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 9951 IROp opCmp = Iop_INVALID; 9952 Bool swap = False; 9953 const HChar* nm = "??"; 9954 switch (ix) { 9955 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break; 9956 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break; 9957 case 3: nm = "fcmlt"; opCmp = opCmpLT; break; 9958 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break; 9959 case 5: nm = "fcmle"; opCmp = opCmpLE; break; 9960 default: vassert(0); 9961 } 9962 IRExpr* zero = mkV128(0x0000); 9963 IRTemp res = newTempV128(); 9964 assign(res, swap ? binop(opCmp, zero, getQReg128(nn)) 9965 : binop(opCmp, getQReg128(nn), zero)); 9966 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9967 mkexpr(res)))); 9968 9969 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 9970 return True; 9971 } 9972 9973 if (opcode == BITS5(1,0,1,0,0) 9974 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) { 9975 /* -------- 0,xx,10100: SQXTN -------- */ 9976 /* -------- 1,xx,10100: UQXTN -------- */ 9977 /* -------- 1,xx,10010: SQXTUN -------- */ 9978 if (size == X11) return False; 9979 vassert(size < 3); 9980 IROp opN = Iop_INVALID; 9981 Bool zWiden = True; 9982 const HChar* nm = "??"; 9983 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) { 9984 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False; 9985 } 9986 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) { 9987 opN = mkVecQNARROWUNUU(size); nm = "uqxtn"; 9988 } 9989 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 9990 opN = mkVecQNARROWUNSU(size); nm = "sqxtun"; 9991 } 9992 else vassert(0); 9993 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9994 size+1, getQReg128(nn)); 9995 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9996 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src)))); 9997 putQReg128(dd, mkexpr(resN)); 9998 /* This widens zero lanes to zero, and compares it against zero, so all 9999 of the non-participating lanes make no contribution to the 10000 Q flag state. */ 10001 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/, 10002 size, mkexpr(resN)); 10003 updateQCFLAGwithDifference(src, resW); 10004 const HChar arrNarrow = "bhsd"[size]; 10005 const HChar arrWide = "bhsd"[size+1]; 10006 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn); 10007 return True; 10008 } 10009 10010 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) { 10011 /* -------- 1,01,10110 FCVTXN s_d -------- */ 10012 /* Using Irrm_NEAREST here isn't right. The docs say "round to 10013 odd" but I don't know what that really means. */ 10014 putQRegLO(dd, 10015 binop(Iop_F64toF32, mkU32(Irrm_NEAREST), 10016 getQRegLO(nn, Ity_F64))); 10017 putQRegLane(dd, 1, mkU32(0)); 10018 putQRegLane(dd, 1, mkU64(0)); 10019 DIP("fcvtxn s%u, d%u\n", dd, nn); 10020 return True; 10021 } 10022 10023 ix = 0; /*INVALID*/ 10024 switch (opcode) { 10025 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break; 10026 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break; 10027 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break; 10028 default: break; 10029 } 10030 if (ix > 0) { 10031 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ 10032 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ 10033 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ 10034 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ 10035 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ 10036 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ 10037 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ 10038 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ 10039 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ 10040 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ 10041 Bool isD = (size & 1) == 1; 10042 IRType tyF = isD ? Ity_F64 : Ity_F32; 10043 IRType tyI = isD ? Ity_I64 : Ity_I32; 10044 IRRoundingMode irrm = 8; /*impossible*/ 10045 HChar ch = '?'; 10046 switch (ix) { 10047 case 1: ch = 'n'; irrm = Irrm_NEAREST; break; 10048 case 2: ch = 'm'; irrm = Irrm_NegINF; break; 10049 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */ 10050 case 4: ch = 'p'; irrm = Irrm_PosINF; break; 10051 case 5: ch = 'z'; irrm = Irrm_ZERO; break; 10052 default: vassert(0); 10053 } 10054 IROp cvt = Iop_INVALID; 10055 if (bitU == 1) { 10056 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U; 10057 } else { 10058 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S; 10059 } 10060 IRTemp src = newTemp(tyF); 10061 IRTemp res = newTemp(tyI); 10062 assign(src, getQRegLane(nn, 0, tyF)); 10063 assign(res, binop(cvt, mkU32(irrm), mkexpr(src))); 10064 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */ 10065 if (!isD) { 10066 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */ 10067 } 10068 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */ 10069 HChar sOrD = isD ? 'd' : 's'; 10070 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's', 10071 sOrD, dd, sOrD, nn); 10072 return True; 10073 } 10074 10075 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) { 10076 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */ 10077 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */ 10078 Bool isU = bitU == 1; 10079 Bool isD = (size & 1) == 1; 10080 IRType tyI = isD ? Ity_I64 : Ity_I32; 10081 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 10082 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 10083 IRTemp rm = mk_get_IR_rounding_mode(); 10084 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI))); 10085 if (!isD) { 10086 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */ 10087 } 10088 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */ 10089 HChar c = isD ? 'd' : 's'; 10090 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn); 10091 return True; 10092 } 10093 10094 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { 10095 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */ 10096 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */ 10097 Bool isSQRT = bitU == 1; 10098 Bool isD = (size & 1) == 1; 10099 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) 10100 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); 10101 IRTemp resV = newTempV128(); 10102 assign(resV, unop(op, getQReg128(nn))); 10103 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 10104 mkexpr(resV)))); 10105 HChar c = isD ? 'd' : 's'; 10106 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn); 10107 return True; 10108 } 10109 10110 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { 10111 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */ 10112 Bool isD = (size & 1) == 1; 10113 IRType ty = isD ? Ity_F64 : Ity_F32; 10114 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32; 10115 IRTemp res = newTemp(ty); 10116 IRTemp rm = mk_get_IR_rounding_mode(); 10117 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty))); 10118 putQReg128(dd, mkV128(0x0000)); 10119 putQRegLane(dd, 0, mkexpr(res)); 10120 HChar c = isD ? 'd' : 's'; 10121 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn); 10122 return True; 10123 } 10124 10125 return False; 10126 # undef INSN 10127 } 10128 10129 10130 static 10131 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn) 10132 { 10133 /* 31 28 23 21 20 19 15 11 9 4 10134 01 U 11111 size L M m opcode H 0 n d 10135 Decode fields are: u,size,opcode 10136 M is really part of the mm register number. Individual 10137 cases need to inspect L and H though. 10138 */ 10139 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10140 if (INSN(31,30) != BITS2(0,1) 10141 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) { 10142 return False; 10143 } 10144 UInt bitU = INSN(29,29); 10145 UInt size = INSN(23,22); 10146 UInt bitL = INSN(21,21); 10147 UInt bitM = INSN(20,20); 10148 UInt mmLO4 = INSN(19,16); 10149 UInt opcode = INSN(15,12); 10150 UInt bitH = INSN(11,11); 10151 UInt nn = INSN(9,5); 10152 UInt dd = INSN(4,0); 10153 vassert(size < 4); 10154 vassert(bitH < 2 && bitM < 2 && bitL < 2); 10155 10156 if (bitU == 0 && size >= X10 10157 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) { 10158 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */ 10159 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */ 10160 Bool isD = (size & 1) == 1; 10161 Bool isSUB = opcode == BITS4(0,1,0,1); 10162 UInt index; 10163 if (!isD) index = (bitH << 1) | bitL; 10164 else if (isD && bitL == 0) index = bitH; 10165 else return False; // sz:L == x11 => unallocated encoding 10166 vassert(index < (isD ? 2 : 4)); 10167 IRType ity = isD ? Ity_F64 : Ity_F32; 10168 IRTemp elem = newTemp(ity); 10169 UInt mm = (bitM << 4) | mmLO4; 10170 assign(elem, getQRegLane(mm, index, ity)); 10171 IRTemp dupd = math_DUP_TO_V128(elem, ity); 10172 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 10173 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 10174 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 10175 IRTemp rm = mk_get_IR_rounding_mode(); 10176 IRTemp t1 = newTempV128(); 10177 IRTemp t2 = newTempV128(); 10178 // FIXME: double rounding; use FMA primops instead 10179 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 10180 assign(t2, triop(isSUB ? opSUB : opADD, 10181 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 10182 putQReg128(dd, 10183 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2, 10184 mkexpr(t2)))); 10185 const HChar c = isD ? 'd' : 's'; 10186 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla", 10187 c, dd, c, nn, nameQReg128(mm), c, index); 10188 return True; 10189 } 10190 10191 if (size >= X10 && opcode == BITS4(1,0,0,1)) { 10192 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */ 10193 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */ 10194 Bool isD = (size & 1) == 1; 10195 Bool isMULX = bitU == 1; 10196 UInt index; 10197 if (!isD) index = (bitH << 1) | bitL; 10198 else if (isD && bitL == 0) index = bitH; 10199 else return False; // sz:L == x11 => unallocated encoding 10200 vassert(index < (isD ? 2 : 4)); 10201 IRType ity = isD ? Ity_F64 : Ity_F32; 10202 IRTemp elem = newTemp(ity); 10203 UInt mm = (bitM << 4) | mmLO4; 10204 assign(elem, getQRegLane(mm, index, ity)); 10205 IRTemp dupd = math_DUP_TO_V128(elem, ity); 10206 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 10207 IRTemp rm = mk_get_IR_rounding_mode(); 10208 IRTemp t1 = newTempV128(); 10209 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 10210 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 10211 putQReg128(dd, 10212 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2, 10213 mkexpr(t1)))); 10214 const HChar c = isD ? 'd' : 's'; 10215 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul", 10216 c, dd, c, nn, nameQReg128(mm), c, index); 10217 return True; 10218 } 10219 10220 if (bitU == 0 10221 && (opcode == BITS4(1,0,1,1) 10222 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) { 10223 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks) 10224 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1 10225 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2 10226 /* Widens, and size refers to the narrowed lanes. */ 10227 UInt ks = 3; 10228 switch (opcode) { 10229 case BITS4(1,0,1,1): ks = 0; break; 10230 case BITS4(0,0,1,1): ks = 1; break; 10231 case BITS4(0,1,1,1): ks = 2; break; 10232 default: vassert(0); 10233 } 10234 vassert(ks >= 0 && ks <= 2); 10235 UInt mm = 32; // invalid 10236 UInt ix = 16; // invalid 10237 switch (size) { 10238 case X00: 10239 return False; // h_b_b[] case is not allowed 10240 case X01: 10241 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 10242 case X10: 10243 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 10244 case X11: 10245 return False; // q_d_d[] case is not allowed 10246 default: 10247 vassert(0); 10248 } 10249 vassert(mm < 32 && ix < 16); 10250 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n; 10251 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 10252 newTempsV128_2(&vecN, &vecD); 10253 assign(vecN, getQReg128(nn)); 10254 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 10255 assign(vecD, getQReg128(dd)); 10256 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 10257 False/*!is2*/, size, "mas"[ks], 10258 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 10259 IROp opZHI = mkVecZEROHIxxOFV128(size+1); 10260 putQReg128(dd, unop(opZHI, mkexpr(res))); 10261 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 10262 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 10263 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 10264 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI); 10265 } 10266 const HChar* nm = ks == 0 ? "sqmull" 10267 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 10268 const HChar arrNarrow = "bhsd"[size]; 10269 const HChar arrWide = "bhsd"[size+1]; 10270 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", 10271 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix); 10272 return True; 10273 } 10274 10275 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { 10276 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ 10277 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ 10278 UInt mm = 32; // invalid 10279 UInt ix = 16; // invalid 10280 switch (size) { 10281 case X00: 10282 return False; // b case is not allowed 10283 case X01: 10284 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 10285 case X10: 10286 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 10287 case X11: 10288 return False; // q case is not allowed 10289 default: 10290 vassert(0); 10291 } 10292 vassert(mm < 32 && ix < 16); 10293 Bool isR = opcode == BITS4(1,1,0,1); 10294 IRTemp res, sat1q, sat1n, vN, vM; 10295 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 10296 vN = newTempV128(); 10297 assign(vN, getQReg128(nn)); 10298 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 10299 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 10300 IROp opZHI = mkVecZEROHIxxOFV128(size); 10301 putQReg128(dd, unop(opZHI, mkexpr(res))); 10302 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 10303 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 10304 HChar ch = size == X01 ? 'h' : 's'; 10305 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix); 10306 return True; 10307 } 10308 10309 return False; 10310 # undef INSN 10311 } 10312 10313 10314 static 10315 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 10316 { 10317 /* 31 28 22 18 15 10 9 4 10318 0 q u 011110 immh immb opcode 1 n d 10319 Decode fields: u,opcode 10320 */ 10321 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10322 if (INSN(31,31) != 0 10323 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) { 10324 return False; 10325 } 10326 UInt bitQ = INSN(30,30); 10327 UInt bitU = INSN(29,29); 10328 UInt immh = INSN(22,19); 10329 UInt immb = INSN(18,16); 10330 UInt opcode = INSN(15,11); 10331 UInt nn = INSN(9,5); 10332 UInt dd = INSN(4,0); 10333 10334 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) { 10335 /* -------- 0,00000 SSHR std7_std7_#imm -------- */ 10336 /* -------- 1,00000 USHR std7_std7_#imm -------- */ 10337 /* -------- 0,00010 SSRA std7_std7_#imm -------- */ 10338 /* -------- 1,00010 USRA std7_std7_#imm -------- */ 10339 /* laneTy, shift = case immh:immb of 10340 0001:xxx -> B, SHR:8-xxx 10341 001x:xxx -> H, SHR:16-xxxx 10342 01xx:xxx -> S, SHR:32-xxxxx 10343 1xxx:xxx -> D, SHR:64-xxxxxx 10344 other -> invalid 10345 */ 10346 UInt size = 0; 10347 UInt shift = 0; 10348 Bool isQ = bitQ == 1; 10349 Bool isU = bitU == 1; 10350 Bool isAcc = opcode == BITS5(0,0,0,1,0); 10351 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10352 if (!ok || (bitQ == 0 && size == X11)) return False; 10353 vassert(size >= 0 && size <= 3); 10354 UInt lanebits = 8 << size; 10355 vassert(shift >= 1 && shift <= lanebits); 10356 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size); 10357 IRExpr* src = getQReg128(nn); 10358 IRTemp shf = newTempV128(); 10359 IRTemp res = newTempV128(); 10360 if (shift == lanebits && isU) { 10361 assign(shf, mkV128(0x0000)); 10362 } else { 10363 UInt nudge = 0; 10364 if (shift == lanebits) { 10365 vassert(!isU); 10366 nudge = 1; 10367 } 10368 assign(shf, binop(op, src, mkU8(shift - nudge))); 10369 } 10370 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf)) 10371 : mkexpr(shf)); 10372 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10373 HChar laneCh = "bhsd"[size]; 10374 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10375 const HChar* nm = isAcc ? (isU ? "usra" : "ssra") 10376 : (isU ? "ushr" : "sshr"); 10377 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10378 nameQReg128(dd), nLanes, laneCh, 10379 nameQReg128(nn), nLanes, laneCh, shift); 10380 return True; 10381 } 10382 10383 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) { 10384 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */ 10385 /* -------- 1,00100 URSHR std7_std7_#imm -------- */ 10386 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */ 10387 /* -------- 1,00110 URSRA std7_std7_#imm -------- */ 10388 /* laneTy, shift = case immh:immb of 10389 0001:xxx -> B, SHR:8-xxx 10390 001x:xxx -> H, SHR:16-xxxx 10391 01xx:xxx -> S, SHR:32-xxxxx 10392 1xxx:xxx -> D, SHR:64-xxxxxx 10393 other -> invalid 10394 */ 10395 UInt size = 0; 10396 UInt shift = 0; 10397 Bool isQ = bitQ == 1; 10398 Bool isU = bitU == 1; 10399 Bool isAcc = opcode == BITS5(0,0,1,1,0); 10400 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10401 if (!ok || (bitQ == 0 && size == X11)) return False; 10402 vassert(size >= 0 && size <= 3); 10403 UInt lanebits = 8 << size; 10404 vassert(shift >= 1 && shift <= lanebits); 10405 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size); 10406 IRExpr* src = getQReg128(nn); 10407 IRTemp imm8 = newTemp(Ity_I8); 10408 assign(imm8, mkU8((UChar)(-shift))); 10409 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8)); 10410 IRTemp shf = newTempV128(); 10411 IRTemp res = newTempV128(); 10412 assign(shf, binop(op, src, amt)); 10413 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf)) 10414 : mkexpr(shf)); 10415 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10416 HChar laneCh = "bhsd"[size]; 10417 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10418 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra") 10419 : (isU ? "urshr" : "srshr"); 10420 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10421 nameQReg128(dd), nLanes, laneCh, 10422 nameQReg128(nn), nLanes, laneCh, shift); 10423 return True; 10424 } 10425 10426 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) { 10427 /* -------- 1,01000 SRI std7_std7_#imm -------- */ 10428 /* laneTy, shift = case immh:immb of 10429 0001:xxx -> B, SHR:8-xxx 10430 001x:xxx -> H, SHR:16-xxxx 10431 01xx:xxx -> S, SHR:32-xxxxx 10432 1xxx:xxx -> D, SHR:64-xxxxxx 10433 other -> invalid 10434 */ 10435 UInt size = 0; 10436 UInt shift = 0; 10437 Bool isQ = bitQ == 1; 10438 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10439 if (!ok || (bitQ == 0 && size == X11)) return False; 10440 vassert(size >= 0 && size <= 3); 10441 UInt lanebits = 8 << size; 10442 vassert(shift >= 1 && shift <= lanebits); 10443 IRExpr* src = getQReg128(nn); 10444 IRTemp res = newTempV128(); 10445 if (shift == lanebits) { 10446 assign(res, getQReg128(dd)); 10447 } else { 10448 assign(res, binop(mkVecSHRN(size), src, mkU8(shift))); 10449 IRExpr* nmask = binop(mkVecSHLN(size), 10450 mkV128(0xFFFF), mkU8(lanebits - shift)); 10451 IRTemp tmp = newTempV128(); 10452 assign(tmp, binop(Iop_OrV128, 10453 mkexpr(res), 10454 binop(Iop_AndV128, getQReg128(dd), nmask))); 10455 res = tmp; 10456 } 10457 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10458 HChar laneCh = "bhsd"[size]; 10459 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10460 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri", 10461 nameQReg128(dd), nLanes, laneCh, 10462 nameQReg128(nn), nLanes, laneCh, shift); 10463 return True; 10464 } 10465 10466 if (opcode == BITS5(0,1,0,1,0)) { 10467 /* -------- 0,01010 SHL std7_std7_#imm -------- */ 10468 /* -------- 1,01010 SLI std7_std7_#imm -------- */ 10469 /* laneTy, shift = case immh:immb of 10470 0001:xxx -> B, xxx 10471 001x:xxx -> H, xxxx 10472 01xx:xxx -> S, xxxxx 10473 1xxx:xxx -> D, xxxxxx 10474 other -> invalid 10475 */ 10476 UInt size = 0; 10477 UInt shift = 0; 10478 Bool isSLI = bitU == 1; 10479 Bool isQ = bitQ == 1; 10480 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10481 if (!ok || (bitQ == 0 && size == X11)) return False; 10482 vassert(size >= 0 && size <= 3); 10483 /* The shift encoding has opposite sign for the leftwards case. 10484 Adjust shift to compensate. */ 10485 UInt lanebits = 8 << size; 10486 shift = lanebits - shift; 10487 vassert(shift >= 0 && shift < lanebits); 10488 IROp op = mkVecSHLN(size); 10489 IRExpr* src = getQReg128(nn); 10490 IRTemp res = newTempV128(); 10491 if (shift == 0) { 10492 assign(res, src); 10493 } else { 10494 assign(res, binop(op, src, mkU8(shift))); 10495 if (isSLI) { 10496 IRExpr* nmask = binop(mkVecSHRN(size), 10497 mkV128(0xFFFF), mkU8(lanebits - shift)); 10498 IRTemp tmp = newTempV128(); 10499 assign(tmp, binop(Iop_OrV128, 10500 mkexpr(res), 10501 binop(Iop_AndV128, getQReg128(dd), nmask))); 10502 res = tmp; 10503 } 10504 } 10505 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10506 HChar laneCh = "bhsd"[size]; 10507 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10508 const HChar* nm = isSLI ? "sli" : "shl"; 10509 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10510 nameQReg128(dd), nLanes, laneCh, 10511 nameQReg128(nn), nLanes, laneCh, shift); 10512 return True; 10513 } 10514 10515 if (opcode == BITS5(0,1,1,1,0) 10516 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) { 10517 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */ 10518 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */ 10519 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */ 10520 UInt size = 0; 10521 UInt shift = 0; 10522 Bool isQ = bitQ == 1; 10523 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10524 if (!ok || (bitQ == 0 && size == X11)) return False; 10525 vassert(size >= 0 && size <= 3); 10526 /* The shift encoding has opposite sign for the leftwards case. 10527 Adjust shift to compensate. */ 10528 UInt lanebits = 8 << size; 10529 shift = lanebits - shift; 10530 vassert(shift >= 0 && shift < lanebits); 10531 const HChar* nm = NULL; 10532 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl"; 10533 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl"; 10534 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu"; 10535 else vassert(0); 10536 IRTemp qDiff1 = IRTemp_INVALID; 10537 IRTemp qDiff2 = IRTemp_INVALID; 10538 IRTemp res = IRTemp_INVALID; 10539 IRTemp src = newTempV128(); 10540 assign(src, getQReg128(nn)); 10541 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm); 10542 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10543 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2, 10544 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128); 10545 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10546 DIP("%s %s.%s, %s.%s, #%u\n", nm, 10547 nameQReg128(dd), arr, nameQReg128(nn), arr, shift); 10548 return True; 10549 } 10550 10551 if (bitU == 0 10552 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) { 10553 /* -------- 0,10000 SHRN{,2} #imm -------- */ 10554 /* -------- 0,10001 RSHRN{,2} #imm -------- */ 10555 /* Narrows, and size is the narrow size. */ 10556 UInt size = 0; 10557 UInt shift = 0; 10558 Bool is2 = bitQ == 1; 10559 Bool isR = opcode == BITS5(1,0,0,0,1); 10560 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10561 if (!ok || size == X11) return False; 10562 vassert(shift >= 1); 10563 IRTemp t1 = newTempV128(); 10564 IRTemp t2 = newTempV128(); 10565 IRTemp t3 = newTempV128(); 10566 assign(t1, getQReg128(nn)); 10567 assign(t2, isR ? binop(mkVecADD(size+1), 10568 mkexpr(t1), 10569 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1)))) 10570 : mkexpr(t1)); 10571 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift))); 10572 IRTemp t4 = math_NARROW_LANES(t3, t3, size); 10573 putLO64andZUorPutHI64(is2, dd, t4); 10574 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10575 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10576 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn", 10577 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift); 10578 return True; 10579 } 10580 10581 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1) 10582 || (bitU == 1 10583 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) { 10584 /* -------- 0,10010 SQSHRN{,2} #imm -------- */ 10585 /* -------- 1,10010 UQSHRN{,2} #imm -------- */ 10586 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */ 10587 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */ 10588 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */ 10589 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */ 10590 UInt size = 0; 10591 UInt shift = 0; 10592 Bool is2 = bitQ == 1; 10593 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10594 if (!ok || size == X11) return False; 10595 vassert(shift >= 1 && shift <= (8 << size)); 10596 const HChar* nm = "??"; 10597 IROp op = Iop_INVALID; 10598 /* Decide on the name and the operation. */ 10599 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 10600 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size); 10601 } 10602 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 10603 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size); 10604 } 10605 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) { 10606 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size); 10607 } 10608 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 10609 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size); 10610 } 10611 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) { 10612 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size); 10613 } 10614 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) { 10615 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size); 10616 } 10617 else vassert(0); 10618 /* Compute the result (Q, shifted value) pair. */ 10619 IRTemp src128 = newTempV128(); 10620 assign(src128, getQReg128(nn)); 10621 IRTemp pair = newTempV128(); 10622 assign(pair, binop(op, mkexpr(src128), mkU8(shift))); 10623 /* Update the result reg */ 10624 IRTemp res64in128 = newTempV128(); 10625 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair))); 10626 putLO64andZUorPutHI64(is2, dd, res64in128); 10627 /* Update the Q flag. */ 10628 IRTemp q64q64 = newTempV128(); 10629 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair))); 10630 IRTemp z128 = newTempV128(); 10631 assign(z128, mkV128(0x0000)); 10632 updateQCFLAGwithDifference(q64q64, z128); 10633 /* */ 10634 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10635 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10636 DIP("%s %s.%s, %s.%s, #%u\n", nm, 10637 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift); 10638 return True; 10639 } 10640 10641 if (opcode == BITS5(1,0,1,0,0)) { 10642 /* -------- 0,10100 SSHLL{,2} #imm -------- */ 10643 /* -------- 1,10100 USHLL{,2} #imm -------- */ 10644 /* 31 28 22 18 15 9 4 10645 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh 10646 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh 10647 where Ta,Tb,sh 10648 = case immh of 1xxx -> invalid 10649 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) 10650 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) 10651 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) 10652 0000 -> AdvSIMD modified immediate (???) 10653 */ 10654 Bool isQ = bitQ == 1; 10655 Bool isU = bitU == 1; 10656 UInt immhb = (immh << 3) | immb; 10657 IRTemp src = newTempV128(); 10658 IRTemp zero = newTempV128(); 10659 IRExpr* res = NULL; 10660 UInt sh = 0; 10661 const HChar* ta = "??"; 10662 const HChar* tb = "??"; 10663 assign(src, getQReg128(nn)); 10664 assign(zero, mkV128(0x0000)); 10665 if (immh & 8) { 10666 /* invalid; don't assign to res */ 10667 } 10668 else if (immh & 4) { 10669 sh = immhb - 32; 10670 vassert(sh < 32); /* so 32-sh is 1..32 */ 10671 ta = "2d"; 10672 tb = isQ ? "4s" : "2s"; 10673 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) 10674 : mk_InterleaveLO32x4(src, zero); 10675 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); 10676 } 10677 else if (immh & 2) { 10678 sh = immhb - 16; 10679 vassert(sh < 16); /* so 16-sh is 1..16 */ 10680 ta = "4s"; 10681 tb = isQ ? "8h" : "4h"; 10682 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) 10683 : mk_InterleaveLO16x8(src, zero); 10684 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); 10685 } 10686 else if (immh & 1) { 10687 sh = immhb - 8; 10688 vassert(sh < 8); /* so 8-sh is 1..8 */ 10689 ta = "8h"; 10690 tb = isQ ? "16b" : "8b"; 10691 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) 10692 : mk_InterleaveLO8x16(src, zero); 10693 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); 10694 } else { 10695 vassert(immh == 0); 10696 /* invalid; don't assign to res */ 10697 } 10698 /* */ 10699 if (res) { 10700 putQReg128(dd, res); 10701 DIP("%cshll%s %s.%s, %s.%s, #%d\n", 10702 isU ? 'u' : 's', isQ ? "2" : "", 10703 nameQReg128(dd), ta, nameQReg128(nn), tb, sh); 10704 return True; 10705 } 10706 return False; 10707 } 10708 10709 if (opcode == BITS5(1,1,1,0,0)) { 10710 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */ 10711 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */ 10712 /* If immh is of the form 00xx, the insn is invalid. */ 10713 if (immh < BITS4(0,1,0,0)) return False; 10714 UInt size = 0; 10715 UInt fbits = 0; 10716 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 10717 /* The following holds because immh is never zero. */ 10718 vassert(ok); 10719 /* The following holds because immh >= 0100. */ 10720 vassert(size == X10 || size == X11); 10721 Bool isD = size == X11; 10722 Bool isU = bitU == 1; 10723 Bool isQ = bitQ == 1; 10724 if (isD && !isQ) return False; /* reject .1d case */ 10725 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 10726 Double scale = two_to_the_minus(fbits); 10727 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 10728 : IRExpr_Const(IRConst_F32( (Float)scale )); 10729 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 10730 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 10731 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 10732 IRType tyF = isD ? Ity_F64 : Ity_F32; 10733 IRType tyI = isD ? Ity_I64 : Ity_I32; 10734 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2); 10735 vassert(nLanes == 2 || nLanes == 4); 10736 for (UInt i = 0; i < nLanes; i++) { 10737 IRTemp src = newTemp(tyI); 10738 IRTemp res = newTemp(tyF); 10739 IRTemp rm = mk_get_IR_rounding_mode(); 10740 assign(src, getQRegLane(nn, i, tyI)); 10741 assign(res, triop(opMUL, mkexpr(rm), 10742 binop(opCVT, mkexpr(rm), mkexpr(src)), 10743 scaleE)); 10744 putQRegLane(dd, i, mkexpr(res)); 10745 } 10746 if (!isQ) { 10747 putQRegLane(dd, 1, mkU64(0)); 10748 } 10749 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10750 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf", 10751 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits); 10752 return True; 10753 } 10754 10755 if (opcode == BITS5(1,1,1,1,1)) { 10756 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */ 10757 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */ 10758 /* If immh is of the form 00xx, the insn is invalid. */ 10759 if (immh < BITS4(0,1,0,0)) return False; 10760 UInt size = 0; 10761 UInt fbits = 0; 10762 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 10763 /* The following holds because immh is never zero. */ 10764 vassert(ok); 10765 /* The following holds because immh >= 0100. */ 10766 vassert(size == X10 || size == X11); 10767 Bool isD = size == X11; 10768 Bool isU = bitU == 1; 10769 Bool isQ = bitQ == 1; 10770 if (isD && !isQ) return False; /* reject .1d case */ 10771 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 10772 Double scale = two_to_the_plus(fbits); 10773 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 10774 : IRExpr_Const(IRConst_F32( (Float)scale )); 10775 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 10776 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U) 10777 : (isD ? Iop_F64toI64S : Iop_F32toI32S); 10778 IRType tyF = isD ? Ity_F64 : Ity_F32; 10779 IRType tyI = isD ? Ity_I64 : Ity_I32; 10780 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2); 10781 vassert(nLanes == 2 || nLanes == 4); 10782 for (UInt i = 0; i < nLanes; i++) { 10783 IRTemp src = newTemp(tyF); 10784 IRTemp res = newTemp(tyI); 10785 IRTemp rm = newTemp(Ity_I32); 10786 assign(src, getQRegLane(nn, i, tyF)); 10787 assign(rm, mkU32(Irrm_ZERO)); 10788 assign(res, binop(opCVT, mkexpr(rm), 10789 triop(opMUL, mkexpr(rm), 10790 mkexpr(src), scaleE))); 10791 putQRegLane(dd, i, mkexpr(res)); 10792 } 10793 if (!isQ) { 10794 putQRegLane(dd, 1, mkU64(0)); 10795 } 10796 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10797 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs", 10798 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits); 10799 return True; 10800 } 10801 10802 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10803 return False; 10804 # undef INSN 10805 } 10806 10807 10808 static 10809 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn) 10810 { 10811 /* 31 30 29 28 23 21 20 15 11 9 4 10812 0 Q U 01110 size 1 m opcode 00 n d 10813 Decode fields: u,opcode 10814 */ 10815 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10816 if (INSN(31,31) != 0 10817 || INSN(28,24) != BITS5(0,1,1,1,0) 10818 || INSN(21,21) != 1 10819 || INSN(11,10) != BITS2(0,0)) { 10820 return False; 10821 } 10822 UInt bitQ = INSN(30,30); 10823 UInt bitU = INSN(29,29); 10824 UInt size = INSN(23,22); 10825 UInt mm = INSN(20,16); 10826 UInt opcode = INSN(15,12); 10827 UInt nn = INSN(9,5); 10828 UInt dd = INSN(4,0); 10829 vassert(size < 4); 10830 Bool is2 = bitQ == 1; 10831 10832 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) { 10833 /* -------- 0,0000 SADDL{2} -------- */ 10834 /* -------- 1,0000 UADDL{2} -------- */ 10835 /* -------- 0,0010 SSUBL{2} -------- */ 10836 /* -------- 1,0010 USUBL{2} -------- */ 10837 /* Widens, and size refers to the narrowed lanes. */ 10838 if (size == X11) return False; 10839 vassert(size <= 2); 10840 Bool isU = bitU == 1; 10841 Bool isADD = opcode == BITS4(0,0,0,0); 10842 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn)); 10843 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10844 IRTemp res = newTempV128(); 10845 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10846 mkexpr(argL), mkexpr(argR))); 10847 putQReg128(dd, mkexpr(res)); 10848 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10849 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10850 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl") 10851 : (isU ? "usubl" : "ssubl"); 10852 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10853 nameQReg128(dd), arrWide, 10854 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 10855 return True; 10856 } 10857 10858 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) { 10859 /* -------- 0,0001 SADDW{2} -------- */ 10860 /* -------- 1,0001 UADDW{2} -------- */ 10861 /* -------- 0,0011 SSUBW{2} -------- */ 10862 /* -------- 1,0011 USUBW{2} -------- */ 10863 /* Widens, and size refers to the narrowed lanes. */ 10864 if (size == X11) return False; 10865 vassert(size <= 2); 10866 Bool isU = bitU == 1; 10867 Bool isADD = opcode == BITS4(0,0,0,1); 10868 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10869 IRTemp res = newTempV128(); 10870 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10871 getQReg128(nn), mkexpr(argR))); 10872 putQReg128(dd, mkexpr(res)); 10873 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10874 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10875 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw") 10876 : (isU ? "usubw" : "ssubw"); 10877 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10878 nameQReg128(dd), arrWide, 10879 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow); 10880 return True; 10881 } 10882 10883 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) { 10884 /* -------- 0,0100 ADDHN{2} -------- */ 10885 /* -------- 1,0100 RADDHN{2} -------- */ 10886 /* -------- 0,0110 SUBHN{2} -------- */ 10887 /* -------- 1,0110 RSUBHN{2} -------- */ 10888 /* Narrows, and size refers to the narrowed lanes. */ 10889 if (size == X11) return False; 10890 vassert(size <= 2); 10891 const UInt shift[3] = { 8, 16, 32 }; 10892 Bool isADD = opcode == BITS4(0,1,0,0); 10893 Bool isR = bitU == 1; 10894 /* Combined elements in wide lanes */ 10895 IRTemp wide = newTempV128(); 10896 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10897 getQReg128(nn), getQReg128(mm)); 10898 if (isR) { 10899 wideE = binop(mkVecADD(size+1), 10900 wideE, 10901 mkexpr(math_VEC_DUP_IMM(size+1, 10902 1ULL << (shift[size]-1)))); 10903 } 10904 assign(wide, wideE); 10905 /* Top halves of elements, still in wide lanes */ 10906 IRTemp shrd = newTempV128(); 10907 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size]))); 10908 /* Elements now compacted into lower 64 bits */ 10909 IRTemp new64 = newTempV128(); 10910 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd))); 10911 putLO64andZUorPutHI64(is2, dd, new64); 10912 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10913 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10914 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn") 10915 : (isR ? "rsubhn" : "subhn"); 10916 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10917 nameQReg128(dd), arrNarrow, 10918 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide); 10919 return True; 10920 } 10921 10922 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) { 10923 /* -------- 0,0101 SABAL{2} -------- */ 10924 /* -------- 1,0101 UABAL{2} -------- */ 10925 /* -------- 0,0111 SABDL{2} -------- */ 10926 /* -------- 1,0111 UABDL{2} -------- */ 10927 /* Widens, and size refers to the narrowed lanes. */ 10928 if (size == X11) return False; 10929 vassert(size <= 2); 10930 Bool isU = bitU == 1; 10931 Bool isACC = opcode == BITS4(0,1,0,1); 10932 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn)); 10933 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10934 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR)); 10935 IRTemp res = newTempV128(); 10936 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd)) 10937 : mkexpr(abd)); 10938 putQReg128(dd, mkexpr(res)); 10939 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10940 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10941 const HChar* nm = isACC ? (isU ? "uabal" : "sabal") 10942 : (isU ? "uabdl" : "sabdl"); 10943 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10944 nameQReg128(dd), arrWide, 10945 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 10946 return True; 10947 } 10948 10949 if (opcode == BITS4(1,1,0,0) 10950 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) { 10951 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks) 10952 /* -------- 1,1100 UMULL{2} -------- */ // 0 10953 /* -------- 0,1000 SMLAL{2} -------- */ // 1 10954 /* -------- 1,1000 UMLAL{2} -------- */ // 1 10955 /* -------- 0,1010 SMLSL{2} -------- */ // 2 10956 /* -------- 1,1010 UMLSL{2} -------- */ // 2 10957 /* Widens, and size refers to the narrowed lanes. */ 10958 UInt ks = 3; 10959 switch (opcode) { 10960 case BITS4(1,1,0,0): ks = 0; break; 10961 case BITS4(1,0,0,0): ks = 1; break; 10962 case BITS4(1,0,1,0): ks = 2; break; 10963 default: vassert(0); 10964 } 10965 vassert(ks >= 0 && ks <= 2); 10966 if (size == X11) return False; 10967 vassert(size <= 2); 10968 Bool isU = bitU == 1; 10969 IRTemp vecN = newTempV128(); 10970 IRTemp vecM = newTempV128(); 10971 IRTemp vecD = newTempV128(); 10972 assign(vecN, getQReg128(nn)); 10973 assign(vecM, getQReg128(mm)); 10974 assign(vecD, getQReg128(dd)); 10975 IRTemp res = IRTemp_INVALID; 10976 math_MULL_ACC(&res, is2, isU, size, "mas"[ks], 10977 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 10978 putQReg128(dd, mkexpr(res)); 10979 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10980 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10981 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl"); 10982 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "", 10983 nameQReg128(dd), arrWide, 10984 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 10985 return True; 10986 } 10987 10988 if (bitU == 0 10989 && (opcode == BITS4(1,1,0,1) 10990 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) { 10991 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks) 10992 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1 10993 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2 10994 /* Widens, and size refers to the narrowed lanes. */ 10995 UInt ks = 3; 10996 switch (opcode) { 10997 case BITS4(1,1,0,1): ks = 0; break; 10998 case BITS4(1,0,0,1): ks = 1; break; 10999 case BITS4(1,0,1,1): ks = 2; break; 11000 default: vassert(0); 11001 } 11002 vassert(ks >= 0 && ks <= 2); 11003 if (size == X00 || size == X11) return False; 11004 vassert(size <= 2); 11005 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n; 11006 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 11007 newTempsV128_3(&vecN, &vecM, &vecD); 11008 assign(vecN, getQReg128(nn)); 11009 assign(vecM, getQReg128(mm)); 11010 assign(vecD, getQReg128(dd)); 11011 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 11012 is2, size, "mas"[ks], 11013 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 11014 putQReg128(dd, mkexpr(res)); 11015 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 11016 updateQCFLAGwithDifference(sat1q, sat1n); 11017 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 11018 updateQCFLAGwithDifference(sat2q, sat2n); 11019 } 11020 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11021 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 11022 const HChar* nm = ks == 0 ? "sqdmull" 11023 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 11024 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 11025 nameQReg128(dd), arrWide, 11026 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 11027 return True; 11028 } 11029 11030 if (bitU == 0 && opcode == BITS4(1,1,1,0)) { 11031 /* -------- 0,1110 PMULL{2} -------- */ 11032 /* Widens, and size refers to the narrowed lanes. */ 11033 if (size != X00) return False; 11034 IRTemp res 11035 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8, 11036 getQReg128(nn), getQReg128(mm)); 11037 putQReg128(dd, mkexpr(res)); 11038 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11039 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 11040 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "", 11041 nameQReg128(dd), arrNarrow, 11042 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide); 11043 return True; 11044 } 11045 11046 return False; 11047 # undef INSN 11048 } 11049 11050 11051 static 11052 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn) 11053 { 11054 /* 31 30 29 28 23 21 20 15 10 9 4 11055 0 Q U 01110 size 1 m opcode 1 n d 11056 Decode fields: u,size,opcode 11057 */ 11058 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 11059 if (INSN(31,31) != 0 11060 || INSN(28,24) != BITS5(0,1,1,1,0) 11061 || INSN(21,21) != 1 11062 || INSN(10,10) != 1) { 11063 return False; 11064 } 11065 UInt bitQ = INSN(30,30); 11066 UInt bitU = INSN(29,29); 11067 UInt size = INSN(23,22); 11068 UInt mm = INSN(20,16); 11069 UInt opcode = INSN(15,11); 11070 UInt nn = INSN(9,5); 11071 UInt dd = INSN(4,0); 11072 vassert(size < 4); 11073 11074 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) { 11075 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */ 11076 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */ 11077 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */ 11078 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */ 11079 if (size == X11) return False; 11080 Bool isADD = opcode == BITS5(0,0,0,0,0); 11081 Bool isU = bitU == 1; 11082 /* Widen both args out, do the math, narrow to final result. */ 11083 IRTemp argL = newTempV128(); 11084 IRTemp argLhi = IRTemp_INVALID; 11085 IRTemp argLlo = IRTemp_INVALID; 11086 IRTemp argR = newTempV128(); 11087 IRTemp argRhi = IRTemp_INVALID; 11088 IRTemp argRlo = IRTemp_INVALID; 11089 IRTemp resHi = newTempV128(); 11090 IRTemp resLo = newTempV128(); 11091 IRTemp res = IRTemp_INVALID; 11092 assign(argL, getQReg128(nn)); 11093 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL)); 11094 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL)); 11095 assign(argR, getQReg128(mm)); 11096 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR)); 11097 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR)); 11098 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1); 11099 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1); 11100 assign(resHi, binop(opSxR, 11101 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)), 11102 mkU8(1))); 11103 assign(resLo, binop(opSxR, 11104 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)), 11105 mkU8(1))); 11106 res = math_NARROW_LANES ( resHi, resLo, size ); 11107 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11108 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd") 11109 : (isU ? "uhsub" : "shsub"); 11110 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11111 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11112 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11113 return True; 11114 } 11115 11116 if (opcode == BITS5(0,0,0,1,0)) { 11117 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */ 11118 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */ 11119 if (bitQ == 0 && size == X11) return False; // implied 1d case 11120 Bool isU = bitU == 1; 11121 IRTemp argL = newTempV128(); 11122 IRTemp argR = newTempV128(); 11123 assign(argL, getQReg128(nn)); 11124 assign(argR, getQReg128(mm)); 11125 IRTemp res = math_RHADD(size, isU, argL, argR); 11126 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11127 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11128 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd", 11129 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11130 return True; 11131 } 11132 11133 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) { 11134 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */ 11135 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */ 11136 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */ 11137 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */ 11138 if (bitQ == 0 && size == X11) return False; // implied 1d case 11139 Bool isADD = opcode == BITS5(0,0,0,0,1); 11140 Bool isU = bitU == 1; 11141 IROp qop = Iop_INVALID; 11142 IROp nop = Iop_INVALID; 11143 if (isADD) { 11144 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size); 11145 nop = mkVecADD(size); 11146 } else { 11147 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size); 11148 nop = mkVecSUB(size); 11149 } 11150 IRTemp argL = newTempV128(); 11151 IRTemp argR = newTempV128(); 11152 IRTemp qres = newTempV128(); 11153 IRTemp nres = newTempV128(); 11154 assign(argL, getQReg128(nn)); 11155 assign(argR, getQReg128(mm)); 11156 assign(qres, math_MAYBE_ZERO_HI64_fromE( 11157 bitQ, binop(qop, mkexpr(argL), mkexpr(argR)))); 11158 assign(nres, math_MAYBE_ZERO_HI64_fromE( 11159 bitQ, binop(nop, mkexpr(argL), mkexpr(argR)))); 11160 putQReg128(dd, mkexpr(qres)); 11161 updateQCFLAGwithDifference(qres, nres); 11162 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd") 11163 : (isU ? "uqsub" : "sqsub"); 11164 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11165 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11166 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11167 return True; 11168 } 11169 11170 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) { 11171 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */ 11172 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */ 11173 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */ 11174 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */ 11175 Bool isORx = (size & 2) == 2; 11176 Bool invert = (size & 1) == 1; 11177 IRTemp res = newTempV128(); 11178 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128, 11179 getQReg128(nn), 11180 invert ? unop(Iop_NotV128, getQReg128(mm)) 11181 : getQReg128(mm))); 11182 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11183 const HChar* names[4] = { "and", "bic", "orr", "orn" }; 11184 const HChar* ar = bitQ == 1 ? "16b" : "8b"; 11185 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)], 11186 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar); 11187 return True; 11188 } 11189 11190 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) { 11191 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */ 11192 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */ 11193 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */ 11194 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */ 11195 IRTemp argD = newTempV128(); 11196 IRTemp argN = newTempV128(); 11197 IRTemp argM = newTempV128(); 11198 assign(argD, getQReg128(dd)); 11199 assign(argN, getQReg128(nn)); 11200 assign(argM, getQReg128(mm)); 11201 const IROp opXOR = Iop_XorV128; 11202 const IROp opAND = Iop_AndV128; 11203 const IROp opNOT = Iop_NotV128; 11204 IRTemp res = newTempV128(); 11205 switch (size) { 11206 case BITS2(0,0): /* EOR */ 11207 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN))); 11208 break; 11209 case BITS2(0,1): /* BSL */ 11210 assign(res, binop(opXOR, mkexpr(argM), 11211 binop(opAND, 11212 binop(opXOR, mkexpr(argM), mkexpr(argN)), 11213 mkexpr(argD)))); 11214 break; 11215 case BITS2(1,0): /* BIT */ 11216 assign(res, binop(opXOR, mkexpr(argD), 11217 binop(opAND, 11218 binop(opXOR, mkexpr(argD), mkexpr(argN)), 11219 mkexpr(argM)))); 11220 break; 11221 case BITS2(1,1): /* BIF */ 11222 assign(res, binop(opXOR, mkexpr(argD), 11223 binop(opAND, 11224 binop(opXOR, mkexpr(argD), mkexpr(argN)), 11225 unop(opNOT, mkexpr(argM))))); 11226 break; 11227 default: 11228 vassert(0); 11229 } 11230 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11231 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; 11232 const HChar* arr = bitQ == 1 ? "16b" : "8b"; 11233 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size], 11234 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11235 return True; 11236 } 11237 11238 if (opcode == BITS5(0,0,1,1,0)) { 11239 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s 11240 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u 11241 if (bitQ == 0 && size == X11) return False; // implied 1d case 11242 Bool isGT = bitU == 0; 11243 IRExpr* argL = getQReg128(nn); 11244 IRExpr* argR = getQReg128(mm); 11245 IRTemp res = newTempV128(); 11246 assign(res, 11247 isGT ? binop(mkVecCMPGTS(size), argL, argR) 11248 : binop(mkVecCMPGTU(size), argL, argR)); 11249 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11250 const HChar* nm = isGT ? "cmgt" : "cmhi"; 11251 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11252 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11253 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11254 return True; 11255 } 11256 11257 if (opcode == BITS5(0,0,1,1,1)) { 11258 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s 11259 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u 11260 if (bitQ == 0 && size == X11) return False; // implied 1d case 11261 Bool isGE = bitU == 0; 11262 IRExpr* argL = getQReg128(nn); 11263 IRExpr* argR = getQReg128(mm); 11264 IRTemp res = newTempV128(); 11265 assign(res, 11266 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL)) 11267 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL))); 11268 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11269 const HChar* nm = isGE ? "cmge" : "cmhs"; 11270 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11271 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11272 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11273 return True; 11274 } 11275 11276 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) { 11277 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */ 11278 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */ 11279 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */ 11280 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */ 11281 if (bitQ == 0 && size == X11) return False; // implied 1d case 11282 Bool isU = bitU == 1; 11283 Bool isR = opcode == BITS5(0,1,0,1,0); 11284 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size)) 11285 : (isU ? mkVecSHU(size) : mkVecSHS(size)); 11286 IRTemp res = newTempV128(); 11287 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 11288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11289 const HChar* nm = isR ? (isU ? "urshl" : "srshl") 11290 : (isU ? "ushl" : "sshl"); 11291 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11292 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11293 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11294 return True; 11295 } 11296 11297 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) { 11298 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */ 11299 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */ 11300 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */ 11301 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */ 11302 if (bitQ == 0 && size == X11) return False; // implied 1d case 11303 Bool isU = bitU == 1; 11304 Bool isR = opcode == BITS5(0,1,0,1,1); 11305 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size)) 11306 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size)); 11307 /* This is a bit tricky. If we're only interested in the lowest 64 bits 11308 of the result (viz, bitQ == 0), then we must adjust the operands to 11309 ensure that the upper part of the result, that we don't care about, 11310 doesn't pollute the returned Q value. To do this, zero out the upper 11311 operand halves beforehand. This works because it means, for the 11312 lanes we don't care about, we are shifting zero by zero, which can 11313 never saturate. */ 11314 IRTemp res256 = newTemp(Ity_V256); 11315 IRTemp resSH = newTempV128(); 11316 IRTemp resQ = newTempV128(); 11317 IRTemp zero = newTempV128(); 11318 assign(res256, binop(op, 11319 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)), 11320 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm)))); 11321 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256))); 11322 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256))); 11323 assign(zero, mkV128(0x0000)); 11324 putQReg128(dd, mkexpr(resSH)); 11325 updateQCFLAGwithDifference(resQ, zero); 11326 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl") 11327 : (isU ? "uqshl" : "sqshl"); 11328 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11329 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11330 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11331 return True; 11332 } 11333 11334 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) { 11335 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */ 11336 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */ 11337 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */ 11338 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */ 11339 if (bitQ == 0 && size == X11) return False; // implied 1d case 11340 Bool isU = bitU == 1; 11341 Bool isMAX = (opcode & 1) == 0; 11342 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size)) 11343 : (isU ? mkVecMINU(size) : mkVecMINS(size)); 11344 IRTemp t = newTempV128(); 11345 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 11346 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t)); 11347 const HChar* nm = isMAX ? (isU ? "umax" : "smax") 11348 : (isU ? "umin" : "smin"); 11349 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11350 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11351 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11352 return True; 11353 } 11354 11355 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) { 11356 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */ 11357 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */ 11358 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */ 11359 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */ 11360 if (size == X11) return False; // 1d/2d cases not allowed 11361 Bool isU = bitU == 1; 11362 Bool isACC = opcode == BITS5(0,1,1,1,1); 11363 vassert(size <= 2); 11364 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm)); 11365 IRTemp t2 = newTempV128(); 11366 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd)) 11367 : mkexpr(t1)); 11368 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11369 const HChar* nm = isACC ? (isU ? "uaba" : "saba") 11370 : (isU ? "uabd" : "sabd"); 11371 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11372 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11373 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11374 return True; 11375 } 11376 11377 if (opcode == BITS5(1,0,0,0,0)) { 11378 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */ 11379 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */ 11380 if (bitQ == 0 && size == X11) return False; // implied 1d case 11381 Bool isSUB = bitU == 1; 11382 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size); 11383 IRTemp t = newTempV128(); 11384 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 11385 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t)); 11386 const HChar* nm = isSUB ? "sub" : "add"; 11387 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11388 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11389 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11390 return True; 11391 } 11392 11393 if (opcode == BITS5(1,0,0,0,1)) { 11394 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0 11395 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // == 11396 if (bitQ == 0 && size == X11) return False; // implied 1d case 11397 Bool isEQ = bitU == 1; 11398 IRExpr* argL = getQReg128(nn); 11399 IRExpr* argR = getQReg128(mm); 11400 IRTemp res = newTempV128(); 11401 assign(res, 11402 isEQ ? binop(mkVecCMPEQ(size), argL, argR) 11403 : unop(Iop_NotV128, binop(mkVecCMPEQ(size), 11404 binop(Iop_AndV128, argL, argR), 11405 mkV128(0x0000)))); 11406 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11407 const HChar* nm = isEQ ? "cmeq" : "cmtst"; 11408 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11409 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11410 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11411 return True; 11412 } 11413 11414 if (opcode == BITS5(1,0,0,1,0)) { 11415 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */ 11416 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */ 11417 if (bitQ == 0 && size == X11) return False; // implied 1d case 11418 Bool isMLS = bitU == 1; 11419 IROp opMUL = mkVecMUL(size); 11420 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size); 11421 IRTemp res = newTempV128(); 11422 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) { 11423 assign(res, binop(opADDSUB, 11424 getQReg128(dd), 11425 binop(opMUL, getQReg128(nn), getQReg128(mm)))); 11426 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11427 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11428 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla", 11429 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11430 return True; 11431 } 11432 return False; 11433 } 11434 11435 if (opcode == BITS5(1,0,0,1,1)) { 11436 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */ 11437 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */ 11438 if (bitQ == 0 && size == X11) return False; // implied 1d case 11439 Bool isPMUL = bitU == 1; 11440 const IROp opsPMUL[4] 11441 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 11442 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size); 11443 IRTemp res = newTempV128(); 11444 if (opMUL != Iop_INVALID) { 11445 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm))); 11446 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11447 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11448 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul", 11449 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11450 return True; 11451 } 11452 return False; 11453 } 11454 11455 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) { 11456 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */ 11457 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */ 11458 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */ 11459 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */ 11460 if (size == X11) return False; 11461 Bool isU = bitU == 1; 11462 Bool isMAX = opcode == BITS5(1,0,1,0,0); 11463 IRTemp vN = newTempV128(); 11464 IRTemp vM = newTempV128(); 11465 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size)) 11466 : (isU ? mkVecMINU(size) : mkVecMINS(size)); 11467 assign(vN, getQReg128(nn)); 11468 assign(vM, getQReg128(mm)); 11469 IRTemp res128 = newTempV128(); 11470 assign(res128, 11471 binop(op, 11472 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)), 11473 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN)))); 11474 /* In the half-width case, use CatEL32x4 to extract the half-width 11475 result from the full-width result. */ 11476 IRExpr* res 11477 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128, 11478 binop(Iop_CatEvenLanes32x4, mkexpr(res128), 11479 mkexpr(res128))) 11480 : mkexpr(res128); 11481 putQReg128(dd, res); 11482 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11483 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp") 11484 : (isU ? "uminp" : "sminp"); 11485 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11486 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11487 return True; 11488 } 11489 11490 if (opcode == BITS5(1,0,1,1,0)) { 11491 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */ 11492 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */ 11493 if (size == X00 || size == X11) return False; 11494 Bool isR = bitU == 1; 11495 IRTemp res, sat1q, sat1n, vN, vM; 11496 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 11497 newTempsV128_2(&vN, &vM); 11498 assign(vN, getQReg128(nn)); 11499 assign(vM, getQReg128(mm)); 11500 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 11501 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11502 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; 11503 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 11504 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11505 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 11506 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11507 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11508 return True; 11509 } 11510 11511 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) { 11512 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */ 11513 if (bitQ == 0 && size == X11) return False; // implied 1d case 11514 IRTemp vN = newTempV128(); 11515 IRTemp vM = newTempV128(); 11516 assign(vN, getQReg128(nn)); 11517 assign(vM, getQReg128(mm)); 11518 IRTemp res128 = newTempV128(); 11519 assign(res128, 11520 binop(mkVecADD(size), 11521 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)), 11522 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN)))); 11523 /* In the half-width case, use CatEL32x4 to extract the half-width 11524 result from the full-width result. */ 11525 IRExpr* res 11526 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128, 11527 binop(Iop_CatEvenLanes32x4, mkexpr(res128), 11528 mkexpr(res128))) 11529 : mkexpr(res128); 11530 putQReg128(dd, res); 11531 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11532 DIP("addp %s.%s, %s.%s, %s.%s\n", 11533 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11534 return True; 11535 } 11536 11537 if (bitU == 0 11538 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) { 11539 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11540 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11541 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11542 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11543 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 11544 Bool isD = (size & 1) == 1; 11545 if (bitQ == 0 && isD) return False; // implied 1d case 11546 Bool isMIN = (size & 2) == 2; 11547 Bool isNM = opcode == BITS5(1,1,0,0,0); 11548 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10); 11549 IRTemp res = newTempV128(); 11550 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm))); 11551 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11552 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11553 DIP("%s%s %s.%s, %s.%s, %s.%s\n", 11554 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", 11555 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11556 return True; 11557 } 11558 11559 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) { 11560 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11561 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11562 Bool isD = (size & 1) == 1; 11563 Bool isSUB = (size & 2) == 2; 11564 if (bitQ == 0 && isD) return False; // implied 1d case 11565 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 11566 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 11567 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 11568 IRTemp rm = mk_get_IR_rounding_mode(); 11569 IRTemp t1 = newTempV128(); 11570 IRTemp t2 = newTempV128(); 11571 // FIXME: double rounding; use FMA primops instead 11572 assign(t1, triop(opMUL, 11573 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11574 assign(t2, triop(isSUB ? opSUB : opADD, 11575 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 11576 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11577 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11578 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla", 11579 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11580 return True; 11581 } 11582 11583 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) { 11584 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11585 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11586 Bool isD = (size & 1) == 1; 11587 Bool isSUB = (size & 2) == 2; 11588 if (bitQ == 0 && isD) return False; // implied 1d case 11589 const IROp ops[4] 11590 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 }; 11591 IROp op = ops[size]; 11592 IRTemp rm = mk_get_IR_rounding_mode(); 11593 IRTemp t1 = newTempV128(); 11594 IRTemp t2 = newTempV128(); 11595 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11596 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1)); 11597 putQReg128(dd, mkexpr(t2)); 11598 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11599 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd", 11600 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11601 return True; 11602 } 11603 11604 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) { 11605 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11606 Bool isD = (size & 1) == 1; 11607 if (bitQ == 0 && isD) return False; // implied 1d case 11608 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 11609 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 11610 IRTemp rm = mk_get_IR_rounding_mode(); 11611 IRTemp t1 = newTempV128(); 11612 IRTemp t2 = newTempV128(); 11613 // FIXME: use Abd primop instead? 11614 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11615 assign(t2, unop(opABS, mkexpr(t1))); 11616 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11617 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11618 DIP("fabd %s.%s, %s.%s, %s.%s\n", 11619 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11620 return True; 11621 } 11622 11623 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) { 11624 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11625 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11626 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 11627 Bool isD = (size & 1) == 1; 11628 Bool isMULX = bitU == 0; 11629 if (bitQ == 0 && isD) return False; // implied 1d case 11630 IRTemp rm = mk_get_IR_rounding_mode(); 11631 IRTemp t1 = newTempV128(); 11632 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4, 11633 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11634 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11635 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11636 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul", 11637 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11638 return True; 11639 } 11640 11641 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) { 11642 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11643 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11644 Bool isD = (size & 1) == 1; 11645 if (bitQ == 0 && isD) return False; // implied 1d case 11646 Bool isGE = bitU == 1; 11647 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4) 11648 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4); 11649 IRTemp t1 = newTempV128(); 11650 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd 11651 : binop(opCMP, getQReg128(nn), getQReg128(mm))); 11652 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11653 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11654 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq", 11655 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11656 return True; 11657 } 11658 11659 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) { 11660 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11661 Bool isD = (size & 1) == 1; 11662 if (bitQ == 0 && isD) return False; // implied 1d case 11663 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 11664 IRTemp t1 = newTempV128(); 11665 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd 11666 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11667 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11668 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt", 11669 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11670 return True; 11671 } 11672 11673 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) { 11674 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11675 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11676 Bool isD = (size & 1) == 1; 11677 Bool isGT = (size & 2) == 2; 11678 if (bitQ == 0 && isD) return False; // implied 1d case 11679 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4) 11680 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4); 11681 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 11682 IRTemp t1 = newTempV128(); 11683 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)), 11684 unop(opABS, getQReg128(nn)))); // swapd 11685 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11686 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11687 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge", 11688 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11689 return True; 11690 } 11691 11692 if (bitU == 1 11693 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) { 11694 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11695 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11696 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11697 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11698 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 11699 Bool isD = (size & 1) == 1; 11700 if (bitQ == 0 && isD) return False; // implied 1d case 11701 Bool isMIN = (size & 2) == 2; 11702 Bool isNM = opcode == BITS5(1,1,0,0,0); 11703 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2); 11704 IRTemp srcN = newTempV128(); 11705 IRTemp srcM = newTempV128(); 11706 IRTemp preL = IRTemp_INVALID; 11707 IRTemp preR = IRTemp_INVALID; 11708 assign(srcN, getQReg128(nn)); 11709 assign(srcM, getQReg128(mm)); 11710 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, 11711 srcM, srcN, isD, bitQ); 11712 putQReg128( 11713 dd, math_MAYBE_ZERO_HI64_fromE( 11714 bitQ, 11715 binop(opMXX, mkexpr(preL), mkexpr(preR)))); 11716 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11717 DIP("%s%sp %s.%s, %s.%s, %s.%s\n", 11718 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", 11719 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11720 return True; 11721 } 11722 11723 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) { 11724 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11725 Bool isD = size == X01; 11726 if (bitQ == 0 && isD) return False; // implied 1d case 11727 IRTemp srcN = newTempV128(); 11728 IRTemp srcM = newTempV128(); 11729 IRTemp preL = IRTemp_INVALID; 11730 IRTemp preR = IRTemp_INVALID; 11731 assign(srcN, getQReg128(nn)); 11732 assign(srcM, getQReg128(mm)); 11733 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, 11734 srcM, srcN, isD, bitQ); 11735 putQReg128( 11736 dd, math_MAYBE_ZERO_HI64_fromE( 11737 bitQ, 11738 triop(mkVecADDF(isD ? 3 : 2), 11739 mkexpr(mk_get_IR_rounding_mode()), 11740 mkexpr(preL), mkexpr(preR)))); 11741 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11742 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp", 11743 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11744 return True; 11745 } 11746 11747 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) { 11748 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11749 Bool isD = (size & 1) == 1; 11750 if (bitQ == 0 && isD) return False; // implied 1d case 11751 vassert(size <= 1); 11752 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 }; 11753 IROp op = ops[size]; 11754 IRTemp rm = mk_get_IR_rounding_mode(); 11755 IRTemp t1 = newTempV128(); 11756 IRTemp t2 = newTempV128(); 11757 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11758 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1)); 11759 putQReg128(dd, mkexpr(t2)); 11760 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11761 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv", 11762 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11763 return True; 11764 } 11765 11766 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { 11767 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11768 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11769 Bool isSQRT = (size & 2) == 2; 11770 Bool isD = (size & 1) == 1; 11771 if (bitQ == 0 && isD) return False; // implied 1d case 11772 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) 11773 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); 11774 IRTemp res = newTempV128(); 11775 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 11776 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11777 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11778 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps", 11779 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11780 return True; 11781 } 11782 11783 return False; 11784 # undef INSN 11785 } 11786 11787 11788 static 11789 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) 11790 { 11791 /* 31 30 29 28 23 21 16 11 9 4 11792 0 Q U 01110 size 10000 opcode 10 n d 11793 Decode fields: U,size,opcode 11794 */ 11795 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 11796 if (INSN(31,31) != 0 11797 || INSN(28,24) != BITS5(0,1,1,1,0) 11798 || INSN(21,17) != BITS5(1,0,0,0,0) 11799 || INSN(11,10) != BITS2(1,0)) { 11800 return False; 11801 } 11802 UInt bitQ = INSN(30,30); 11803 UInt bitU = INSN(29,29); 11804 UInt size = INSN(23,22); 11805 UInt opcode = INSN(16,12); 11806 UInt nn = INSN(9,5); 11807 UInt dd = INSN(4,0); 11808 vassert(size < 4); 11809 11810 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) { 11811 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */ 11812 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */ 11813 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */ 11814 const IROp iops[3] = { Iop_Reverse8sIn64_x2, 11815 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 }; 11816 vassert(size <= 2); 11817 IRTemp res = newTempV128(); 11818 assign(res, unop(iops[size], getQReg128(nn))); 11819 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11820 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11821 DIP("%s %s.%s, %s.%s\n", "rev64", 11822 nameQReg128(dd), arr, nameQReg128(nn), arr); 11823 return True; 11824 } 11825 11826 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) { 11827 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */ 11828 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */ 11829 Bool isH = size == X01; 11830 IRTemp res = newTempV128(); 11831 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4; 11832 assign(res, unop(iop, getQReg128(nn))); 11833 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11834 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11835 DIP("%s %s.%s, %s.%s\n", "rev32", 11836 nameQReg128(dd), arr, nameQReg128(nn), arr); 11837 return True; 11838 } 11839 11840 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) { 11841 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */ 11842 IRTemp res = newTempV128(); 11843 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn))); 11844 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11845 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11846 DIP("%s %s.%s, %s.%s\n", "rev16", 11847 nameQReg128(dd), arr, nameQReg128(nn), arr); 11848 return True; 11849 } 11850 11851 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) { 11852 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */ 11853 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */ 11854 /* -------- 0,xx,00110: SADALP std6_std6 -------- */ 11855 /* -------- 1,xx,00110: UADALP std6_std6 -------- */ 11856 /* Widens, and size refers to the narrow size. */ 11857 if (size == X11) return False; // no 1d or 2d cases 11858 Bool isU = bitU == 1; 11859 Bool isACC = opcode == BITS5(0,0,1,1,0); 11860 IRTemp src = newTempV128(); 11861 IRTemp sum = newTempV128(); 11862 IRTemp res = newTempV128(); 11863 assign(src, getQReg128(nn)); 11864 assign(sum, 11865 binop(mkVecADD(size+1), 11866 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES( 11867 isU, True/*fromOdd*/, size, mkexpr(src))), 11868 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES( 11869 isU, False/*!fromOdd*/, size, mkexpr(src))))); 11870 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd)) 11871 : mkexpr(sum)); 11872 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11873 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11874 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1); 11875 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp") 11876 : (isU ? "uaddlp" : "saddlp"), 11877 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow); 11878 return True; 11879 } 11880 11881 if (opcode == BITS5(0,0,0,1,1)) { 11882 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */ 11883 /* -------- 1,xx,00011: USQADD std7_std7 -------- */ 11884 if (bitQ == 0 && size == X11) return False; // implied 1d case 11885 Bool isUSQADD = bitU == 1; 11886 /* This is switched (in the US vs SU sense) deliberately. 11887 SUQADD corresponds to the ExtUSsatSS variants and 11888 USQADD corresponds to the ExtSUsatUU variants. 11889 See libvex_ir for more details. */ 11890 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) 11891 : mkVecQADDEXTUSSATSS(size); 11892 IROp nop = mkVecADD(size); 11893 IRTemp argL = newTempV128(); 11894 IRTemp argR = newTempV128(); 11895 IRTemp qres = newTempV128(); 11896 IRTemp nres = newTempV128(); 11897 /* Because the two arguments to the addition are implicitly 11898 extended differently (one signedly, the other unsignedly) it is 11899 important to present them to the primop in the correct order. */ 11900 assign(argL, getQReg128(nn)); 11901 assign(argR, getQReg128(dd)); 11902 assign(qres, math_MAYBE_ZERO_HI64_fromE( 11903 bitQ, binop(qop, mkexpr(argL), mkexpr(argR)))); 11904 assign(nres, math_MAYBE_ZERO_HI64_fromE( 11905 bitQ, binop(nop, mkexpr(argL), mkexpr(argR)))); 11906 putQReg128(dd, mkexpr(qres)); 11907 updateQCFLAGwithDifference(qres, nres); 11908 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11909 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd", 11910 nameQReg128(dd), arr, nameQReg128(nn), arr); 11911 return True; 11912 } 11913 11914 if (opcode == BITS5(0,0,1,0,0)) { 11915 /* -------- 0,xx,00100: CLS std6_std6 -------- */ 11916 /* -------- 1,xx,00100: CLZ std6_std6 -------- */ 11917 if (size == X11) return False; // no 1d or 2d cases 11918 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 }; 11919 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 }; 11920 Bool isCLZ = bitU == 1; 11921 IRTemp res = newTempV128(); 11922 vassert(size <= 2); 11923 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn))); 11924 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11925 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11926 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls", 11927 nameQReg128(dd), arr, nameQReg128(nn), arr); 11928 return True; 11929 } 11930 11931 if (size == X00 && opcode == BITS5(0,0,1,0,1)) { 11932 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */ 11933 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */ 11934 IRTemp res = newTempV128(); 11935 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn))); 11936 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11937 const HChar* arr = nameArr_Q_SZ(bitQ, 0); 11938 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not", 11939 nameQReg128(dd), arr, nameQReg128(nn), arr); 11940 return True; 11941 } 11942 11943 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) { 11944 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */ 11945 IRTemp res = newTempV128(); 11946 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn))); 11947 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11948 const HChar* arr = nameArr_Q_SZ(bitQ, 0); 11949 DIP("%s %s.%s, %s.%s\n", "rbit", 11950 nameQReg128(dd), arr, nameQReg128(nn), arr); 11951 return True; 11952 } 11953 11954 if (opcode == BITS5(0,0,1,1,1)) { 11955 /* -------- 0,xx,00111 SQABS std7_std7 -------- */ 11956 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */ 11957 if (bitQ == 0 && size == X11) return False; // implied 1d case 11958 Bool isNEG = bitU == 1; 11959 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID; 11960 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW, 11961 getQReg128(nn), size ); 11962 IRTemp qres = newTempV128(), nres = newTempV128(); 11963 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW)); 11964 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW)); 11965 putQReg128(dd, mkexpr(qres)); 11966 updateQCFLAGwithDifference(qres, nres); 11967 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11968 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs", 11969 nameQReg128(dd), arr, nameQReg128(nn), arr); 11970 return True; 11971 } 11972 11973 if (opcode == BITS5(0,1,0,0,0)) { 11974 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0 11975 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0 11976 if (bitQ == 0 && size == X11) return False; // implied 1d case 11977 Bool isGT = bitU == 0; 11978 IRExpr* argL = getQReg128(nn); 11979 IRExpr* argR = mkV128(0x0000); 11980 IRTemp res = newTempV128(); 11981 IROp opGTS = mkVecCMPGTS(size); 11982 assign(res, isGT ? binop(opGTS, argL, argR) 11983 : unop(Iop_NotV128, binop(opGTS, argR, argL))); 11984 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11985 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11986 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge", 11987 nameQReg128(dd), arr, nameQReg128(nn), arr); 11988 return True; 11989 } 11990 11991 if (opcode == BITS5(0,1,0,0,1)) { 11992 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0 11993 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0 11994 if (bitQ == 0 && size == X11) return False; // implied 1d case 11995 Bool isEQ = bitU == 0; 11996 IRExpr* argL = getQReg128(nn); 11997 IRExpr* argR = mkV128(0x0000); 11998 IRTemp res = newTempV128(); 11999 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR) 12000 : unop(Iop_NotV128, 12001 binop(mkVecCMPGTS(size), argL, argR))); 12002 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12003 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12004 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le", 12005 nameQReg128(dd), arr, nameQReg128(nn), arr); 12006 return True; 12007 } 12008 12009 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) { 12010 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0 12011 if (bitQ == 0 && size == X11) return False; // implied 1d case 12012 IRExpr* argL = getQReg128(nn); 12013 IRExpr* argR = mkV128(0x0000); 12014 IRTemp res = newTempV128(); 12015 assign(res, binop(mkVecCMPGTS(size), argR, argL)); 12016 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12017 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12018 DIP("cm%s %s.%s, %s.%s, #0\n", "lt", 12019 nameQReg128(dd), arr, nameQReg128(nn), arr); 12020 return True; 12021 } 12022 12023 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) { 12024 /* -------- 0,xx,01011: ABS std7_std7 -------- */ 12025 if (bitQ == 0 && size == X11) return False; // implied 1d case 12026 IRTemp res = newTempV128(); 12027 assign(res, unop(mkVecABS(size), getQReg128(nn))); 12028 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12029 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12030 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr); 12031 return True; 12032 } 12033 12034 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) { 12035 /* -------- 1,xx,01011: NEG std7_std7 -------- */ 12036 if (bitQ == 0 && size == X11) return False; // implied 1d case 12037 IRTemp res = newTempV128(); 12038 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn))); 12039 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12040 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12041 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr); 12042 return True; 12043 } 12044 12045 UInt ix = 0; /*INVALID*/ 12046 if (size >= X10) { 12047 switch (opcode) { 12048 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break; 12049 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break; 12050 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; 12051 default: break; 12052 } 12053 } 12054 if (ix > 0) { 12055 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */ 12056 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */ 12057 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */ 12058 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */ 12059 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */ 12060 if (bitQ == 0 && size == X11) return False; // implied 1d case 12061 Bool isD = size == X11; 12062 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 12063 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 12064 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 12065 IROp opCmp = Iop_INVALID; 12066 Bool swap = False; 12067 const HChar* nm = "??"; 12068 switch (ix) { 12069 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break; 12070 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break; 12071 case 3: nm = "fcmlt"; opCmp = opCmpLT; break; 12072 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break; 12073 case 5: nm = "fcmle"; opCmp = opCmpLE; break; 12074 default: vassert(0); 12075 } 12076 IRExpr* zero = mkV128(0x0000); 12077 IRTemp res = newTempV128(); 12078 assign(res, swap ? binop(opCmp, zero, getQReg128(nn)) 12079 : binop(opCmp, getQReg128(nn), zero)); 12080 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12081 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12082 DIP("%s %s.%s, %s.%s, #0.0\n", nm, 12083 nameQReg128(dd), arr, nameQReg128(nn), arr); 12084 return True; 12085 } 12086 12087 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) { 12088 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */ 12089 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */ 12090 if (bitQ == 0 && size == X11) return False; // implied 1d case 12091 Bool isFNEG = bitU == 1; 12092 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2) 12093 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2); 12094 IRTemp res = newTempV128(); 12095 assign(res, unop(op, getQReg128(nn))); 12096 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12097 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12098 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs", 12099 nameQReg128(dd), arr, nameQReg128(nn), arr); 12100 return True; 12101 } 12102 12103 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 12104 /* -------- 0,xx,10010: XTN{,2} -------- */ 12105 if (size == X11) return False; 12106 vassert(size < 3); 12107 Bool is2 = bitQ == 1; 12108 IROp opN = mkVecNARROWUN(size); 12109 IRTemp resN = newTempV128(); 12110 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn)))); 12111 putLO64andZUorPutHI64(is2, dd, resN); 12112 const HChar* nm = "xtn"; 12113 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12114 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12115 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm, 12116 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12117 return True; 12118 } 12119 12120 if (opcode == BITS5(1,0,1,0,0) 12121 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) { 12122 /* -------- 0,xx,10100: SQXTN{,2} -------- */ 12123 /* -------- 1,xx,10100: UQXTN{,2} -------- */ 12124 /* -------- 1,xx,10010: SQXTUN{,2} -------- */ 12125 if (size == X11) return False; 12126 vassert(size < 3); 12127 Bool is2 = bitQ == 1; 12128 IROp opN = Iop_INVALID; 12129 Bool zWiden = True; 12130 const HChar* nm = "??"; 12131 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) { 12132 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False; 12133 } 12134 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) { 12135 opN = mkVecQNARROWUNUU(size); nm = "uqxtn"; 12136 } 12137 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 12138 opN = mkVecQNARROWUNSU(size); nm = "sqxtun"; 12139 } 12140 else vassert(0); 12141 IRTemp src = newTempV128(); 12142 assign(src, getQReg128(nn)); 12143 IRTemp resN = newTempV128(); 12144 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src)))); 12145 putLO64andZUorPutHI64(is2, dd, resN); 12146 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/, 12147 size, mkexpr(resN)); 12148 updateQCFLAGwithDifference(src, resW); 12149 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12150 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12151 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm, 12152 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12153 return True; 12154 } 12155 12156 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 12157 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */ 12158 /* Widens, and size is the narrow size. */ 12159 if (size == X11) return False; 12160 Bool is2 = bitQ == 1; 12161 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size); 12162 IROp opSHL = mkVecSHLN(size+1); 12163 IRTemp src = newTempV128(); 12164 IRTemp res = newTempV128(); 12165 assign(src, getQReg128(nn)); 12166 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)), 12167 mkU8(8 << size))); 12168 putQReg128(dd, mkexpr(res)); 12169 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12170 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12171 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "", 12172 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size); 12173 return True; 12174 } 12175 12176 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) { 12177 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */ 12178 UInt nLanes = size == X00 ? 4 : 2; 12179 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64; 12180 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32; 12181 IRTemp rm = mk_get_IR_rounding_mode(); 12182 IRTemp src[nLanes]; 12183 for (UInt i = 0; i < nLanes; i++) { 12184 src[i] = newTemp(srcTy); 12185 assign(src[i], getQRegLane(nn, i, srcTy)); 12186 } 12187 for (UInt i = 0; i < nLanes; i++) { 12188 putQRegLane(dd, nLanes * bitQ + i, 12189 binop(opCvt, mkexpr(rm), mkexpr(src[i]))); 12190 } 12191 if (bitQ == 0) { 12192 putQRegLane(dd, 1, mkU64(0)); 12193 } 12194 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12195 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12196 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12197 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12198 return True; 12199 } 12200 12201 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) { 12202 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */ 12203 /* Using Irrm_NEAREST here isn't right. The docs say "round to 12204 odd" but I don't know what that really means. */ 12205 IRType srcTy = Ity_F64; 12206 IROp opCvt = Iop_F64toF32; 12207 IRTemp src[2]; 12208 for (UInt i = 0; i < 2; i++) { 12209 src[i] = newTemp(srcTy); 12210 assign(src[i], getQRegLane(nn, i, srcTy)); 12211 } 12212 for (UInt i = 0; i < 2; i++) { 12213 putQRegLane(dd, 2 * bitQ + i, 12214 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i]))); 12215 } 12216 if (bitQ == 0) { 12217 putQRegLane(dd, 1, mkU64(0)); 12218 } 12219 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12220 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12221 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12222 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12223 return True; 12224 } 12225 12226 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) { 12227 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */ 12228 UInt nLanes = size == X00 ? 4 : 2; 12229 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32; 12230 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64; 12231 IRTemp src[nLanes]; 12232 for (UInt i = 0; i < nLanes; i++) { 12233 src[i] = newTemp(srcTy); 12234 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy)); 12235 } 12236 for (UInt i = 0; i < nLanes; i++) { 12237 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i]))); 12238 } 12239 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12240 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12241 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12242 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow); 12243 return True; 12244 } 12245 12246 ix = 0; 12247 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) { 12248 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0)); 12249 // = 1 + bitU[0]:size[1]:opcode[0] 12250 vassert(ix >= 1 && ix <= 8); 12251 if (ix == 7) ix = 0; 12252 } 12253 if (ix > 0) { 12254 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */ 12255 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */ 12256 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */ 12257 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */ 12258 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */ 12259 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */ 12260 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */ 12261 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */ 12262 /* rm plan: 12263 FRINTN: tieeven -- !! FIXME KLUDGED !! 12264 FRINTM: -inf 12265 FRINTP: +inf 12266 FRINTZ: zero 12267 FRINTA: tieaway -- !! FIXME KLUDGED !! 12268 FRINTX: per FPCR + "exact = TRUE" 12269 FRINTI: per FPCR 12270 */ 12271 Bool isD = (size & 1) == 1; 12272 if (bitQ == 0 && isD) return False; // implied 1d case 12273 12274 IRTemp irrmRM = mk_get_IR_rounding_mode(); 12275 12276 UChar ch = '?'; 12277 IRTemp irrm = newTemp(Ity_I32); 12278 switch (ix) { 12279 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break; 12280 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break; 12281 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break; 12282 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; 12283 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 12284 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break; 12285 // I am unsure about the following, due to the "integral exact" 12286 // description in the manual. What does it mean? (frintx, that is) 12287 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break; 12288 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; 12289 default: vassert(0); 12290 } 12291 12292 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; 12293 if (isD) { 12294 for (UInt i = 0; i < 2; i++) { 12295 putQRegLane(dd, i, binop(opRND, mkexpr(irrm), 12296 getQRegLane(nn, i, Ity_F64))); 12297 } 12298 } else { 12299 UInt n = bitQ==1 ? 4 : 2; 12300 for (UInt i = 0; i < n; i++) { 12301 putQRegLane(dd, i, binop(opRND, mkexpr(irrm), 12302 getQRegLane(nn, i, Ity_F32))); 12303 } 12304 if (bitQ == 0) 12305 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 12306 } 12307 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12308 DIP("frint%c %s.%s, %s.%s\n", ch, 12309 nameQReg128(dd), arr, nameQReg128(nn), arr); 12310 return True; 12311 } 12312 12313 ix = 0; /*INVALID*/ 12314 switch (opcode) { 12315 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break; 12316 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break; 12317 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break; 12318 default: break; 12319 } 12320 if (ix > 0) { 12321 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */ 12322 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */ 12323 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */ 12324 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */ 12325 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */ 12326 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */ 12327 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */ 12328 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */ 12329 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */ 12330 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */ 12331 Bool isD = (size & 1) == 1; 12332 if (bitQ == 0 && isD) return False; // implied 1d case 12333 12334 IRRoundingMode irrm = 8; /*impossible*/ 12335 HChar ch = '?'; 12336 switch (ix) { 12337 case 1: ch = 'n'; irrm = Irrm_NEAREST; break; 12338 case 2: ch = 'm'; irrm = Irrm_NegINF; break; 12339 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */ 12340 case 4: ch = 'p'; irrm = Irrm_PosINF; break; 12341 case 5: ch = 'z'; irrm = Irrm_ZERO; break; 12342 default: vassert(0); 12343 } 12344 IROp cvt = Iop_INVALID; 12345 if (bitU == 1) { 12346 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U; 12347 } else { 12348 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S; 12349 } 12350 if (isD) { 12351 for (UInt i = 0; i < 2; i++) { 12352 putQRegLane(dd, i, binop(cvt, mkU32(irrm), 12353 getQRegLane(nn, i, Ity_F64))); 12354 } 12355 } else { 12356 UInt n = bitQ==1 ? 4 : 2; 12357 for (UInt i = 0; i < n; i++) { 12358 putQRegLane(dd, i, binop(cvt, mkU32(irrm), 12359 getQRegLane(nn, i, Ity_F32))); 12360 } 12361 if (bitQ == 0) 12362 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 12363 } 12364 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12365 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's', 12366 nameQReg128(dd), arr, nameQReg128(nn), arr); 12367 return True; 12368 } 12369 12370 if (size == X10 && opcode == BITS5(1,1,1,0,0)) { 12371 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */ 12372 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */ 12373 Bool isREC = bitU == 0; 12374 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4; 12375 IRTemp res = newTempV128(); 12376 assign(res, unop(op, getQReg128(nn))); 12377 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12378 const HChar* nm = isREC ? "urecpe" : "ursqrte"; 12379 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12380 DIP("%s %s.%s, %s.%s\n", nm, 12381 nameQReg128(dd), arr, nameQReg128(nn), arr); 12382 return True; 12383 } 12384 12385 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) { 12386 /* -------- 0,0x,11101: SCVTF -------- */ 12387 /* -------- 1,0x,11101: UCVTF -------- */ 12388 /* 31 28 22 21 15 9 4 12389 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn 12390 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn 12391 with laneage: 12392 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D 12393 */ 12394 Bool isQ = bitQ == 1; 12395 Bool isU = bitU == 1; 12396 Bool isF64 = (size & 1) == 1; 12397 if (isQ || !isF64) { 12398 IRType tyF = Ity_INVALID, tyI = Ity_INVALID; 12399 UInt nLanes = 0; 12400 Bool zeroHI = False; 12401 const HChar* arrSpec = NULL; 12402 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec, 12403 isQ, isF64 ); 12404 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32) 12405 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32); 12406 IRTemp rm = mk_get_IR_rounding_mode(); 12407 UInt i; 12408 vassert(ok); /* the 'if' above should ensure this */ 12409 for (i = 0; i < nLanes; i++) { 12410 putQRegLane(dd, i, 12411 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI))); 12412 } 12413 if (zeroHI) { 12414 putQRegLane(dd, 1, mkU64(0)); 12415 } 12416 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's', 12417 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 12418 return True; 12419 } 12420 /* else fall through */ 12421 } 12422 12423 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { 12424 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */ 12425 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */ 12426 Bool isSQRT = bitU == 1; 12427 Bool isD = (size & 1) == 1; 12428 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) 12429 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); 12430 if (bitQ == 0 && isD) return False; // implied 1d case 12431 IRTemp resV = newTempV128(); 12432 assign(resV, unop(op, getQReg128(nn))); 12433 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); 12434 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12435 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe", 12436 nameQReg128(dd), arr, nameQReg128(nn), arr); 12437 return True; 12438 } 12439 12440 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { 12441 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */ 12442 Bool isD = (size & 1) == 1; 12443 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4; 12444 if (bitQ == 0 && isD) return False; // implied 1d case 12445 IRTemp resV = newTempV128(); 12446 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()), 12447 getQReg128(nn))); 12448 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); 12449 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12450 DIP("%s %s.%s, %s.%s\n", "fsqrt", 12451 nameQReg128(dd), arr, nameQReg128(nn), arr); 12452 return True; 12453 } 12454 12455 return False; 12456 # undef INSN 12457 } 12458 12459 12460 static 12461 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn) 12462 { 12463 /* 31 28 23 21 20 19 15 11 9 4 12464 0 Q U 01111 size L M m opcode H 0 n d 12465 Decode fields are: u,size,opcode 12466 M is really part of the mm register number. Individual 12467 cases need to inspect L and H though. 12468 */ 12469 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12470 if (INSN(31,31) != 0 12471 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) { 12472 return False; 12473 } 12474 UInt bitQ = INSN(30,30); 12475 UInt bitU = INSN(29,29); 12476 UInt size = INSN(23,22); 12477 UInt bitL = INSN(21,21); 12478 UInt bitM = INSN(20,20); 12479 UInt mmLO4 = INSN(19,16); 12480 UInt opcode = INSN(15,12); 12481 UInt bitH = INSN(11,11); 12482 UInt nn = INSN(9,5); 12483 UInt dd = INSN(4,0); 12484 vassert(size < 4); 12485 vassert(bitH < 2 && bitM < 2 && bitL < 2); 12486 12487 if (bitU == 0 && size >= X10 12488 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) { 12489 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12490 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12491 if (bitQ == 0 && size == X11) return False; // implied 1d case 12492 Bool isD = (size & 1) == 1; 12493 Bool isSUB = opcode == BITS4(0,1,0,1); 12494 UInt index; 12495 if (!isD) index = (bitH << 1) | bitL; 12496 else if (isD && bitL == 0) index = bitH; 12497 else return False; // sz:L == x11 => unallocated encoding 12498 vassert(index < (isD ? 2 : 4)); 12499 IRType ity = isD ? Ity_F64 : Ity_F32; 12500 IRTemp elem = newTemp(ity); 12501 UInt mm = (bitM << 4) | mmLO4; 12502 assign(elem, getQRegLane(mm, index, ity)); 12503 IRTemp dupd = math_DUP_TO_V128(elem, ity); 12504 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 12505 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 12506 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 12507 IRTemp rm = mk_get_IR_rounding_mode(); 12508 IRTemp t1 = newTempV128(); 12509 IRTemp t2 = newTempV128(); 12510 // FIXME: double rounding; use FMA primops instead 12511 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 12512 assign(t2, triop(isSUB ? opSUB : opADD, 12513 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 12514 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 12515 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 12516 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla", 12517 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), 12518 isD ? 'd' : 's', index); 12519 return True; 12520 } 12521 12522 if (size >= X10 && opcode == BITS4(1,0,0,1)) { 12523 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12524 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12525 if (bitQ == 0 && size == X11) return False; // implied 1d case 12526 Bool isD = (size & 1) == 1; 12527 Bool isMULX = bitU == 1; 12528 UInt index; 12529 if (!isD) index = (bitH << 1) | bitL; 12530 else if (isD && bitL == 0) index = bitH; 12531 else return False; // sz:L == x11 => unallocated encoding 12532 vassert(index < (isD ? 2 : 4)); 12533 IRType ity = isD ? Ity_F64 : Ity_F32; 12534 IRTemp elem = newTemp(ity); 12535 UInt mm = (bitM << 4) | mmLO4; 12536 assign(elem, getQRegLane(mm, index, ity)); 12537 IRTemp dupd = math_DUP_TO_V128(elem, ity); 12538 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 12539 IRTemp res = newTempV128(); 12540 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4, 12541 mkexpr(mk_get_IR_rounding_mode()), 12542 getQReg128(nn), mkexpr(dupd))); 12543 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12544 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 12545 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", 12546 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr, 12547 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index); 12548 return True; 12549 } 12550 12551 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0))) 12552 || (bitU == 0 && opcode == BITS4(1,0,0,0))) { 12553 /* -------- 1,xx,0000 MLA s/h variants only -------- */ 12554 /* -------- 1,xx,0100 MLS s/h variants only -------- */ 12555 /* -------- 0,xx,1000 MUL s/h variants only -------- */ 12556 Bool isMLA = opcode == BITS4(0,0,0,0); 12557 Bool isMLS = opcode == BITS4(0,1,0,0); 12558 UInt mm = 32; // invalid 12559 UInt ix = 16; // invalid 12560 switch (size) { 12561 case X00: 12562 return False; // b case is not allowed 12563 case X01: 12564 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12565 case X10: 12566 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12567 case X11: 12568 return False; // d case is not allowed 12569 default: 12570 vassert(0); 12571 } 12572 vassert(mm < 32 && ix < 16); 12573 IROp opMUL = mkVecMUL(size); 12574 IROp opADD = mkVecADD(size); 12575 IROp opSUB = mkVecSUB(size); 12576 HChar ch = size == X01 ? 'h' : 's'; 12577 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12578 IRTemp vecD = newTempV128(); 12579 IRTemp vecN = newTempV128(); 12580 IRTemp res = newTempV128(); 12581 assign(vecD, getQReg128(dd)); 12582 assign(vecN, getQReg128(nn)); 12583 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM)); 12584 if (isMLA || isMLS) { 12585 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod)); 12586 } else { 12587 assign(res, prod); 12588 } 12589 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12590 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12591 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla" 12592 : (isMLS ? "mls" : "mul"), 12593 nameQReg128(dd), arr, 12594 nameQReg128(nn), arr, nameQReg128(dd), ch, ix); 12595 return True; 12596 } 12597 12598 if (opcode == BITS4(1,0,1,0) 12599 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) { 12600 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks) 12601 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0 12602 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1 12603 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1 12604 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2 12605 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2 12606 /* Widens, and size refers to the narrowed lanes. */ 12607 UInt ks = 3; 12608 switch (opcode) { 12609 case BITS4(1,0,1,0): ks = 0; break; 12610 case BITS4(0,0,1,0): ks = 1; break; 12611 case BITS4(0,1,1,0): ks = 2; break; 12612 default: vassert(0); 12613 } 12614 vassert(ks >= 0 && ks <= 2); 12615 Bool isU = bitU == 1; 12616 Bool is2 = bitQ == 1; 12617 UInt mm = 32; // invalid 12618 UInt ix = 16; // invalid 12619 switch (size) { 12620 case X00: 12621 return False; // h_b_b[] case is not allowed 12622 case X01: 12623 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12624 case X10: 12625 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12626 case X11: 12627 return False; // q_d_d[] case is not allowed 12628 default: 12629 vassert(0); 12630 } 12631 vassert(mm < 32 && ix < 16); 12632 IRTemp vecN = newTempV128(); 12633 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12634 IRTemp vecD = newTempV128(); 12635 assign(vecN, getQReg128(nn)); 12636 assign(vecD, getQReg128(dd)); 12637 IRTemp res = IRTemp_INVALID; 12638 math_MULL_ACC(&res, is2, isU, size, "mas"[ks], 12639 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 12640 putQReg128(dd, mkexpr(res)); 12641 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl"); 12642 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12643 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12644 HChar ch = size == X01 ? 'h' : 's'; 12645 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n", 12646 isU ? 'u' : 's', nm, is2 ? "2" : "", 12647 nameQReg128(dd), arrWide, 12648 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix); 12649 return True; 12650 } 12651 12652 if (bitU == 0 12653 && (opcode == BITS4(1,0,1,1) 12654 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) { 12655 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks) 12656 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1 12657 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2 12658 /* Widens, and size refers to the narrowed lanes. */ 12659 UInt ks = 3; 12660 switch (opcode) { 12661 case BITS4(1,0,1,1): ks = 0; break; 12662 case BITS4(0,0,1,1): ks = 1; break; 12663 case BITS4(0,1,1,1): ks = 2; break; 12664 default: vassert(0); 12665 } 12666 vassert(ks >= 0 && ks <= 2); 12667 Bool is2 = bitQ == 1; 12668 UInt mm = 32; // invalid 12669 UInt ix = 16; // invalid 12670 switch (size) { 12671 case X00: 12672 return False; // h_b_b[] case is not allowed 12673 case X01: 12674 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12675 case X10: 12676 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12677 case X11: 12678 return False; // q_d_d[] case is not allowed 12679 default: 12680 vassert(0); 12681 } 12682 vassert(mm < 32 && ix < 16); 12683 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n; 12684 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 12685 newTempsV128_2(&vecN, &vecD); 12686 assign(vecN, getQReg128(nn)); 12687 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12688 assign(vecD, getQReg128(dd)); 12689 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 12690 is2, size, "mas"[ks], 12691 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 12692 putQReg128(dd, mkexpr(res)); 12693 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 12694 updateQCFLAGwithDifference(sat1q, sat1n); 12695 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 12696 updateQCFLAGwithDifference(sat2q, sat2n); 12697 } 12698 const HChar* nm = ks == 0 ? "sqdmull" 12699 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 12700 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12701 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12702 HChar ch = size == X01 ? 'h' : 's'; 12703 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n", 12704 nm, is2 ? "2" : "", 12705 nameQReg128(dd), arrWide, 12706 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix); 12707 return True; 12708 } 12709 12710 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { 12711 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ 12712 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ 12713 UInt mm = 32; // invalid 12714 UInt ix = 16; // invalid 12715 switch (size) { 12716 case X00: 12717 return False; // b case is not allowed 12718 case X01: 12719 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12720 case X10: 12721 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12722 case X11: 12723 return False; // q case is not allowed 12724 default: 12725 vassert(0); 12726 } 12727 vassert(mm < 32 && ix < 16); 12728 Bool isR = opcode == BITS4(1,1,0,1); 12729 IRTemp res, sat1q, sat1n, vN, vM; 12730 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 12731 vN = newTempV128(); 12732 assign(vN, getQReg128(nn)); 12733 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12734 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 12735 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12736 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; 12737 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 12738 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 12739 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12740 HChar ch = size == X01 ? 'h' : 's'; 12741 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm, 12742 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix); 12743 return True; 12744 } 12745 12746 return False; 12747 # undef INSN 12748 } 12749 12750 12751 static 12752 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn) 12753 { 12754 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12755 return False; 12756 # undef INSN 12757 } 12758 12759 12760 static 12761 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn) 12762 { 12763 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12764 return False; 12765 # undef INSN 12766 } 12767 12768 12769 static 12770 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn) 12771 { 12772 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12773 return False; 12774 # undef INSN 12775 } 12776 12777 12778 static 12779 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn) 12780 { 12781 /* 31 28 23 21 20 15 13 9 4 12782 000 11110 ty 1 m op 1000 n opcode2 12783 The first 3 bits are really "M 0 S", but M and S are always zero. 12784 Decode fields are: ty,op,opcode2 12785 */ 12786 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12787 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12788 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) { 12789 return False; 12790 } 12791 UInt ty = INSN(23,22); 12792 UInt mm = INSN(20,16); 12793 UInt op = INSN(15,14); 12794 UInt nn = INSN(9,5); 12795 UInt opcode2 = INSN(4,0); 12796 vassert(ty < 4); 12797 12798 if (ty <= X01 && op == X00 12799 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) { 12800 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */ 12801 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */ 12802 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */ 12803 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */ 12804 /* 31 23 20 15 9 4 12805 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm 12806 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0 12807 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm 12808 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0 12809 12810 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm 12811 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0 12812 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm 12813 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0 12814 12815 FCMPE generates Invalid Operation exn if either arg is any kind 12816 of NaN. FCMP generates Invalid Operation exn if either arg is a 12817 signalling NaN. We ignore this detail here and produce the same 12818 IR for both. 12819 */ 12820 Bool isD = (ty & 1) == 1; 12821 Bool isCMPE = (opcode2 & 16) == 16; 12822 Bool cmpZero = (opcode2 & 8) == 8; 12823 IRType ity = isD ? Ity_F64 : Ity_F32; 12824 Bool valid = True; 12825 if (cmpZero && mm != 0) valid = False; 12826 if (valid) { 12827 IRTemp argL = newTemp(ity); 12828 IRTemp argR = newTemp(ity); 12829 IRTemp irRes = newTemp(Ity_I32); 12830 assign(argL, getQRegLO(nn, ity)); 12831 assign(argR, 12832 cmpZero 12833 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0))) 12834 : getQRegLO(mm, ity)); 12835 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 12836 mkexpr(argL), mkexpr(argR))); 12837 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes); 12838 IRTemp nzcv_28x0 = newTemp(Ity_I64); 12839 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28))); 12840 setFlags_COPY(nzcv_28x0); 12841 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity), 12842 cmpZero ? "#0.0" : nameQRegLO(mm, ity)); 12843 return True; 12844 } 12845 return False; 12846 } 12847 12848 return False; 12849 # undef INSN 12850 } 12851 12852 12853 static 12854 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn) 12855 { 12856 /* 31 28 23 21 20 15 11 9 4 3 12857 000 11110 ty 1 m cond 01 n op nzcv 12858 The first 3 bits are really "M 0 S", but M and S are always zero. 12859 Decode fields are: ty,op 12860 */ 12861 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12862 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12863 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) { 12864 return False; 12865 } 12866 UInt ty = INSN(23,22); 12867 UInt mm = INSN(20,16); 12868 UInt cond = INSN(15,12); 12869 UInt nn = INSN(9,5); 12870 UInt op = INSN(4,4); 12871 UInt nzcv = INSN(3,0); 12872 vassert(ty < 4 && op <= 1); 12873 12874 if (ty <= BITS2(0,1)) { 12875 /* -------- 00,0 FCCMP s_s -------- */ 12876 /* -------- 00,1 FCCMPE s_s -------- */ 12877 /* -------- 01,0 FCCMP d_d -------- */ 12878 /* -------- 01,1 FCCMPE d_d -------- */ 12879 12880 /* FCCMPE generates Invalid Operation exn if either arg is any kind 12881 of NaN. FCCMP generates Invalid Operation exn if either arg is a 12882 signalling NaN. We ignore this detail here and produce the same 12883 IR for both. 12884 */ 12885 Bool isD = (ty & 1) == 1; 12886 Bool isCMPE = op == 1; 12887 IRType ity = isD ? Ity_F64 : Ity_F32; 12888 IRTemp argL = newTemp(ity); 12889 IRTemp argR = newTemp(ity); 12890 IRTemp irRes = newTemp(Ity_I32); 12891 assign(argL, getQRegLO(nn, ity)); 12892 assign(argR, getQRegLO(mm, ity)); 12893 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 12894 mkexpr(argL), mkexpr(argR))); 12895 IRTemp condT = newTemp(Ity_I1); 12896 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 12897 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes); 12898 12899 IRTemp nzcvT_28x0 = newTemp(Ity_I64); 12900 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28))); 12901 12902 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28); 12903 12904 IRTemp nzcv_28x0 = newTemp(Ity_I64); 12905 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT), 12906 mkexpr(nzcvT_28x0), nzcvF_28x0)); 12907 setFlags_COPY(nzcv_28x0); 12908 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "", 12909 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond)); 12910 return True; 12911 } 12912 12913 return False; 12914 # undef INSN 12915 } 12916 12917 12918 static 12919 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn) 12920 { 12921 /* 31 23 21 20 15 11 9 5 12922 000 11110 ty 1 m cond 11 n d 12923 The first 3 bits are really "M 0 S", but M and S are always zero. 12924 Decode fields: ty 12925 */ 12926 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12927 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1 12928 || INSN(11,10) != BITS2(1,1)) { 12929 return False; 12930 } 12931 UInt ty = INSN(23,22); 12932 UInt mm = INSN(20,16); 12933 UInt cond = INSN(15,12); 12934 UInt nn = INSN(9,5); 12935 UInt dd = INSN(4,0); 12936 if (ty <= X01) { 12937 /* -------- 00: FCSEL s_s -------- */ 12938 /* -------- 00: FCSEL d_d -------- */ 12939 IRType ity = ty == X01 ? Ity_F64 : Ity_F32; 12940 IRTemp srcT = newTemp(ity); 12941 IRTemp srcF = newTemp(ity); 12942 IRTemp res = newTemp(ity); 12943 assign(srcT, getQRegLO(nn, ity)); 12944 assign(srcF, getQRegLO(mm, ity)); 12945 assign(res, IRExpr_ITE( 12946 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 12947 mkexpr(srcT), mkexpr(srcF))); 12948 putQReg128(dd, mkV128(0x0000)); 12949 putQRegLO(dd, mkexpr(res)); 12950 DIP("fcsel %s, %s, %s, %s\n", 12951 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity), 12952 nameCC(cond)); 12953 return True; 12954 } 12955 return False; 12956 # undef INSN 12957 } 12958 12959 12960 static 12961 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn) 12962 { 12963 /* 31 28 23 21 20 14 9 4 12964 000 11110 ty 1 opcode 10000 n d 12965 The first 3 bits are really "M 0 S", but M and S are always zero. 12966 Decode fields: ty,opcode 12967 */ 12968 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12969 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12970 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) { 12971 return False; 12972 } 12973 UInt ty = INSN(23,22); 12974 UInt opcode = INSN(20,15); 12975 UInt nn = INSN(9,5); 12976 UInt dd = INSN(4,0); 12977 12978 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) { 12979 /* -------- 0x,000000: FMOV d_d, s_s -------- */ 12980 /* -------- 0x,000001: FABS d_d, s_s -------- */ 12981 /* -------- 0x,000010: FNEG d_d, s_s -------- */ 12982 /* -------- 0x,000011: FSQRT d_d, s_s -------- */ 12983 IRType ity = ty == X01 ? Ity_F64 : Ity_F32; 12984 IRTemp src = newTemp(ity); 12985 IRTemp res = newTemp(ity); 12986 const HChar* nm = "??"; 12987 assign(src, getQRegLO(nn, ity)); 12988 switch (opcode) { 12989 case BITS6(0,0,0,0,0,0): 12990 nm = "fmov"; assign(res, mkexpr(src)); break; 12991 case BITS6(0,0,0,0,0,1): 12992 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break; 12993 case BITS6(0,0,0,0,1,0): 12994 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break; 12995 case BITS6(0,0,0,0,1,1): 12996 nm = "fsqrt"; 12997 assign(res, binop(mkSQRTF(ity), 12998 mkexpr(mk_get_IR_rounding_mode()), 12999 mkexpr(src))); break; 13000 default: 13001 vassert(0); 13002 } 13003 putQReg128(dd, mkV128(0x0000)); 13004 putQRegLO(dd, mkexpr(res)); 13005 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 13006 return True; 13007 } 13008 13009 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0) 13010 || opcode == BITS6(0,0,0,1,0,1))) 13011 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1) 13012 || opcode == BITS6(0,0,0,1,0,1))) 13013 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1) 13014 || opcode == BITS6(0,0,0,1,0,0)))) { 13015 /* -------- 11,000100: FCVT s_h -------- */ 13016 /* -------- 11,000101: FCVT d_h -------- */ 13017 /* -------- 00,000111: FCVT h_s -------- */ 13018 /* -------- 00,000101: FCVT d_s -------- */ 13019 /* -------- 01,000111: FCVT h_d -------- */ 13020 /* -------- 01,000100: FCVT s_d -------- */ 13021 /* 31 23 21 16 14 9 4 13022 000 11110 11 10001 00 10000 n d FCVT Sd, Hn 13023 --------- 11 ----- 01 --------- FCVT Dd, Hn 13024 --------- 00 ----- 11 --------- FCVT Hd, Sn 13025 --------- 00 ----- 01 --------- FCVT Dd, Sn 13026 --------- 01 ----- 11 --------- FCVT Hd, Dn 13027 --------- 01 ----- 00 --------- FCVT Sd, Dn 13028 Rounding, when dst is smaller than src, is per the FPCR. 13029 */ 13030 UInt b2322 = ty; 13031 UInt b1615 = opcode & BITS2(1,1); 13032 switch ((b2322 << 2) | b1615) { 13033 case BITS4(0,0,0,1): // S -> D 13034 case BITS4(1,1,0,1): { // H -> D 13035 Bool srcIsH = b2322 == BITS2(1,1); 13036 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32; 13037 IRTemp res = newTemp(Ity_F64); 13038 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64, 13039 getQRegLO(nn, srcTy))); 13040 putQReg128(dd, mkV128(0x0000)); 13041 putQRegLO(dd, mkexpr(res)); 13042 DIP("fcvt %s, %s\n", 13043 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy)); 13044 return True; 13045 } 13046 case BITS4(0,1,0,0): // D -> S 13047 case BITS4(0,1,1,1): { // D -> H 13048 Bool dstIsH = b1615 == BITS2(1,1); 13049 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32; 13050 IRTemp res = newTemp(dstTy); 13051 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32, 13052 mkexpr(mk_get_IR_rounding_mode()), 13053 getQRegLO(nn, Ity_F64))); 13054 putQReg128(dd, mkV128(0x0000)); 13055 putQRegLO(dd, mkexpr(res)); 13056 DIP("fcvt %s, %s\n", 13057 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64)); 13058 return True; 13059 } 13060 case BITS4(0,0,1,1): // S -> H 13061 case BITS4(1,1,0,0): { // H -> S 13062 Bool toH = b1615 == BITS2(1,1); 13063 IRType srcTy = toH ? Ity_F32 : Ity_F16; 13064 IRType dstTy = toH ? Ity_F16 : Ity_F32; 13065 IRTemp res = newTemp(dstTy); 13066 if (toH) { 13067 assign(res, binop(Iop_F32toF16, 13068 mkexpr(mk_get_IR_rounding_mode()), 13069 getQRegLO(nn, srcTy))); 13070 13071 } else { 13072 assign(res, unop(Iop_F16toF32, 13073 getQRegLO(nn, srcTy))); 13074 } 13075 putQReg128(dd, mkV128(0x0000)); 13076 putQRegLO(dd, mkexpr(res)); 13077 DIP("fcvt %s, %s\n", 13078 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy)); 13079 return True; 13080 } 13081 default: 13082 break; 13083 } 13084 /* else unhandled */ 13085 return False; 13086 } 13087 13088 if (ty <= X01 13089 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1) 13090 && opcode != BITS6(0,0,1,1,0,1)) { 13091 /* -------- 0x,001000 FRINTN d_d, s_s -------- */ 13092 /* -------- 0x,001001 FRINTP d_d, s_s -------- */ 13093 /* -------- 0x,001010 FRINTM d_d, s_s -------- */ 13094 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */ 13095 /* -------- 0x,001100 FRINTA d_d, s_s -------- */ 13096 /* -------- 0x,001110 FRINTX d_d, s_s -------- */ 13097 /* -------- 0x,001111 FRINTI d_d, s_s -------- */ 13098 /* 31 23 21 17 14 9 4 13099 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) 13100 rm 13101 x==0 => S-registers, x==1 => D-registers 13102 rm (17:15) encodings: 13103 111 per FPCR (FRINTI) 13104 001 +inf (FRINTP) 13105 010 -inf (FRINTM) 13106 011 zero (FRINTZ) 13107 000 tieeven (FRINTN) -- !! FIXME KLUDGED !! 13108 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! 13109 110 per FPCR + "exact = TRUE" (FRINTX) 13110 101 unallocated 13111 */ 13112 Bool isD = (ty & 1) == 1; 13113 UInt rm = opcode & BITS6(0,0,0,1,1,1); 13114 IRType ity = isD ? Ity_F64 : Ity_F32; 13115 IRExpr* irrmE = NULL; 13116 UChar ch = '?'; 13117 switch (rm) { 13118 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; 13119 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; 13120 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; 13121 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 13122 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; 13123 // I am unsure about the following, due to the "integral exact" 13124 // description in the manual. What does it mean? (frintx, that is) 13125 case BITS3(1,1,0): 13126 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; 13127 case BITS3(1,1,1): 13128 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; 13129 // The following is a kludge. There's no Irrm_ value to represent 13130 // this ("to nearest, with ties to even") 13131 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break; 13132 default: break; 13133 } 13134 if (irrmE) { 13135 IRTemp src = newTemp(ity); 13136 IRTemp dst = newTemp(ity); 13137 assign(src, getQRegLO(nn, ity)); 13138 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 13139 irrmE, mkexpr(src))); 13140 putQReg128(dd, mkV128(0x0000)); 13141 putQRegLO(dd, mkexpr(dst)); 13142 DIP("frint%c %s, %s\n", 13143 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 13144 return True; 13145 } 13146 return False; 13147 } 13148 13149 return False; 13150 # undef INSN 13151 } 13152 13153 13154 static 13155 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn) 13156 { 13157 /* 31 28 23 21 20 15 11 9 4 13158 000 11110 ty 1 m opcode 10 n d 13159 The first 3 bits are really "M 0 S", but M and S are always zero. 13160 Decode fields: ty, opcode 13161 */ 13162 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13163 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 13164 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) { 13165 return False; 13166 } 13167 UInt ty = INSN(23,22); 13168 UInt mm = INSN(20,16); 13169 UInt opcode = INSN(15,12); 13170 UInt nn = INSN(9,5); 13171 UInt dd = INSN(4,0); 13172 13173 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) { 13174 /* ------- 0x,0000: FMUL d_d, s_s ------- */ 13175 /* ------- 0x,0001: FDIV d_d, s_s ------- */ 13176 /* ------- 0x,0010: FADD d_d, s_s ------- */ 13177 /* ------- 0x,0011: FSUB d_d, s_s ------- */ 13178 /* ------- 0x,0100: FMAX d_d, s_s ------- */ 13179 /* ------- 0x,0101: FMIN d_d, s_s ------- */ 13180 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */ 13181 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */ 13182 IRType ity = ty == X00 ? Ity_F32 : Ity_F64; 13183 IROp iop = Iop_INVALID; 13184 const HChar* nm = "???"; 13185 switch (opcode) { 13186 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break; 13187 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break; 13188 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break; 13189 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break; 13190 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break; 13191 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break; 13192 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!! 13193 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!! 13194 default: vassert(0); 13195 } 13196 if (opcode <= BITS4(0,0,1,1)) { 13197 // This is really not good code. TODO: avoid width-changing 13198 IRTemp res = newTemp(ity); 13199 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()), 13200 getQRegLO(nn, ity), getQRegLO(mm, ity))); 13201 putQReg128(dd, mkV128(0)); 13202 putQRegLO(dd, mkexpr(res)); 13203 } else { 13204 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2), 13205 binop(iop, getQReg128(nn), getQReg128(mm)))); 13206 } 13207 DIP("%s %s, %s, %s\n", 13208 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 13209 return True; 13210 } 13211 13212 if (ty <= X01 && opcode == BITS4(1,0,0,0)) { 13213 /* ------- 0x,1000: FNMUL d_d, s_s ------- */ 13214 IRType ity = ty == X00 ? Ity_F32 : Ity_F64; 13215 IROp iop = mkMULF(ity); 13216 IROp iopn = mkNEGF(ity); 13217 const HChar* nm = "fnmul"; 13218 IRExpr* resE = unop(iopn, 13219 triop(iop, mkexpr(mk_get_IR_rounding_mode()), 13220 getQRegLO(nn, ity), getQRegLO(mm, ity))); 13221 IRTemp res = newTemp(ity); 13222 assign(res, resE); 13223 putQReg128(dd, mkV128(0)); 13224 putQRegLO(dd, mkexpr(res)); 13225 DIP("%s %s, %s, %s\n", 13226 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 13227 return True; 13228 } 13229 13230 return False; 13231 # undef INSN 13232 } 13233 13234 13235 static 13236 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn) 13237 { 13238 /* 31 28 23 21 20 15 14 9 4 13239 000 11111 ty o1 m o0 a n d 13240 The first 3 bits are really "M 0 S", but M and S are always zero. 13241 Decode fields: ty,o1,o0 13242 */ 13243 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13244 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) { 13245 return False; 13246 } 13247 UInt ty = INSN(23,22); 13248 UInt bitO1 = INSN(21,21); 13249 UInt mm = INSN(20,16); 13250 UInt bitO0 = INSN(15,15); 13251 UInt aa = INSN(14,10); 13252 UInt nn = INSN(9,5); 13253 UInt dd = INSN(4,0); 13254 vassert(ty < 4); 13255 13256 if (ty <= X01) { 13257 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */ 13258 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */ 13259 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */ 13260 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */ 13261 /* -------------------- F{N}M{ADD,SUB} -------------------- */ 13262 /* 31 22 20 15 14 9 4 ix 13263 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa 13264 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa 13265 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa 13266 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa 13267 where Fx=Dx when sz=1, Fx=Sx when sz=0 13268 13269 -----SPEC------ ----IMPL---- 13270 fmadd a + n * m a + n * m 13271 fmsub a + (-n) * m a - n * m 13272 fnmadd (-a) + (-n) * m -(a + n * m) 13273 fnmsub (-a) + n * m -(a - n * m) 13274 */ 13275 Bool isD = (ty & 1) == 1; 13276 UInt ix = (bitO1 << 1) | bitO0; 13277 IRType ity = isD ? Ity_F64 : Ity_F32; 13278 IROp opADD = mkADDF(ity); 13279 IROp opSUB = mkSUBF(ity); 13280 IROp opMUL = mkMULF(ity); 13281 IROp opNEG = mkNEGF(ity); 13282 IRTemp res = newTemp(ity); 13283 IRExpr* eA = getQRegLO(aa, ity); 13284 IRExpr* eN = getQRegLO(nn, ity); 13285 IRExpr* eM = getQRegLO(mm, ity); 13286 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); 13287 IRExpr* eNxM = triop(opMUL, rm, eN, eM); 13288 switch (ix) { 13289 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; 13290 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; 13291 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; 13292 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; 13293 default: vassert(0); 13294 } 13295 putQReg128(dd, mkV128(0x0000)); 13296 putQRegLO(dd, mkexpr(res)); 13297 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" }; 13298 DIP("%s %s, %s, %s, %s\n", 13299 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity), 13300 nameQRegLO(mm, ity), nameQRegLO(aa, ity)); 13301 return True; 13302 } 13303 13304 return False; 13305 # undef INSN 13306 } 13307 13308 13309 static 13310 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 13311 { 13312 /* 31 28 23 21 20 12 9 4 13313 000 11110 ty 1 imm8 100 imm5 d 13314 The first 3 bits are really "M 0 S", but M and S are always zero. 13315 */ 13316 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13317 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 13318 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) { 13319 return False; 13320 } 13321 UInt ty = INSN(23,22); 13322 UInt imm8 = INSN(20,13); 13323 UInt imm5 = INSN(9,5); 13324 UInt dd = INSN(4,0); 13325 13326 /* ------- 00,00000: FMOV s_imm ------- */ 13327 /* ------- 01,00000: FMOV d_imm ------- */ 13328 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) { 13329 Bool isD = (ty & 1) == 1; 13330 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32); 13331 if (!isD) { 13332 vassert(0 == (imm & 0xFFFFFFFF00000000ULL)); 13333 } 13334 putQReg128(dd, mkV128(0)); 13335 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL)); 13336 DIP("fmov %s, #0x%llx\n", 13337 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm); 13338 return True; 13339 } 13340 13341 return False; 13342 # undef INSN 13343 } 13344 13345 13346 static 13347 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn) 13348 { 13349 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13350 /* 31 30 29 28 23 21 20 18 15 9 4 13351 sf 0 0 11110 type 0 rmode opcode scale n d 13352 The first 3 bits are really "sf 0 S", but S is always zero. 13353 Decode fields: sf,type,rmode,opcode 13354 */ 13355 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13356 if (INSN(30,29) != BITS2(0,0) 13357 || INSN(28,24) != BITS5(1,1,1,1,0) 13358 || INSN(21,21) != 0) { 13359 return False; 13360 } 13361 UInt bitSF = INSN(31,31); 13362 UInt ty = INSN(23,22); // type 13363 UInt rm = INSN(20,19); // rmode 13364 UInt op = INSN(18,16); // opcode 13365 UInt sc = INSN(15,10); // scale 13366 UInt nn = INSN(9,5); 13367 UInt dd = INSN(4,0); 13368 13369 if (ty <= X01 && rm == X11 13370 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) { 13371 /* -------- (ix) sf ty rm opc -------- */ 13372 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */ 13373 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */ 13374 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */ 13375 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */ 13376 13377 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */ 13378 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */ 13379 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */ 13380 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */ 13381 Bool isI64 = bitSF == 1; 13382 Bool isF64 = (ty & 1) == 1; 13383 Bool isU = (op & 1) == 1; 13384 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13385 13386 Int fbits = 64 - sc; 13387 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32)); 13388 13389 Double scale = two_to_the_plus(fbits); 13390 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale)) 13391 : IRExpr_Const(IRConst_F32( (Float)scale )); 13392 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32; 13393 13394 const IROp ops[8] 13395 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S, 13396 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U }; 13397 IRTemp irrm = newTemp(Ity_I32); 13398 assign(irrm, mkU32(Irrm_ZERO)); 13399 13400 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32); 13401 IRExpr* res = binop(ops[ix], mkexpr(irrm), 13402 triop(opMUL, mkexpr(irrm), src, scaleE)); 13403 putIRegOrZR(isI64, dd, res); 13404 13405 DIP("fcvtz%c %s, %s, #%d\n", 13406 isU ? 'u' : 's', nameIRegOrZR(isI64, dd), 13407 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits); 13408 return True; 13409 } 13410 13411 /* ------ sf,ty,rm,opc ------ */ 13412 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */ 13413 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */ 13414 /* (ix) sf S 28 ty rm opc 15 9 4 13415 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits 13416 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits 13417 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits 13418 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits 13419 13420 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits 13421 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits 13422 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits 13423 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits 13424 13425 These are signed/unsigned conversion from integer registers to 13426 FP registers, all 4 32/64-bit combinations, rounded per FPCR, 13427 scaled per |scale|. 13428 */ 13429 if (ty <= X01 && rm == X00 13430 && (op == BITS3(0,1,0) || op == BITS3(0,1,1)) 13431 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) { 13432 Bool isI64 = bitSF == 1; 13433 Bool isF64 = (ty & 1) == 1; 13434 Bool isU = (op & 1) == 1; 13435 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13436 13437 Int fbits = 64 - sc; 13438 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32)); 13439 13440 Double scale = two_to_the_minus(fbits); 13441 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale)) 13442 : IRExpr_Const(IRConst_F32( (Float)scale )); 13443 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32; 13444 13445 const IROp ops[8] 13446 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 13447 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 13448 IRExpr* src = getIRegOrZR(isI64, nn); 13449 IRExpr* res = (isF64 && !isI64) 13450 ? unop(ops[ix], src) 13451 : binop(ops[ix], 13452 mkexpr(mk_get_IR_rounding_mode()), src); 13453 putQReg128(dd, mkV128(0)); 13454 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE)); 13455 13456 DIP("%ccvtf %s, %s, #%d\n", 13457 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 13458 nameIRegOrZR(isI64, nn), fbits); 13459 return True; 13460 } 13461 13462 return False; 13463 # undef INSN 13464 } 13465 13466 13467 static 13468 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn) 13469 { 13470 /* 31 30 29 28 23 21 20 18 15 9 4 13471 sf 0 0 11110 type 1 rmode opcode 000000 n d 13472 The first 3 bits are really "sf 0 S", but S is always zero. 13473 Decode fields: sf,type,rmode,opcode 13474 */ 13475 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13476 if (INSN(30,29) != BITS2(0,0) 13477 || INSN(28,24) != BITS5(1,1,1,1,0) 13478 || INSN(21,21) != 1 13479 || INSN(15,10) != BITS6(0,0,0,0,0,0)) { 13480 return False; 13481 } 13482 UInt bitSF = INSN(31,31); 13483 UInt ty = INSN(23,22); // type 13484 UInt rm = INSN(20,19); // rmode 13485 UInt op = INSN(18,16); // opcode 13486 UInt nn = INSN(9,5); 13487 UInt dd = INSN(4,0); 13488 13489 // op = 000, 001 13490 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */ 13491 /* 30 23 20 18 15 9 4 13492 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to 13493 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest) 13494 ---------------- 01 -------------- FCVTP-------- (round to +inf) 13495 ---------------- 10 -------------- FCVTM-------- (round to -inf) 13496 ---------------- 11 -------------- FCVTZ-------- (round to zero) 13497 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away) 13498 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away) 13499 13500 Rd is Xd when sf==1, Wd when sf==0 13501 Fn is Dn when x==1, Sn when x==0 13502 20:19 carry the rounding mode, using the same encoding as FPCR 13503 */ 13504 if (ty <= X01 13505 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True) 13506 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0)) 13507 ) 13508 ) { 13509 Bool isI64 = bitSF == 1; 13510 Bool isF64 = (ty & 1) == 1; 13511 Bool isU = (op & 1) == 1; 13512 /* Decide on the IR rounding mode to use. */ 13513 IRRoundingMode irrm = 8; /*impossible*/ 13514 HChar ch = '?'; 13515 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) { 13516 switch (rm) { 13517 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break; 13518 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break; 13519 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break; 13520 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break; 13521 default: vassert(0); 13522 } 13523 } else { 13524 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1)); 13525 switch (rm) { 13526 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break; 13527 default: vassert(0); 13528 } 13529 } 13530 vassert(irrm != 8); 13531 /* Decide on the conversion primop, based on the source size, 13532 dest size and signedness (8 possibilities). Case coding: 13533 F32 ->s I32 0 13534 F32 ->u I32 1 13535 F32 ->s I64 2 13536 F32 ->u I64 3 13537 F64 ->s I32 4 13538 F64 ->u I32 5 13539 F64 ->s I64 6 13540 F64 ->u I64 7 13541 */ 13542 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0); 13543 vassert(ix < 8); 13544 const IROp iops[8] 13545 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U, 13546 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U }; 13547 IROp iop = iops[ix]; 13548 // A bit of ATCery: bounce all cases we haven't seen an example of. 13549 if (/* F32toI32S */ 13550 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ 13551 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ 13552 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ 13553 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */ 13554 /* F32toI32U */ 13555 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ 13556 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ 13557 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */ 13558 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */ 13559 /* F32toI64S */ 13560 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ 13561 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */ 13562 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */ 13563 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */ 13564 /* F32toI64U */ 13565 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ 13566 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */ 13567 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ 13568 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */ 13569 /* F64toI32S */ 13570 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ 13571 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ 13572 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ 13573 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */ 13574 /* F64toI32U */ 13575 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ 13576 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ 13577 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ 13578 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */ 13579 /* F64toI64S */ 13580 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ 13581 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ 13582 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ 13583 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */ 13584 /* F64toI64U */ 13585 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ 13586 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */ 13587 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ 13588 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */ 13589 ) { 13590 /* validated */ 13591 } else { 13592 return False; 13593 } 13594 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 13595 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 13596 IRTemp src = newTemp(srcTy); 13597 IRTemp dst = newTemp(dstTy); 13598 assign(src, getQRegLO(nn, srcTy)); 13599 assign(dst, binop(iop, mkU32(irrm), mkexpr(src))); 13600 putIRegOrZR(isI64, dd, mkexpr(dst)); 13601 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's', 13602 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 13603 return True; 13604 } 13605 13606 // op = 010, 011 13607 /* -------------- {S,U}CVTF (scalar, integer) -------------- */ 13608 /* (ix) sf S 28 ty rm op 15 9 4 13609 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 13610 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 13611 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 13612 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 13613 13614 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 13615 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 13616 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 13617 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 13618 13619 These are signed/unsigned conversion from integer registers to 13620 FP registers, all 4 32/64-bit combinations, rounded per FPCR. 13621 */ 13622 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) { 13623 Bool isI64 = bitSF == 1; 13624 Bool isF64 = (ty & 1) == 1; 13625 Bool isU = (op & 1) == 1; 13626 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13627 const IROp ops[8] 13628 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 13629 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 13630 IRExpr* src = getIRegOrZR(isI64, nn); 13631 IRExpr* res = (isF64 && !isI64) 13632 ? unop(ops[ix], src) 13633 : binop(ops[ix], 13634 mkexpr(mk_get_IR_rounding_mode()), src); 13635 putQReg128(dd, mkV128(0)); 13636 putQRegLO(dd, res); 13637 DIP("%ccvtf %s, %s\n", 13638 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 13639 nameIRegOrZR(isI64, nn)); 13640 return True; 13641 } 13642 13643 // op = 110, 111 13644 /* -------- FMOV (general) -------- */ 13645 /* case sf S ty rm op 15 9 4 13646 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn 13647 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn 13648 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn 13649 13650 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn 13651 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn 13652 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1] 13653 */ 13654 if (1) { 13655 UInt ix = 0; // case 13656 if (bitSF == 0) { 13657 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 13658 ix = 1; 13659 else 13660 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 13661 ix = 4; 13662 } else { 13663 vassert(bitSF == 1); 13664 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 13665 ix = 2; 13666 else 13667 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 13668 ix = 5; 13669 else 13670 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1)) 13671 ix = 3; 13672 else 13673 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0)) 13674 ix = 6; 13675 } 13676 if (ix > 0) { 13677 switch (ix) { 13678 case 1: 13679 putQReg128(dd, mkV128(0)); 13680 putQRegLO(dd, getIReg32orZR(nn)); 13681 DIP("fmov s%u, w%u\n", dd, nn); 13682 break; 13683 case 2: 13684 putQReg128(dd, mkV128(0)); 13685 putQRegLO(dd, getIReg64orZR(nn)); 13686 DIP("fmov d%u, x%u\n", dd, nn); 13687 break; 13688 case 3: 13689 putQRegHI64(dd, getIReg64orZR(nn)); 13690 DIP("fmov v%u.d[1], x%u\n", dd, nn); 13691 break; 13692 case 4: 13693 putIReg32orZR(dd, getQRegLO(nn, Ity_I32)); 13694 DIP("fmov w%u, s%u\n", dd, nn); 13695 break; 13696 case 5: 13697 putIReg64orZR(dd, getQRegLO(nn, Ity_I64)); 13698 DIP("fmov x%u, d%u\n", dd, nn); 13699 break; 13700 case 6: 13701 putIReg64orZR(dd, getQRegHI64(nn)); 13702 DIP("fmov x%u, v%u.d[1]\n", dd, nn); 13703 break; 13704 default: 13705 vassert(0); 13706 } 13707 return True; 13708 } 13709 /* undecodable; fall through */ 13710 } 13711 13712 return False; 13713 # undef INSN 13714 } 13715 13716 13717 static 13718 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 13719 { 13720 Bool ok; 13721 ok = dis_AdvSIMD_EXT(dres, insn); 13722 if (UNLIKELY(ok)) return True; 13723 ok = dis_AdvSIMD_TBL_TBX(dres, insn); 13724 if (UNLIKELY(ok)) return True; 13725 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn); 13726 if (UNLIKELY(ok)) return True; 13727 ok = dis_AdvSIMD_across_lanes(dres, insn); 13728 if (UNLIKELY(ok)) return True; 13729 ok = dis_AdvSIMD_copy(dres, insn); 13730 if (UNLIKELY(ok)) return True; 13731 ok = dis_AdvSIMD_modified_immediate(dres, insn); 13732 if (UNLIKELY(ok)) return True; 13733 ok = dis_AdvSIMD_scalar_copy(dres, insn); 13734 if (UNLIKELY(ok)) return True; 13735 ok = dis_AdvSIMD_scalar_pairwise(dres, insn); 13736 if (UNLIKELY(ok)) return True; 13737 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn); 13738 if (UNLIKELY(ok)) return True; 13739 ok = dis_AdvSIMD_scalar_three_different(dres, insn); 13740 if (UNLIKELY(ok)) return True; 13741 ok = dis_AdvSIMD_scalar_three_same(dres, insn); 13742 if (UNLIKELY(ok)) return True; 13743 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn); 13744 if (UNLIKELY(ok)) return True; 13745 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn); 13746 if (UNLIKELY(ok)) return True; 13747 ok = dis_AdvSIMD_shift_by_immediate(dres, insn); 13748 if (UNLIKELY(ok)) return True; 13749 ok = dis_AdvSIMD_three_different(dres, insn); 13750 if (UNLIKELY(ok)) return True; 13751 ok = dis_AdvSIMD_three_same(dres, insn); 13752 if (UNLIKELY(ok)) return True; 13753 ok = dis_AdvSIMD_two_reg_misc(dres, insn); 13754 if (UNLIKELY(ok)) return True; 13755 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn); 13756 if (UNLIKELY(ok)) return True; 13757 ok = dis_AdvSIMD_crypto_aes(dres, insn); 13758 if (UNLIKELY(ok)) return True; 13759 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn); 13760 if (UNLIKELY(ok)) return True; 13761 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn); 13762 if (UNLIKELY(ok)) return True; 13763 ok = dis_AdvSIMD_fp_compare(dres, insn); 13764 if (UNLIKELY(ok)) return True; 13765 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn); 13766 if (UNLIKELY(ok)) return True; 13767 ok = dis_AdvSIMD_fp_conditional_select(dres, insn); 13768 if (UNLIKELY(ok)) return True; 13769 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn); 13770 if (UNLIKELY(ok)) return True; 13771 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn); 13772 if (UNLIKELY(ok)) return True; 13773 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn); 13774 if (UNLIKELY(ok)) return True; 13775 ok = dis_AdvSIMD_fp_immediate(dres, insn); 13776 if (UNLIKELY(ok)) return True; 13777 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn); 13778 if (UNLIKELY(ok)) return True; 13779 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn); 13780 if (UNLIKELY(ok)) return True; 13781 return False; 13782 } 13783 13784 13785 /*------------------------------------------------------------*/ 13786 /*--- Disassemble a single ARM64 instruction ---*/ 13787 /*------------------------------------------------------------*/ 13788 13789 /* Disassemble a single ARM64 instruction into IR. The instruction 13790 has is located at |guest_instr| and has guest IP of 13791 |guest_PC_curr_instr|, which will have been set before the call 13792 here. Returns True iff the instruction was decoded, in which case 13793 *dres will be set accordingly, or False, in which case *dres should 13794 be ignored by the caller. */ 13795 13796 static 13797 Bool disInstr_ARM64_WRK ( 13798 /*MB_OUT*/DisResult* dres, 13799 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 13800 Bool resteerCisOk, 13801 void* callback_opaque, 13802 const UChar* guest_instr, 13803 const VexArchInfo* archinfo, 13804 const VexAbiInfo* abiinfo 13805 ) 13806 { 13807 // A macro to fish bits out of 'insn'. 13808 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13809 13810 //ZZ DisResult dres; 13811 //ZZ UInt insn; 13812 //ZZ //Bool allow_VFP = False; 13813 //ZZ //UInt hwcaps = archinfo->hwcaps; 13814 //ZZ IRTemp condT; /* :: Ity_I32 */ 13815 //ZZ UInt summary; 13816 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text 13817 //ZZ 13818 //ZZ /* What insn variants are we supporting today? */ 13819 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP)); 13820 //ZZ // etc etc 13821 13822 /* Set result defaults. */ 13823 dres->whatNext = Dis_Continue; 13824 dres->len = 4; 13825 dres->continueAt = 0; 13826 dres->jk_StopHere = Ijk_INVALID; 13827 13828 /* At least this is simple on ARM64: insns are all 4 bytes long, and 13829 4-aligned. So just fish the whole thing out of memory right now 13830 and have done. */ 13831 UInt insn = getUIntLittleEndianly( guest_instr ); 13832 13833 if (0) vex_printf("insn: 0x%x\n", insn); 13834 13835 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr); 13836 13837 vassert(0 == (guest_PC_curr_instr & 3ULL)); 13838 13839 /* ----------------------------------------------------------- */ 13840 13841 /* Spot "Special" instructions (see comment at top of file). */ 13842 { 13843 const UChar* code = guest_instr; 13844 /* Spot the 16-byte preamble: 13845 93CC0D8C ror x12, x12, #3 13846 93CC358C ror x12, x12, #13 13847 93CCCD8C ror x12, x12, #51 13848 93CCF58C ror x12, x12, #61 13849 */ 13850 UInt word1 = 0x93CC0D8C; 13851 UInt word2 = 0x93CC358C; 13852 UInt word3 = 0x93CCCD8C; 13853 UInt word4 = 0x93CCF58C; 13854 if (getUIntLittleEndianly(code+ 0) == word1 && 13855 getUIntLittleEndianly(code+ 4) == word2 && 13856 getUIntLittleEndianly(code+ 8) == word3 && 13857 getUIntLittleEndianly(code+12) == word4) { 13858 /* Got a "Special" instruction preamble. Which one is it? */ 13859 if (getUIntLittleEndianly(code+16) == 0xAA0A014A 13860 /* orr x10,x10,x10 */) { 13861 /* X3 = client_request ( X4 ) */ 13862 DIP("x3 = client_request ( x4 )\n"); 13863 putPC(mkU64( guest_PC_curr_instr + 20 )); 13864 dres->jk_StopHere = Ijk_ClientReq; 13865 dres->whatNext = Dis_StopHere; 13866 return True; 13867 } 13868 else 13869 if (getUIntLittleEndianly(code+16) == 0xAA0B016B 13870 /* orr x11,x11,x11 */) { 13871 /* X3 = guest_NRADDR */ 13872 DIP("x3 = guest_NRADDR\n"); 13873 dres->len = 20; 13874 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 13875 return True; 13876 } 13877 else 13878 if (getUIntLittleEndianly(code+16) == 0xAA0C018C 13879 /* orr x12,x12,x12 */) { 13880 /* branch-and-link-to-noredir X8 */ 13881 DIP("branch-and-link-to-noredir x8\n"); 13882 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20)); 13883 putPC(getIReg64orZR(8)); 13884 dres->jk_StopHere = Ijk_NoRedir; 13885 dres->whatNext = Dis_StopHere; 13886 return True; 13887 } 13888 else 13889 if (getUIntLittleEndianly(code+16) == 0xAA090129 13890 /* orr x9,x9,x9 */) { 13891 /* IR injection */ 13892 DIP("IR injection\n"); 13893 vex_inject_ir(irsb, Iend_LE); 13894 // Invalidate the current insn. The reason is that the IRop we're 13895 // injecting here can change. In which case the translation has to 13896 // be redone. For ease of handling, we simply invalidate all the 13897 // time. 13898 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr))); 13899 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20))); 13900 putPC(mkU64( guest_PC_curr_instr + 20 )); 13901 dres->whatNext = Dis_StopHere; 13902 dres->jk_StopHere = Ijk_InvalICache; 13903 return True; 13904 } 13905 /* We don't know what it is. */ 13906 return False; 13907 /*NOTREACHED*/ 13908 } 13909 } 13910 13911 /* ----------------------------------------------------------- */ 13912 13913 /* Main ARM64 instruction decoder starts here. */ 13914 13915 Bool ok = False; 13916 13917 /* insn[28:25] determines the top-level grouping, so let's start 13918 off with that. 13919 13920 For all of these dis_ARM64_ functions, we pass *dres with the 13921 normal default results "insn OK, 4 bytes long, keep decoding" so 13922 they don't need to change it. However, decodes of control-flow 13923 insns may cause *dres to change. 13924 */ 13925 switch (INSN(28,25)) { 13926 case BITS4(1,0,0,0): case BITS4(1,0,0,1): 13927 // Data processing - immediate 13928 ok = dis_ARM64_data_processing_immediate(dres, insn); 13929 break; 13930 case BITS4(1,0,1,0): case BITS4(1,0,1,1): 13931 // Branch, exception generation and system instructions 13932 ok = dis_ARM64_branch_etc(dres, insn, archinfo); 13933 break; 13934 case BITS4(0,1,0,0): case BITS4(0,1,1,0): 13935 case BITS4(1,1,0,0): case BITS4(1,1,1,0): 13936 // Loads and stores 13937 ok = dis_ARM64_load_store(dres, insn); 13938 break; 13939 case BITS4(0,1,0,1): case BITS4(1,1,0,1): 13940 // Data processing - register 13941 ok = dis_ARM64_data_processing_register(dres, insn); 13942 break; 13943 case BITS4(0,1,1,1): case BITS4(1,1,1,1): 13944 // Data processing - SIMD and floating point 13945 ok = dis_ARM64_simd_and_fp(dres, insn); 13946 break; 13947 case BITS4(0,0,0,0): case BITS4(0,0,0,1): 13948 case BITS4(0,0,1,0): case BITS4(0,0,1,1): 13949 // UNALLOCATED 13950 break; 13951 default: 13952 vassert(0); /* Can't happen */ 13953 } 13954 13955 /* If the next-level down decoders failed, make sure |dres| didn't 13956 get changed. */ 13957 if (!ok) { 13958 vassert(dres->whatNext == Dis_Continue); 13959 vassert(dres->len == 4); 13960 vassert(dres->continueAt == 0); 13961 vassert(dres->jk_StopHere == Ijk_INVALID); 13962 } 13963 13964 return ok; 13965 13966 # undef INSN 13967 } 13968 13969 13970 /*------------------------------------------------------------*/ 13971 /*--- Top-level fn ---*/ 13972 /*------------------------------------------------------------*/ 13973 13974 /* Disassemble a single instruction into IR. The instruction 13975 is located in host memory at &guest_code[delta]. */ 13976 13977 DisResult disInstr_ARM64 ( IRSB* irsb_IN, 13978 Bool (*resteerOkFn) ( void*, Addr ), 13979 Bool resteerCisOk, 13980 void* callback_opaque, 13981 const UChar* guest_code_IN, 13982 Long delta_IN, 13983 Addr guest_IP, 13984 VexArch guest_arch, 13985 const VexArchInfo* archinfo, 13986 const VexAbiInfo* abiinfo, 13987 VexEndness host_endness_IN, 13988 Bool sigill_diag_IN ) 13989 { 13990 DisResult dres; 13991 vex_bzero(&dres, sizeof(dres)); 13992 13993 /* Set globals (see top of this file) */ 13994 vassert(guest_arch == VexArchARM64); 13995 13996 irsb = irsb_IN; 13997 host_endness = host_endness_IN; 13998 guest_PC_curr_instr = (Addr64)guest_IP; 13999 14000 /* Sanity checks */ 14001 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */ 14002 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15); 14003 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15); 14004 14005 /* Try to decode */ 14006 Bool ok = disInstr_ARM64_WRK( &dres, 14007 resteerOkFn, resteerCisOk, callback_opaque, 14008 &guest_code_IN[delta_IN], 14009 archinfo, abiinfo ); 14010 if (ok) { 14011 /* All decode successes end up here. */ 14012 vassert(dres.len == 4 || dres.len == 20); 14013 switch (dres.whatNext) { 14014 case Dis_Continue: 14015 putPC( mkU64(dres.len + guest_PC_curr_instr) ); 14016 break; 14017 case Dis_ResteerU: 14018 case Dis_ResteerC: 14019 putPC(mkU64(dres.continueAt)); 14020 break; 14021 case Dis_StopHere: 14022 break; 14023 default: 14024 vassert(0); 14025 } 14026 DIP("\n"); 14027 } else { 14028 /* All decode failures end up here. */ 14029 if (sigill_diag_IN) { 14030 Int i, j; 14031 UChar buf[64]; 14032 UInt insn 14033 = getUIntLittleEndianly( &guest_code_IN[delta_IN] ); 14034 vex_bzero(buf, sizeof(buf)); 14035 for (i = j = 0; i < 32; i++) { 14036 if (i > 0) { 14037 if ((i & 7) == 0) buf[j++] = ' '; 14038 else if ((i & 3) == 0) buf[j++] = '\''; 14039 } 14040 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0'; 14041 } 14042 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn); 14043 vex_printf("disInstr(arm64): %s\n", buf); 14044 } 14045 14046 /* Tell the dispatcher that this insn cannot be decoded, and so 14047 has not been executed, and (is currently) the next to be 14048 executed. PC should be up-to-date since it is made so at the 14049 start of each insn, but nevertheless be paranoid and update 14050 it again right now. */ 14051 putPC( mkU64(guest_PC_curr_instr) ); 14052 dres.len = 0; 14053 dres.whatNext = Dis_StopHere; 14054 dres.jk_StopHere = Ijk_NoDecode; 14055 dres.continueAt = 0; 14056 } 14057 return dres; 14058 } 14059 14060 14061 /*--------------------------------------------------------------------*/ 14062 /*--- end guest_arm64_toIR.c ---*/ 14063 /*--------------------------------------------------------------------*/ 14064