1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- begin guest_arm64_toIR.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2013-2015 OpenWorks 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 /* KNOWN LIMITATIONS 2014-Nov-16 33 34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN. 35 36 Also FP comparison "unordered" .. is implemented as normal FP 37 comparison. 38 39 Both should be fixed. They behave incorrectly in the presence of 40 NaNs. 41 42 FMULX is treated the same as FMUL. That's also not correct. 43 44 * Floating multiply-add (etc) insns. Are split into a multiply and 45 an add, and so suffer double rounding and hence sometimes the 46 least significant mantissa bit is incorrect. Fix: use the IR 47 multiply-add IROps instead. 48 49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special 50 handling for the "ties" case. FRINTX might be dubious too. 51 52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation 53 just rounds to nearest. 54 */ 55 56 /* "Special" instructions. 57 58 This instruction decoder can decode four special instructions 59 which mean nothing natively (are no-ops as far as regs/mem are 60 concerned) but have meaning for supporting Valgrind. A special 61 instruction is flagged by a 16-byte preamble: 62 63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C 64 (ror x12, x12, #3; ror x12, x12, #13 65 ror x12, x12, #51; ror x12, x12, #61) 66 67 Following that, one of the following 3 are allowed 68 (standard interpretation in parentheses): 69 70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) 71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR 72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 73 AA090129 (orr x9,x9,x9) IR injection 74 75 Any other bytes following the 16-byte preamble are illegal and 76 constitute a failure in instruction decoding. This all assumes 77 that the preamble will never occur except in specific code 78 fragments designed for Valgrind to catch. 79 */ 80 81 /* Translates ARM64 code to IR. */ 82 83 #include "libvex_basictypes.h" 84 #include "libvex_ir.h" 85 #include "libvex.h" 86 #include "libvex_guest_arm64.h" 87 88 #include "main_util.h" 89 #include "main_globals.h" 90 #include "guest_generic_bb_to_IR.h" 91 #include "guest_arm64_defs.h" 92 93 94 /*------------------------------------------------------------*/ 95 /*--- Globals ---*/ 96 /*------------------------------------------------------------*/ 97 98 /* These are set at the start of the translation of a instruction, so 99 that we don't have to pass them around endlessly. CONST means does 100 not change during translation of the instruction. 101 */ 102 103 /* CONST: what is the host's endianness? We need to know this in 104 order to do sub-register accesses to the SIMD/FP registers 105 correctly. */ 106 static VexEndness host_endness; 107 108 /* CONST: The guest address for the instruction currently being 109 translated. */ 110 static Addr64 guest_PC_curr_instr; 111 112 /* MOD: The IRSB* into which we're generating code. */ 113 static IRSB* irsb; 114 115 116 /*------------------------------------------------------------*/ 117 /*--- Debugging output ---*/ 118 /*------------------------------------------------------------*/ 119 120 #define DIP(format, args...) \ 121 if (vex_traceflags & VEX_TRACE_FE) \ 122 vex_printf(format, ## args) 123 124 #define DIS(buf, format, args...) \ 125 if (vex_traceflags & VEX_TRACE_FE) \ 126 vex_sprintf(buf, format, ## args) 127 128 129 /*------------------------------------------------------------*/ 130 /*--- Helper bits and pieces for deconstructing the ---*/ 131 /*--- arm insn stream. ---*/ 132 /*------------------------------------------------------------*/ 133 134 /* Do a little-endian load of a 32-bit word, regardless of the 135 endianness of the underlying host. */ 136 static inline UInt getUIntLittleEndianly ( const UChar* p ) 137 { 138 UInt w = 0; 139 w = (w << 8) | p[3]; 140 w = (w << 8) | p[2]; 141 w = (w << 8) | p[1]; 142 w = (w << 8) | p[0]; 143 return w; 144 } 145 146 /* Sign extend a N-bit value up to 64 bits, by copying 147 bit N-1 into all higher positions. */ 148 static ULong sx_to_64 ( ULong x, UInt n ) 149 { 150 vassert(n > 1 && n < 64); 151 Long r = (Long)x; 152 r = (r << (64-n)) >> (64-n); 153 return (ULong)r; 154 } 155 156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the 157 //ZZ endianness of the underlying host. */ 158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) 159 //ZZ { 160 //ZZ UShort w = 0; 161 //ZZ w = (w << 8) | p[1]; 162 //ZZ w = (w << 8) | p[0]; 163 //ZZ return w; 164 //ZZ } 165 //ZZ 166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) { 167 //ZZ vassert(sh >= 0 && sh < 32); 168 //ZZ if (sh == 0) 169 //ZZ return x; 170 //ZZ else 171 //ZZ return (x << (32-sh)) | (x >> sh); 172 //ZZ } 173 //ZZ 174 //ZZ static Int popcount32 ( UInt x ) 175 //ZZ { 176 //ZZ Int res = 0, i; 177 //ZZ for (i = 0; i < 32; i++) { 178 //ZZ res += (x & 1); 179 //ZZ x >>= 1; 180 //ZZ } 181 //ZZ return res; 182 //ZZ } 183 //ZZ 184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) 185 //ZZ { 186 //ZZ UInt mask = 1 << ix; 187 //ZZ x &= ~mask; 188 //ZZ x |= ((b << ix) & mask); 189 //ZZ return x; 190 //ZZ } 191 192 #define BITS2(_b1,_b0) \ 193 (((_b1) << 1) | (_b0)) 194 195 #define BITS3(_b2,_b1,_b0) \ 196 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 197 198 #define BITS4(_b3,_b2,_b1,_b0) \ 199 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 200 201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 202 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 203 | BITS4((_b3),(_b2),(_b1),(_b0))) 204 205 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 206 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 208 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 210 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 211 212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 213 (((_b8) << 8) \ 214 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 215 216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 217 (((_b9) << 9) | ((_b8) << 8) \ 218 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 219 220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 221 (((_b10) << 10) \ 222 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 223 224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 225 (((_b11) << 11) \ 226 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 227 228 #define X00 BITS2(0,0) 229 #define X01 BITS2(0,1) 230 #define X10 BITS2(1,0) 231 #define X11 BITS2(1,1) 232 233 // produces _uint[_bMax:_bMin] 234 #define SLICE_UInt(_uint,_bMax,_bMin) \ 235 (( ((UInt)(_uint)) >> (_bMin)) \ 236 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 237 238 239 /*------------------------------------------------------------*/ 240 /*--- Helper bits and pieces for creating IR fragments. ---*/ 241 /*------------------------------------------------------------*/ 242 243 static IRExpr* mkV128 ( UShort w ) 244 { 245 return IRExpr_Const(IRConst_V128(w)); 246 } 247 248 static IRExpr* mkU64 ( ULong i ) 249 { 250 return IRExpr_Const(IRConst_U64(i)); 251 } 252 253 static IRExpr* mkU32 ( UInt i ) 254 { 255 return IRExpr_Const(IRConst_U32(i)); 256 } 257 258 static IRExpr* mkU16 ( UInt i ) 259 { 260 vassert(i < 65536); 261 return IRExpr_Const(IRConst_U16(i)); 262 } 263 264 static IRExpr* mkU8 ( UInt i ) 265 { 266 vassert(i < 256); 267 return IRExpr_Const(IRConst_U8( (UChar)i )); 268 } 269 270 static IRExpr* mkexpr ( IRTemp tmp ) 271 { 272 return IRExpr_RdTmp(tmp); 273 } 274 275 static IRExpr* unop ( IROp op, IRExpr* a ) 276 { 277 return IRExpr_Unop(op, a); 278 } 279 280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 281 { 282 return IRExpr_Binop(op, a1, a2); 283 } 284 285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 286 { 287 return IRExpr_Triop(op, a1, a2, a3); 288 } 289 290 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 291 { 292 return IRExpr_Load(Iend_LE, ty, addr); 293 } 294 295 /* Add a statement to the list held by "irbb". */ 296 static void stmt ( IRStmt* st ) 297 { 298 addStmtToIRSB( irsb, st ); 299 } 300 301 static void assign ( IRTemp dst, IRExpr* e ) 302 { 303 stmt( IRStmt_WrTmp(dst, e) ); 304 } 305 306 static void storeLE ( IRExpr* addr, IRExpr* data ) 307 { 308 stmt( IRStmt_Store(Iend_LE, addr, data) ); 309 } 310 311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) 312 //ZZ { 313 //ZZ if (guardT == IRTemp_INVALID) { 314 //ZZ /* unconditional */ 315 //ZZ storeLE(addr, data); 316 //ZZ } else { 317 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, 318 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 319 //ZZ } 320 //ZZ } 321 //ZZ 322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, 323 //ZZ IRExpr* addr, IRExpr* alt, 324 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 325 //ZZ { 326 //ZZ if (guardT == IRTemp_INVALID) { 327 //ZZ /* unconditional */ 328 //ZZ IRExpr* loaded = NULL; 329 //ZZ switch (cvt) { 330 //ZZ case ILGop_Ident32: 331 //ZZ loaded = loadLE(Ity_I32, addr); break; 332 //ZZ case ILGop_8Uto32: 333 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; 334 //ZZ case ILGop_8Sto32: 335 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; 336 //ZZ case ILGop_16Uto32: 337 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; 338 //ZZ case ILGop_16Sto32: 339 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; 340 //ZZ default: 341 //ZZ vassert(0); 342 //ZZ } 343 //ZZ vassert(loaded != NULL); 344 //ZZ assign(dst, loaded); 345 //ZZ } else { 346 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the 347 //ZZ loaded data before putting the data in 'dst'. If the load 348 //ZZ does not take place, 'alt' is placed directly in 'dst'. */ 349 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, 350 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 351 //ZZ } 352 //ZZ } 353 354 /* Generate a new temporary of the given type. */ 355 static IRTemp newTemp ( IRType ty ) 356 { 357 vassert(isPlausibleIRType(ty)); 358 return newIRTemp( irsb->tyenv, ty ); 359 } 360 361 /* This is used in many places, so the brevity is an advantage. */ 362 static IRTemp newTempV128(void) 363 { 364 return newTemp(Ity_V128); 365 } 366 367 /* Initialise V128 temporaries en masse. */ 368 static 369 void newTempsV128_2(IRTemp* t1, IRTemp* t2) 370 { 371 vassert(t1 && *t1 == IRTemp_INVALID); 372 vassert(t2 && *t2 == IRTemp_INVALID); 373 *t1 = newTempV128(); 374 *t2 = newTempV128(); 375 } 376 377 static 378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3) 379 { 380 vassert(t1 && *t1 == IRTemp_INVALID); 381 vassert(t2 && *t2 == IRTemp_INVALID); 382 vassert(t3 && *t3 == IRTemp_INVALID); 383 *t1 = newTempV128(); 384 *t2 = newTempV128(); 385 *t3 = newTempV128(); 386 } 387 388 static 389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4) 390 { 391 vassert(t1 && *t1 == IRTemp_INVALID); 392 vassert(t2 && *t2 == IRTemp_INVALID); 393 vassert(t3 && *t3 == IRTemp_INVALID); 394 vassert(t4 && *t4 == IRTemp_INVALID); 395 *t1 = newTempV128(); 396 *t2 = newTempV128(); 397 *t3 = newTempV128(); 398 *t4 = newTempV128(); 399 } 400 401 static 402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3, 403 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7) 404 { 405 vassert(t1 && *t1 == IRTemp_INVALID); 406 vassert(t2 && *t2 == IRTemp_INVALID); 407 vassert(t3 && *t3 == IRTemp_INVALID); 408 vassert(t4 && *t4 == IRTemp_INVALID); 409 vassert(t5 && *t5 == IRTemp_INVALID); 410 vassert(t6 && *t6 == IRTemp_INVALID); 411 vassert(t7 && *t7 == IRTemp_INVALID); 412 *t1 = newTempV128(); 413 *t2 = newTempV128(); 414 *t3 = newTempV128(); 415 *t4 = newTempV128(); 416 *t5 = newTempV128(); 417 *t6 = newTempV128(); 418 *t7 = newTempV128(); 419 } 420 421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type 422 //ZZ IRRoundingMode. */ 423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 424 //ZZ { 425 //ZZ return mkU32(Irrm_NEAREST); 426 //ZZ } 427 //ZZ 428 //ZZ /* Generate an expression for SRC rotated right by ROT. */ 429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot ) 430 //ZZ { 431 //ZZ vassert(rot >= 0 && rot < 32); 432 //ZZ if (rot == 0) 433 //ZZ return mkexpr(src); 434 //ZZ return 435 //ZZ binop(Iop_Or32, 436 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 437 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 438 //ZZ } 439 //ZZ 440 //ZZ static IRExpr* mkU128 ( ULong i ) 441 //ZZ { 442 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 443 //ZZ } 444 //ZZ 445 //ZZ /* Generate a 4-aligned version of the given expression if 446 //ZZ the given condition is true. Else return it unchanged. */ 447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) 448 //ZZ { 449 //ZZ if (b) 450 //ZZ return binop(Iop_And32, e, mkU32(~3)); 451 //ZZ else 452 //ZZ return e; 453 //ZZ } 454 455 /* Other IR construction helpers. */ 456 static IROp mkAND ( IRType ty ) { 457 switch (ty) { 458 case Ity_I32: return Iop_And32; 459 case Ity_I64: return Iop_And64; 460 default: vpanic("mkAND"); 461 } 462 } 463 464 static IROp mkOR ( IRType ty ) { 465 switch (ty) { 466 case Ity_I32: return Iop_Or32; 467 case Ity_I64: return Iop_Or64; 468 default: vpanic("mkOR"); 469 } 470 } 471 472 static IROp mkXOR ( IRType ty ) { 473 switch (ty) { 474 case Ity_I32: return Iop_Xor32; 475 case Ity_I64: return Iop_Xor64; 476 default: vpanic("mkXOR"); 477 } 478 } 479 480 static IROp mkSHL ( IRType ty ) { 481 switch (ty) { 482 case Ity_I32: return Iop_Shl32; 483 case Ity_I64: return Iop_Shl64; 484 default: vpanic("mkSHL"); 485 } 486 } 487 488 static IROp mkSHR ( IRType ty ) { 489 switch (ty) { 490 case Ity_I32: return Iop_Shr32; 491 case Ity_I64: return Iop_Shr64; 492 default: vpanic("mkSHR"); 493 } 494 } 495 496 static IROp mkSAR ( IRType ty ) { 497 switch (ty) { 498 case Ity_I32: return Iop_Sar32; 499 case Ity_I64: return Iop_Sar64; 500 default: vpanic("mkSAR"); 501 } 502 } 503 504 static IROp mkNOT ( IRType ty ) { 505 switch (ty) { 506 case Ity_I32: return Iop_Not32; 507 case Ity_I64: return Iop_Not64; 508 default: vpanic("mkNOT"); 509 } 510 } 511 512 static IROp mkADD ( IRType ty ) { 513 switch (ty) { 514 case Ity_I32: return Iop_Add32; 515 case Ity_I64: return Iop_Add64; 516 default: vpanic("mkADD"); 517 } 518 } 519 520 static IROp mkSUB ( IRType ty ) { 521 switch (ty) { 522 case Ity_I32: return Iop_Sub32; 523 case Ity_I64: return Iop_Sub64; 524 default: vpanic("mkSUB"); 525 } 526 } 527 528 static IROp mkADDF ( IRType ty ) { 529 switch (ty) { 530 case Ity_F32: return Iop_AddF32; 531 case Ity_F64: return Iop_AddF64; 532 default: vpanic("mkADDF"); 533 } 534 } 535 536 static IROp mkSUBF ( IRType ty ) { 537 switch (ty) { 538 case Ity_F32: return Iop_SubF32; 539 case Ity_F64: return Iop_SubF64; 540 default: vpanic("mkSUBF"); 541 } 542 } 543 544 static IROp mkMULF ( IRType ty ) { 545 switch (ty) { 546 case Ity_F32: return Iop_MulF32; 547 case Ity_F64: return Iop_MulF64; 548 default: vpanic("mkMULF"); 549 } 550 } 551 552 static IROp mkDIVF ( IRType ty ) { 553 switch (ty) { 554 case Ity_F32: return Iop_DivF32; 555 case Ity_F64: return Iop_DivF64; 556 default: vpanic("mkMULF"); 557 } 558 } 559 560 static IROp mkNEGF ( IRType ty ) { 561 switch (ty) { 562 case Ity_F32: return Iop_NegF32; 563 case Ity_F64: return Iop_NegF64; 564 default: vpanic("mkNEGF"); 565 } 566 } 567 568 static IROp mkABSF ( IRType ty ) { 569 switch (ty) { 570 case Ity_F32: return Iop_AbsF32; 571 case Ity_F64: return Iop_AbsF64; 572 default: vpanic("mkNEGF"); 573 } 574 } 575 576 static IROp mkSQRTF ( IRType ty ) { 577 switch (ty) { 578 case Ity_F32: return Iop_SqrtF32; 579 case Ity_F64: return Iop_SqrtF64; 580 default: vpanic("mkNEGF"); 581 } 582 } 583 584 static IROp mkVecADD ( UInt size ) { 585 const IROp ops[4] 586 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; 587 vassert(size < 4); 588 return ops[size]; 589 } 590 591 static IROp mkVecQADDU ( UInt size ) { 592 const IROp ops[4] 593 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 }; 594 vassert(size < 4); 595 return ops[size]; 596 } 597 598 static IROp mkVecQADDS ( UInt size ) { 599 const IROp ops[4] 600 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 }; 601 vassert(size < 4); 602 return ops[size]; 603 } 604 605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) { 606 const IROp ops[4] 607 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, 608 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 }; 609 vassert(size < 4); 610 return ops[size]; 611 } 612 613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) { 614 const IROp ops[4] 615 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, 616 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 }; 617 vassert(size < 4); 618 return ops[size]; 619 } 620 621 static IROp mkVecSUB ( UInt size ) { 622 const IROp ops[4] 623 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 624 vassert(size < 4); 625 return ops[size]; 626 } 627 628 static IROp mkVecQSUBU ( UInt size ) { 629 const IROp ops[4] 630 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 }; 631 vassert(size < 4); 632 return ops[size]; 633 } 634 635 static IROp mkVecQSUBS ( UInt size ) { 636 const IROp ops[4] 637 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 }; 638 vassert(size < 4); 639 return ops[size]; 640 } 641 642 static IROp mkVecSARN ( UInt size ) { 643 const IROp ops[4] 644 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; 645 vassert(size < 4); 646 return ops[size]; 647 } 648 649 static IROp mkVecSHRN ( UInt size ) { 650 const IROp ops[4] 651 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 652 vassert(size < 4); 653 return ops[size]; 654 } 655 656 static IROp mkVecSHLN ( UInt size ) { 657 const IROp ops[4] 658 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; 659 vassert(size < 4); 660 return ops[size]; 661 } 662 663 static IROp mkVecCATEVENLANES ( UInt size ) { 664 const IROp ops[4] 665 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, 666 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 }; 667 vassert(size < 4); 668 return ops[size]; 669 } 670 671 static IROp mkVecCATODDLANES ( UInt size ) { 672 const IROp ops[4] 673 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, 674 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 }; 675 vassert(size < 4); 676 return ops[size]; 677 } 678 679 static IROp mkVecINTERLEAVELO ( UInt size ) { 680 const IROp ops[4] 681 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 682 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 }; 683 vassert(size < 4); 684 return ops[size]; 685 } 686 687 static IROp mkVecINTERLEAVEHI ( UInt size ) { 688 const IROp ops[4] 689 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 690 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 }; 691 vassert(size < 4); 692 return ops[size]; 693 } 694 695 static IROp mkVecMAXU ( UInt size ) { 696 const IROp ops[4] 697 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; 698 vassert(size < 4); 699 return ops[size]; 700 } 701 702 static IROp mkVecMAXS ( UInt size ) { 703 const IROp ops[4] 704 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; 705 vassert(size < 4); 706 return ops[size]; 707 } 708 709 static IROp mkVecMINU ( UInt size ) { 710 const IROp ops[4] 711 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; 712 vassert(size < 4); 713 return ops[size]; 714 } 715 716 static IROp mkVecMINS ( UInt size ) { 717 const IROp ops[4] 718 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; 719 vassert(size < 4); 720 return ops[size]; 721 } 722 723 static IROp mkVecMUL ( UInt size ) { 724 const IROp ops[4] 725 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; 726 vassert(size < 3); 727 return ops[size]; 728 } 729 730 static IROp mkVecMULLU ( UInt sizeNarrow ) { 731 const IROp ops[4] 732 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID }; 733 vassert(sizeNarrow < 3); 734 return ops[sizeNarrow]; 735 } 736 737 static IROp mkVecMULLS ( UInt sizeNarrow ) { 738 const IROp ops[4] 739 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID }; 740 vassert(sizeNarrow < 3); 741 return ops[sizeNarrow]; 742 } 743 744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) { 745 const IROp ops[4] 746 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID }; 747 vassert(sizeNarrow < 3); 748 return ops[sizeNarrow]; 749 } 750 751 static IROp mkVecCMPEQ ( UInt size ) { 752 const IROp ops[4] 753 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 }; 754 vassert(size < 4); 755 return ops[size]; 756 } 757 758 static IROp mkVecCMPGTU ( UInt size ) { 759 const IROp ops[4] 760 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 }; 761 vassert(size < 4); 762 return ops[size]; 763 } 764 765 static IROp mkVecCMPGTS ( UInt size ) { 766 const IROp ops[4] 767 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 }; 768 vassert(size < 4); 769 return ops[size]; 770 } 771 772 static IROp mkVecABS ( UInt size ) { 773 const IROp ops[4] 774 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 }; 775 vassert(size < 4); 776 return ops[size]; 777 } 778 779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) { 780 const IROp ops[4] 781 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128, 782 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 }; 783 vassert(size < 4); 784 return ops[size]; 785 } 786 787 static IRExpr* mkU ( IRType ty, ULong imm ) { 788 switch (ty) { 789 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); 790 case Ity_I64: return mkU64(imm); 791 default: vpanic("mkU"); 792 } 793 } 794 795 static IROp mkVecQDMULHIS ( UInt size ) { 796 const IROp ops[4] 797 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID }; 798 vassert(size < 4); 799 return ops[size]; 800 } 801 802 static IROp mkVecQRDMULHIS ( UInt size ) { 803 const IROp ops[4] 804 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID }; 805 vassert(size < 4); 806 return ops[size]; 807 } 808 809 static IROp mkVecQANDUQSH ( UInt size ) { 810 const IROp ops[4] 811 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8, 812 Iop_QandUQsh32x4, Iop_QandUQsh64x2 }; 813 vassert(size < 4); 814 return ops[size]; 815 } 816 817 static IROp mkVecQANDSQSH ( UInt size ) { 818 const IROp ops[4] 819 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8, 820 Iop_QandSQsh32x4, Iop_QandSQsh64x2 }; 821 vassert(size < 4); 822 return ops[size]; 823 } 824 825 static IROp mkVecQANDUQRSH ( UInt size ) { 826 const IROp ops[4] 827 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8, 828 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 }; 829 vassert(size < 4); 830 return ops[size]; 831 } 832 833 static IROp mkVecQANDSQRSH ( UInt size ) { 834 const IROp ops[4] 835 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8, 836 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 }; 837 vassert(size < 4); 838 return ops[size]; 839 } 840 841 static IROp mkVecSHU ( UInt size ) { 842 const IROp ops[4] 843 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 }; 844 vassert(size < 4); 845 return ops[size]; 846 } 847 848 static IROp mkVecSHS ( UInt size ) { 849 const IROp ops[4] 850 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 }; 851 vassert(size < 4); 852 return ops[size]; 853 } 854 855 static IROp mkVecRSHU ( UInt size ) { 856 const IROp ops[4] 857 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 }; 858 vassert(size < 4); 859 return ops[size]; 860 } 861 862 static IROp mkVecRSHS ( UInt size ) { 863 const IROp ops[4] 864 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 }; 865 vassert(size < 4); 866 return ops[size]; 867 } 868 869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) { 870 const IROp ops[4] 871 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, 872 Iop_NarrowUn64to32x2, Iop_INVALID }; 873 vassert(sizeNarrow < 4); 874 return ops[sizeNarrow]; 875 } 876 877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) { 878 const IROp ops[4] 879 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, 880 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID }; 881 vassert(sizeNarrow < 4); 882 return ops[sizeNarrow]; 883 } 884 885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) { 886 const IROp ops[4] 887 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, 888 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID }; 889 vassert(sizeNarrow < 4); 890 return ops[sizeNarrow]; 891 } 892 893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) { 894 const IROp ops[4] 895 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, 896 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID }; 897 vassert(sizeNarrow < 4); 898 return ops[sizeNarrow]; 899 } 900 901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) { 902 const IROp ops[4] 903 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4, 904 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID }; 905 vassert(sizeNarrow < 4); 906 return ops[sizeNarrow]; 907 } 908 909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) { 910 const IROp ops[4] 911 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4, 912 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID }; 913 vassert(sizeNarrow < 4); 914 return ops[sizeNarrow]; 915 } 916 917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) { 918 const IROp ops[4] 919 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4, 920 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID }; 921 vassert(sizeNarrow < 4); 922 return ops[sizeNarrow]; 923 } 924 925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) { 926 const IROp ops[4] 927 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4, 928 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID }; 929 vassert(sizeNarrow < 4); 930 return ops[sizeNarrow]; 931 } 932 933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) { 934 const IROp ops[4] 935 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4, 936 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID }; 937 vassert(sizeNarrow < 4); 938 return ops[sizeNarrow]; 939 } 940 941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) { 942 const IROp ops[4] 943 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4, 944 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID }; 945 vassert(sizeNarrow < 4); 946 return ops[sizeNarrow]; 947 } 948 949 static IROp mkVecQSHLNSATUU ( UInt size ) { 950 const IROp ops[4] 951 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8, 952 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 }; 953 vassert(size < 4); 954 return ops[size]; 955 } 956 957 static IROp mkVecQSHLNSATSS ( UInt size ) { 958 const IROp ops[4] 959 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8, 960 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 }; 961 vassert(size < 4); 962 return ops[size]; 963 } 964 965 static IROp mkVecQSHLNSATSU ( UInt size ) { 966 const IROp ops[4] 967 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8, 968 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 }; 969 vassert(size < 4); 970 return ops[size]; 971 } 972 973 static IROp mkVecADDF ( UInt size ) { 974 const IROp ops[4] 975 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 }; 976 vassert(size < 4); 977 return ops[size]; 978 } 979 980 static IROp mkVecMAXF ( UInt size ) { 981 const IROp ops[4] 982 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 }; 983 vassert(size < 4); 984 return ops[size]; 985 } 986 987 static IROp mkVecMINF ( UInt size ) { 988 const IROp ops[4] 989 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 }; 990 vassert(size < 4); 991 return ops[size]; 992 } 993 994 /* Generate IR to create 'arg rotated right by imm', for sane values 995 of 'ty' and 'imm'. */ 996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) 997 { 998 UInt w = 0; 999 if (ty == Ity_I64) { 1000 w = 64; 1001 } else { 1002 vassert(ty == Ity_I32); 1003 w = 32; 1004 } 1005 vassert(w != 0); 1006 vassert(imm < w); 1007 if (imm == 0) { 1008 return arg; 1009 } 1010 IRTemp res = newTemp(ty); 1011 assign(res, binop(mkOR(ty), 1012 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), 1013 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); 1014 return res; 1015 } 1016 1017 /* Generate IR to set the returned temp to either all-zeroes or 1018 all ones, as a copy of arg<imm>. */ 1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) 1020 { 1021 UInt w = 0; 1022 if (ty == Ity_I64) { 1023 w = 64; 1024 } else { 1025 vassert(ty == Ity_I32); 1026 w = 32; 1027 } 1028 vassert(w != 0); 1029 vassert(imm < w); 1030 IRTemp res = newTemp(ty); 1031 assign(res, binop(mkSAR(ty), 1032 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), 1033 mkU8(w - 1))); 1034 return res; 1035 } 1036 1037 /* U-widen 8/16/32/64 bit int expr to 64. */ 1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) 1039 { 1040 switch (srcTy) { 1041 case Ity_I64: return e; 1042 case Ity_I32: return unop(Iop_32Uto64, e); 1043 case Ity_I16: return unop(Iop_16Uto64, e); 1044 case Ity_I8: return unop(Iop_8Uto64, e); 1045 default: vpanic("widenUto64(arm64)"); 1046 } 1047 } 1048 1049 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some 1050 of these combinations make sense. */ 1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) 1052 { 1053 switch (dstTy) { 1054 case Ity_I64: return e; 1055 case Ity_I32: return unop(Iop_64to32, e); 1056 case Ity_I16: return unop(Iop_64to16, e); 1057 case Ity_I8: return unop(Iop_64to8, e); 1058 default: vpanic("narrowFrom64(arm64)"); 1059 } 1060 } 1061 1062 1063 /*------------------------------------------------------------*/ 1064 /*--- Helpers for accessing guest registers. ---*/ 1065 /*------------------------------------------------------------*/ 1066 1067 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) 1068 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) 1069 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) 1070 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) 1071 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) 1072 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) 1073 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) 1074 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) 1075 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) 1076 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) 1077 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) 1078 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) 1079 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) 1080 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) 1081 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) 1082 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) 1083 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) 1084 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) 1085 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) 1086 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) 1087 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) 1088 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) 1089 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) 1090 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) 1091 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) 1092 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) 1093 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) 1094 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) 1095 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) 1096 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) 1097 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) 1098 1099 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) 1100 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC) 1101 1102 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) 1103 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) 1104 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) 1105 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) 1106 1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) 1108 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) 1109 1110 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) 1111 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) 1112 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) 1113 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) 1114 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) 1115 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) 1116 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) 1117 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) 1118 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) 1119 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) 1120 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) 1121 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) 1122 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) 1123 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) 1124 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) 1125 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) 1126 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) 1127 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) 1128 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) 1129 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) 1130 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) 1131 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) 1132 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) 1133 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) 1134 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) 1135 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) 1136 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) 1137 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) 1138 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) 1139 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) 1140 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) 1141 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) 1142 1143 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) 1144 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG) 1145 1146 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) 1147 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) 1148 1149 1150 /* ---------------- Integer registers ---------------- */ 1151 1152 static Int offsetIReg64 ( UInt iregNo ) 1153 { 1154 /* Do we care about endianness here? We do if sub-parts of integer 1155 registers are accessed. */ 1156 switch (iregNo) { 1157 case 0: return OFFB_X0; 1158 case 1: return OFFB_X1; 1159 case 2: return OFFB_X2; 1160 case 3: return OFFB_X3; 1161 case 4: return OFFB_X4; 1162 case 5: return OFFB_X5; 1163 case 6: return OFFB_X6; 1164 case 7: return OFFB_X7; 1165 case 8: return OFFB_X8; 1166 case 9: return OFFB_X9; 1167 case 10: return OFFB_X10; 1168 case 11: return OFFB_X11; 1169 case 12: return OFFB_X12; 1170 case 13: return OFFB_X13; 1171 case 14: return OFFB_X14; 1172 case 15: return OFFB_X15; 1173 case 16: return OFFB_X16; 1174 case 17: return OFFB_X17; 1175 case 18: return OFFB_X18; 1176 case 19: return OFFB_X19; 1177 case 20: return OFFB_X20; 1178 case 21: return OFFB_X21; 1179 case 22: return OFFB_X22; 1180 case 23: return OFFB_X23; 1181 case 24: return OFFB_X24; 1182 case 25: return OFFB_X25; 1183 case 26: return OFFB_X26; 1184 case 27: return OFFB_X27; 1185 case 28: return OFFB_X28; 1186 case 29: return OFFB_X29; 1187 case 30: return OFFB_X30; 1188 /* but not 31 */ 1189 default: vassert(0); 1190 } 1191 } 1192 1193 static Int offsetIReg64orSP ( UInt iregNo ) 1194 { 1195 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); 1196 } 1197 1198 static const HChar* nameIReg64orZR ( UInt iregNo ) 1199 { 1200 vassert(iregNo < 32); 1201 static const HChar* names[32] 1202 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 1203 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 1204 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 1205 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; 1206 return names[iregNo]; 1207 } 1208 1209 static const HChar* nameIReg64orSP ( UInt iregNo ) 1210 { 1211 if (iregNo == 31) { 1212 return "sp"; 1213 } 1214 vassert(iregNo < 31); 1215 return nameIReg64orZR(iregNo); 1216 } 1217 1218 static IRExpr* getIReg64orSP ( UInt iregNo ) 1219 { 1220 vassert(iregNo < 32); 1221 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1222 } 1223 1224 static IRExpr* getIReg64orZR ( UInt iregNo ) 1225 { 1226 if (iregNo == 31) { 1227 return mkU64(0); 1228 } 1229 vassert(iregNo < 31); 1230 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1231 } 1232 1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e ) 1234 { 1235 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1236 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1237 } 1238 1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e ) 1240 { 1241 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1242 if (iregNo == 31) { 1243 return; 1244 } 1245 vassert(iregNo < 31); 1246 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1247 } 1248 1249 static const HChar* nameIReg32orZR ( UInt iregNo ) 1250 { 1251 vassert(iregNo < 32); 1252 static const HChar* names[32] 1253 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 1254 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 1255 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 1256 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; 1257 return names[iregNo]; 1258 } 1259 1260 static const HChar* nameIReg32orSP ( UInt iregNo ) 1261 { 1262 if (iregNo == 31) { 1263 return "wsp"; 1264 } 1265 vassert(iregNo < 31); 1266 return nameIReg32orZR(iregNo); 1267 } 1268 1269 static IRExpr* getIReg32orSP ( UInt iregNo ) 1270 { 1271 vassert(iregNo < 32); 1272 return unop(Iop_64to32, 1273 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1274 } 1275 1276 static IRExpr* getIReg32orZR ( UInt iregNo ) 1277 { 1278 if (iregNo == 31) { 1279 return mkU32(0); 1280 } 1281 vassert(iregNo < 31); 1282 return unop(Iop_64to32, 1283 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1284 } 1285 1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e ) 1287 { 1288 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1290 } 1291 1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e ) 1293 { 1294 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1295 if (iregNo == 31) { 1296 return; 1297 } 1298 vassert(iregNo < 31); 1299 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1300 } 1301 1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) 1303 { 1304 vassert(is64 == True || is64 == False); 1305 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); 1306 } 1307 1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) 1309 { 1310 vassert(is64 == True || is64 == False); 1311 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); 1312 } 1313 1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) 1315 { 1316 vassert(is64 == True || is64 == False); 1317 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); 1318 } 1319 1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) 1321 { 1322 vassert(is64 == True || is64 == False); 1323 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); 1324 } 1325 1326 static void putPC ( IRExpr* e ) 1327 { 1328 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1329 stmt( IRStmt_Put(OFFB_PC, e) ); 1330 } 1331 1332 1333 /* ---------------- Vector (Q) registers ---------------- */ 1334 1335 static Int offsetQReg128 ( UInt qregNo ) 1336 { 1337 /* We don't care about endianness at this point. It only becomes 1338 relevant when dealing with sections of these registers.*/ 1339 switch (qregNo) { 1340 case 0: return OFFB_Q0; 1341 case 1: return OFFB_Q1; 1342 case 2: return OFFB_Q2; 1343 case 3: return OFFB_Q3; 1344 case 4: return OFFB_Q4; 1345 case 5: return OFFB_Q5; 1346 case 6: return OFFB_Q6; 1347 case 7: return OFFB_Q7; 1348 case 8: return OFFB_Q8; 1349 case 9: return OFFB_Q9; 1350 case 10: return OFFB_Q10; 1351 case 11: return OFFB_Q11; 1352 case 12: return OFFB_Q12; 1353 case 13: return OFFB_Q13; 1354 case 14: return OFFB_Q14; 1355 case 15: return OFFB_Q15; 1356 case 16: return OFFB_Q16; 1357 case 17: return OFFB_Q17; 1358 case 18: return OFFB_Q18; 1359 case 19: return OFFB_Q19; 1360 case 20: return OFFB_Q20; 1361 case 21: return OFFB_Q21; 1362 case 22: return OFFB_Q22; 1363 case 23: return OFFB_Q23; 1364 case 24: return OFFB_Q24; 1365 case 25: return OFFB_Q25; 1366 case 26: return OFFB_Q26; 1367 case 27: return OFFB_Q27; 1368 case 28: return OFFB_Q28; 1369 case 29: return OFFB_Q29; 1370 case 30: return OFFB_Q30; 1371 case 31: return OFFB_Q31; 1372 default: vassert(0); 1373 } 1374 } 1375 1376 /* Write to a complete Qreg. */ 1377 static void putQReg128 ( UInt qregNo, IRExpr* e ) 1378 { 1379 vassert(qregNo < 32); 1380 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 1381 stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); 1382 } 1383 1384 /* Read a complete Qreg. */ 1385 static IRExpr* getQReg128 ( UInt qregNo ) 1386 { 1387 vassert(qregNo < 32); 1388 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); 1389 } 1390 1391 /* Produce the IR type for some sub-part of a vector. For 32- and 64- 1392 bit sub-parts we can choose either integer or float types, and 1393 choose float on the basis that that is the common use case and so 1394 will give least interference with Put-to-Get forwarding later 1395 on. */ 1396 static IRType preferredVectorSubTypeFromSize ( UInt szB ) 1397 { 1398 switch (szB) { 1399 case 1: return Ity_I8; 1400 case 2: return Ity_I16; 1401 case 4: return Ity_I32; //Ity_F32; 1402 case 8: return Ity_F64; 1403 case 16: return Ity_V128; 1404 default: vassert(0); 1405 } 1406 } 1407 1408 /* Find the offset of the laneNo'th lane of type laneTy in the given 1409 Qreg. Since the host is little-endian, the least significant lane 1410 has the lowest offset. */ 1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) 1412 { 1413 vassert(host_endness == VexEndnessLE); 1414 Int base = offsetQReg128(qregNo); 1415 /* Since the host is little-endian, the least significant lane 1416 will be at the lowest address. */ 1417 /* Restrict this to known types, so as to avoid silently accepting 1418 stupid types. */ 1419 UInt laneSzB = 0; 1420 switch (laneTy) { 1421 case Ity_I8: laneSzB = 1; break; 1422 case Ity_F16: case Ity_I16: laneSzB = 2; break; 1423 case Ity_F32: case Ity_I32: laneSzB = 4; break; 1424 case Ity_F64: case Ity_I64: laneSzB = 8; break; 1425 case Ity_V128: laneSzB = 16; break; 1426 default: break; 1427 } 1428 vassert(laneSzB > 0); 1429 UInt minOff = laneNo * laneSzB; 1430 UInt maxOff = minOff + laneSzB - 1; 1431 vassert(maxOff < 16); 1432 return base + minOff; 1433 } 1434 1435 /* Put to the least significant lane of a Qreg. */ 1436 static void putQRegLO ( UInt qregNo, IRExpr* e ) 1437 { 1438 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1439 Int off = offsetQRegLane(qregNo, ty, 0); 1440 switch (ty) { 1441 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 1442 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128: 1443 break; 1444 default: 1445 vassert(0); // Other cases are probably invalid 1446 } 1447 stmt(IRStmt_Put(off, e)); 1448 } 1449 1450 /* Get from the least significant lane of a Qreg. */ 1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) 1452 { 1453 Int off = offsetQRegLane(qregNo, ty, 0); 1454 switch (ty) { 1455 case Ity_I8: 1456 case Ity_F16: case Ity_I16: 1457 case Ity_I32: case Ity_I64: 1458 case Ity_F32: case Ity_F64: case Ity_V128: 1459 break; 1460 default: 1461 vassert(0); // Other cases are ATC 1462 } 1463 return IRExpr_Get(off, ty); 1464 } 1465 1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) 1467 { 1468 static const HChar* namesQ[32] 1469 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1470 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", 1471 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", 1472 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; 1473 static const HChar* namesD[32] 1474 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1475 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 1476 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1477 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; 1478 static const HChar* namesS[32] 1479 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", 1480 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", 1481 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", 1482 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; 1483 static const HChar* namesH[32] 1484 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", 1485 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", 1486 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", 1487 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; 1488 static const HChar* namesB[32] 1489 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", 1490 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", 1491 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", 1492 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; 1493 vassert(qregNo < 32); 1494 switch (sizeofIRType(laneTy)) { 1495 case 1: return namesB[qregNo]; 1496 case 2: return namesH[qregNo]; 1497 case 4: return namesS[qregNo]; 1498 case 8: return namesD[qregNo]; 1499 case 16: return namesQ[qregNo]; 1500 default: vassert(0); 1501 } 1502 /*NOTREACHED*/ 1503 } 1504 1505 static const HChar* nameQReg128 ( UInt qregNo ) 1506 { 1507 return nameQRegLO(qregNo, Ity_V128); 1508 } 1509 1510 /* Find the offset of the most significant half (8 bytes) of the given 1511 Qreg. This requires knowing the endianness of the host. */ 1512 static Int offsetQRegHI64 ( UInt qregNo ) 1513 { 1514 return offsetQRegLane(qregNo, Ity_I64, 1); 1515 } 1516 1517 static IRExpr* getQRegHI64 ( UInt qregNo ) 1518 { 1519 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); 1520 } 1521 1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e ) 1523 { 1524 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1525 Int off = offsetQRegHI64(qregNo); 1526 switch (ty) { 1527 case Ity_I64: case Ity_F64: 1528 break; 1529 default: 1530 vassert(0); // Other cases are plain wrong 1531 } 1532 stmt(IRStmt_Put(off, e)); 1533 } 1534 1535 /* Put to a specified lane of a Qreg. */ 1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) 1537 { 1538 IRType laneTy = typeOfIRExpr(irsb->tyenv, e); 1539 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1540 switch (laneTy) { 1541 case Ity_F64: case Ity_I64: 1542 case Ity_I32: case Ity_F32: 1543 case Ity_I16: case Ity_F16: 1544 case Ity_I8: 1545 break; 1546 default: 1547 vassert(0); // Other cases are ATC 1548 } 1549 stmt(IRStmt_Put(off, e)); 1550 } 1551 1552 /* Get from a specified lane of a Qreg. */ 1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) 1554 { 1555 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1556 switch (laneTy) { 1557 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 1558 case Ity_F64: case Ity_F32: case Ity_F16: 1559 break; 1560 default: 1561 vassert(0); // Other cases are ATC 1562 } 1563 return IRExpr_Get(off, laneTy); 1564 } 1565 1566 1567 //ZZ /* ---------------- Misc registers ---------------- */ 1568 //ZZ 1569 //ZZ static void putMiscReg32 ( UInt gsoffset, 1570 //ZZ IRExpr* e, /* :: Ity_I32 */ 1571 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) 1572 //ZZ { 1573 //ZZ switch (gsoffset) { 1574 //ZZ case OFFB_FPSCR: break; 1575 //ZZ case OFFB_QFLAG32: break; 1576 //ZZ case OFFB_GEFLAG0: break; 1577 //ZZ case OFFB_GEFLAG1: break; 1578 //ZZ case OFFB_GEFLAG2: break; 1579 //ZZ case OFFB_GEFLAG3: break; 1580 //ZZ default: vassert(0); /* awaiting more cases */ 1581 //ZZ } 1582 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1583 //ZZ 1584 //ZZ if (guardT == IRTemp_INVALID) { 1585 //ZZ /* unconditional write */ 1586 //ZZ stmt(IRStmt_Put(gsoffset, e)); 1587 //ZZ } else { 1588 //ZZ stmt(IRStmt_Put( 1589 //ZZ gsoffset, 1590 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), 1591 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) 1592 //ZZ )); 1593 //ZZ } 1594 //ZZ } 1595 //ZZ 1596 //ZZ static IRTemp get_ITSTATE ( void ) 1597 //ZZ { 1598 //ZZ ASSERT_IS_THUMB; 1599 //ZZ IRTemp t = newTemp(Ity_I32); 1600 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 1601 //ZZ return t; 1602 //ZZ } 1603 //ZZ 1604 //ZZ static void put_ITSTATE ( IRTemp t ) 1605 //ZZ { 1606 //ZZ ASSERT_IS_THUMB; 1607 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 1608 //ZZ } 1609 //ZZ 1610 //ZZ static IRTemp get_QFLAG32 ( void ) 1611 //ZZ { 1612 //ZZ IRTemp t = newTemp(Ity_I32); 1613 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 1614 //ZZ return t; 1615 //ZZ } 1616 //ZZ 1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 1618 //ZZ { 1619 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 1620 //ZZ } 1621 //ZZ 1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 1623 //ZZ Status Register) to indicate that overflow or saturation occurred. 1624 //ZZ Nb: t must be zero to denote no saturation, and any nonzero 1625 //ZZ value to indicate saturation. */ 1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 1627 //ZZ { 1628 //ZZ IRTemp old = get_QFLAG32(); 1629 //ZZ IRTemp nyu = newTemp(Ity_I32); 1630 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 1631 //ZZ put_QFLAG32(nyu, condT); 1632 //ZZ } 1633 1634 1635 /* ---------------- FPCR stuff ---------------- */ 1636 1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and 1638 convert them to IR format. Bind the final result to the 1639 returned temp. */ 1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1641 { 1642 /* The ARMvfp encoding for rounding mode bits is: 1643 00 to nearest 1644 01 to +infinity 1645 10 to -infinity 1646 11 to zero 1647 We need to convert that to the IR encoding: 1648 00 to nearest (the default) 1649 10 to +infinity 1650 01 to -infinity 1651 11 to zero 1652 Which can be done by swapping bits 0 and 1. 1653 The rmode bits are at 23:22 in FPSCR. 1654 */ 1655 IRTemp armEncd = newTemp(Ity_I32); 1656 IRTemp swapped = newTemp(Ity_I32); 1657 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that 1658 we don't zero out bits 24 and above, since the assignment to 1659 'swapped' will mask them out anyway. */ 1660 assign(armEncd, 1661 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); 1662 /* Now swap them. */ 1663 assign(swapped, 1664 binop(Iop_Or32, 1665 binop(Iop_And32, 1666 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1667 mkU32(2)), 1668 binop(Iop_And32, 1669 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1670 mkU32(1)) 1671 )); 1672 return swapped; 1673 } 1674 1675 1676 /*------------------------------------------------------------*/ 1677 /*--- Helpers for flag handling and conditional insns ---*/ 1678 /*------------------------------------------------------------*/ 1679 1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond ) 1681 { 1682 switch (cond) { 1683 case ARM64CondEQ: return "eq"; 1684 case ARM64CondNE: return "ne"; 1685 case ARM64CondCS: return "cs"; // or 'hs' 1686 case ARM64CondCC: return "cc"; // or 'lo' 1687 case ARM64CondMI: return "mi"; 1688 case ARM64CondPL: return "pl"; 1689 case ARM64CondVS: return "vs"; 1690 case ARM64CondVC: return "vc"; 1691 case ARM64CondHI: return "hi"; 1692 case ARM64CondLS: return "ls"; 1693 case ARM64CondGE: return "ge"; 1694 case ARM64CondLT: return "lt"; 1695 case ARM64CondGT: return "gt"; 1696 case ARM64CondLE: return "le"; 1697 case ARM64CondAL: return "al"; 1698 case ARM64CondNV: return "nv"; 1699 default: vpanic("name_ARM64Condcode"); 1700 } 1701 } 1702 1703 /* and a handy shorthand for it */ 1704 static const HChar* nameCC ( ARM64Condcode cond ) { 1705 return nameARM64Condcode(cond); 1706 } 1707 1708 1709 /* Build IR to calculate some particular condition from stored 1710 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1711 Ity_I64, suitable for narrowing. Although the return type is 1712 Ity_I64, the returned value is either 0 or 1. 'cond' must be 1713 :: Ity_I64 and must denote the condition to compute in 1714 bits 7:4, and be zero everywhere else. 1715 */ 1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) 1717 { 1718 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); 1719 /* And 'cond' had better produce a value in which only bits 7:4 are 1720 nonzero. However, obviously we can't assert for that. */ 1721 1722 /* So what we're constructing for the first argument is 1723 "(cond << 4) | stored-operation". 1724 However, as per comments above, 'cond' must be supplied 1725 pre-shifted to this function. 1726 1727 This pairing scheme requires that the ARM64_CC_OP_ values all fit 1728 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1729 8 bits of the first argument. */ 1730 IRExpr** args 1731 = mkIRExprVec_4( 1732 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), 1733 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1734 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1735 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) 1736 ); 1737 IRExpr* call 1738 = mkIRExprCCall( 1739 Ity_I64, 1740 0/*regparm*/, 1741 "arm64g_calculate_condition", &arm64g_calculate_condition, 1742 args 1743 ); 1744 1745 /* Exclude the requested condition, OP and NDEP from definedness 1746 checking. We're only interested in DEP1 and DEP2. */ 1747 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1748 return call; 1749 } 1750 1751 1752 /* Build IR to calculate some particular condition from stored 1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1754 Ity_I64, suitable for narrowing. Although the return type is 1755 Ity_I64, the returned value is either 0 or 1. 1756 */ 1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) 1758 { 1759 /* First arg is "(cond << 4) | condition". This requires that the 1760 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a 1761 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1762 vassert(cond >= 0 && cond <= 15); 1763 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); 1764 } 1765 1766 1767 /* Build IR to calculate just the carry flag from stored 1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1769 Ity_I64. */ 1770 static IRExpr* mk_arm64g_calculate_flag_c ( void ) 1771 { 1772 IRExpr** args 1773 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1774 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1775 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1776 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1777 IRExpr* call 1778 = mkIRExprCCall( 1779 Ity_I64, 1780 0/*regparm*/, 1781 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c, 1782 args 1783 ); 1784 /* Exclude OP and NDEP from definedness checking. We're only 1785 interested in DEP1 and DEP2. */ 1786 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1787 return call; 1788 } 1789 1790 1791 //ZZ /* Build IR to calculate just the overflow flag from stored 1792 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1793 //ZZ Ity_I32. */ 1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) 1795 //ZZ { 1796 //ZZ IRExpr** args 1797 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1798 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1799 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1800 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1801 //ZZ IRExpr* call 1802 //ZZ = mkIRExprCCall( 1803 //ZZ Ity_I32, 1804 //ZZ 0/*regparm*/, 1805 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, 1806 //ZZ args 1807 //ZZ ); 1808 //ZZ /* Exclude OP and NDEP from definedness checking. We're only 1809 //ZZ interested in DEP1 and DEP2. */ 1810 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1811 //ZZ return call; 1812 //ZZ } 1813 1814 1815 /* Build IR to calculate N Z C V in bits 31:28 of the 1816 returned word. */ 1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) 1818 { 1819 IRExpr** args 1820 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1821 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1822 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1823 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1824 IRExpr* call 1825 = mkIRExprCCall( 1826 Ity_I64, 1827 0/*regparm*/, 1828 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, 1829 args 1830 ); 1831 /* Exclude OP and NDEP from definedness checking. We're only 1832 interested in DEP1 and DEP2. */ 1833 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1834 return call; 1835 } 1836 1837 1838 /* Build IR to set the flags thunk, in the most general case. */ 1839 static 1840 void setFlags_D1_D2_ND ( UInt cc_op, 1841 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) 1842 { 1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); 1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); 1845 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); 1846 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); 1847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); 1848 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1849 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1850 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1851 } 1852 1853 /* Build IR to set the flags thunk after ADD or SUB. */ 1854 static 1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) 1856 { 1857 IRTemp argL64 = IRTemp_INVALID; 1858 IRTemp argR64 = IRTemp_INVALID; 1859 IRTemp z64 = newTemp(Ity_I64); 1860 if (is64) { 1861 argL64 = argL; 1862 argR64 = argR; 1863 } else { 1864 argL64 = newTemp(Ity_I64); 1865 argR64 = newTemp(Ity_I64); 1866 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1867 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1868 } 1869 assign(z64, mkU64(0)); 1870 UInt cc_op = ARM64G_CC_OP_NUMBER; 1871 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } 1872 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } 1873 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } 1874 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } 1875 else { vassert(0); } 1876 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); 1877 } 1878 1879 /* Build IR to set the flags thunk after ADC or SBC. */ 1880 static 1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC, 1882 IRTemp argL, IRTemp argR, IRTemp oldC ) 1883 { 1884 IRTemp argL64 = IRTemp_INVALID; 1885 IRTemp argR64 = IRTemp_INVALID; 1886 IRTemp oldC64 = IRTemp_INVALID; 1887 if (is64) { 1888 argL64 = argL; 1889 argR64 = argR; 1890 oldC64 = oldC; 1891 } else { 1892 argL64 = newTemp(Ity_I64); 1893 argR64 = newTemp(Ity_I64); 1894 oldC64 = newTemp(Ity_I64); 1895 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1896 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1897 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC))); 1898 } 1899 UInt cc_op = ARM64G_CC_OP_NUMBER; 1900 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; } 1901 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; } 1902 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; } 1903 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; } 1904 else { vassert(0); } 1905 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64); 1906 } 1907 1908 /* Build IR to set the flags thunk after ADD or SUB, if the given 1909 condition evaluates to True at run time. If not, the flags are set 1910 to the specified NZCV value. */ 1911 static 1912 void setFlags_ADD_SUB_conditionally ( 1913 Bool is64, Bool isSUB, 1914 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv 1915 ) 1916 { 1917 /* Generate IR as follows: 1918 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) 1919 CC_DEP1 = ITE(cond, argL64, nzcv << 28) 1920 CC_DEP2 = ITE(cond, argR64, 0) 1921 CC_NDEP = 0 1922 */ 1923 1924 IRTemp z64 = newTemp(Ity_I64); 1925 assign(z64, mkU64(0)); 1926 1927 /* Establish the operation and operands for the True case. */ 1928 IRTemp t_dep1 = IRTemp_INVALID; 1929 IRTemp t_dep2 = IRTemp_INVALID; 1930 UInt t_op = ARM64G_CC_OP_NUMBER; 1931 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } 1932 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } 1933 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } 1934 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } 1935 else { vassert(0); } 1936 /* */ 1937 if (is64) { 1938 t_dep1 = argL; 1939 t_dep2 = argR; 1940 } else { 1941 t_dep1 = newTemp(Ity_I64); 1942 t_dep2 = newTemp(Ity_I64); 1943 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); 1944 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); 1945 } 1946 1947 /* Establish the operation and operands for the False case. */ 1948 IRTemp f_dep1 = newTemp(Ity_I64); 1949 IRTemp f_dep2 = z64; 1950 UInt f_op = ARM64G_CC_OP_COPY; 1951 assign(f_dep1, mkU64(nzcv << 28)); 1952 1953 /* Final thunk values */ 1954 IRTemp dep1 = newTemp(Ity_I64); 1955 IRTemp dep2 = newTemp(Ity_I64); 1956 IRTemp op = newTemp(Ity_I64); 1957 1958 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); 1959 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); 1960 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); 1961 1962 /* finally .. */ 1963 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); 1964 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); 1965 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); 1966 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); 1967 } 1968 1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ 1970 static 1971 void setFlags_LOGIC ( Bool is64, IRTemp res ) 1972 { 1973 IRTemp res64 = IRTemp_INVALID; 1974 IRTemp z64 = newTemp(Ity_I64); 1975 UInt cc_op = ARM64G_CC_OP_NUMBER; 1976 if (is64) { 1977 res64 = res; 1978 cc_op = ARM64G_CC_OP_LOGIC64; 1979 } else { 1980 res64 = newTemp(Ity_I64); 1981 assign(res64, unop(Iop_32Uto64, mkexpr(res))); 1982 cc_op = ARM64G_CC_OP_LOGIC32; 1983 } 1984 assign(z64, mkU64(0)); 1985 setFlags_D1_D2_ND(cc_op, res64, z64, z64); 1986 } 1987 1988 /* Build IR to set the flags thunk to a given NZCV value. NZCV is 1989 located in bits 31:28 of the supplied value. */ 1990 static 1991 void setFlags_COPY ( IRTemp nzcv_28x0 ) 1992 { 1993 IRTemp z64 = newTemp(Ity_I64); 1994 assign(z64, mkU64(0)); 1995 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); 1996 } 1997 1998 1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it 2000 //ZZ sets it at all) */ 2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 2002 //ZZ IRTemp t_dep2, 2003 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2004 //ZZ { 2005 //ZZ IRTemp z32 = newTemp(Ity_I32); 2006 //ZZ assign( z32, mkU32(0) ); 2007 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 2008 //ZZ } 2009 //ZZ 2010 //ZZ 2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it 2012 //ZZ sets it at all) */ 2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 2014 //ZZ IRTemp t_ndep, 2015 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2016 //ZZ { 2017 //ZZ IRTemp z32 = newTemp(Ity_I32); 2018 //ZZ assign( z32, mkU32(0) ); 2019 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 2020 //ZZ } 2021 //ZZ 2022 //ZZ 2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 2024 //ZZ sets them at all) */ 2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 2026 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2027 //ZZ { 2028 //ZZ IRTemp z32 = newTemp(Ity_I32); 2029 //ZZ assign( z32, mkU32(0) ); 2030 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 2031 //ZZ } 2032 2033 2034 /*------------------------------------------------------------*/ 2035 /*--- Misc math helpers ---*/ 2036 /*------------------------------------------------------------*/ 2037 2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ 2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) 2040 { 2041 IRTemp maskT = newTemp(Ity_I64); 2042 IRTemp res = newTemp(Ity_I64); 2043 vassert(sh >= 1 && sh <= 63); 2044 assign(maskT, mkU64(mask)); 2045 assign( res, 2046 binop(Iop_Or64, 2047 binop(Iop_Shr64, 2048 binop(Iop_And64,mkexpr(x),mkexpr(maskT)), 2049 mkU8(sh)), 2050 binop(Iop_And64, 2051 binop(Iop_Shl64,mkexpr(x),mkU8(sh)), 2052 mkexpr(maskT)) 2053 ) 2054 ); 2055 return res; 2056 } 2057 2058 /* Generates byte swaps within 32-bit lanes. */ 2059 static IRTemp math_UINTSWAP64 ( IRTemp src ) 2060 { 2061 IRTemp res; 2062 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2063 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2064 return res; 2065 } 2066 2067 /* Generates byte swaps within 16-bit lanes. */ 2068 static IRTemp math_USHORTSWAP64 ( IRTemp src ) 2069 { 2070 IRTemp res; 2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2072 return res; 2073 } 2074 2075 /* Generates a 64-bit byte swap. */ 2076 static IRTemp math_BYTESWAP64 ( IRTemp src ) 2077 { 2078 IRTemp res; 2079 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2080 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2081 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); 2082 return res; 2083 } 2084 2085 /* Generates a 64-bit bit swap. */ 2086 static IRTemp math_BITSWAP64 ( IRTemp src ) 2087 { 2088 IRTemp res; 2089 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); 2090 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); 2091 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); 2092 return math_BYTESWAP64(res); 2093 } 2094 2095 /* Duplicates the bits at the bottom of the given word to fill the 2096 whole word. src :: Ity_I64 is assumed to have zeroes everywhere 2097 except for the bottom bits. */ 2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) 2099 { 2100 if (srcTy == Ity_I8) { 2101 IRTemp t16 = newTemp(Ity_I64); 2102 assign(t16, binop(Iop_Or64, mkexpr(src), 2103 binop(Iop_Shl64, mkexpr(src), mkU8(8)))); 2104 IRTemp t32 = newTemp(Ity_I64); 2105 assign(t32, binop(Iop_Or64, mkexpr(t16), 2106 binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); 2107 IRTemp t64 = newTemp(Ity_I64); 2108 assign(t64, binop(Iop_Or64, mkexpr(t32), 2109 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2110 return t64; 2111 } 2112 if (srcTy == Ity_I16) { 2113 IRTemp t32 = newTemp(Ity_I64); 2114 assign(t32, binop(Iop_Or64, mkexpr(src), 2115 binop(Iop_Shl64, mkexpr(src), mkU8(16)))); 2116 IRTemp t64 = newTemp(Ity_I64); 2117 assign(t64, binop(Iop_Or64, mkexpr(t32), 2118 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2119 return t64; 2120 } 2121 if (srcTy == Ity_I32) { 2122 IRTemp t64 = newTemp(Ity_I64); 2123 assign(t64, binop(Iop_Or64, mkexpr(src), 2124 binop(Iop_Shl64, mkexpr(src), mkU8(32)))); 2125 return t64; 2126 } 2127 if (srcTy == Ity_I64) { 2128 return src; 2129 } 2130 vassert(0); 2131 } 2132 2133 2134 /* Duplicates the src element exactly so as to fill a V128 value. */ 2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy ) 2136 { 2137 IRTemp res = newTempV128(); 2138 if (srcTy == Ity_F64) { 2139 IRTemp i64 = newTemp(Ity_I64); 2140 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src))); 2141 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64))); 2142 return res; 2143 } 2144 if (srcTy == Ity_F32) { 2145 IRTemp i64a = newTemp(Ity_I64); 2146 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src)))); 2147 IRTemp i64b = newTemp(Ity_I64); 2148 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)), 2149 mkexpr(i64a))); 2150 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b))); 2151 return res; 2152 } 2153 if (srcTy == Ity_I64) { 2154 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src))); 2155 return res; 2156 } 2157 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) { 2158 IRTemp t1 = newTemp(Ity_I64); 2159 assign(t1, widenUto64(srcTy, mkexpr(src))); 2160 IRTemp t2 = math_DUP_TO_64(t1, srcTy); 2161 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2))); 2162 return res; 2163 } 2164 vassert(0); 2165 } 2166 2167 2168 /* |fullWidth| is a full V128 width result. Depending on bitQ, 2169 zero out the upper half. */ 2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth ) 2171 { 2172 if (bitQ == 1) return mkexpr(fullWidth); 2173 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth)); 2174 vassert(0); 2175 } 2176 2177 /* The same, but from an expression instead. */ 2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth ) 2179 { 2180 IRTemp fullWidthT = newTempV128(); 2181 assign(fullWidthT, fullWidth); 2182 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT); 2183 } 2184 2185 2186 /*------------------------------------------------------------*/ 2187 /*--- FP comparison helpers ---*/ 2188 /*------------------------------------------------------------*/ 2189 2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded 2191 as an IRCmpF64Result. Generate code to convert it to an 2192 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. 2193 Assign a new temp to hold that value, and return the temp. */ 2194 static 2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) 2196 { 2197 IRTemp ix = newTemp(Ity_I64); 2198 IRTemp termL = newTemp(Ity_I64); 2199 IRTemp termR = newTemp(Ity_I64); 2200 IRTemp nzcv = newTemp(Ity_I64); 2201 IRTemp irRes = newTemp(Ity_I64); 2202 2203 /* This is where the fun starts. We have to convert 'irRes' from 2204 an IR-convention return result (IRCmpF64Result) to an 2205 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 2206 4 bits of 'nzcv'. */ 2207 /* Map compare result from IR to ARM(nzcv) */ 2208 /* 2209 FP cmp result | IR | ARM(nzcv) 2210 -------------------------------- 2211 UN 0x45 0011 2212 LT 0x01 1000 2213 GT 0x00 0010 2214 EQ 0x40 0110 2215 */ 2216 /* Now since you're probably wondering WTF .. 2217 2218 ix fishes the useful bits out of the IR value, bits 6 and 0, and 2219 places them side by side, giving a number which is 0, 1, 2 or 3. 2220 2221 termL is a sequence cooked up by GNU superopt. It converts ix 2222 into an almost correct value NZCV value (incredibly), except 2223 for the case of UN, where it produces 0100 instead of the 2224 required 0011. 2225 2226 termR is therefore a correction term, also computed from ix. It 2227 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 2228 the final correct value, we subtract termR from termL. 2229 2230 Don't take my word for it. There's a test program at the bottom 2231 of guest_arm_toIR.c, to try this out with. 2232 */ 2233 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); 2234 2235 assign( 2236 ix, 2237 binop(Iop_Or64, 2238 binop(Iop_And64, 2239 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), 2240 mkU64(3)), 2241 binop(Iop_And64, mkexpr(irRes), mkU64(1)))); 2242 2243 assign( 2244 termL, 2245 binop(Iop_Add64, 2246 binop(Iop_Shr64, 2247 binop(Iop_Sub64, 2248 binop(Iop_Shl64, 2249 binop(Iop_Xor64, mkexpr(ix), mkU64(1)), 2250 mkU8(62)), 2251 mkU64(1)), 2252 mkU8(61)), 2253 mkU64(1))); 2254 2255 assign( 2256 termR, 2257 binop(Iop_And64, 2258 binop(Iop_And64, 2259 mkexpr(ix), 2260 binop(Iop_Shr64, mkexpr(ix), mkU8(1))), 2261 mkU64(1))); 2262 2263 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); 2264 return nzcv; 2265 } 2266 2267 2268 /*------------------------------------------------------------*/ 2269 /*--- Data processing (immediate) ---*/ 2270 /*------------------------------------------------------------*/ 2271 2272 /* Helper functions for supporting "DecodeBitMasks" */ 2273 2274 static ULong dbm_ROR ( Int width, ULong x, Int rot ) 2275 { 2276 vassert(width > 0 && width <= 64); 2277 vassert(rot >= 0 && rot < width); 2278 if (rot == 0) return x; 2279 ULong res = x >> rot; 2280 res |= (x << (width - rot)); 2281 if (width < 64) 2282 res &= ((1ULL << width) - 1); 2283 return res; 2284 } 2285 2286 static ULong dbm_RepTo64( Int esize, ULong x ) 2287 { 2288 switch (esize) { 2289 case 64: 2290 return x; 2291 case 32: 2292 x &= 0xFFFFFFFF; x |= (x << 32); 2293 return x; 2294 case 16: 2295 x &= 0xFFFF; x |= (x << 16); x |= (x << 32); 2296 return x; 2297 case 8: 2298 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); 2299 return x; 2300 case 4: 2301 x &= 0xF; x |= (x << 4); x |= (x << 8); 2302 x |= (x << 16); x |= (x << 32); 2303 return x; 2304 case 2: 2305 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); 2306 x |= (x << 16); x |= (x << 32); 2307 return x; 2308 default: 2309 break; 2310 } 2311 vpanic("dbm_RepTo64"); 2312 /*NOTREACHED*/ 2313 return 0; 2314 } 2315 2316 static Int dbm_highestSetBit ( ULong x ) 2317 { 2318 Int i; 2319 for (i = 63; i >= 0; i--) { 2320 if (x & (1ULL << i)) 2321 return i; 2322 } 2323 vassert(x == 0); 2324 return -1; 2325 } 2326 2327 static 2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, 2329 ULong immN, ULong imms, ULong immr, Bool immediate, 2330 UInt M /*32 or 64*/) 2331 { 2332 vassert(immN < (1ULL << 1)); 2333 vassert(imms < (1ULL << 6)); 2334 vassert(immr < (1ULL << 6)); 2335 vassert(immediate == False || immediate == True); 2336 vassert(M == 32 || M == 64); 2337 2338 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); 2339 if (len < 1) { /* printf("fail1\n"); */ return False; } 2340 vassert(len <= 6); 2341 vassert(M >= (1 << len)); 2342 2343 vassert(len >= 1 && len <= 6); 2344 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); 2345 (1 << len) - 1; 2346 vassert(levels >= 1 && levels <= 63); 2347 2348 if (immediate && ((imms & levels) == levels)) { 2349 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ 2350 return False; 2351 } 2352 2353 ULong S = imms & levels; 2354 ULong R = immr & levels; 2355 Int diff = S - R; 2356 diff &= 63; 2357 Int esize = 1 << len; 2358 vassert(2 <= esize && esize <= 64); 2359 2360 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the 2361 same below with d. S can be 63 in which case we have an out of 2362 range and hence undefined shift. */ 2363 vassert(S >= 0 && S <= 63); 2364 vassert(esize >= (S+1)); 2365 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) 2366 //(1ULL << (S+1)) - 1; 2367 ((1ULL << S) - 1) + (1ULL << S); 2368 2369 Int d = // diff<len-1:0> 2370 diff & ((1 << len)-1); 2371 vassert(esize >= (d+1)); 2372 vassert(d >= 0 && d <= 63); 2373 2374 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) 2375 //(1ULL << (d+1)) - 1; 2376 ((1ULL << d) - 1) + (1ULL << d); 2377 2378 if (esize != 64) vassert(elem_s < (1ULL << esize)); 2379 if (esize != 64) vassert(elem_d < (1ULL << esize)); 2380 2381 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); 2382 if (tmask) *tmask = dbm_RepTo64(esize, elem_d); 2383 2384 return True; 2385 } 2386 2387 2388 static 2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, 2390 UInt insn) 2391 { 2392 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2393 2394 /* insn[28:23] 2395 10000x PC-rel addressing 2396 10001x Add/subtract (immediate) 2397 100100 Logical (immediate) 2398 100101 Move Wide (immediate) 2399 100110 Bitfield 2400 100111 Extract 2401 */ 2402 2403 /* ------------------ ADD/SUB{,S} imm12 ------------------ */ 2404 if (INSN(28,24) == BITS5(1,0,0,0,1)) { 2405 Bool is64 = INSN(31,31) == 1; 2406 Bool isSub = INSN(30,30) == 1; 2407 Bool setCC = INSN(29,29) == 1; 2408 UInt sh = INSN(23,22); 2409 UInt uimm12 = INSN(21,10); 2410 UInt nn = INSN(9,5); 2411 UInt dd = INSN(4,0); 2412 const HChar* nm = isSub ? "sub" : "add"; 2413 if (sh >= 2) { 2414 /* Invalid; fall through */ 2415 } else { 2416 vassert(sh <= 1); 2417 uimm12 <<= (12 * sh); 2418 if (is64) { 2419 IRTemp argL = newTemp(Ity_I64); 2420 IRTemp argR = newTemp(Ity_I64); 2421 IRTemp res = newTemp(Ity_I64); 2422 assign(argL, getIReg64orSP(nn)); 2423 assign(argR, mkU64(uimm12)); 2424 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 2425 mkexpr(argL), mkexpr(argR))); 2426 if (setCC) { 2427 putIReg64orZR(dd, mkexpr(res)); 2428 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 2429 DIP("%ss %s, %s, 0x%x\n", 2430 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); 2431 } else { 2432 putIReg64orSP(dd, mkexpr(res)); 2433 DIP("%s %s, %s, 0x%x\n", 2434 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); 2435 } 2436 } else { 2437 IRTemp argL = newTemp(Ity_I32); 2438 IRTemp argR = newTemp(Ity_I32); 2439 IRTemp res = newTemp(Ity_I32); 2440 assign(argL, getIReg32orSP(nn)); 2441 assign(argR, mkU32(uimm12)); 2442 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, 2443 mkexpr(argL), mkexpr(argR))); 2444 if (setCC) { 2445 putIReg32orZR(dd, mkexpr(res)); 2446 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); 2447 DIP("%ss %s, %s, 0x%x\n", 2448 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); 2449 } else { 2450 putIReg32orSP(dd, mkexpr(res)); 2451 DIP("%s %s, %s, 0x%x\n", 2452 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); 2453 } 2454 } 2455 return True; 2456 } 2457 } 2458 2459 /* -------------------- ADR/ADRP -------------------- */ 2460 if (INSN(28,24) == BITS5(1,0,0,0,0)) { 2461 UInt bP = INSN(31,31); 2462 UInt immLo = INSN(30,29); 2463 UInt immHi = INSN(23,5); 2464 UInt rD = INSN(4,0); 2465 ULong uimm = (immHi << 2) | immLo; 2466 ULong simm = sx_to_64(uimm, 21); 2467 ULong val; 2468 if (bP) { 2469 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); 2470 } else { 2471 val = guest_PC_curr_instr + simm; 2472 } 2473 putIReg64orZR(rD, mkU64(val)); 2474 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); 2475 return True; 2476 } 2477 2478 /* -------------------- LOGIC(imm) -------------------- */ 2479 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { 2480 /* 31 30 28 22 21 15 9 4 2481 sf op 100100 N immr imms Rn Rd 2482 op=00: AND Rd|SP, Rn, #imm 2483 op=01: ORR Rd|SP, Rn, #imm 2484 op=10: EOR Rd|SP, Rn, #imm 2485 op=11: ANDS Rd|ZR, Rn, #imm 2486 */ 2487 Bool is64 = INSN(31,31) == 1; 2488 UInt op = INSN(30,29); 2489 UInt N = INSN(22,22); 2490 UInt immR = INSN(21,16); 2491 UInt immS = INSN(15,10); 2492 UInt nn = INSN(9,5); 2493 UInt dd = INSN(4,0); 2494 ULong imm = 0; 2495 Bool ok; 2496 if (N == 1 && !is64) 2497 goto after_logic_imm; /* not allowed; fall through */ 2498 ok = dbm_DecodeBitMasks(&imm, NULL, 2499 N, immS, immR, True, is64 ? 64 : 32); 2500 if (!ok) 2501 goto after_logic_imm; 2502 2503 const HChar* names[4] = { "and", "orr", "eor", "ands" }; 2504 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; 2505 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; 2506 2507 vassert(op < 4); 2508 if (is64) { 2509 IRExpr* argL = getIReg64orZR(nn); 2510 IRExpr* argR = mkU64(imm); 2511 IRTemp res = newTemp(Ity_I64); 2512 assign(res, binop(ops64[op], argL, argR)); 2513 if (op < 3) { 2514 putIReg64orSP(dd, mkexpr(res)); 2515 DIP("%s %s, %s, 0x%llx\n", names[op], 2516 nameIReg64orSP(dd), nameIReg64orZR(nn), imm); 2517 } else { 2518 putIReg64orZR(dd, mkexpr(res)); 2519 setFlags_LOGIC(True/*is64*/, res); 2520 DIP("%s %s, %s, 0x%llx\n", names[op], 2521 nameIReg64orZR(dd), nameIReg64orZR(nn), imm); 2522 } 2523 } else { 2524 IRExpr* argL = getIReg32orZR(nn); 2525 IRExpr* argR = mkU32((UInt)imm); 2526 IRTemp res = newTemp(Ity_I32); 2527 assign(res, binop(ops32[op], argL, argR)); 2528 if (op < 3) { 2529 putIReg32orSP(dd, mkexpr(res)); 2530 DIP("%s %s, %s, 0x%x\n", names[op], 2531 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); 2532 } else { 2533 putIReg32orZR(dd, mkexpr(res)); 2534 setFlags_LOGIC(False/*!is64*/, res); 2535 DIP("%s %s, %s, 0x%x\n", names[op], 2536 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); 2537 } 2538 } 2539 return True; 2540 } 2541 after_logic_imm: 2542 2543 /* -------------------- MOV{Z,N,K} -------------------- */ 2544 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { 2545 /* 31 30 28 22 20 4 2546 | | | | | | 2547 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) 2548 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) 2549 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) 2550 */ 2551 Bool is64 = INSN(31,31) == 1; 2552 UInt subopc = INSN(30,29); 2553 UInt hw = INSN(22,21); 2554 UInt imm16 = INSN(20,5); 2555 UInt dd = INSN(4,0); 2556 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { 2557 /* invalid; fall through */ 2558 } else { 2559 ULong imm64 = ((ULong)imm16) << (16 * hw); 2560 if (!is64) 2561 vassert(imm64 < 0x100000000ULL); 2562 switch (subopc) { 2563 case BITS2(1,0): // MOVZ 2564 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2565 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2566 break; 2567 case BITS2(0,0): // MOVN 2568 imm64 = ~imm64; 2569 if (!is64) 2570 imm64 &= 0xFFFFFFFFULL; 2571 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2572 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2573 break; 2574 case BITS2(1,1): // MOVK 2575 /* This is more complex. We are inserting a slice into 2576 the destination register, so we need to have the old 2577 value of it. */ 2578 if (is64) { 2579 IRTemp old = newTemp(Ity_I64); 2580 assign(old, getIReg64orZR(dd)); 2581 ULong mask = 0xFFFFULL << (16 * hw); 2582 IRExpr* res 2583 = binop(Iop_Or64, 2584 binop(Iop_And64, mkexpr(old), mkU64(~mask)), 2585 mkU64(imm64)); 2586 putIReg64orZR(dd, res); 2587 DIP("movk %s, 0x%x, lsl %u\n", 2588 nameIReg64orZR(dd), imm16, 16*hw); 2589 } else { 2590 IRTemp old = newTemp(Ity_I32); 2591 assign(old, getIReg32orZR(dd)); 2592 vassert(hw <= 1); 2593 UInt mask = 0xFFFF << (16 * hw); 2594 IRExpr* res 2595 = binop(Iop_Or32, 2596 binop(Iop_And32, mkexpr(old), mkU32(~mask)), 2597 mkU32((UInt)imm64)); 2598 putIReg32orZR(dd, res); 2599 DIP("movk %s, 0x%x, lsl %u\n", 2600 nameIReg32orZR(dd), imm16, 16*hw); 2601 } 2602 break; 2603 default: 2604 vassert(0); 2605 } 2606 return True; 2607 } 2608 } 2609 2610 /* -------------------- {U,S,}BFM -------------------- */ 2611 /* 30 28 22 21 15 9 4 2612 2613 sf 10 100110 N immr imms nn dd 2614 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2615 UBFM Xd, Xn, #immr, #imms when sf=1, N=1 2616 2617 sf 00 100110 N immr imms nn dd 2618 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2619 SBFM Xd, Xn, #immr, #imms when sf=1, N=1 2620 2621 sf 01 100110 N immr imms nn dd 2622 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2623 BFM Xd, Xn, #immr, #imms when sf=1, N=1 2624 */ 2625 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { 2626 UInt sf = INSN(31,31); 2627 UInt opc = INSN(30,29); 2628 UInt N = INSN(22,22); 2629 UInt immR = INSN(21,16); 2630 UInt immS = INSN(15,10); 2631 UInt nn = INSN(9,5); 2632 UInt dd = INSN(4,0); 2633 Bool inZero = False; 2634 Bool extend = False; 2635 const HChar* nm = "???"; 2636 /* skip invalid combinations */ 2637 switch (opc) { 2638 case BITS2(0,0): 2639 inZero = True; extend = True; nm = "sbfm"; break; 2640 case BITS2(0,1): 2641 inZero = False; extend = False; nm = "bfm"; break; 2642 case BITS2(1,0): 2643 inZero = True; extend = False; nm = "ubfm"; break; 2644 case BITS2(1,1): 2645 goto after_bfm; /* invalid */ 2646 default: 2647 vassert(0); 2648 } 2649 if (sf == 1 && N != 1) goto after_bfm; 2650 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 2651 || ((immS >> 5) & 1) != 0)) goto after_bfm; 2652 ULong wmask = 0, tmask = 0; 2653 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, 2654 N, immS, immR, False, sf == 1 ? 64 : 32); 2655 if (!ok) goto after_bfm; /* hmmm */ 2656 2657 Bool is64 = sf == 1; 2658 IRType ty = is64 ? Ity_I64 : Ity_I32; 2659 2660 IRTemp dst = newTemp(ty); 2661 IRTemp src = newTemp(ty); 2662 IRTemp bot = newTemp(ty); 2663 IRTemp top = newTemp(ty); 2664 IRTemp res = newTemp(ty); 2665 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); 2666 assign(src, getIRegOrZR(is64, nn)); 2667 /* perform bitfield move on low bits */ 2668 assign(bot, binop(mkOR(ty), 2669 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), 2670 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), 2671 mkU(ty, wmask)))); 2672 /* determine extension bits (sign, zero or dest register) */ 2673 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); 2674 /* combine extension bits and result bits */ 2675 assign(res, binop(mkOR(ty), 2676 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), 2677 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); 2678 putIRegOrZR(is64, dd, mkexpr(res)); 2679 DIP("%s %s, %s, immR=%u, immS=%u\n", 2680 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); 2681 return True; 2682 } 2683 after_bfm: 2684 2685 /* ---------------------- EXTR ---------------------- */ 2686 /* 30 28 22 20 15 9 4 2687 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 2688 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 2689 */ 2690 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { 2691 Bool is64 = INSN(31,31) == 1; 2692 UInt mm = INSN(20,16); 2693 UInt imm6 = INSN(15,10); 2694 UInt nn = INSN(9,5); 2695 UInt dd = INSN(4,0); 2696 Bool valid = True; 2697 if (INSN(31,31) != INSN(22,22)) 2698 valid = False; 2699 if (!is64 && imm6 >= 32) 2700 valid = False; 2701 if (!valid) goto after_extr; 2702 IRType ty = is64 ? Ity_I64 : Ity_I32; 2703 IRTemp srcHi = newTemp(ty); 2704 IRTemp srcLo = newTemp(ty); 2705 IRTemp res = newTemp(ty); 2706 assign(srcHi, getIRegOrZR(is64, nn)); 2707 assign(srcLo, getIRegOrZR(is64, mm)); 2708 if (imm6 == 0) { 2709 assign(res, mkexpr(srcLo)); 2710 } else { 2711 UInt szBits = 8 * sizeofIRType(ty); 2712 vassert(imm6 > 0 && imm6 < szBits); 2713 assign(res, binop(mkOR(ty), 2714 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), 2715 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); 2716 } 2717 putIRegOrZR(is64, dd, mkexpr(res)); 2718 DIP("extr %s, %s, %s, #%u\n", 2719 nameIRegOrZR(is64,dd), 2720 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); 2721 return True; 2722 } 2723 after_extr: 2724 2725 vex_printf("ARM64 front end: data_processing_immediate\n"); 2726 return False; 2727 # undef INSN 2728 } 2729 2730 2731 /*------------------------------------------------------------*/ 2732 /*--- Data processing (register) instructions ---*/ 2733 /*------------------------------------------------------------*/ 2734 2735 static const HChar* nameSH ( UInt sh ) { 2736 switch (sh) { 2737 case 0: return "lsl"; 2738 case 1: return "lsr"; 2739 case 2: return "asr"; 2740 case 3: return "ror"; 2741 default: vassert(0); 2742 } 2743 } 2744 2745 /* Generate IR to get a register value, possibly shifted by an 2746 immediate. Returns either a 32- or 64-bit temporary holding the 2747 result. After the shift, the value can optionally be NOT-ed 2748 too. 2749 2750 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be 2751 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR 2752 isn't allowed, but it's the job of the caller to check that. 2753 */ 2754 static IRTemp getShiftedIRegOrZR ( Bool is64, 2755 UInt sh_how, UInt sh_amt, UInt regNo, 2756 Bool invert ) 2757 { 2758 vassert(sh_how < 4); 2759 vassert(sh_amt < (is64 ? 64 : 32)); 2760 IRType ty = is64 ? Ity_I64 : Ity_I32; 2761 IRTemp t0 = newTemp(ty); 2762 assign(t0, getIRegOrZR(is64, regNo)); 2763 IRTemp t1 = newTemp(ty); 2764 switch (sh_how) { 2765 case BITS2(0,0): 2766 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); 2767 break; 2768 case BITS2(0,1): 2769 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); 2770 break; 2771 case BITS2(1,0): 2772 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); 2773 break; 2774 case BITS2(1,1): 2775 assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); 2776 break; 2777 default: 2778 vassert(0); 2779 } 2780 if (invert) { 2781 IRTemp t2 = newTemp(ty); 2782 assign(t2, unop(mkNOT(ty), mkexpr(t1))); 2783 return t2; 2784 } else { 2785 return t1; 2786 } 2787 } 2788 2789 2790 static 2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, 2792 UInt insn) 2793 { 2794 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2795 2796 /* ------------------- ADD/SUB(reg) ------------------- */ 2797 /* x==0 => 32 bit op x==1 => 64 bit op 2798 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) 2799 2800 31 30 29 28 23 21 20 15 9 4 2801 | | | | | | | | | | 2802 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) 2803 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) 2804 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) 2805 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) 2806 */ 2807 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { 2808 UInt bX = INSN(31,31); 2809 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ 2810 UInt bS = INSN(29, 29); /* set flags? */ 2811 UInt sh = INSN(23,22); 2812 UInt rM = INSN(20,16); 2813 UInt imm6 = INSN(15,10); 2814 UInt rN = INSN(9,5); 2815 UInt rD = INSN(4,0); 2816 Bool isSUB = bOP == 1; 2817 Bool is64 = bX == 1; 2818 IRType ty = is64 ? Ity_I64 : Ity_I32; 2819 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { 2820 /* invalid; fall through */ 2821 } else { 2822 IRTemp argL = newTemp(ty); 2823 assign(argL, getIRegOrZR(is64, rN)); 2824 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); 2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2826 IRTemp res = newTemp(ty); 2827 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2828 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2829 if (bS) { 2830 setFlags_ADD_SUB(is64, isSUB, argL, argR); 2831 } 2832 DIP("%s%s %s, %s, %s, %s #%u\n", 2833 bOP ? "sub" : "add", bS ? "s" : "", 2834 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2835 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2836 return True; 2837 } 2838 } 2839 2840 /* ------------------- ADC/SBC(reg) ------------------- */ 2841 /* x==0 => 32 bit op x==1 => 64 bit op 2842 2843 31 30 29 28 23 21 20 15 9 4 2844 | | | | | | | | | | 2845 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm 2846 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm 2847 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm 2848 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm 2849 */ 2850 2851 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) { 2852 UInt bX = INSN(31,31); 2853 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */ 2854 UInt bS = INSN(29,29); /* set flags */ 2855 UInt rM = INSN(20,16); 2856 UInt rN = INSN(9,5); 2857 UInt rD = INSN(4,0); 2858 2859 Bool isSUB = bOP == 1; 2860 Bool is64 = bX == 1; 2861 IRType ty = is64 ? Ity_I64 : Ity_I32; 2862 2863 IRTemp oldC = newTemp(ty); 2864 assign(oldC, 2865 is64 ? mk_arm64g_calculate_flag_c() 2866 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) ); 2867 2868 IRTemp argL = newTemp(ty); 2869 assign(argL, getIRegOrZR(is64, rN)); 2870 IRTemp argR = newTemp(ty); 2871 assign(argR, getIRegOrZR(is64, rM)); 2872 2873 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2874 IRTemp res = newTemp(ty); 2875 if (isSUB) { 2876 IRExpr* one = is64 ? mkU64(1) : mkU32(1); 2877 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32; 2878 assign(res, 2879 binop(op, 2880 binop(op, mkexpr(argL), mkexpr(argR)), 2881 binop(xorOp, mkexpr(oldC), one))); 2882 } else { 2883 assign(res, 2884 binop(op, 2885 binop(op, mkexpr(argL), mkexpr(argR)), 2886 mkexpr(oldC))); 2887 } 2888 2889 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2890 2891 if (bS) { 2892 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC); 2893 } 2894 2895 DIP("%s%s %s, %s, %s\n", 2896 bOP ? "sbc" : "adc", bS ? "s" : "", 2897 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2898 nameIRegOrZR(is64, rM)); 2899 return True; 2900 } 2901 2902 /* -------------------- LOGIC(reg) -------------------- */ 2903 /* x==0 => 32 bit op x==1 => 64 bit op 2904 N==0 => inv? is no-op (no inversion) 2905 N==1 => inv? is NOT 2906 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR 2907 2908 31 30 28 23 21 20 15 9 4 2909 | | | | | | | | | 2910 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) 2911 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) 2912 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) 2913 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) 2914 With N=1, the names are: BIC ORN EON BICS 2915 */ 2916 if (INSN(28,24) == BITS5(0,1,0,1,0)) { 2917 UInt bX = INSN(31,31); 2918 UInt sh = INSN(23,22); 2919 UInt bN = INSN(21,21); 2920 UInt rM = INSN(20,16); 2921 UInt imm6 = INSN(15,10); 2922 UInt rN = INSN(9,5); 2923 UInt rD = INSN(4,0); 2924 Bool is64 = bX == 1; 2925 IRType ty = is64 ? Ity_I64 : Ity_I32; 2926 if (!is64 && imm6 > 31) { 2927 /* invalid; fall though */ 2928 } else { 2929 IRTemp argL = newTemp(ty); 2930 assign(argL, getIRegOrZR(is64, rN)); 2931 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); 2932 IROp op = Iop_INVALID; 2933 switch (INSN(30,29)) { 2934 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; 2935 case BITS2(0,1): op = mkOR(ty); break; 2936 case BITS2(1,0): op = mkXOR(ty); break; 2937 default: vassert(0); 2938 } 2939 IRTemp res = newTemp(ty); 2940 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2941 if (INSN(30,29) == BITS2(1,1)) { 2942 setFlags_LOGIC(is64, res); 2943 } 2944 putIRegOrZR(is64, rD, mkexpr(res)); 2945 2946 static const HChar* names_op[8] 2947 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; 2948 vassert(((bN << 2) | INSN(30,29)) < 8); 2949 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; 2950 /* Special-case the printing of "MOV" */ 2951 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { 2952 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), 2953 nameIRegOrZR(is64, rM)); 2954 } else { 2955 DIP("%s %s, %s, %s, %s #%u\n", nm_op, 2956 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2957 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2958 } 2959 return True; 2960 } 2961 } 2962 2963 /* -------------------- {U,S}MULH -------------------- */ 2964 /* 31 23 22 20 15 9 4 2965 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm 2966 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm 2967 */ 2968 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) 2969 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { 2970 Bool isU = INSN(23,23) == 1; 2971 UInt mm = INSN(20,16); 2972 UInt nn = INSN(9,5); 2973 UInt dd = INSN(4,0); 2974 putIReg64orZR(dd, unop(Iop_128HIto64, 2975 binop(isU ? Iop_MullU64 : Iop_MullS64, 2976 getIReg64orZR(nn), getIReg64orZR(mm)))); 2977 DIP("%cmulh %s, %s, %s\n", 2978 isU ? 'u' : 's', 2979 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); 2980 return True; 2981 } 2982 2983 /* -------------------- M{ADD,SUB} -------------------- */ 2984 /* 31 30 20 15 14 9 4 2985 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n 2986 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n 2987 */ 2988 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { 2989 Bool is64 = INSN(31,31) == 1; 2990 UInt mm = INSN(20,16); 2991 Bool isAdd = INSN(15,15) == 0; 2992 UInt aa = INSN(14,10); 2993 UInt nn = INSN(9,5); 2994 UInt dd = INSN(4,0); 2995 if (is64) { 2996 putIReg64orZR( 2997 dd, 2998 binop(isAdd ? Iop_Add64 : Iop_Sub64, 2999 getIReg64orZR(aa), 3000 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); 3001 } else { 3002 putIReg32orZR( 3003 dd, 3004 binop(isAdd ? Iop_Add32 : Iop_Sub32, 3005 getIReg32orZR(aa), 3006 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); 3007 } 3008 DIP("%s %s, %s, %s, %s\n", 3009 isAdd ? "madd" : "msub", 3010 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3011 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); 3012 return True; 3013 } 3014 3015 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ 3016 /* 31 30 28 20 15 11 9 4 3017 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm 3018 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm 3019 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm 3020 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm 3021 In all cases, the operation is: Rd = if cond then Rn else OP(Rm) 3022 */ 3023 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { 3024 Bool is64 = INSN(31,31) == 1; 3025 UInt b30 = INSN(30,30); 3026 UInt mm = INSN(20,16); 3027 UInt cond = INSN(15,12); 3028 UInt b10 = INSN(10,10); 3029 UInt nn = INSN(9,5); 3030 UInt dd = INSN(4,0); 3031 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ 3032 IRType ty = is64 ? Ity_I64 : Ity_I32; 3033 IRExpr* argL = getIRegOrZR(is64, nn); 3034 IRExpr* argR = getIRegOrZR(is64, mm); 3035 switch (op) { 3036 case BITS2(0,0): 3037 break; 3038 case BITS2(0,1): 3039 argR = binop(mkADD(ty), argR, mkU(ty,1)); 3040 break; 3041 case BITS2(1,0): 3042 argR = unop(mkNOT(ty), argR); 3043 break; 3044 case BITS2(1,1): 3045 argR = binop(mkSUB(ty), mkU(ty,0), argR); 3046 break; 3047 default: 3048 vassert(0); 3049 } 3050 putIRegOrZR( 3051 is64, dd, 3052 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 3053 argL, argR) 3054 ); 3055 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; 3056 DIP("%s %s, %s, %s, %s\n", op_nm[op], 3057 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3058 nameIRegOrZR(is64, mm), nameCC(cond)); 3059 return True; 3060 } 3061 3062 /* -------------- ADD/SUB(extended reg) -------------- */ 3063 /* 28 20 15 12 9 4 3064 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld 3065 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld 3066 3067 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld 3068 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld 3069 3070 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld 3071 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld 3072 3073 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld 3074 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld 3075 3076 The 'm' operand is extended per opt, thusly: 3077 3078 000 Xm & 0xFF UXTB 3079 001 Xm & 0xFFFF UXTH 3080 010 Xm & (2^32)-1 UXTW 3081 011 Xm UXTX 3082 3083 100 Xm sx from bit 7 SXTB 3084 101 Xm sx from bit 15 SXTH 3085 110 Xm sx from bit 31 SXTW 3086 111 Xm SXTX 3087 3088 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity 3089 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX 3090 are the identity operation on Wm. 3091 3092 After extension, the value is shifted left by imm3 bits, which 3093 may only be in the range 0 .. 4 inclusive. 3094 */ 3095 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { 3096 Bool is64 = INSN(31,31) == 1; 3097 Bool isSub = INSN(30,30) == 1; 3098 Bool setCC = INSN(29,29) == 1; 3099 UInt mm = INSN(20,16); 3100 UInt opt = INSN(15,13); 3101 UInt imm3 = INSN(12,10); 3102 UInt nn = INSN(9,5); 3103 UInt dd = INSN(4,0); 3104 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", 3105 "sxtb", "sxth", "sxtw", "sxtx" }; 3106 /* Do almost the same thing in the 32- and 64-bit cases. */ 3107 IRTemp xN = newTemp(Ity_I64); 3108 IRTemp xM = newTemp(Ity_I64); 3109 assign(xN, getIReg64orSP(nn)); 3110 assign(xM, getIReg64orZR(mm)); 3111 IRExpr* xMw = mkexpr(xM); /* "xM widened" */ 3112 Int shSX = 0; 3113 /* widen Xm .. */ 3114 switch (opt) { 3115 case BITS3(0,0,0): // UXTB 3116 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; 3117 case BITS3(0,0,1): // UXTH 3118 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; 3119 case BITS3(0,1,0): // UXTW -- noop for the 32bit case 3120 if (is64) { 3121 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); 3122 } 3123 break; 3124 case BITS3(0,1,1): // UXTX -- always a noop 3125 break; 3126 case BITS3(1,0,0): // SXTB 3127 shSX = 56; goto sxTo64; 3128 case BITS3(1,0,1): // SXTH 3129 shSX = 48; goto sxTo64; 3130 case BITS3(1,1,0): // SXTW -- noop for the 32bit case 3131 if (is64) { 3132 shSX = 32; goto sxTo64; 3133 } 3134 break; 3135 case BITS3(1,1,1): // SXTX -- always a noop 3136 break; 3137 sxTo64: 3138 vassert(shSX >= 32); 3139 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), 3140 mkU8(shSX)); 3141 break; 3142 default: 3143 vassert(0); 3144 } 3145 /* and now shift */ 3146 IRTemp argL = xN; 3147 IRTemp argR = newTemp(Ity_I64); 3148 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); 3149 IRTemp res = newTemp(Ity_I64); 3150 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 3151 mkexpr(argL), mkexpr(argR))); 3152 if (is64) { 3153 if (setCC) { 3154 putIReg64orZR(dd, mkexpr(res)); 3155 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 3156 } else { 3157 putIReg64orSP(dd, mkexpr(res)); 3158 } 3159 } else { 3160 if (setCC) { 3161 IRTemp argL32 = newTemp(Ity_I32); 3162 IRTemp argR32 = newTemp(Ity_I32); 3163 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); 3164 assign(argL32, unop(Iop_64to32, mkexpr(argL))); 3165 assign(argR32, unop(Iop_64to32, mkexpr(argR))); 3166 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); 3167 } else { 3168 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); 3169 } 3170 } 3171 DIP("%s%s %s, %s, %s %s lsl %u\n", 3172 isSub ? "sub" : "add", setCC ? "s" : "", 3173 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), 3174 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), 3175 nameExt[opt], imm3); 3176 return True; 3177 } 3178 3179 /* ---------------- CCMP/CCMN(imm) ---------------- */ 3180 /* Bizarrely, these appear in the "data processing register" 3181 category, even though they are operations against an 3182 immediate. */ 3183 /* 31 29 20 15 11 9 3 3184 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond 3185 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond 3186 3187 Operation is: 3188 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv 3189 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv 3190 */ 3191 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3192 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { 3193 Bool is64 = INSN(31,31) == 1; 3194 Bool isSUB = INSN(30,30) == 1; 3195 UInt imm5 = INSN(20,16); 3196 UInt cond = INSN(15,12); 3197 UInt nn = INSN(9,5); 3198 UInt nzcv = INSN(3,0); 3199 3200 IRTemp condT = newTemp(Ity_I1); 3201 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3202 3203 IRType ty = is64 ? Ity_I64 : Ity_I32; 3204 IRTemp argL = newTemp(ty); 3205 IRTemp argR = newTemp(ty); 3206 3207 if (is64) { 3208 assign(argL, getIReg64orZR(nn)); 3209 assign(argR, mkU64(imm5)); 3210 } else { 3211 assign(argL, getIReg32orZR(nn)); 3212 assign(argR, mkU32(imm5)); 3213 } 3214 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3215 3216 DIP("ccm%c %s, #%u, #%u, %s\n", 3217 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3218 imm5, nzcv, nameCC(cond)); 3219 return True; 3220 } 3221 3222 /* ---------------- CCMP/CCMN(reg) ---------------- */ 3223 /* 31 29 20 15 11 9 3 3224 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond 3225 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond 3226 Operation is: 3227 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv 3228 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv 3229 */ 3230 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3231 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { 3232 Bool is64 = INSN(31,31) == 1; 3233 Bool isSUB = INSN(30,30) == 1; 3234 UInt mm = INSN(20,16); 3235 UInt cond = INSN(15,12); 3236 UInt nn = INSN(9,5); 3237 UInt nzcv = INSN(3,0); 3238 3239 IRTemp condT = newTemp(Ity_I1); 3240 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3241 3242 IRType ty = is64 ? Ity_I64 : Ity_I32; 3243 IRTemp argL = newTemp(ty); 3244 IRTemp argR = newTemp(ty); 3245 3246 if (is64) { 3247 assign(argL, getIReg64orZR(nn)); 3248 assign(argR, getIReg64orZR(mm)); 3249 } else { 3250 assign(argL, getIReg32orZR(nn)); 3251 assign(argR, getIReg32orZR(mm)); 3252 } 3253 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3254 3255 DIP("ccm%c %s, %s, #%u, %s\n", 3256 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3257 nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); 3258 return True; 3259 } 3260 3261 3262 /* -------------- REV/REV16/REV32/RBIT -------------- */ 3263 /* 31 30 28 20 15 11 9 4 3264 3265 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn 3266 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn 3267 3268 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn 3269 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn 3270 3271 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn 3272 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn 3273 3274 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn 3275 */ 3276 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3277 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { 3278 UInt b31 = INSN(31,31); 3279 UInt opc = INSN(11,10); 3280 3281 UInt ix = 0; 3282 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; 3283 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; 3284 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; 3285 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; 3286 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; 3287 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; 3288 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; 3289 if (ix >= 1 && ix <= 7) { 3290 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; 3291 UInt nn = INSN(9,5); 3292 UInt dd = INSN(4,0); 3293 IRTemp src = newTemp(Ity_I64); 3294 IRTemp dst = IRTemp_INVALID; 3295 IRTemp (*math)(IRTemp) = NULL; 3296 switch (ix) { 3297 case 1: case 2: math = math_BYTESWAP64; break; 3298 case 3: case 4: math = math_BITSWAP64; break; 3299 case 5: case 6: math = math_USHORTSWAP64; break; 3300 case 7: math = math_UINTSWAP64; break; 3301 default: vassert(0); 3302 } 3303 const HChar* names[7] 3304 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; 3305 const HChar* nm = names[ix-1]; 3306 vassert(math); 3307 if (ix == 6) { 3308 /* This has to be special cased, since the logic below doesn't 3309 handle it correctly. */ 3310 assign(src, getIReg64orZR(nn)); 3311 dst = math(src); 3312 putIReg64orZR(dd, 3313 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); 3314 } else if (is64) { 3315 assign(src, getIReg64orZR(nn)); 3316 dst = math(src); 3317 putIReg64orZR(dd, mkexpr(dst)); 3318 } else { 3319 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); 3320 dst = math(src); 3321 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3322 } 3323 DIP("%s %s, %s\n", nm, 3324 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); 3325 return True; 3326 } 3327 /* else fall through */ 3328 } 3329 3330 /* -------------------- CLZ/CLS -------------------- */ 3331 /* 30 28 24 20 15 9 4 3332 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn 3333 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn 3334 */ 3335 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3336 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { 3337 Bool is64 = INSN(31,31) == 1; 3338 Bool isCLS = INSN(10,10) == 1; 3339 UInt nn = INSN(9,5); 3340 UInt dd = INSN(4,0); 3341 IRTemp src = newTemp(Ity_I64); 3342 IRTemp srcZ = newTemp(Ity_I64); 3343 IRTemp dst = newTemp(Ity_I64); 3344 /* Get the argument, widened out to 64 bit */ 3345 if (is64) { 3346 assign(src, getIReg64orZR(nn)); 3347 } else { 3348 assign(src, binop(Iop_Shl64, 3349 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); 3350 } 3351 /* If this is CLS, mash the arg around accordingly */ 3352 if (isCLS) { 3353 IRExpr* one = mkU8(1); 3354 assign(srcZ, 3355 binop(Iop_Xor64, 3356 binop(Iop_Shl64, mkexpr(src), one), 3357 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one))); 3358 } else { 3359 assign(srcZ, mkexpr(src)); 3360 } 3361 /* And compute CLZ. */ 3362 if (is64) { 3363 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3364 mkU64(isCLS ? 63 : 64), 3365 unop(Iop_Clz64, mkexpr(srcZ)))); 3366 putIReg64orZR(dd, mkexpr(dst)); 3367 } else { 3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3369 mkU64(isCLS ? 31 : 32), 3370 unop(Iop_Clz64, mkexpr(srcZ)))); 3371 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3372 } 3373 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z', 3374 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); 3375 return True; 3376 } 3377 3378 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */ 3379 /* 30 28 20 15 11 9 4 3380 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm 3381 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm 3382 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm 3383 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm 3384 */ 3385 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3386 && INSN(15,12) == BITS4(0,0,1,0)) { 3387 Bool is64 = INSN(31,31) == 1; 3388 UInt mm = INSN(20,16); 3389 UInt op = INSN(11,10); 3390 UInt nn = INSN(9,5); 3391 UInt dd = INSN(4,0); 3392 IRType ty = is64 ? Ity_I64 : Ity_I32; 3393 IRTemp srcL = newTemp(ty); 3394 IRTemp srcR = newTemp(Ity_I64); 3395 IRTemp res = newTemp(ty); 3396 IROp iop = Iop_INVALID; 3397 assign(srcL, getIRegOrZR(is64, nn)); 3398 assign(srcR, binop(Iop_And64, getIReg64orZR(mm), 3399 mkU64(is64 ? 63 : 31))); 3400 if (op < 3) { 3401 // LSLV, LSRV, ASRV 3402 switch (op) { 3403 case BITS2(0,0): iop = mkSHL(ty); break; 3404 case BITS2(0,1): iop = mkSHR(ty); break; 3405 case BITS2(1,0): iop = mkSAR(ty); break; 3406 default: vassert(0); 3407 } 3408 assign(res, binop(iop, mkexpr(srcL), 3409 unop(Iop_64to8, mkexpr(srcR)))); 3410 } else { 3411 // RORV 3412 IROp opSHL = mkSHL(ty); 3413 IROp opSHR = mkSHR(ty); 3414 IROp opOR = mkOR(ty); 3415 IRExpr* width = mkU64(is64 ? 64: 32); 3416 assign( 3417 res, 3418 IRExpr_ITE( 3419 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)), 3420 mkexpr(srcL), 3421 binop(opOR, 3422 binop(opSHL, 3423 mkexpr(srcL), 3424 unop(Iop_64to8, binop(Iop_Sub64, width, 3425 mkexpr(srcR)))), 3426 binop(opSHR, 3427 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR)))) 3428 )); 3429 } 3430 putIRegOrZR(is64, dd, mkexpr(res)); 3431 vassert(op < 4); 3432 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" }; 3433 DIP("%s %s, %s, %s\n", 3434 names[op], nameIRegOrZR(is64,dd), 3435 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); 3436 return True; 3437 } 3438 3439 /* -------------------- SDIV/UDIV -------------------- */ 3440 /* 30 28 20 15 10 9 4 3441 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm 3442 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm 3443 */ 3444 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3445 && INSN(15,11) == BITS5(0,0,0,0,1)) { 3446 Bool is64 = INSN(31,31) == 1; 3447 UInt mm = INSN(20,16); 3448 Bool isS = INSN(10,10) == 1; 3449 UInt nn = INSN(9,5); 3450 UInt dd = INSN(4,0); 3451 if (isS) { 3452 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, 3453 getIRegOrZR(is64, nn), 3454 getIRegOrZR(is64, mm))); 3455 } else { 3456 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, 3457 getIRegOrZR(is64, nn), 3458 getIRegOrZR(is64, mm))); 3459 } 3460 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', 3461 nameIRegOrZR(is64, dd), 3462 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); 3463 return True; 3464 } 3465 3466 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ 3467 /* 31 23 20 15 14 9 4 3468 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa 3469 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa 3470 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa 3471 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa 3472 with operation 3473 Xd = Xa +/- (Wn *u/s Wm) 3474 */ 3475 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { 3476 Bool isU = INSN(23,23) == 1; 3477 UInt mm = INSN(20,16); 3478 Bool isAdd = INSN(15,15) == 0; 3479 UInt aa = INSN(14,10); 3480 UInt nn = INSN(9,5); 3481 UInt dd = INSN(4,0); 3482 IRTemp wN = newTemp(Ity_I32); 3483 IRTemp wM = newTemp(Ity_I32); 3484 IRTemp xA = newTemp(Ity_I64); 3485 IRTemp muld = newTemp(Ity_I64); 3486 IRTemp res = newTemp(Ity_I64); 3487 assign(wN, getIReg32orZR(nn)); 3488 assign(wM, getIReg32orZR(mm)); 3489 assign(xA, getIReg64orZR(aa)); 3490 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, 3491 mkexpr(wN), mkexpr(wM))); 3492 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, 3493 mkexpr(xA), mkexpr(muld))); 3494 putIReg64orZR(dd, mkexpr(res)); 3495 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", 3496 nameIReg64orZR(dd), nameIReg32orZR(nn), 3497 nameIReg32orZR(mm), nameIReg64orZR(aa)); 3498 return True; 3499 } 3500 vex_printf("ARM64 front end: data_processing_register\n"); 3501 return False; 3502 # undef INSN 3503 } 3504 3505 3506 /*------------------------------------------------------------*/ 3507 /*--- Math helpers for vector interleave/deinterleave ---*/ 3508 /*------------------------------------------------------------*/ 3509 3510 #define EX(_tmp) \ 3511 mkexpr(_tmp) 3512 #define SL(_hi128,_lo128,_nbytes) \ 3513 ( (_nbytes) == 0 \ 3514 ? (_lo128) \ 3515 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) ) 3516 #define ROR(_v128,_nbytes) \ 3517 SL((_v128),(_v128),(_nbytes)) 3518 #define ROL(_v128,_nbytes) \ 3519 SL((_v128),(_v128),16-(_nbytes)) 3520 #define SHR(_v128,_nbytes) \ 3521 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes))) 3522 #define SHL(_v128,_nbytes) \ 3523 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes))) 3524 #define ILO64x2(_argL,_argR) \ 3525 binop(Iop_InterleaveLO64x2,(_argL),(_argR)) 3526 #define IHI64x2(_argL,_argR) \ 3527 binop(Iop_InterleaveHI64x2,(_argL),(_argR)) 3528 #define ILO32x4(_argL,_argR) \ 3529 binop(Iop_InterleaveLO32x4,(_argL),(_argR)) 3530 #define IHI32x4(_argL,_argR) \ 3531 binop(Iop_InterleaveHI32x4,(_argL),(_argR)) 3532 #define ILO16x8(_argL,_argR) \ 3533 binop(Iop_InterleaveLO16x8,(_argL),(_argR)) 3534 #define IHI16x8(_argL,_argR) \ 3535 binop(Iop_InterleaveHI16x8,(_argL),(_argR)) 3536 #define ILO8x16(_argL,_argR) \ 3537 binop(Iop_InterleaveLO8x16,(_argL),(_argR)) 3538 #define IHI8x16(_argL,_argR) \ 3539 binop(Iop_InterleaveHI8x16,(_argL),(_argR)) 3540 #define CEV32x4(_argL,_argR) \ 3541 binop(Iop_CatEvenLanes32x4,(_argL),(_argR)) 3542 #define COD32x4(_argL,_argR) \ 3543 binop(Iop_CatOddLanes32x4,(_argL),(_argR)) 3544 #define COD16x8(_argL,_argR) \ 3545 binop(Iop_CatOddLanes16x8,(_argL),(_argR)) 3546 #define COD8x16(_argL,_argR) \ 3547 binop(Iop_CatOddLanes8x16,(_argL),(_argR)) 3548 #define CEV8x16(_argL,_argR) \ 3549 binop(Iop_CatEvenLanes8x16,(_argL),(_argR)) 3550 #define AND(_arg1,_arg2) \ 3551 binop(Iop_AndV128,(_arg1),(_arg2)) 3552 #define OR2(_arg1,_arg2) \ 3553 binop(Iop_OrV128,(_arg1),(_arg2)) 3554 #define OR3(_arg1,_arg2,_arg3) \ 3555 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3))) 3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \ 3557 binop(Iop_OrV128, \ 3558 binop(Iop_OrV128,(_arg1),(_arg2)), \ 3559 binop(Iop_OrV128,(_arg3),(_arg4))) 3560 3561 3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */ 3563 static 3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0, 3565 UInt laneSzBlg2, IRTemp u0 ) 3566 { 3567 assign(*i0, mkexpr(u0)); 3568 } 3569 3570 3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */ 3572 static 3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 3574 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 3575 { 3576 /* This is pretty easy, since we have primitives directly to 3577 hand. */ 3578 if (laneSzBlg2 == 3) { 3579 // 64x2 3580 // u1 == B1 B0, u0 == A1 A0 3581 // i1 == B1 A1, i0 == B0 A0 3582 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0))); 3583 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0))); 3584 return; 3585 } 3586 if (laneSzBlg2 == 2) { 3587 // 32x4 3588 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3589 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3590 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0))); 3591 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0))); 3592 return; 3593 } 3594 if (laneSzBlg2 == 1) { 3595 // 16x8 3596 // u1 == B{7..0}, u0 == A{7..0} 3597 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3598 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3599 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0))); 3600 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0))); 3601 return; 3602 } 3603 if (laneSzBlg2 == 0) { 3604 // 8x16 3605 // u1 == B{f..0}, u0 == A{f..0} 3606 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3607 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3608 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0))); 3609 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0))); 3610 return; 3611 } 3612 /*NOTREACHED*/ 3613 vassert(0); 3614 } 3615 3616 3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */ 3618 static 3619 void math_INTERLEAVE3_128( 3620 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 3621 UInt laneSzBlg2, 3622 IRTemp u0, IRTemp u1, IRTemp u2 ) 3623 { 3624 if (laneSzBlg2 == 3) { 3625 // 64x2 3626 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3627 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3628 assign(*i2, IHI64x2( EX(u2), EX(u1) )); 3629 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) )); 3630 assign(*i0, ILO64x2( EX(u1), EX(u0) )); 3631 return; 3632 } 3633 3634 if (laneSzBlg2 == 2) { 3635 // 32x4 3636 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3637 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3638 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3639 IRTemp p0 = newTempV128(); 3640 IRTemp p1 = newTempV128(); 3641 IRTemp p2 = newTempV128(); 3642 IRTemp c1100 = newTempV128(); 3643 IRTemp c0011 = newTempV128(); 3644 IRTemp c0110 = newTempV128(); 3645 assign(c1100, mkV128(0xFF00)); 3646 assign(c0011, mkV128(0x00FF)); 3647 assign(c0110, mkV128(0x0FF0)); 3648 // First interleave them at 64x2 granularity, 3649 // generating partial ("p") values. 3650 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2); 3651 // And more shuffling around for the final answer 3652 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ), 3653 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) )); 3654 assign(*i1, OR3( SHL(EX(p2),12), 3655 AND(EX(p1),EX(c0110)), 3656 SHR(EX(p0),12) )); 3657 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ), 3658 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) )); 3659 return; 3660 } 3661 3662 if (laneSzBlg2 == 1) { 3663 // 16x8 3664 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3665 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3666 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3667 // 3668 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3669 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3670 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3671 // 3672 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3673 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3674 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3675 IRTemp p0 = newTempV128(); 3676 IRTemp p1 = newTempV128(); 3677 IRTemp p2 = newTempV128(); 3678 IRTemp c1000 = newTempV128(); 3679 IRTemp c0100 = newTempV128(); 3680 IRTemp c0010 = newTempV128(); 3681 IRTemp c0001 = newTempV128(); 3682 assign(c1000, mkV128(0xF000)); 3683 assign(c0100, mkV128(0x0F00)); 3684 assign(c0010, mkV128(0x00F0)); 3685 assign(c0001, mkV128(0x000F)); 3686 // First interleave them at 32x4 granularity, 3687 // generating partial ("p") values. 3688 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2); 3689 // And more shuffling around for the final answer 3690 assign(*i2, 3691 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ), 3692 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ), 3693 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ), 3694 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) ) 3695 )); 3696 assign(*i1, 3697 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ), 3698 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ), 3699 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ), 3700 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) ) 3701 )); 3702 assign(*i0, 3703 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ), 3704 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ), 3705 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ), 3706 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) ) 3707 )); 3708 return; 3709 } 3710 3711 if (laneSzBlg2 == 0) { 3712 // 8x16. It doesn't seem worth the hassle of first doing a 3713 // 16x8 interleave, so just generate all 24 partial results 3714 // directly :-( 3715 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0 3716 // i2 == Cf Bf Af Ce .. Bb Ab Ca 3717 // i1 == Ba Aa C9 B9 .. A6 C5 B5 3718 // i0 == A5 C4 B4 A4 .. C0 B0 A0 3719 3720 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128(); 3721 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128(); 3722 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128(); 3723 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128(); 3724 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128(); 3725 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128(); 3726 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128(); 3727 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128(); 3728 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128(); 3729 3730 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector 3731 // of the form 14 bytes junk : CC[0xF] : BB[0xA] 3732 // 3733 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \ 3734 IRTemp t_##_tempName = newTempV128(); \ 3735 assign(t_##_tempName, \ 3736 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \ 3737 ROR(EX(_srcVec2),(_srcShift2)) ) ) 3738 3739 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively 3740 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0; 3741 3742 // The slicing and reassembly are done as interleavedly as possible, 3743 // so as to minimise the demand for registers in the back end, which 3744 // was observed to be a problem in testing. 3745 3746 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14] 3747 XXXX(AfCe, AA, 0xf, CC, 0xe); 3748 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe))); 3749 3750 XXXX(BeAe, BB, 0xe, AA, 0xe); 3751 XXXX(CdBd, CC, 0xd, BB, 0xd); 3752 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd))); 3753 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98))); 3754 3755 XXXX(AdCc, AA, 0xd, CC, 0xc); 3756 XXXX(BcAc, BB, 0xc, AA, 0xc); 3757 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc))); 3758 3759 XXXX(CbBb, CC, 0xb, BB, 0xb); 3760 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0] 3761 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa))); 3762 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210))); 3763 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64))); 3764 3765 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14] 3766 XXXX(C9B9, CC, 0x9, BB, 0x9); 3767 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9))); 3768 3769 XXXX(A9C8, AA, 0x9, CC, 0x8); 3770 XXXX(B8A8, BB, 0x8, AA, 0x8); 3771 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8))); 3772 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98))); 3773 3774 XXXX(C7B7, CC, 0x7, BB, 0x7); 3775 XXXX(A7C6, AA, 0x7, CC, 0x6); 3776 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6))); 3777 3778 XXXX(B6A6, BB, 0x6, AA, 0x6); 3779 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0] 3780 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5))); 3781 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210))); 3782 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64))); 3783 3784 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14] 3785 XXXX(B4A4, BB, 0x4, AA, 0x4); 3786 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4))); 3787 3788 XXXX(C3B3, CC, 0x3, BB, 0x3); 3789 XXXX(A3C2, AA, 0x3, CC, 0x2); 3790 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2))); 3791 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98))); 3792 3793 XXXX(B2A2, BB, 0x2, AA, 0x2); 3794 XXXX(C1B1, CC, 0x1, BB, 0x1); 3795 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1))); 3796 3797 XXXX(A1C0, AA, 0x1, CC, 0x0); 3798 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0] 3799 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0))); 3800 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210))); 3801 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64))); 3802 3803 # undef XXXX 3804 return; 3805 } 3806 3807 /*NOTREACHED*/ 3808 vassert(0); 3809 } 3810 3811 3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */ 3813 static 3814 void math_INTERLEAVE4_128( 3815 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 3816 UInt laneSzBlg2, 3817 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 3818 { 3819 if (laneSzBlg2 == 3) { 3820 // 64x2 3821 assign(*i0, ILO64x2(EX(u1), EX(u0))); 3822 assign(*i1, ILO64x2(EX(u3), EX(u2))); 3823 assign(*i2, IHI64x2(EX(u1), EX(u0))); 3824 assign(*i3, IHI64x2(EX(u3), EX(u2))); 3825 return; 3826 } 3827 if (laneSzBlg2 == 2) { 3828 // 32x4 3829 // First, interleave at the 64-bit lane size. 3830 IRTemp p0 = newTempV128(); 3831 IRTemp p1 = newTempV128(); 3832 IRTemp p2 = newTempV128(); 3833 IRTemp p3 = newTempV128(); 3834 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3); 3835 // And interleave (cat) at the 32 bit size. 3836 assign(*i0, CEV32x4(EX(p1), EX(p0))); 3837 assign(*i1, COD32x4(EX(p1), EX(p0))); 3838 assign(*i2, CEV32x4(EX(p3), EX(p2))); 3839 assign(*i3, COD32x4(EX(p3), EX(p2))); 3840 return; 3841 } 3842 if (laneSzBlg2 == 1) { 3843 // 16x8 3844 // First, interleave at the 32-bit lane size. 3845 IRTemp p0 = newTempV128(); 3846 IRTemp p1 = newTempV128(); 3847 IRTemp p2 = newTempV128(); 3848 IRTemp p3 = newTempV128(); 3849 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3); 3850 // And rearrange within each vector, to get the right 16 bit lanes. 3851 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2))); 3852 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2))); 3853 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2))); 3854 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2))); 3855 return; 3856 } 3857 if (laneSzBlg2 == 0) { 3858 // 8x16 3859 // First, interleave at the 16-bit lane size. 3860 IRTemp p0 = newTempV128(); 3861 IRTemp p1 = newTempV128(); 3862 IRTemp p2 = newTempV128(); 3863 IRTemp p3 = newTempV128(); 3864 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3); 3865 // And rearrange within each vector, to get the right 8 bit lanes. 3866 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0)))); 3867 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1)))); 3868 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2)))); 3869 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3)))); 3870 return; 3871 } 3872 /*NOTREACHED*/ 3873 vassert(0); 3874 } 3875 3876 3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */ 3878 static 3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0, 3880 UInt laneSzBlg2, IRTemp i0 ) 3881 { 3882 assign(*u0, mkexpr(i0)); 3883 } 3884 3885 3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */ 3887 static 3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 3889 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 3890 { 3891 /* This is pretty easy, since we have primitives directly to 3892 hand. */ 3893 if (laneSzBlg2 == 3) { 3894 // 64x2 3895 // i1 == B1 A1, i0 == B0 A0 3896 // u1 == B1 B0, u0 == A1 A0 3897 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0))); 3898 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0))); 3899 return; 3900 } 3901 if (laneSzBlg2 == 2) { 3902 // 32x4 3903 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3904 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3905 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0))); 3906 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0))); 3907 return; 3908 } 3909 if (laneSzBlg2 == 1) { 3910 // 16x8 3911 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3912 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3913 // u1 == B{7..0}, u0 == A{7..0} 3914 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0))); 3915 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0))); 3916 return; 3917 } 3918 if (laneSzBlg2 == 0) { 3919 // 8x16 3920 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3921 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3922 // u1 == B{f..0}, u0 == A{f..0} 3923 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0))); 3924 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0))); 3925 return; 3926 } 3927 /*NOTREACHED*/ 3928 vassert(0); 3929 } 3930 3931 3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */ 3933 static 3934 void math_DEINTERLEAVE3_128( 3935 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 3936 UInt laneSzBlg2, 3937 IRTemp i0, IRTemp i1, IRTemp i2 ) 3938 { 3939 if (laneSzBlg2 == 3) { 3940 // 64x2 3941 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3942 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3943 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) )); 3944 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) )); 3945 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) )); 3946 return; 3947 } 3948 3949 if (laneSzBlg2 == 2) { 3950 // 32x4 3951 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3952 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3953 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3954 IRTemp t_a1c0b0a0 = newTempV128(); 3955 IRTemp t_a2c1b1a1 = newTempV128(); 3956 IRTemp t_a3c2b2a2 = newTempV128(); 3957 IRTemp t_a0c3b3a3 = newTempV128(); 3958 IRTemp p0 = newTempV128(); 3959 IRTemp p1 = newTempV128(); 3960 IRTemp p2 = newTempV128(); 3961 // Compute some intermediate values. 3962 assign(t_a1c0b0a0, EX(i0)); 3963 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4)); 3964 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4)); 3965 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4)); 3966 // First deinterleave into lane-pairs 3967 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0))); 3968 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)), 3969 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0)))); 3970 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4))); 3971 // Then deinterleave at 64x2 granularity. 3972 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2); 3973 return; 3974 } 3975 3976 if (laneSzBlg2 == 1) { 3977 // 16x8 3978 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3979 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3980 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3981 // 3982 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3983 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3984 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3985 // 3986 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3987 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3988 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3989 3990 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111; 3991 s0 = s1 = s2 = s3 3992 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID; 3993 newTempsV128_4(&s0, &s1, &s2, &s3); 3994 newTempsV128_4(&t0, &t1, &t2, &t3); 3995 newTempsV128_4(&p0, &p1, &p2, &c00111111); 3996 3997 // s0 == b2a2 c1b1a1 c0b0a0 3998 // s1 == b4a4 c3b3c3 c2b2a2 3999 // s2 == b6a6 c5b5a5 c4b4a4 4000 // s3 == b0a0 c7b7a7 c6b6a6 4001 assign(s0, EX(i0)); 4002 assign(s1, SL(EX(i1),EX(i0),6*2)); 4003 assign(s2, SL(EX(i2),EX(i1),4*2)); 4004 assign(s3, SL(EX(i0),EX(i2),2*2)); 4005 4006 // t0 == 0 0 c1c0 b1b0 a1a0 4007 // t1 == 0 0 c3c2 b3b2 a3a2 4008 // t2 == 0 0 c5c4 b5b4 a5a4 4009 // t3 == 0 0 c7c6 b7b6 a7a6 4010 assign(c00111111, mkV128(0x0FFF)); 4011 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111))); 4012 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111))); 4013 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111))); 4014 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111))); 4015 4016 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2))); 4017 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2))); 4018 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2))); 4019 4020 // Then deinterleave at 32x4 granularity. 4021 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2); 4022 return; 4023 } 4024 4025 if (laneSzBlg2 == 0) { 4026 // 8x16. This is the same scheme as for 16x8, with twice the 4027 // number of intermediate values. 4028 // 4029 // u2 == C{f..0} 4030 // u1 == B{f..0} 4031 // u0 == A{f..0} 4032 // 4033 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a} 4034 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5} 4035 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4036 // 4037 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba} 4038 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54} 4039 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10} 4040 // 4041 IRTemp s0, s1, s2, s3, s4, s5, s6, s7, 4042 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK; 4043 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 4044 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK 4045 = IRTemp_INVALID; 4046 newTempsV128_4(&s0, &s1, &s2, &s3); 4047 newTempsV128_4(&s4, &s5, &s6, &s7); 4048 newTempsV128_4(&t0, &t1, &t2, &t3); 4049 newTempsV128_4(&t4, &t5, &t6, &t7); 4050 newTempsV128_4(&p0, &p1, &p2, &cMASK); 4051 4052 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4053 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2} 4054 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4} 4055 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6} 4056 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8} 4057 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a} 4058 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c} 4059 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e} 4060 assign(s0, SL(EX(i1),EX(i0), 0)); 4061 assign(s1, SL(EX(i1),EX(i0), 6)); 4062 assign(s2, SL(EX(i1),EX(i0),12)); 4063 assign(s3, SL(EX(i2),EX(i1), 2)); 4064 assign(s4, SL(EX(i2),EX(i1), 8)); 4065 assign(s5, SL(EX(i2),EX(i1),14)); 4066 assign(s6, SL(EX(i0),EX(i2), 4)); 4067 assign(s7, SL(EX(i0),EX(i2),10)); 4068 4069 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0 4070 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2 4071 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4 4072 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6 4073 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8 4074 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa 4075 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac 4076 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae 4077 assign(cMASK, mkV128(0x003F)); 4078 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK))); 4079 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK))); 4080 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK))); 4081 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK))); 4082 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK))); 4083 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK))); 4084 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK))); 4085 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK))); 4086 4087 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) )); 4088 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8), 4089 SHL(EX(t3),2), SHR(EX(t2),4) )); 4090 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) )); 4091 4092 // Then deinterleave at 16x8 granularity. 4093 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2); 4094 return; 4095 } 4096 4097 /*NOTREACHED*/ 4098 vassert(0); 4099 } 4100 4101 4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */ 4103 static 4104 void math_DEINTERLEAVE4_128( 4105 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4106 UInt laneSzBlg2, 4107 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4108 { 4109 if (laneSzBlg2 == 3) { 4110 // 64x2 4111 assign(*u0, ILO64x2(EX(i2), EX(i0))); 4112 assign(*u1, IHI64x2(EX(i2), EX(i0))); 4113 assign(*u2, ILO64x2(EX(i3), EX(i1))); 4114 assign(*u3, IHI64x2(EX(i3), EX(i1))); 4115 return; 4116 } 4117 if (laneSzBlg2 == 2) { 4118 // 32x4 4119 IRTemp p0 = newTempV128(); 4120 IRTemp p2 = newTempV128(); 4121 IRTemp p1 = newTempV128(); 4122 IRTemp p3 = newTempV128(); 4123 assign(p0, ILO32x4(EX(i1), EX(i0))); 4124 assign(p1, IHI32x4(EX(i1), EX(i0))); 4125 assign(p2, ILO32x4(EX(i3), EX(i2))); 4126 assign(p3, IHI32x4(EX(i3), EX(i2))); 4127 // And now do what we did for the 64-bit case. 4128 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3); 4129 return; 4130 } 4131 if (laneSzBlg2 == 1) { 4132 // 16x8 4133 // Deinterleave into 32-bit chunks, then do as the 32-bit case. 4134 IRTemp p0 = newTempV128(); 4135 IRTemp p1 = newTempV128(); 4136 IRTemp p2 = newTempV128(); 4137 IRTemp p3 = newTempV128(); 4138 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8))); 4139 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8))); 4140 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8))); 4141 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8))); 4142 // From here on is like the 32 bit case. 4143 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3); 4144 return; 4145 } 4146 if (laneSzBlg2 == 0) { 4147 // 8x16 4148 // Deinterleave into 16-bit chunks, then do as the 16-bit case. 4149 IRTemp p0 = newTempV128(); 4150 IRTemp p1 = newTempV128(); 4151 IRTemp p2 = newTempV128(); 4152 IRTemp p3 = newTempV128(); 4153 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)), 4154 ILO8x16(EX(i0),ROL(EX(i0),4)) )); 4155 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)), 4156 ILO8x16(EX(i1),ROL(EX(i1),4)) )); 4157 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)), 4158 ILO8x16(EX(i2),ROL(EX(i2),4)) )); 4159 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)), 4160 ILO8x16(EX(i3),ROL(EX(i3),4)) )); 4161 // From here on is like the 16 bit case. 4162 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3); 4163 return; 4164 } 4165 /*NOTREACHED*/ 4166 vassert(0); 4167 } 4168 4169 4170 /* Wrappers that use the full-width (de)interleavers to do half-width 4171 (de)interleaving. The scheme is to clone each input lane in the 4172 lower half of each incoming value, do a full width (de)interleave 4173 at the next lane size up, and remove every other lane of the the 4174 result. The returned values may have any old junk in the upper 4175 64 bits -- the caller must ignore that. */ 4176 4177 /* Helper function -- get doubling and narrowing operations. */ 4178 static 4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler, 4180 /*OUT*/IROp* halver, 4181 UInt laneSzBlg2 ) 4182 { 4183 switch (laneSzBlg2) { 4184 case 2: 4185 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4; 4186 break; 4187 case 1: 4188 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8; 4189 break; 4190 case 0: 4191 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16; 4192 break; 4193 default: 4194 vassert(0); 4195 } 4196 } 4197 4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */ 4199 static 4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0, 4201 UInt laneSzBlg2, IRTemp u0 ) 4202 { 4203 assign(*i0, mkexpr(u0)); 4204 } 4205 4206 4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */ 4208 static 4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 4210 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 4211 { 4212 if (laneSzBlg2 == 3) { 4213 // 1x64, degenerate case 4214 assign(*i0, EX(u0)); 4215 assign(*i1, EX(u1)); 4216 return; 4217 } 4218 4219 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4220 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4221 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4222 4223 IRTemp du0 = newTempV128(); 4224 IRTemp du1 = newTempV128(); 4225 assign(du0, binop(doubler, EX(u0), EX(u0))); 4226 assign(du1, binop(doubler, EX(u1), EX(u1))); 4227 IRTemp di0 = newTempV128(); 4228 IRTemp di1 = newTempV128(); 4229 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1); 4230 assign(*i0, binop(halver, EX(di0), EX(di0))); 4231 assign(*i1, binop(halver, EX(di1), EX(di1))); 4232 } 4233 4234 4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */ 4236 static 4237 void math_INTERLEAVE3_64( 4238 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 4239 UInt laneSzBlg2, 4240 IRTemp u0, IRTemp u1, IRTemp u2 ) 4241 { 4242 if (laneSzBlg2 == 3) { 4243 // 1x64, degenerate case 4244 assign(*i0, EX(u0)); 4245 assign(*i1, EX(u1)); 4246 assign(*i2, EX(u2)); 4247 return; 4248 } 4249 4250 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4251 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4252 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4253 4254 IRTemp du0 = newTempV128(); 4255 IRTemp du1 = newTempV128(); 4256 IRTemp du2 = newTempV128(); 4257 assign(du0, binop(doubler, EX(u0), EX(u0))); 4258 assign(du1, binop(doubler, EX(u1), EX(u1))); 4259 assign(du2, binop(doubler, EX(u2), EX(u2))); 4260 IRTemp di0 = newTempV128(); 4261 IRTemp di1 = newTempV128(); 4262 IRTemp di2 = newTempV128(); 4263 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2); 4264 assign(*i0, binop(halver, EX(di0), EX(di0))); 4265 assign(*i1, binop(halver, EX(di1), EX(di1))); 4266 assign(*i2, binop(halver, EX(di2), EX(di2))); 4267 } 4268 4269 4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */ 4271 static 4272 void math_INTERLEAVE4_64( 4273 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 4274 UInt laneSzBlg2, 4275 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 4276 { 4277 if (laneSzBlg2 == 3) { 4278 // 1x64, degenerate case 4279 assign(*i0, EX(u0)); 4280 assign(*i1, EX(u1)); 4281 assign(*i2, EX(u2)); 4282 assign(*i3, EX(u3)); 4283 return; 4284 } 4285 4286 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4287 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4288 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4289 4290 IRTemp du0 = newTempV128(); 4291 IRTemp du1 = newTempV128(); 4292 IRTemp du2 = newTempV128(); 4293 IRTemp du3 = newTempV128(); 4294 assign(du0, binop(doubler, EX(u0), EX(u0))); 4295 assign(du1, binop(doubler, EX(u1), EX(u1))); 4296 assign(du2, binop(doubler, EX(u2), EX(u2))); 4297 assign(du3, binop(doubler, EX(u3), EX(u3))); 4298 IRTemp di0 = newTempV128(); 4299 IRTemp di1 = newTempV128(); 4300 IRTemp di2 = newTempV128(); 4301 IRTemp di3 = newTempV128(); 4302 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3, 4303 laneSzBlg2 + 1, du0, du1, du2, du3); 4304 assign(*i0, binop(halver, EX(di0), EX(di0))); 4305 assign(*i1, binop(halver, EX(di1), EX(di1))); 4306 assign(*i2, binop(halver, EX(di2), EX(di2))); 4307 assign(*i3, binop(halver, EX(di3), EX(di3))); 4308 } 4309 4310 4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */ 4312 static 4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0, 4314 UInt laneSzBlg2, IRTemp i0 ) 4315 { 4316 assign(*u0, mkexpr(i0)); 4317 } 4318 4319 4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */ 4321 static 4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 4323 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 4324 { 4325 if (laneSzBlg2 == 3) { 4326 // 1x64, degenerate case 4327 assign(*u0, EX(i0)); 4328 assign(*u1, EX(i1)); 4329 return; 4330 } 4331 4332 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4333 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4334 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4335 4336 IRTemp di0 = newTempV128(); 4337 IRTemp di1 = newTempV128(); 4338 assign(di0, binop(doubler, EX(i0), EX(i0))); 4339 assign(di1, binop(doubler, EX(i1), EX(i1))); 4340 4341 IRTemp du0 = newTempV128(); 4342 IRTemp du1 = newTempV128(); 4343 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1); 4344 assign(*u0, binop(halver, EX(du0), EX(du0))); 4345 assign(*u1, binop(halver, EX(du1), EX(du1))); 4346 } 4347 4348 4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */ 4350 static 4351 void math_DEINTERLEAVE3_64( 4352 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 4353 UInt laneSzBlg2, 4354 IRTemp i0, IRTemp i1, IRTemp i2 ) 4355 { 4356 if (laneSzBlg2 == 3) { 4357 // 1x64, degenerate case 4358 assign(*u0, EX(i0)); 4359 assign(*u1, EX(i1)); 4360 assign(*u2, EX(i2)); 4361 return; 4362 } 4363 4364 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4365 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4366 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4367 4368 IRTemp di0 = newTempV128(); 4369 IRTemp di1 = newTempV128(); 4370 IRTemp di2 = newTempV128(); 4371 assign(di0, binop(doubler, EX(i0), EX(i0))); 4372 assign(di1, binop(doubler, EX(i1), EX(i1))); 4373 assign(di2, binop(doubler, EX(i2), EX(i2))); 4374 IRTemp du0 = newTempV128(); 4375 IRTemp du1 = newTempV128(); 4376 IRTemp du2 = newTempV128(); 4377 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2); 4378 assign(*u0, binop(halver, EX(du0), EX(du0))); 4379 assign(*u1, binop(halver, EX(du1), EX(du1))); 4380 assign(*u2, binop(halver, EX(du2), EX(du2))); 4381 } 4382 4383 4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */ 4385 static 4386 void math_DEINTERLEAVE4_64( 4387 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4388 UInt laneSzBlg2, 4389 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4390 { 4391 if (laneSzBlg2 == 3) { 4392 // 1x64, degenerate case 4393 assign(*u0, EX(i0)); 4394 assign(*u1, EX(i1)); 4395 assign(*u2, EX(i2)); 4396 assign(*u3, EX(i3)); 4397 return; 4398 } 4399 4400 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4401 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4402 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4403 4404 IRTemp di0 = newTempV128(); 4405 IRTemp di1 = newTempV128(); 4406 IRTemp di2 = newTempV128(); 4407 IRTemp di3 = newTempV128(); 4408 assign(di0, binop(doubler, EX(i0), EX(i0))); 4409 assign(di1, binop(doubler, EX(i1), EX(i1))); 4410 assign(di2, binop(doubler, EX(i2), EX(i2))); 4411 assign(di3, binop(doubler, EX(i3), EX(i3))); 4412 IRTemp du0 = newTempV128(); 4413 IRTemp du1 = newTempV128(); 4414 IRTemp du2 = newTempV128(); 4415 IRTemp du3 = newTempV128(); 4416 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3, 4417 laneSzBlg2 + 1, di0, di1, di2, di3); 4418 assign(*u0, binop(halver, EX(du0), EX(du0))); 4419 assign(*u1, binop(halver, EX(du1), EX(du1))); 4420 assign(*u2, binop(halver, EX(du2), EX(du2))); 4421 assign(*u3, binop(halver, EX(du3), EX(du3))); 4422 } 4423 4424 4425 #undef EX 4426 #undef SL 4427 #undef ROR 4428 #undef ROL 4429 #undef SHR 4430 #undef SHL 4431 #undef ILO64x2 4432 #undef IHI64x2 4433 #undef ILO32x4 4434 #undef IHI32x4 4435 #undef ILO16x8 4436 #undef IHI16x8 4437 #undef ILO16x8 4438 #undef IHI16x8 4439 #undef CEV32x4 4440 #undef COD32x4 4441 #undef COD16x8 4442 #undef COD8x16 4443 #undef CEV8x16 4444 #undef AND 4445 #undef OR2 4446 #undef OR3 4447 #undef OR4 4448 4449 4450 /*------------------------------------------------------------*/ 4451 /*--- Load and Store instructions ---*/ 4452 /*------------------------------------------------------------*/ 4453 4454 /* Generate the EA for a "reg + reg" style amode. This is done from 4455 parts of the insn, but for sanity checking sake it takes the whole 4456 insn. This appears to depend on insn[15:12], with opt=insn[15:13] 4457 and S=insn[12]: 4458 4459 The possible forms, along with their opt:S values, are: 4460 011:0 Xn|SP + Xm 4461 111:0 Xn|SP + Xm 4462 011:1 Xn|SP + Xm * transfer_szB 4463 111:1 Xn|SP + Xm * transfer_szB 4464 010:0 Xn|SP + 32Uto64(Wm) 4465 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB 4466 110:0 Xn|SP + 32Sto64(Wm) 4467 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB 4468 4469 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of 4470 the transfer size is insn[23,31,30]. For integer loads/stores, 4471 insn[23] is zero, hence szLg2 can be at most 3 in such cases. 4472 4473 If the decoding fails, it returns IRTemp_INVALID. 4474 4475 isInt is True iff this is decoding is for transfers to/from integer 4476 registers. If False it is for transfers to/from vector registers. 4477 */ 4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) 4479 { 4480 UInt optS = SLICE_UInt(insn, 15, 12); 4481 UInt mm = SLICE_UInt(insn, 20, 16); 4482 UInt nn = SLICE_UInt(insn, 9, 5); 4483 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) 4484 | SLICE_UInt(insn, 31, 30); // Log2 of the size 4485 4486 buf[0] = 0; 4487 4488 /* Sanity checks, that this really is a load/store insn. */ 4489 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) 4490 goto fail; 4491 4492 if (isInt 4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ 4494 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ 4495 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ 4496 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ 4497 goto fail; 4498 4499 if (!isInt 4500 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ 4501 goto fail; 4502 4503 /* Throw out non-verified but possibly valid cases. */ 4504 switch (szLg2) { 4505 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec 4506 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec 4507 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec 4508 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec 4509 case BITS3(1,0,0): // can only ever be valid for the vector case 4510 if (isInt) goto fail; else break; 4511 case BITS3(1,0,1): // these sizes are never valid 4512 case BITS3(1,1,0): 4513 case BITS3(1,1,1): goto fail; 4514 4515 default: vassert(0); 4516 } 4517 4518 IRExpr* rhs = NULL; 4519 switch (optS) { 4520 case BITS4(1,1,1,0): goto fail; //ATC 4521 case BITS4(0,1,1,0): 4522 rhs = getIReg64orZR(mm); 4523 vex_sprintf(buf, "[%s, %s]", 4524 nameIReg64orZR(nn), nameIReg64orZR(mm)); 4525 break; 4526 case BITS4(1,1,1,1): goto fail; //ATC 4527 case BITS4(0,1,1,1): 4528 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); 4529 vex_sprintf(buf, "[%s, %s lsl %u]", 4530 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); 4531 break; 4532 case BITS4(0,1,0,0): 4533 rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); 4534 vex_sprintf(buf, "[%s, %s uxtx]", 4535 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4536 break; 4537 case BITS4(0,1,0,1): 4538 rhs = binop(Iop_Shl64, 4539 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); 4540 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", 4541 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4542 break; 4543 case BITS4(1,1,0,0): 4544 rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); 4545 vex_sprintf(buf, "[%s, %s sxtx]", 4546 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4547 break; 4548 case BITS4(1,1,0,1): 4549 rhs = binop(Iop_Shl64, 4550 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); 4551 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", 4552 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4553 break; 4554 default: 4555 /* The rest appear to be genuinely invalid */ 4556 goto fail; 4557 } 4558 4559 vassert(rhs); 4560 IRTemp res = newTemp(Ity_I64); 4561 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); 4562 return res; 4563 4564 fail: 4565 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); 4566 return IRTemp_INVALID; 4567 } 4568 4569 4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest 4571 bits of DATAE :: Ity_I64. */ 4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) 4573 { 4574 IRExpr* addrE = mkexpr(addr); 4575 switch (szB) { 4576 case 8: 4577 storeLE(addrE, dataE); 4578 break; 4579 case 4: 4580 storeLE(addrE, unop(Iop_64to32, dataE)); 4581 break; 4582 case 2: 4583 storeLE(addrE, unop(Iop_64to16, dataE)); 4584 break; 4585 case 1: 4586 storeLE(addrE, unop(Iop_64to8, dataE)); 4587 break; 4588 default: 4589 vassert(0); 4590 } 4591 } 4592 4593 4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR, 4595 placing the result in an Ity_I64 temporary. */ 4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) 4597 { 4598 IRTemp res = newTemp(Ity_I64); 4599 IRExpr* addrE = mkexpr(addr); 4600 switch (szB) { 4601 case 8: 4602 assign(res, loadLE(Ity_I64,addrE)); 4603 break; 4604 case 4: 4605 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); 4606 break; 4607 case 2: 4608 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); 4609 break; 4610 case 1: 4611 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); 4612 break; 4613 default: 4614 vassert(0); 4615 } 4616 return res; 4617 } 4618 4619 4620 /* Generate a "standard 7" name, from bitQ and size. But also 4621 allow ".1d" since that's occasionally useful. */ 4622 static 4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size ) 4624 { 4625 vassert(bitQ <= 1 && size <= 3); 4626 const HChar* nms[8] 4627 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" }; 4628 UInt ix = (bitQ << 2) | size; 4629 vassert(ix < 8); 4630 return nms[ix]; 4631 } 4632 4633 4634 static 4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) 4636 { 4637 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 4638 4639 /* ------------ LDR,STR (immediate, uimm12) ----------- */ 4640 /* uimm12 is scaled by the transfer size 4641 4642 31 29 26 21 9 4 4643 | | | | | | 4644 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] 4645 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] 4646 4647 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] 4648 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] 4649 4650 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] 4651 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] 4652 4653 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] 4654 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] 4655 */ 4656 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { 4657 UInt szLg2 = INSN(31,30); 4658 UInt szB = 1 << szLg2; 4659 Bool isLD = INSN(22,22) == 1; 4660 UInt offs = INSN(21,10) * szB; 4661 UInt nn = INSN(9,5); 4662 UInt tt = INSN(4,0); 4663 IRTemp ta = newTemp(Ity_I64); 4664 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); 4665 if (nn == 31) { /* FIXME generate stack alignment check */ } 4666 vassert(szLg2 < 4); 4667 if (isLD) { 4668 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); 4669 } else { 4670 gen_narrowing_store(szB, ta, getIReg64orZR(tt)); 4671 } 4672 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; 4673 const HChar* st_name[4] = { "strb", "strh", "str", "str" }; 4674 DIP("%s %s, [%s, #%u]\n", 4675 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), 4676 nameIReg64orSP(nn), offs); 4677 return True; 4678 } 4679 4680 /* ------------ LDUR,STUR (immediate, simm9) ----------- */ 4681 /* 4682 31 29 26 20 11 9 4 4683 | | | | | | | 4684 (at-Rn-then-Rn=EA) | | | 4685 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 4686 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 4687 4688 (at-EA-then-Rn=EA) 4689 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! 4690 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! 4691 4692 (at-EA) 4693 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] 4694 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] 4695 4696 simm9 is unscaled. 4697 4698 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the 4699 load case this is because would create two competing values for 4700 Rt. In the store case the reason is unclear, but the spec 4701 disallows it anyway. 4702 4703 Stores are narrowing, loads are unsigned widening. sz encodes 4704 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. 4705 */ 4706 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) 4707 == BITS9(1,1,1, 0,0,0,0,0, 0)) { 4708 UInt szLg2 = INSN(31,30); 4709 UInt szB = 1 << szLg2; 4710 Bool isLoad = INSN(22,22) == 1; 4711 UInt imm9 = INSN(20,12); 4712 UInt nn = INSN(9,5); 4713 UInt tt = INSN(4,0); 4714 Bool wBack = INSN(10,10) == 1; 4715 UInt how = INSN(11,10); 4716 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { 4717 /* undecodable; fall through */ 4718 } else { 4719 if (nn == 31) { /* FIXME generate stack alignment check */ } 4720 4721 // Compute the transfer address TA and the writeback address WA. 4722 IRTemp tRN = newTemp(Ity_I64); 4723 assign(tRN, getIReg64orSP(nn)); 4724 IRTemp tEA = newTemp(Ity_I64); 4725 Long simm9 = (Long)sx_to_64(imm9, 9); 4726 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 4727 4728 IRTemp tTA = newTemp(Ity_I64); 4729 IRTemp tWA = newTemp(Ity_I64); 4730 switch (how) { 4731 case BITS2(0,1): 4732 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4733 case BITS2(1,1): 4734 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4735 case BITS2(0,0): 4736 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4737 default: 4738 vassert(0); /* NOTREACHED */ 4739 } 4740 4741 /* Normally rN would be updated after the transfer. However, in 4742 the special case typifed by 4743 str x30, [sp,#-16]! 4744 it is necessary to update SP before the transfer, (1) 4745 because Memcheck will otherwise complain about a write 4746 below the stack pointer, and (2) because the segfault 4747 stack extension mechanism will otherwise extend the stack 4748 only down to SP before the instruction, which might not be 4749 far enough, if the -16 bit takes the actual access 4750 address to the next page. 4751 */ 4752 Bool earlyWBack 4753 = wBack && simm9 < 0 && szB == 8 4754 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; 4755 4756 if (wBack && earlyWBack) 4757 putIReg64orSP(nn, mkexpr(tEA)); 4758 4759 if (isLoad) { 4760 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); 4761 } else { 4762 gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); 4763 } 4764 4765 if (wBack && !earlyWBack) 4766 putIReg64orSP(nn, mkexpr(tEA)); 4767 4768 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; 4769 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; 4770 const HChar* fmt_str = NULL; 4771 switch (how) { 4772 case BITS2(0,1): 4773 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4774 break; 4775 case BITS2(1,1): 4776 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4777 break; 4778 case BITS2(0,0): 4779 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; 4780 break; 4781 default: 4782 vassert(0); 4783 } 4784 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], 4785 nameIRegOrZR(szB == 8, tt), 4786 nameIReg64orSP(nn), simm9); 4787 return True; 4788 } 4789 } 4790 4791 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ 4792 /* L==1 => mm==LD 4793 L==0 => mm==ST 4794 x==0 => 32 bit transfers, and zero extended loads 4795 x==1 => 64 bit transfers 4796 simm7 is scaled by the (single-register) transfer size 4797 4798 (at-Rn-then-Rn=EA) 4799 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm 4800 4801 (at-EA-then-Rn=EA) 4802 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! 4803 4804 (at-EA) 4805 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] 4806 */ 4807 4808 UInt insn_30_23 = INSN(30,23); 4809 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) 4810 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) 4811 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { 4812 UInt bL = INSN(22,22); 4813 UInt bX = INSN(31,31); 4814 UInt bWBack = INSN(23,23); 4815 UInt rT1 = INSN(4,0); 4816 UInt rN = INSN(9,5); 4817 UInt rT2 = INSN(14,10); 4818 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 4819 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) 4820 || (bL && rT1 == rT2)) { 4821 /* undecodable; fall through */ 4822 } else { 4823 if (rN == 31) { /* FIXME generate stack alignment check */ } 4824 4825 // Compute the transfer address TA and the writeback address WA. 4826 IRTemp tRN = newTemp(Ity_I64); 4827 assign(tRN, getIReg64orSP(rN)); 4828 IRTemp tEA = newTemp(Ity_I64); 4829 simm7 = (bX ? 8 : 4) * simm7; 4830 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 4831 4832 IRTemp tTA = newTemp(Ity_I64); 4833 IRTemp tWA = newTemp(Ity_I64); 4834 switch (INSN(24,23)) { 4835 case BITS2(0,1): 4836 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4837 case BITS2(1,1): 4838 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4839 case BITS2(1,0): 4840 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4841 default: 4842 vassert(0); /* NOTREACHED */ 4843 } 4844 4845 /* Normally rN would be updated after the transfer. However, in 4846 the special case typifed by 4847 stp x29, x30, [sp,#-112]! 4848 it is necessary to update SP before the transfer, (1) 4849 because Memcheck will otherwise complain about a write 4850 below the stack pointer, and (2) because the segfault 4851 stack extension mechanism will otherwise extend the stack 4852 only down to SP before the instruction, which might not be 4853 far enough, if the -112 bit takes the actual access 4854 address to the next page. 4855 */ 4856 Bool earlyWBack 4857 = bWBack && simm7 < 0 4858 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; 4859 4860 if (bWBack && earlyWBack) 4861 putIReg64orSP(rN, mkexpr(tEA)); 4862 4863 /**/ if (bL == 1 && bX == 1) { 4864 // 64 bit load 4865 putIReg64orZR(rT1, loadLE(Ity_I64, 4866 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4867 putIReg64orZR(rT2, loadLE(Ity_I64, 4868 binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); 4869 } else if (bL == 1 && bX == 0) { 4870 // 32 bit load 4871 putIReg32orZR(rT1, loadLE(Ity_I32, 4872 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4873 putIReg32orZR(rT2, loadLE(Ity_I32, 4874 binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); 4875 } else if (bL == 0 && bX == 1) { 4876 // 64 bit store 4877 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4878 getIReg64orZR(rT1)); 4879 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), 4880 getIReg64orZR(rT2)); 4881 } else { 4882 vassert(bL == 0 && bX == 0); 4883 // 32 bit store 4884 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4885 getIReg32orZR(rT1)); 4886 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), 4887 getIReg32orZR(rT2)); 4888 } 4889 4890 if (bWBack && !earlyWBack) 4891 putIReg64orSP(rN, mkexpr(tEA)); 4892 4893 const HChar* fmt_str = NULL; 4894 switch (INSN(24,23)) { 4895 case BITS2(0,1): 4896 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4897 break; 4898 case BITS2(1,1): 4899 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4900 break; 4901 case BITS2(1,0): 4902 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 4903 break; 4904 default: 4905 vassert(0); 4906 } 4907 DIP(fmt_str, bL == 0 ? "st" : "ld", 4908 nameIRegOrZR(bX == 1, rT1), 4909 nameIRegOrZR(bX == 1, rT2), 4910 nameIReg64orSP(rN), simm7); 4911 return True; 4912 } 4913 } 4914 4915 /* ---------------- LDR (literal, int reg) ---------------- */ 4916 /* 31 29 23 4 4917 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] 4918 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] 4919 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] 4920 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] 4921 Just handles the first two cases for now. 4922 */ 4923 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { 4924 UInt imm19 = INSN(23,5); 4925 UInt rT = INSN(4,0); 4926 UInt bX = INSN(30,30); 4927 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 4928 if (bX) { 4929 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); 4930 } else { 4931 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); 4932 } 4933 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); 4934 return True; 4935 } 4936 4937 /* -------------- {LD,ST}R (integer register) --------------- */ 4938 /* 31 29 20 15 12 11 9 4 4939 | | | | | | | | 4940 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}] 4941 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}] 4942 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}] 4943 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}] 4944 4945 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}] 4946 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}] 4947 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}] 4948 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}] 4949 */ 4950 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) 4951 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 4952 HChar dis_buf[64]; 4953 UInt szLg2 = INSN(31,30); 4954 Bool isLD = INSN(22,22) == 1; 4955 UInt tt = INSN(4,0); 4956 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 4957 if (ea != IRTemp_INVALID) { 4958 switch (szLg2) { 4959 case 3: /* 64 bit */ 4960 if (isLD) { 4961 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); 4962 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); 4963 } else { 4964 storeLE(mkexpr(ea), getIReg64orZR(tt)); 4965 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); 4966 } 4967 break; 4968 case 2: /* 32 bit */ 4969 if (isLD) { 4970 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); 4971 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); 4972 } else { 4973 storeLE(mkexpr(ea), getIReg32orZR(tt)); 4974 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); 4975 } 4976 break; 4977 case 1: /* 16 bit */ 4978 if (isLD) { 4979 putIReg64orZR(tt, unop(Iop_16Uto64, 4980 loadLE(Ity_I16, mkexpr(ea)))); 4981 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4982 } else { 4983 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); 4984 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4985 } 4986 break; 4987 case 0: /* 8 bit */ 4988 if (isLD) { 4989 putIReg64orZR(tt, unop(Iop_8Uto64, 4990 loadLE(Ity_I8, mkexpr(ea)))); 4991 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); 4992 } else { 4993 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); 4994 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); 4995 } 4996 break; 4997 default: 4998 vassert(0); 4999 } 5000 return True; 5001 } 5002 } 5003 5004 /* -------------- LDRS{B,H,W} (uimm12) -------------- */ 5005 /* 31 29 26 23 21 9 4 5006 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] 5007 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] 5008 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] 5009 where 5010 Rt is Wt when x==1, Xt when x==0 5011 */ 5012 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { 5013 /* Further checks on bits 31:30 and 22 */ 5014 Bool valid = False; 5015 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5016 case BITS3(1,0,0): 5017 case BITS3(0,1,0): case BITS3(0,1,1): 5018 case BITS3(0,0,0): case BITS3(0,0,1): 5019 valid = True; 5020 break; 5021 } 5022 if (valid) { 5023 UInt szLg2 = INSN(31,30); 5024 UInt bitX = INSN(22,22); 5025 UInt imm12 = INSN(21,10); 5026 UInt nn = INSN(9,5); 5027 UInt tt = INSN(4,0); 5028 UInt szB = 1 << szLg2; 5029 IRExpr* ea = binop(Iop_Add64, 5030 getIReg64orSP(nn), mkU64(imm12 * szB)); 5031 switch (szB) { 5032 case 4: 5033 vassert(bitX == 0); 5034 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); 5035 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), 5036 nameIReg64orSP(nn), imm12 * szB); 5037 break; 5038 case 2: 5039 if (bitX == 1) { 5040 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); 5041 } else { 5042 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); 5043 } 5044 DIP("ldrsh %s, [%s, #%u]\n", 5045 nameIRegOrZR(bitX == 0, tt), 5046 nameIReg64orSP(nn), imm12 * szB); 5047 break; 5048 case 1: 5049 if (bitX == 1) { 5050 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); 5051 } else { 5052 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); 5053 } 5054 DIP("ldrsb %s, [%s, #%u]\n", 5055 nameIRegOrZR(bitX == 0, tt), 5056 nameIReg64orSP(nn), imm12 * szB); 5057 break; 5058 default: 5059 vassert(0); 5060 } 5061 return True; 5062 } 5063 /* else fall through */ 5064 } 5065 5066 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ 5067 /* (at-Rn-then-Rn=EA) 5068 31 29 23 21 20 11 9 4 5069 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 5070 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 5071 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 5072 5073 (at-EA-then-Rn=EA) 5074 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! 5075 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! 5076 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! 5077 where 5078 Rt is Wt when x==1, Xt when x==0 5079 transfer-at-Rn when [11]==0, at EA when [11]==1 5080 */ 5081 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5082 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5083 /* Further checks on bits 31:30 and 22 */ 5084 Bool valid = False; 5085 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5086 case BITS3(1,0,0): // LDRSW Xt 5087 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt 5088 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt 5089 valid = True; 5090 break; 5091 } 5092 if (valid) { 5093 UInt szLg2 = INSN(31,30); 5094 UInt imm9 = INSN(20,12); 5095 Bool atRN = INSN(11,11) == 0; 5096 UInt nn = INSN(9,5); 5097 UInt tt = INSN(4,0); 5098 IRTemp tRN = newTemp(Ity_I64); 5099 IRTemp tEA = newTemp(Ity_I64); 5100 IRTemp tTA = IRTemp_INVALID; 5101 ULong simm9 = sx_to_64(imm9, 9); 5102 Bool is64 = INSN(22,22) == 0; 5103 assign(tRN, getIReg64orSP(nn)); 5104 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5105 tTA = atRN ? tRN : tEA; 5106 HChar ch = '?'; 5107 /* There are 5 cases: 5108 byte load, SX to 64 5109 byte load, SX to 32, ZX to 64 5110 halfword load, SX to 64 5111 halfword load, SX to 32, ZX to 64 5112 word load, SX to 64 5113 The ifs below handle them in the listed order. 5114 */ 5115 if (szLg2 == 0) { 5116 ch = 'b'; 5117 if (is64) { 5118 putIReg64orZR(tt, unop(Iop_8Sto64, 5119 loadLE(Ity_I8, mkexpr(tTA)))); 5120 } else { 5121 putIReg32orZR(tt, unop(Iop_8Sto32, 5122 loadLE(Ity_I8, mkexpr(tTA)))); 5123 } 5124 } 5125 else if (szLg2 == 1) { 5126 ch = 'h'; 5127 if (is64) { 5128 putIReg64orZR(tt, unop(Iop_16Sto64, 5129 loadLE(Ity_I16, mkexpr(tTA)))); 5130 } else { 5131 putIReg32orZR(tt, unop(Iop_16Sto32, 5132 loadLE(Ity_I16, mkexpr(tTA)))); 5133 } 5134 } 5135 else if (szLg2 == 2 && is64) { 5136 ch = 'w'; 5137 putIReg64orZR(tt, unop(Iop_32Sto64, 5138 loadLE(Ity_I32, mkexpr(tTA)))); 5139 } 5140 else { 5141 vassert(0); 5142 } 5143 putIReg64orSP(nn, mkexpr(tEA)); 5144 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!", 5145 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 5146 return True; 5147 } 5148 /* else fall through */ 5149 } 5150 5151 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ 5152 /* 31 29 23 21 20 11 9 4 5153 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] 5154 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] 5155 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] 5156 where 5157 Rt is Wt when x==1, Xt when x==0 5158 */ 5159 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5160 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5161 /* Further checks on bits 31:30 and 22 */ 5162 Bool valid = False; 5163 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5164 case BITS3(1,0,0): // LDURSW Xt 5165 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt 5166 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt 5167 valid = True; 5168 break; 5169 } 5170 if (valid) { 5171 UInt szLg2 = INSN(31,30); 5172 UInt imm9 = INSN(20,12); 5173 UInt nn = INSN(9,5); 5174 UInt tt = INSN(4,0); 5175 IRTemp tRN = newTemp(Ity_I64); 5176 IRTemp tEA = newTemp(Ity_I64); 5177 ULong simm9 = sx_to_64(imm9, 9); 5178 Bool is64 = INSN(22,22) == 0; 5179 assign(tRN, getIReg64orSP(nn)); 5180 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5181 HChar ch = '?'; 5182 /* There are 5 cases: 5183 byte load, SX to 64 5184 byte load, SX to 32, ZX to 64 5185 halfword load, SX to 64 5186 halfword load, SX to 32, ZX to 64 5187 word load, SX to 64 5188 The ifs below handle them in the listed order. 5189 */ 5190 if (szLg2 == 0) { 5191 ch = 'b'; 5192 if (is64) { 5193 putIReg64orZR(tt, unop(Iop_8Sto64, 5194 loadLE(Ity_I8, mkexpr(tEA)))); 5195 } else { 5196 putIReg32orZR(tt, unop(Iop_8Sto32, 5197 loadLE(Ity_I8, mkexpr(tEA)))); 5198 } 5199 } 5200 else if (szLg2 == 1) { 5201 ch = 'h'; 5202 if (is64) { 5203 putIReg64orZR(tt, unop(Iop_16Sto64, 5204 loadLE(Ity_I16, mkexpr(tEA)))); 5205 } else { 5206 putIReg32orZR(tt, unop(Iop_16Sto32, 5207 loadLE(Ity_I16, mkexpr(tEA)))); 5208 } 5209 } 5210 else if (szLg2 == 2 && is64) { 5211 ch = 'w'; 5212 putIReg64orZR(tt, unop(Iop_32Sto64, 5213 loadLE(Ity_I32, mkexpr(tEA)))); 5214 } 5215 else { 5216 vassert(0); 5217 } 5218 DIP("ldurs%c %s, [%s, #%lld]", 5219 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9); 5220 return True; 5221 } 5222 /* else fall through */ 5223 } 5224 5225 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ 5226 /* L==1 => mm==LD 5227 L==0 => mm==ST 5228 sz==00 => 32 bit (S) transfers 5229 sz==01 => 64 bit (D) transfers 5230 sz==10 => 128 bit (Q) transfers 5231 sz==11 isn't allowed 5232 simm7 is scaled by the (single-register) transfer size 5233 5234 31 29 26 22 21 14 9 4 5235 5236 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm] 5237 (at-EA, with nontemporal hint) 5238 5239 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm 5240 (at-Rn-then-Rn=EA) 5241 5242 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] 5243 (at-EA) 5244 5245 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! 5246 (at-EA-then-Rn=EA) 5247 */ 5248 if (INSN(29,25) == BITS5(1,0,1,1,0)) { 5249 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units 5250 Bool isLD = INSN(22,22) == 1; 5251 Bool wBack = INSN(23,23) == 1; 5252 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 5253 UInt tt2 = INSN(14,10); 5254 UInt nn = INSN(9,5); 5255 UInt tt1 = INSN(4,0); 5256 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { 5257 /* undecodable; fall through */ 5258 } else { 5259 if (nn == 31) { /* FIXME generate stack alignment check */ } 5260 5261 // Compute the transfer address TA and the writeback address WA. 5262 UInt szB = 4 << szSlg2; /* szB is the per-register size */ 5263 IRTemp tRN = newTemp(Ity_I64); 5264 assign(tRN, getIReg64orSP(nn)); 5265 IRTemp tEA = newTemp(Ity_I64); 5266 simm7 = szB * simm7; 5267 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 5268 5269 IRTemp tTA = newTemp(Ity_I64); 5270 IRTemp tWA = newTemp(Ity_I64); 5271 switch (INSN(24,23)) { 5272 case BITS2(0,1): 5273 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 5274 case BITS2(1,1): 5275 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 5276 case BITS2(1,0): 5277 case BITS2(0,0): 5278 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 5279 default: 5280 vassert(0); /* NOTREACHED */ 5281 } 5282 5283 IRType ty = Ity_INVALID; 5284 switch (szB) { 5285 case 4: ty = Ity_F32; break; 5286 case 8: ty = Ity_F64; break; 5287 case 16: ty = Ity_V128; break; 5288 default: vassert(0); 5289 } 5290 5291 /* Normally rN would be updated after the transfer. However, in 5292 the special cases typifed by 5293 stp q0, q1, [sp,#-512]! 5294 stp d0, d1, [sp,#-512]! 5295 stp s0, s1, [sp,#-512]! 5296 it is necessary to update SP before the transfer, (1) 5297 because Memcheck will otherwise complain about a write 5298 below the stack pointer, and (2) because the segfault 5299 stack extension mechanism will otherwise extend the stack 5300 only down to SP before the instruction, which might not be 5301 far enough, if the -512 bit takes the actual access 5302 address to the next page. 5303 */ 5304 Bool earlyWBack 5305 = wBack && simm7 < 0 5306 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; 5307 5308 if (wBack && earlyWBack) 5309 putIReg64orSP(nn, mkexpr(tEA)); 5310 5311 if (isLD) { 5312 if (szB < 16) { 5313 putQReg128(tt1, mkV128(0x0000)); 5314 } 5315 putQRegLO(tt1, 5316 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); 5317 if (szB < 16) { 5318 putQReg128(tt2, mkV128(0x0000)); 5319 } 5320 putQRegLO(tt2, 5321 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); 5322 } else { 5323 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), 5324 getQRegLO(tt1, ty)); 5325 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), 5326 getQRegLO(tt2, ty)); 5327 } 5328 5329 if (wBack && !earlyWBack) 5330 putIReg64orSP(nn, mkexpr(tEA)); 5331 5332 const HChar* fmt_str = NULL; 5333 switch (INSN(24,23)) { 5334 case BITS2(0,1): 5335 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 5336 break; 5337 case BITS2(1,1): 5338 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 5339 break; 5340 case BITS2(1,0): 5341 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 5342 break; 5343 case BITS2(0,0): 5344 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n"; 5345 break; 5346 default: 5347 vassert(0); 5348 } 5349 DIP(fmt_str, isLD ? "ld" : "st", 5350 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), 5351 nameIReg64orSP(nn), simm7); 5352 return True; 5353 } 5354 } 5355 5356 /* -------------- {LD,ST}R (vector register) --------------- */ 5357 /* 31 29 23 20 15 12 11 9 4 5358 | | | | | | | | | 5359 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}] 5360 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}] 5361 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}] 5362 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}] 5363 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}] 5364 5365 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}] 5366 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}] 5367 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}] 5368 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}] 5369 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}] 5370 */ 5371 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5372 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5373 HChar dis_buf[64]; 5374 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5375 Bool isLD = INSN(22,22) == 1; 5376 UInt tt = INSN(4,0); 5377 if (szLg2 > 4) goto after_LDR_STR_vector_register; 5378 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); 5379 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; 5380 switch (szLg2) { 5381 case 0: /* 8 bit */ 5382 if (isLD) { 5383 putQReg128(tt, mkV128(0x0000)); 5384 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); 5385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5386 } else { 5387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); 5388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5389 } 5390 break; 5391 case 1: 5392 if (isLD) { 5393 putQReg128(tt, mkV128(0x0000)); 5394 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); 5395 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5396 } else { 5397 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); 5398 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5399 } 5400 break; 5401 case 2: /* 32 bit */ 5402 if (isLD) { 5403 putQReg128(tt, mkV128(0x0000)); 5404 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); 5405 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5406 } else { 5407 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); 5408 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5409 } 5410 break; 5411 case 3: /* 64 bit */ 5412 if (isLD) { 5413 putQReg128(tt, mkV128(0x0000)); 5414 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); 5415 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5416 } else { 5417 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); 5418 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5419 } 5420 break; 5421 case 4: 5422 if (isLD) { 5423 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea))); 5424 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf); 5425 } else { 5426 storeLE(mkexpr(ea), getQReg128(tt)); 5427 DIP("str %s, %s\n", nameQReg128(tt), dis_buf); 5428 } 5429 break; 5430 default: 5431 vassert(0); 5432 } 5433 return True; 5434 } 5435 after_LDR_STR_vector_register: 5436 5437 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ 5438 /* 31 29 22 20 15 12 11 9 4 5439 | | | | | | | | | 5440 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}] 5441 5442 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}] 5443 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}] 5444 5445 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}] 5446 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}] 5447 */ 5448 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5449 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5450 HChar dis_buf[64]; 5451 UInt szLg2 = INSN(31,30); 5452 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 5453 UInt tt = INSN(4,0); 5454 if (szLg2 == 3) goto after_LDRS_integer_register; 5455 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 5456 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; 5457 /* Enumerate the 5 variants explicitly. */ 5458 if (szLg2 == 2/*32 bit*/ && sxTo64) { 5459 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); 5460 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); 5461 return True; 5462 } 5463 else 5464 if (szLg2 == 1/*16 bit*/) { 5465 if (sxTo64) { 5466 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); 5467 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); 5468 } else { 5469 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); 5470 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); 5471 } 5472 return True; 5473 } 5474 else 5475 if (szLg2 == 0/*8 bit*/) { 5476 if (sxTo64) { 5477 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); 5478 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); 5479 } else { 5480 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); 5481 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); 5482 } 5483 return True; 5484 } 5485 /* else it's an invalid combination */ 5486 } 5487 after_LDRS_integer_register: 5488 5489 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ 5490 /* This is the Unsigned offset variant only. The Post-Index and 5491 Pre-Index variants are below. 5492 5493 31 29 23 21 9 4 5494 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] 5495 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] 5496 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] 5497 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] 5498 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] 5499 5500 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] 5501 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] 5502 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] 5503 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] 5504 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] 5505 */ 5506 if (INSN(29,24) == BITS6(1,1,1,1,0,1) 5507 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { 5508 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5509 Bool isLD = INSN(22,22) == 1; 5510 UInt pimm12 = INSN(21,10) << szLg2; 5511 UInt nn = INSN(9,5); 5512 UInt tt = INSN(4,0); 5513 IRTemp tEA = newTemp(Ity_I64); 5514 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5515 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); 5516 if (isLD) { 5517 if (szLg2 < 4) { 5518 putQReg128(tt, mkV128(0x0000)); 5519 } 5520 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5521 } else { 5522 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5523 } 5524 DIP("%s %s, [%s, #%u]\n", 5525 isLD ? "ldr" : "str", 5526 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); 5527 return True; 5528 } 5529 5530 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ 5531 /* These are the Post-Index and Pre-Index variants. 5532 5533 31 29 23 20 11 9 4 5534 (at-Rn-then-Rn=EA) 5535 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm 5536 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm 5537 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm 5538 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm 5539 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm 5540 5541 (at-EA-then-Rn=EA) 5542 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! 5543 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! 5544 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! 5545 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! 5546 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! 5547 5548 Stores are the same except with bit 22 set to 0. 5549 */ 5550 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5551 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5552 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5553 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5554 Bool isLD = INSN(22,22) == 1; 5555 UInt imm9 = INSN(20,12); 5556 Bool atRN = INSN(11,11) == 0; 5557 UInt nn = INSN(9,5); 5558 UInt tt = INSN(4,0); 5559 IRTemp tRN = newTemp(Ity_I64); 5560 IRTemp tEA = newTemp(Ity_I64); 5561 IRTemp tTA = IRTemp_INVALID; 5562 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5563 ULong simm9 = sx_to_64(imm9, 9); 5564 assign(tRN, getIReg64orSP(nn)); 5565 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5566 tTA = atRN ? tRN : tEA; 5567 if (isLD) { 5568 if (szLg2 < 4) { 5569 putQReg128(tt, mkV128(0x0000)); 5570 } 5571 putQRegLO(tt, loadLE(ty, mkexpr(tTA))); 5572 } else { 5573 storeLE(mkexpr(tTA), getQRegLO(tt, ty)); 5574 } 5575 putIReg64orSP(nn, mkexpr(tEA)); 5576 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", 5577 isLD ? "ldr" : "str", 5578 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 5579 return True; 5580 } 5581 5582 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ 5583 /* 31 29 23 20 11 9 4 5584 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] 5585 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] 5586 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] 5587 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] 5588 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] 5589 5590 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] 5591 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] 5592 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] 5593 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] 5594 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] 5595 */ 5596 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5597 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5598 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5599 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5600 Bool isLD = INSN(22,22) == 1; 5601 UInt imm9 = INSN(20,12); 5602 UInt nn = INSN(9,5); 5603 UInt tt = INSN(4,0); 5604 ULong simm9 = sx_to_64(imm9, 9); 5605 IRTemp tEA = newTemp(Ity_I64); 5606 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5607 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); 5608 if (isLD) { 5609 if (szLg2 < 4) { 5610 putQReg128(tt, mkV128(0x0000)); 5611 } 5612 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5613 } else { 5614 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5615 } 5616 DIP("%s %s, [%s, #%lld]\n", 5617 isLD ? "ldur" : "stur", 5618 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 5619 return True; 5620 } 5621 5622 /* ---------------- LDR (literal, SIMD&FP) ---------------- */ 5623 /* 31 29 23 4 5624 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] 5625 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] 5626 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] 5627 */ 5628 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { 5629 UInt szB = 4 << INSN(31,30); 5630 UInt imm19 = INSN(23,5); 5631 UInt tt = INSN(4,0); 5632 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 5633 IRType ty = preferredVectorSubTypeFromSize(szB); 5634 putQReg128(tt, mkV128(0x0000)); 5635 putQRegLO(tt, loadLE(ty, mkU64(ea))); 5636 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); 5637 return True; 5638 } 5639 5640 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */ 5641 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */ 5642 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */ 5643 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */ 5644 /* 31 29 26 22 21 20 15 11 9 4 5645 5646 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP] 5647 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step 5648 5649 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP] 5650 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step 5651 5652 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP] 5653 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step 5654 5655 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP] 5656 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step 5657 5658 T = defined by Q and sz in the normal way 5659 step = if m == 11111 then transfer-size else Xm 5660 xx = case L of 1 -> LD ; 0 -> ST 5661 */ 5662 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5663 && INSN(21,21) == 0) { 5664 Bool bitQ = INSN(30,30); 5665 Bool isPX = INSN(23,23) == 1; 5666 Bool isLD = INSN(22,22) == 1; 5667 UInt mm = INSN(20,16); 5668 UInt opc = INSN(15,12); 5669 UInt sz = INSN(11,10); 5670 UInt nn = INSN(9,5); 5671 UInt tt = INSN(4,0); 5672 Bool isQ = bitQ == 1; 5673 Bool is1d = sz == BITS2(1,1) && !isQ; 5674 UInt nRegs = 0; 5675 switch (opc) { 5676 case BITS4(0,0,0,0): nRegs = 4; break; 5677 case BITS4(0,1,0,0): nRegs = 3; break; 5678 case BITS4(1,0,0,0): nRegs = 2; break; 5679 case BITS4(0,1,1,1): nRegs = 1; break; 5680 default: break; 5681 } 5682 5683 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5684 If we see it, set nRegs to 0 so as to cause the next conditional 5685 to fail. */ 5686 if (!isPX && mm != 0) 5687 nRegs = 0; 5688 5689 if (nRegs == 1 /* .1d is allowed */ 5690 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) { 5691 5692 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5693 5694 /* Generate the transfer address (TA) and if necessary the 5695 writeback address (WB) */ 5696 IRTemp tTA = newTemp(Ity_I64); 5697 assign(tTA, getIReg64orSP(nn)); 5698 if (nn == 31) { /* FIXME generate stack alignment check */ } 5699 IRTemp tWB = IRTemp_INVALID; 5700 if (isPX) { 5701 tWB = newTemp(Ity_I64); 5702 assign(tWB, binop(Iop_Add64, 5703 mkexpr(tTA), 5704 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5705 : getIReg64orZR(mm))); 5706 } 5707 5708 /* -- BEGIN generate the transfers -- */ 5709 5710 IRTemp u0, u1, u2, u3, i0, i1, i2, i3; 5711 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID; 5712 switch (nRegs) { 5713 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */ 5714 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */ 5715 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */ 5716 case 1: u0 = newTempV128(); i0 = newTempV128(); break; 5717 default: vassert(0); 5718 } 5719 5720 /* -- Multiple 128 or 64 bit stores -- */ 5721 if (!isLD) { 5722 switch (nRegs) { 5723 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5724 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5725 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */ 5726 case 1: assign(u0, getQReg128((tt+0) % 32)); break; 5727 default: vassert(0); 5728 } 5729 switch (nRegs) { 5730 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64) 5731 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3); 5732 break; 5733 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64) 5734 (&i0, &i1, &i2, sz, u0, u1, u2); 5735 break; 5736 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64) 5737 (&i0, &i1, sz, u0, u1); 5738 break; 5739 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64) 5740 (&i0, sz, u0); 5741 break; 5742 default: vassert(0); 5743 } 5744 # define MAYBE_NARROW_TO_64(_expr) \ 5745 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5746 UInt step = isQ ? 16 : 8; 5747 switch (nRegs) { 5748 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5749 MAYBE_NARROW_TO_64(mkexpr(i3)) ); 5750 /* fallthru */ 5751 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5752 MAYBE_NARROW_TO_64(mkexpr(i2)) ); 5753 /* fallthru */ 5754 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5755 MAYBE_NARROW_TO_64(mkexpr(i1)) ); 5756 /* fallthru */ 5757 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5758 MAYBE_NARROW_TO_64(mkexpr(i0)) ); 5759 break; 5760 default: vassert(0); 5761 } 5762 # undef MAYBE_NARROW_TO_64 5763 } 5764 5765 /* -- Multiple 128 or 64 bit loads -- */ 5766 else /* isLD */ { 5767 UInt step = isQ ? 16 : 8; 5768 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5769 # define MAYBE_WIDEN_FROM_64(_expr) \ 5770 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5771 switch (nRegs) { 5772 case 4: 5773 assign(i3, MAYBE_WIDEN_FROM_64( 5774 loadLE(loadTy, 5775 binop(Iop_Add64, mkexpr(tTA), 5776 mkU64(3 * step))))); 5777 /* fallthru */ 5778 case 3: 5779 assign(i2, MAYBE_WIDEN_FROM_64( 5780 loadLE(loadTy, 5781 binop(Iop_Add64, mkexpr(tTA), 5782 mkU64(2 * step))))); 5783 /* fallthru */ 5784 case 2: 5785 assign(i1, MAYBE_WIDEN_FROM_64( 5786 loadLE(loadTy, 5787 binop(Iop_Add64, mkexpr(tTA), 5788 mkU64(1 * step))))); 5789 /* fallthru */ 5790 case 1: 5791 assign(i0, MAYBE_WIDEN_FROM_64( 5792 loadLE(loadTy, 5793 binop(Iop_Add64, mkexpr(tTA), 5794 mkU64(0 * step))))); 5795 break; 5796 default: 5797 vassert(0); 5798 } 5799 # undef MAYBE_WIDEN_FROM_64 5800 switch (nRegs) { 5801 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64) 5802 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3); 5803 break; 5804 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64) 5805 (&u0, &u1, &u2, sz, i0, i1, i2); 5806 break; 5807 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64) 5808 (&u0, &u1, sz, i0, i1); 5809 break; 5810 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64) 5811 (&u0, sz, i0); 5812 break; 5813 default: vassert(0); 5814 } 5815 switch (nRegs) { 5816 case 4: putQReg128( (tt+3) % 32, 5817 math_MAYBE_ZERO_HI64(bitQ, u3)); 5818 /* fallthru */ 5819 case 3: putQReg128( (tt+2) % 32, 5820 math_MAYBE_ZERO_HI64(bitQ, u2)); 5821 /* fallthru */ 5822 case 2: putQReg128( (tt+1) % 32, 5823 math_MAYBE_ZERO_HI64(bitQ, u1)); 5824 /* fallthru */ 5825 case 1: putQReg128( (tt+0) % 32, 5826 math_MAYBE_ZERO_HI64(bitQ, u0)); 5827 break; 5828 default: vassert(0); 5829 } 5830 } 5831 5832 /* -- END generate the transfers -- */ 5833 5834 /* Do the writeback, if necessary */ 5835 if (isPX) { 5836 putIReg64orSP(nn, mkexpr(tWB)); 5837 } 5838 5839 HChar pxStr[20]; 5840 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 5841 if (isPX) { 5842 if (mm == BITS5(1,1,1,1,1)) 5843 vex_sprintf(pxStr, ", #%u", xferSzB); 5844 else 5845 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 5846 } 5847 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 5848 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n", 5849 isLD ? "ld" : "st", nRegs, 5850 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 5851 pxStr); 5852 5853 return True; 5854 } 5855 /* else fall through */ 5856 } 5857 5858 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */ 5859 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */ 5860 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */ 5861 /* 31 29 26 22 21 20 15 11 9 4 5862 5863 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP] 5864 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step 5865 5866 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP] 5867 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step 5868 5869 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP] 5870 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step 5871 5872 T = defined by Q and sz in the normal way 5873 step = if m == 11111 then transfer-size else Xm 5874 xx = case L of 1 -> LD ; 0 -> ST 5875 */ 5876 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5877 && INSN(21,21) == 0) { 5878 Bool bitQ = INSN(30,30); 5879 Bool isPX = INSN(23,23) == 1; 5880 Bool isLD = INSN(22,22) == 1; 5881 UInt mm = INSN(20,16); 5882 UInt opc = INSN(15,12); 5883 UInt sz = INSN(11,10); 5884 UInt nn = INSN(9,5); 5885 UInt tt = INSN(4,0); 5886 Bool isQ = bitQ == 1; 5887 UInt nRegs = 0; 5888 switch (opc) { 5889 case BITS4(0,0,1,0): nRegs = 4; break; 5890 case BITS4(0,1,1,0): nRegs = 3; break; 5891 case BITS4(1,0,1,0): nRegs = 2; break; 5892 default: break; 5893 } 5894 5895 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5896 If we see it, set nRegs to 0 so as to cause the next conditional 5897 to fail. */ 5898 if (!isPX && mm != 0) 5899 nRegs = 0; 5900 5901 if (nRegs >= 2 && nRegs <= 4) { 5902 5903 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5904 5905 /* Generate the transfer address (TA) and if necessary the 5906 writeback address (WB) */ 5907 IRTemp tTA = newTemp(Ity_I64); 5908 assign(tTA, getIReg64orSP(nn)); 5909 if (nn == 31) { /* FIXME generate stack alignment check */ } 5910 IRTemp tWB = IRTemp_INVALID; 5911 if (isPX) { 5912 tWB = newTemp(Ity_I64); 5913 assign(tWB, binop(Iop_Add64, 5914 mkexpr(tTA), 5915 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5916 : getIReg64orZR(mm))); 5917 } 5918 5919 /* -- BEGIN generate the transfers -- */ 5920 5921 IRTemp u0, u1, u2, u3; 5922 u0 = u1 = u2 = u3 = IRTemp_INVALID; 5923 switch (nRegs) { 5924 case 4: u3 = newTempV128(); /* fallthru */ 5925 case 3: u2 = newTempV128(); /* fallthru */ 5926 case 2: u1 = newTempV128(); 5927 u0 = newTempV128(); break; 5928 default: vassert(0); 5929 } 5930 5931 /* -- Multiple 128 or 64 bit stores -- */ 5932 if (!isLD) { 5933 switch (nRegs) { 5934 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5935 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5936 case 2: assign(u1, getQReg128((tt+1) % 32)); 5937 assign(u0, getQReg128((tt+0) % 32)); break; 5938 default: vassert(0); 5939 } 5940 # define MAYBE_NARROW_TO_64(_expr) \ 5941 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5942 UInt step = isQ ? 16 : 8; 5943 switch (nRegs) { 5944 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5945 MAYBE_NARROW_TO_64(mkexpr(u3)) ); 5946 /* fallthru */ 5947 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5948 MAYBE_NARROW_TO_64(mkexpr(u2)) ); 5949 /* fallthru */ 5950 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5951 MAYBE_NARROW_TO_64(mkexpr(u1)) ); 5952 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5953 MAYBE_NARROW_TO_64(mkexpr(u0)) ); 5954 break; 5955 default: vassert(0); 5956 } 5957 # undef MAYBE_NARROW_TO_64 5958 } 5959 5960 /* -- Multiple 128 or 64 bit loads -- */ 5961 else /* isLD */ { 5962 UInt step = isQ ? 16 : 8; 5963 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5964 # define MAYBE_WIDEN_FROM_64(_expr) \ 5965 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5966 switch (nRegs) { 5967 case 4: 5968 assign(u3, MAYBE_WIDEN_FROM_64( 5969 loadLE(loadTy, 5970 binop(Iop_Add64, mkexpr(tTA), 5971 mkU64(3 * step))))); 5972 /* fallthru */ 5973 case 3: 5974 assign(u2, MAYBE_WIDEN_FROM_64( 5975 loadLE(loadTy, 5976 binop(Iop_Add64, mkexpr(tTA), 5977 mkU64(2 * step))))); 5978 /* fallthru */ 5979 case 2: 5980 assign(u1, MAYBE_WIDEN_FROM_64( 5981 loadLE(loadTy, 5982 binop(Iop_Add64, mkexpr(tTA), 5983 mkU64(1 * step))))); 5984 assign(u0, MAYBE_WIDEN_FROM_64( 5985 loadLE(loadTy, 5986 binop(Iop_Add64, mkexpr(tTA), 5987 mkU64(0 * step))))); 5988 break; 5989 default: 5990 vassert(0); 5991 } 5992 # undef MAYBE_WIDEN_FROM_64 5993 switch (nRegs) { 5994 case 4: putQReg128( (tt+3) % 32, 5995 math_MAYBE_ZERO_HI64(bitQ, u3)); 5996 /* fallthru */ 5997 case 3: putQReg128( (tt+2) % 32, 5998 math_MAYBE_ZERO_HI64(bitQ, u2)); 5999 /* fallthru */ 6000 case 2: putQReg128( (tt+1) % 32, 6001 math_MAYBE_ZERO_HI64(bitQ, u1)); 6002 putQReg128( (tt+0) % 32, 6003 math_MAYBE_ZERO_HI64(bitQ, u0)); 6004 break; 6005 default: vassert(0); 6006 } 6007 } 6008 6009 /* -- END generate the transfers -- */ 6010 6011 /* Do the writeback, if necessary */ 6012 if (isPX) { 6013 putIReg64orSP(nn, mkexpr(tWB)); 6014 } 6015 6016 HChar pxStr[20]; 6017 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6018 if (isPX) { 6019 if (mm == BITS5(1,1,1,1,1)) 6020 vex_sprintf(pxStr, ", #%u", xferSzB); 6021 else 6022 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6023 } 6024 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6025 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n", 6026 isLD ? "ld" : "st", 6027 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6028 pxStr); 6029 6030 return True; 6031 } 6032 /* else fall through */ 6033 } 6034 6035 /* ---------- LD1R (single structure, replicate) ---------- */ 6036 /* ---------- LD2R (single structure, replicate) ---------- */ 6037 /* ---------- LD3R (single structure, replicate) ---------- */ 6038 /* ---------- LD4R (single structure, replicate) ---------- */ 6039 /* 31 29 22 20 15 11 9 4 6040 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP] 6041 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step 6042 6043 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP] 6044 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step 6045 6046 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP] 6047 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step 6048 6049 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP] 6050 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step 6051 6052 step = if m == 11111 then transfer-size else Xm 6053 */ 6054 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1) 6055 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1) 6056 && INSN(12,12) == 0) { 6057 UInt bitQ = INSN(30,30); 6058 Bool isPX = INSN(23,23) == 1; 6059 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6060 UInt mm = INSN(20,16); 6061 UInt sz = INSN(11,10); 6062 UInt nn = INSN(9,5); 6063 UInt tt = INSN(4,0); 6064 6065 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6066 if (isPX || mm == 0) { 6067 6068 IRType ty = integerIRTypeOfSize(1 << sz); 6069 6070 UInt laneSzB = 1 << sz; 6071 UInt xferSzB = laneSzB * nRegs; 6072 6073 /* Generate the transfer address (TA) and if necessary the 6074 writeback address (WB) */ 6075 IRTemp tTA = newTemp(Ity_I64); 6076 assign(tTA, getIReg64orSP(nn)); 6077 if (nn == 31) { /* FIXME generate stack alignment check */ } 6078 IRTemp tWB = IRTemp_INVALID; 6079 if (isPX) { 6080 tWB = newTemp(Ity_I64); 6081 assign(tWB, binop(Iop_Add64, 6082 mkexpr(tTA), 6083 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6084 : getIReg64orZR(mm))); 6085 } 6086 6087 /* Do the writeback, if necessary */ 6088 if (isPX) { 6089 putIReg64orSP(nn, mkexpr(tWB)); 6090 } 6091 6092 IRTemp e0, e1, e2, e3, v0, v1, v2, v3; 6093 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID; 6094 switch (nRegs) { 6095 case 4: 6096 e3 = newTemp(ty); 6097 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6098 mkU64(3 * laneSzB)))); 6099 v3 = math_DUP_TO_V128(e3, ty); 6100 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3)); 6101 /* fallthrough */ 6102 case 3: 6103 e2 = newTemp(ty); 6104 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6105 mkU64(2 * laneSzB)))); 6106 v2 = math_DUP_TO_V128(e2, ty); 6107 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2)); 6108 /* fallthrough */ 6109 case 2: 6110 e1 = newTemp(ty); 6111 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6112 mkU64(1 * laneSzB)))); 6113 v1 = math_DUP_TO_V128(e1, ty); 6114 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1)); 6115 /* fallthrough */ 6116 case 1: 6117 e0 = newTemp(ty); 6118 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6119 mkU64(0 * laneSzB)))); 6120 v0 = math_DUP_TO_V128(e0, ty); 6121 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0)); 6122 break; 6123 default: 6124 vassert(0); 6125 } 6126 6127 HChar pxStr[20]; 6128 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6129 if (isPX) { 6130 if (mm == BITS5(1,1,1,1,1)) 6131 vex_sprintf(pxStr, ", #%u", xferSzB); 6132 else 6133 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6134 } 6135 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6136 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n", 6137 nRegs, 6138 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6139 pxStr); 6140 6141 return True; 6142 } 6143 /* else fall through */ 6144 } 6145 6146 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */ 6147 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */ 6148 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */ 6149 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */ 6150 /* 31 29 22 21 20 15 11 9 4 6151 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP] 6152 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step 6153 6154 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP] 6155 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step 6156 6157 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP] 6158 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step 6159 6160 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP] 6161 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step 6162 6163 step = if m == 11111 then transfer-size else Xm 6164 op = case L of 1 -> LD ; 0 -> ST 6165 6166 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb 6167 01:b:b:b0 -> 2, bbb 6168 10:b:b:00 -> 4, bb 6169 10:b:0:01 -> 8, b 6170 */ 6171 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) { 6172 UInt bitQ = INSN(30,30); 6173 Bool isPX = INSN(23,23) == 1; 6174 Bool isLD = INSN(22,22) == 1; 6175 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6176 UInt mm = INSN(20,16); 6177 UInt xx = INSN(15,14); 6178 UInt bitS = INSN(12,12); 6179 UInt sz = INSN(11,10); 6180 UInt nn = INSN(9,5); 6181 UInt tt = INSN(4,0); 6182 6183 Bool valid = True; 6184 6185 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6186 if (!isPX && mm != 0) 6187 valid = False; 6188 6189 UInt laneSzB = 0; /* invalid */ 6190 UInt ix = 16; /* invalid */ 6191 6192 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz; 6193 switch (xx_q_S_sz) { 6194 case 0x00: case 0x01: case 0x02: case 0x03: 6195 case 0x04: case 0x05: case 0x06: case 0x07: 6196 case 0x08: case 0x09: case 0x0A: case 0x0B: 6197 case 0x0C: case 0x0D: case 0x0E: case 0x0F: 6198 laneSzB = 1; ix = xx_q_S_sz & 0xF; 6199 break; 6200 case 0x10: case 0x12: case 0x14: case 0x16: 6201 case 0x18: case 0x1A: case 0x1C: case 0x1E: 6202 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7; 6203 break; 6204 case 0x20: case 0x24: case 0x28: case 0x2C: 6205 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3; 6206 break; 6207 case 0x21: case 0x29: 6208 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1; 6209 break; 6210 default: 6211 break; 6212 } 6213 6214 if (valid && laneSzB != 0) { 6215 6216 IRType ty = integerIRTypeOfSize(laneSzB); 6217 UInt xferSzB = laneSzB * nRegs; 6218 6219 /* Generate the transfer address (TA) and if necessary the 6220 writeback address (WB) */ 6221 IRTemp tTA = newTemp(Ity_I64); 6222 assign(tTA, getIReg64orSP(nn)); 6223 if (nn == 31) { /* FIXME generate stack alignment check */ } 6224 IRTemp tWB = IRTemp_INVALID; 6225 if (isPX) { 6226 tWB = newTemp(Ity_I64); 6227 assign(tWB, binop(Iop_Add64, 6228 mkexpr(tTA), 6229 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6230 : getIReg64orZR(mm))); 6231 } 6232 6233 /* Do the writeback, if necessary */ 6234 if (isPX) { 6235 putIReg64orSP(nn, mkexpr(tWB)); 6236 } 6237 6238 switch (nRegs) { 6239 case 4: { 6240 IRExpr* addr 6241 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB)); 6242 if (isLD) { 6243 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr)); 6244 } else { 6245 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty)); 6246 } 6247 /* fallthrough */ 6248 } 6249 case 3: { 6250 IRExpr* addr 6251 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB)); 6252 if (isLD) { 6253 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr)); 6254 } else { 6255 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty)); 6256 } 6257 /* fallthrough */ 6258 } 6259 case 2: { 6260 IRExpr* addr 6261 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB)); 6262 if (isLD) { 6263 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr)); 6264 } else { 6265 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty)); 6266 } 6267 /* fallthrough */ 6268 } 6269 case 1: { 6270 IRExpr* addr 6271 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB)); 6272 if (isLD) { 6273 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr)); 6274 } else { 6275 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty)); 6276 } 6277 break; 6278 } 6279 default: 6280 vassert(0); 6281 } 6282 6283 HChar pxStr[20]; 6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6285 if (isPX) { 6286 if (mm == BITS5(1,1,1,1,1)) 6287 vex_sprintf(pxStr, ", #%u", xferSzB); 6288 else 6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6290 } 6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6292 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n", 6293 isLD ? "ld" : "st", nRegs, 6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, 6295 ix, nameIReg64orSP(nn), pxStr); 6296 6297 return True; 6298 } 6299 /* else fall through */ 6300 } 6301 6302 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ 6303 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ 6304 /* 31 29 23 20 14 9 4 6305 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] 6306 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] 6307 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] 6308 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] 6309 */ 6310 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) 6311 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) 6312 && INSN(14,10) == BITS5(1,1,1,1,1)) { 6313 UInt szBlg2 = INSN(31,30); 6314 Bool isLD = INSN(22,22) == 1; 6315 Bool isAcqOrRel = INSN(15,15) == 1; 6316 UInt ss = INSN(20,16); 6317 UInt nn = INSN(9,5); 6318 UInt tt = INSN(4,0); 6319 6320 vassert(szBlg2 < 4); 6321 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6322 IRType ty = integerIRTypeOfSize(szB); 6323 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6324 6325 IRTemp ea = newTemp(Ity_I64); 6326 assign(ea, getIReg64orSP(nn)); 6327 /* FIXME generate check that ea is szB-aligned */ 6328 6329 if (isLD && ss == BITS5(1,1,1,1,1)) { 6330 IRTemp res = newTemp(ty); 6331 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); 6332 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6333 if (isAcqOrRel) { 6334 stmt(IRStmt_MBE(Imbe_Fence)); 6335 } 6336 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6337 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6338 return True; 6339 } 6340 if (!isLD) { 6341 if (isAcqOrRel) { 6342 stmt(IRStmt_MBE(Imbe_Fence)); 6343 } 6344 IRTemp res = newTemp(Ity_I1); 6345 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6346 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); 6347 /* IR semantics: res is 1 if store succeeds, 0 if it fails. 6348 Need to set rS to 1 on failure, 0 on success. */ 6349 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), 6350 mkU64(1))); 6351 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6352 nameIRegOrZR(False, ss), 6353 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6354 return True; 6355 } 6356 /* else fall through */ 6357 } 6358 6359 /* ------------------ LDA{R,RH,RB} ------------------ */ 6360 /* ------------------ STL{R,RH,RB} ------------------ */ 6361 /* 31 29 23 20 14 9 4 6362 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP] 6363 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP] 6364 */ 6365 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) 6366 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { 6367 UInt szBlg2 = INSN(31,30); 6368 Bool isLD = INSN(22,22) == 1; 6369 UInt nn = INSN(9,5); 6370 UInt tt = INSN(4,0); 6371 6372 vassert(szBlg2 < 4); 6373 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6374 IRType ty = integerIRTypeOfSize(szB); 6375 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6376 6377 IRTemp ea = newTemp(Ity_I64); 6378 assign(ea, getIReg64orSP(nn)); 6379 /* FIXME generate check that ea is szB-aligned */ 6380 6381 if (isLD) { 6382 IRTemp res = newTemp(ty); 6383 assign(res, loadLE(ty, mkexpr(ea))); 6384 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6385 stmt(IRStmt_MBE(Imbe_Fence)); 6386 DIP("lda%s %s, [%s]\n", suffix[szBlg2], 6387 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6388 } else { 6389 stmt(IRStmt_MBE(Imbe_Fence)); 6390 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6391 storeLE(mkexpr(ea), data); 6392 DIP("stl%s %s, [%s]\n", suffix[szBlg2], 6393 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6394 } 6395 return True; 6396 } 6397 6398 /* ------------------ PRFM (immediate) ------------------ */ 6399 /* 31 21 9 4 6400 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm] 6401 */ 6402 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) { 6403 UInt imm12 = INSN(21,10); 6404 UInt nn = INSN(9,5); 6405 UInt tt = INSN(4,0); 6406 /* Generating any IR here is pointless, except for documentation 6407 purposes, as it will get optimised away later. */ 6408 IRTemp ea = newTemp(Ity_I64); 6409 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8))); 6410 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8); 6411 return True; 6412 } 6413 6414 /* ------------------ PRFM (register) ------------------ */ 6415 /* 31 29 22 20 15 12 11 9 4 6416 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}] 6417 */ 6418 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1) 6419 && INSN(11,10) == BITS2(1,0)) { 6420 HChar dis_buf[64]; 6421 UInt tt = INSN(4,0); 6422 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 6423 if (ea != IRTemp_INVALID) { 6424 /* No actual code to generate. */ 6425 DIP("prfm prfop=%u, %s\n", tt, dis_buf); 6426 return True; 6427 } 6428 } 6429 6430 vex_printf("ARM64 front end: load_store\n"); 6431 return False; 6432 # undef INSN 6433 } 6434 6435 6436 /*------------------------------------------------------------*/ 6437 /*--- Control flow and misc instructions ---*/ 6438 /*------------------------------------------------------------*/ 6439 6440 static 6441 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, 6442 const VexArchInfo* archinfo) 6443 { 6444 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 6445 6446 /* ---------------------- B cond ----------------------- */ 6447 /* 31 24 4 3 6448 0101010 0 imm19 0 cond */ 6449 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { 6450 UInt cond = INSN(3,0); 6451 ULong uimm64 = INSN(23,5) << 2; 6452 Long simm64 = (Long)sx_to_64(uimm64, 21); 6453 vassert(dres->whatNext == Dis_Continue); 6454 vassert(dres->len == 4); 6455 vassert(dres->continueAt == 0); 6456 vassert(dres->jk_StopHere == Ijk_INVALID); 6457 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 6458 Ijk_Boring, 6459 IRConst_U64(guest_PC_curr_instr + simm64), 6460 OFFB_PC) ); 6461 putPC(mkU64(guest_PC_curr_instr + 4)); 6462 dres->whatNext = Dis_StopHere; 6463 dres->jk_StopHere = Ijk_Boring; 6464 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); 6465 return True; 6466 } 6467 6468 /* -------------------- B{L} uncond -------------------- */ 6469 if (INSN(30,26) == BITS5(0,0,1,0,1)) { 6470 /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) 6471 100101 imm26 B (PC + sxTo64(imm26 << 2)) 6472 */ 6473 UInt bLink = INSN(31,31); 6474 ULong uimm64 = INSN(25,0) << 2; 6475 Long simm64 = (Long)sx_to_64(uimm64, 28); 6476 if (bLink) { 6477 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6478 } 6479 putPC(mkU64(guest_PC_curr_instr + simm64)); 6480 dres->whatNext = Dis_StopHere; 6481 dres->jk_StopHere = Ijk_Call; 6482 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", 6483 guest_PC_curr_instr + simm64); 6484 return True; 6485 } 6486 6487 /* --------------------- B{L} reg --------------------- */ 6488 /* 31 24 22 20 15 9 4 6489 1101011 00 10 11111 000000 nn 00000 RET Rn 6490 1101011 00 01 11111 000000 nn 00000 CALL Rn 6491 1101011 00 00 11111 000000 nn 00000 JMP Rn 6492 */ 6493 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) 6494 && INSN(20,16) == BITS5(1,1,1,1,1) 6495 && INSN(15,10) == BITS6(0,0,0,0,0,0) 6496 && INSN(4,0) == BITS5(0,0,0,0,0)) { 6497 UInt branch_type = INSN(22,21); 6498 UInt nn = INSN(9,5); 6499 if (branch_type == BITS2(1,0) /* RET */) { 6500 putPC(getIReg64orZR(nn)); 6501 dres->whatNext = Dis_StopHere; 6502 dres->jk_StopHere = Ijk_Ret; 6503 DIP("ret %s\n", nameIReg64orZR(nn)); 6504 return True; 6505 } 6506 if (branch_type == BITS2(0,1) /* CALL */) { 6507 IRTemp dst = newTemp(Ity_I64); 6508 assign(dst, getIReg64orZR(nn)); 6509 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6510 putPC(mkexpr(dst)); 6511 dres->whatNext = Dis_StopHere; 6512 dres->jk_StopHere = Ijk_Call; 6513 DIP("blr %s\n", nameIReg64orZR(nn)); 6514 return True; 6515 } 6516 if (branch_type == BITS2(0,0) /* JMP */) { 6517 putPC(getIReg64orZR(nn)); 6518 dres->whatNext = Dis_StopHere; 6519 dres->jk_StopHere = Ijk_Boring; 6520 DIP("jmp %s\n", nameIReg64orZR(nn)); 6521 return True; 6522 } 6523 } 6524 6525 /* -------------------- CB{N}Z -------------------- */ 6526 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6527 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6528 */ 6529 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { 6530 Bool is64 = INSN(31,31) == 1; 6531 Bool bIfZ = INSN(24,24) == 0; 6532 ULong uimm64 = INSN(23,5) << 2; 6533 UInt rT = INSN(4,0); 6534 Long simm64 = (Long)sx_to_64(uimm64, 21); 6535 IRExpr* cond = NULL; 6536 if (is64) { 6537 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6538 getIReg64orZR(rT), mkU64(0)); 6539 } else { 6540 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, 6541 getIReg32orZR(rT), mkU32(0)); 6542 } 6543 stmt( IRStmt_Exit(cond, 6544 Ijk_Boring, 6545 IRConst_U64(guest_PC_curr_instr + simm64), 6546 OFFB_PC) ); 6547 putPC(mkU64(guest_PC_curr_instr + 4)); 6548 dres->whatNext = Dis_StopHere; 6549 dres->jk_StopHere = Ijk_Boring; 6550 DIP("cb%sz %s, 0x%llx\n", 6551 bIfZ ? "" : "n", nameIRegOrZR(is64, rT), 6552 guest_PC_curr_instr + simm64); 6553 return True; 6554 } 6555 6556 /* -------------------- TB{N}Z -------------------- */ 6557 /* 31 30 24 23 18 5 4 6558 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6559 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6560 */ 6561 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { 6562 UInt b5 = INSN(31,31); 6563 Bool bIfZ = INSN(24,24) == 0; 6564 UInt b40 = INSN(23,19); 6565 UInt imm14 = INSN(18,5); 6566 UInt tt = INSN(4,0); 6567 UInt bitNo = (b5 << 5) | b40; 6568 ULong uimm64 = imm14 << 2; 6569 Long simm64 = sx_to_64(uimm64, 16); 6570 IRExpr* cond 6571 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6572 binop(Iop_And64, 6573 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), 6574 mkU64(1)), 6575 mkU64(0)); 6576 stmt( IRStmt_Exit(cond, 6577 Ijk_Boring, 6578 IRConst_U64(guest_PC_curr_instr + simm64), 6579 OFFB_PC) ); 6580 putPC(mkU64(guest_PC_curr_instr + 4)); 6581 dres->whatNext = Dis_StopHere; 6582 dres->jk_StopHere = Ijk_Boring; 6583 DIP("tb%sz %s, #%u, 0x%llx\n", 6584 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, 6585 guest_PC_curr_instr + simm64); 6586 return True; 6587 } 6588 6589 /* -------------------- SVC -------------------- */ 6590 /* 11010100 000 imm16 000 01 6591 Don't bother with anything except the imm16==0 case. 6592 */ 6593 if (INSN(31,0) == 0xD4000001) { 6594 putPC(mkU64(guest_PC_curr_instr + 4)); 6595 dres->whatNext = Dis_StopHere; 6596 dres->jk_StopHere = Ijk_Sys_syscall; 6597 DIP("svc #0\n"); 6598 return True; 6599 } 6600 6601 /* ------------------ M{SR,RS} ------------------ */ 6602 /* ---- Cases for TPIDR_EL0 ---- 6603 0xD51BD0 010 Rt MSR tpidr_el0, rT 6604 0xD53BD0 010 Rt MRS rT, tpidr_el0 6605 */ 6606 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ 6607 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { 6608 Bool toSys = INSN(21,21) == 0; 6609 UInt tt = INSN(4,0); 6610 if (toSys) { 6611 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); 6612 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); 6613 } else { 6614 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); 6615 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); 6616 } 6617 return True; 6618 } 6619 /* ---- Cases for FPCR ---- 6620 0xD51B44 000 Rt MSR fpcr, rT 6621 0xD53B44 000 Rt MSR rT, fpcr 6622 */ 6623 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ 6624 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { 6625 Bool toSys = INSN(21,21) == 0; 6626 UInt tt = INSN(4,0); 6627 if (toSys) { 6628 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); 6629 DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); 6630 } else { 6631 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); 6632 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); 6633 } 6634 return True; 6635 } 6636 /* ---- Cases for FPSR ---- 6637 0xD51B44 001 Rt MSR fpsr, rT 6638 0xD53B44 001 Rt MSR rT, fpsr 6639 The only part of this we model is FPSR.QC. All other bits 6640 are ignored when writing to it and RAZ when reading from it. 6641 */ 6642 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ 6643 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { 6644 Bool toSys = INSN(21,21) == 0; 6645 UInt tt = INSN(4,0); 6646 if (toSys) { 6647 /* Just deal with FPSR.QC. Make up a V128 value which is 6648 zero if Xt[27] is zero and any other value if Xt[27] is 6649 nonzero. */ 6650 IRTemp qc64 = newTemp(Ity_I64); 6651 assign(qc64, binop(Iop_And64, 6652 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)), 6653 mkU64(1))); 6654 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64)); 6655 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) ); 6656 DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); 6657 } else { 6658 /* Generate a value which is all zeroes except for bit 27, 6659 which must be zero if QCFLAG is all zeroes and one otherwise. */ 6660 IRTemp qcV128 = newTempV128(); 6661 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 )); 6662 IRTemp qc64 = newTemp(Ity_I64); 6663 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)), 6664 unop(Iop_V128to64, mkexpr(qcV128)))); 6665 IRExpr* res = binop(Iop_Shl64, 6666 unop(Iop_1Uto64, 6667 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))), 6668 mkU8(27)); 6669 putIReg64orZR(tt, res); 6670 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); 6671 } 6672 return True; 6673 } 6674 /* ---- Cases for NZCV ---- 6675 D51B42 000 Rt MSR nzcv, rT 6676 D53B42 000 Rt MRS rT, nzcv 6677 The only parts of NZCV that actually exist are bits 31:28, which 6678 are the N Z C and V bits themselves. Hence the flags thunk provides 6679 all the state we need. 6680 */ 6681 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ 6682 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { 6683 Bool toSys = INSN(21,21) == 0; 6684 UInt tt = INSN(4,0); 6685 if (toSys) { 6686 IRTemp t = newTemp(Ity_I64); 6687 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); 6688 setFlags_COPY(t); 6689 DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); 6690 } else { 6691 IRTemp res = newTemp(Ity_I64); 6692 assign(res, mk_arm64g_calculate_flags_nzcv()); 6693 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); 6694 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); 6695 } 6696 return True; 6697 } 6698 /* ---- Cases for DCZID_EL0 ---- 6699 Don't support arbitrary reads and writes to this register. Just 6700 return the value 16, which indicates that the DC ZVA instruction 6701 is not permitted, so we don't have to emulate it. 6702 D5 3B 00 111 Rt MRS rT, dczid_el0 6703 */ 6704 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { 6705 UInt tt = INSN(4,0); 6706 putIReg64orZR(tt, mkU64(1<<4)); 6707 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); 6708 return True; 6709 } 6710 /* ---- Cases for CTR_EL0 ---- 6711 We just handle reads, and make up a value from the D and I line 6712 sizes in the VexArchInfo we are given, and patch in the following 6713 fields that the Foundation model gives ("natively"): 6714 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 6715 D5 3B 00 001 Rt MRS rT, dczid_el0 6716 */ 6717 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { 6718 UInt tt = INSN(4,0); 6719 /* Need to generate a value from dMinLine_lg2_szB and 6720 dMinLine_lg2_szB. The value in the register is in 32-bit 6721 units, so need to subtract 2 from the values in the 6722 VexArchInfo. We can assume that the values here are valid -- 6723 disInstr_ARM64 checks them -- so there's no need to deal with 6724 out-of-range cases. */ 6725 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6726 && archinfo->arm64_dMinLine_lg2_szB <= 17 6727 && archinfo->arm64_iMinLine_lg2_szB >= 2 6728 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6729 UInt val 6730 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) 6731 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); 6732 putIReg64orZR(tt, mkU64(val)); 6733 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); 6734 return True; 6735 } 6736 /* ---- Cases for CNTVCT_EL0 ---- 6737 This is a timestamp counter of some sort. Support reads of it only 6738 by passing through to the host. 6739 D5 3B E0 010 Rt MRS Xt, cntvct_el0 6740 */ 6741 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) { 6742 UInt tt = INSN(4,0); 6743 IRTemp val = newTemp(Ity_I64); 6744 IRExpr** args = mkIRExprVec_0(); 6745 IRDirty* d = unsafeIRDirty_1_N ( 6746 val, 6747 0/*regparms*/, 6748 "arm64g_dirtyhelper_MRS_CNTVCT_EL0", 6749 &arm64g_dirtyhelper_MRS_CNTVCT_EL0, 6750 args 6751 ); 6752 /* execute the dirty call, dumping the result in val. */ 6753 stmt( IRStmt_Dirty(d) ); 6754 putIReg64orZR(tt, mkexpr(val)); 6755 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt)); 6756 return True; 6757 } 6758 6759 /* ------------------ IC_IVAU ------------------ */ 6760 /* D5 0B 75 001 Rt ic ivau, rT 6761 */ 6762 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { 6763 /* We will always be provided with a valid iMinLine value. */ 6764 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 6765 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6766 /* Round the requested address, in rT, down to the start of the 6767 containing block. */ 6768 UInt tt = INSN(4,0); 6769 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; 6770 IRTemp addr = newTemp(Ity_I64); 6771 assign( addr, binop( Iop_And64, 6772 getIReg64orZR(tt), 6773 mkU64(~(lineszB - 1))) ); 6774 /* Set the invalidation range, request exit-and-invalidate, with 6775 continuation at the next instruction. */ 6776 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6777 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6778 /* be paranoid ... */ 6779 stmt( IRStmt_MBE(Imbe_Fence) ); 6780 putPC(mkU64( guest_PC_curr_instr + 4 )); 6781 dres->whatNext = Dis_StopHere; 6782 dres->jk_StopHere = Ijk_InvalICache; 6783 DIP("ic ivau, %s\n", nameIReg64orZR(tt)); 6784 return True; 6785 } 6786 6787 /* ------------------ DC_CVAU ------------------ */ 6788 /* D5 0B 7B 001 Rt dc cvau, rT 6789 */ 6790 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { 6791 /* Exactly the same scheme as for IC IVAU, except we observe the 6792 dMinLine size, and request an Ijk_FlushDCache instead of 6793 Ijk_InvalICache. */ 6794 /* We will always be provided with a valid dMinLine value. */ 6795 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6796 && archinfo->arm64_dMinLine_lg2_szB <= 17); 6797 /* Round the requested address, in rT, down to the start of the 6798 containing block. */ 6799 UInt tt = INSN(4,0); 6800 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; 6801 IRTemp addr = newTemp(Ity_I64); 6802 assign( addr, binop( Iop_And64, 6803 getIReg64orZR(tt), 6804 mkU64(~(lineszB - 1))) ); 6805 /* Set the flush range, request exit-and-flush, with 6806 continuation at the next instruction. */ 6807 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6808 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6809 /* be paranoid ... */ 6810 stmt( IRStmt_MBE(Imbe_Fence) ); 6811 putPC(mkU64( guest_PC_curr_instr + 4 )); 6812 dres->whatNext = Dis_StopHere; 6813 dres->jk_StopHere = Ijk_FlushDCache; 6814 DIP("dc cvau, %s\n", nameIReg64orZR(tt)); 6815 return True; 6816 } 6817 6818 /* ------------------ ISB, DMB, DSB ------------------ */ 6819 /* 31 21 11 7 6 4 6820 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt 6821 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt 6822 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt 6823 */ 6824 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0) 6825 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1) 6826 && INSN(7,7) == 1 6827 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) { 6828 UInt opc = INSN(6,5); 6829 UInt CRm = INSN(11,8); 6830 vassert(opc <= 2 && CRm <= 15); 6831 stmt(IRStmt_MBE(Imbe_Fence)); 6832 const HChar* opNames[3] 6833 = { "dsb", "dmb", "isb" }; 6834 const HChar* howNames[16] 6835 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh", 6836 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" }; 6837 DIP("%s %s\n", opNames[opc], howNames[CRm]); 6838 return True; 6839 } 6840 6841 /* -------------------- NOP -------------------- */ 6842 if (INSN(31,0) == 0xD503201F) { 6843 DIP("nop\n"); 6844 return True; 6845 } 6846 6847 /* -------------------- BRK -------------------- */ 6848 /* 31 23 20 4 6849 1101 0100 001 imm16 00000 BRK #imm16 6850 */ 6851 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0) 6852 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) { 6853 UInt imm16 = INSN(20,5); 6854 /* Request SIGTRAP and then restart of this insn. */ 6855 putPC(mkU64(guest_PC_curr_instr + 0)); 6856 dres->whatNext = Dis_StopHere; 6857 dres->jk_StopHere = Ijk_SigTRAP; 6858 DIP("brk #%u\n", imm16); 6859 return True; 6860 } 6861 6862 /* ------------------- YIELD ------------------- */ 6863 /* 31 23 15 7 6864 1101 0101 0000 0011 0010 0000 0011 1111 6865 */ 6866 if (INSN(31,0) == 0xD503203F) { 6867 /* Request yield followed by continuation at the next insn. */ 6868 putPC(mkU64(guest_PC_curr_instr + 4)); 6869 dres->whatNext = Dis_StopHere; 6870 dres->jk_StopHere = Ijk_Yield; 6871 DIP("yield\n"); 6872 return True; 6873 } 6874 6875 //fail: 6876 vex_printf("ARM64 front end: branch_etc\n"); 6877 return False; 6878 # undef INSN 6879 } 6880 6881 6882 /*------------------------------------------------------------*/ 6883 /*--- SIMD and FP instructions: helper functions ---*/ 6884 /*------------------------------------------------------------*/ 6885 6886 /* Some constructors for interleave/deinterleave expressions. */ 6887 6888 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6889 // returns a0 b0 6890 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10)); 6891 } 6892 6893 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6894 // returns a1 b1 6895 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10)); 6896 } 6897 6898 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6899 // returns a2 a0 b2 b0 6900 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6901 } 6902 6903 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6904 // returns a3 a1 b3 b1 6905 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6906 } 6907 6908 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) { 6909 // returns a1 b1 a0 b0 6910 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210)); 6911 } 6912 6913 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) { 6914 // returns a3 b3 a2 b2 6915 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210)); 6916 } 6917 6918 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6919 // returns a6 a4 a2 a0 b6 b4 b2 b0 6920 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6921 } 6922 6923 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6924 // returns a7 a5 a3 a1 b7 b5 b3 b1 6925 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6926 } 6927 6928 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6929 // returns a3 b3 a2 b2 a1 b1 a0 b0 6930 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210)); 6931 } 6932 6933 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6934 // returns a7 b7 a6 b6 a5 b5 a4 b4 6935 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210)); 6936 } 6937 6938 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, 6939 IRTemp bFEDCBA9876543210 ) { 6940 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 6941 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210), 6942 mkexpr(bFEDCBA9876543210)); 6943 } 6944 6945 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, 6946 IRTemp bFEDCBA9876543210 ) { 6947 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 6948 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210), 6949 mkexpr(bFEDCBA9876543210)); 6950 } 6951 6952 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, 6953 IRTemp bFEDCBA9876543210 ) { 6954 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 6955 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210), 6956 mkexpr(bFEDCBA9876543210)); 6957 } 6958 6959 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, 6960 IRTemp bFEDCBA9876543210 ) { 6961 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 6962 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210), 6963 mkexpr(bFEDCBA9876543210)); 6964 } 6965 6966 /* Generate N copies of |bit| in the bottom of a ULong. */ 6967 static ULong Replicate ( ULong bit, Int N ) 6968 { 6969 vassert(bit <= 1 && N >= 1 && N < 64); 6970 if (bit == 0) { 6971 return 0; 6972 } else { 6973 /* Careful. This won't work for N == 64. */ 6974 return (1ULL << N) - 1; 6975 } 6976 } 6977 6978 static ULong Replicate32x2 ( ULong bits32 ) 6979 { 6980 vassert(0 == (bits32 & ~0xFFFFFFFFULL)); 6981 return (bits32 << 32) | bits32; 6982 } 6983 6984 static ULong Replicate16x4 ( ULong bits16 ) 6985 { 6986 vassert(0 == (bits16 & ~0xFFFFULL)); 6987 return Replicate32x2((bits16 << 16) | bits16); 6988 } 6989 6990 static ULong Replicate8x8 ( ULong bits8 ) 6991 { 6992 vassert(0 == (bits8 & ~0xFFULL)); 6993 return Replicate16x4((bits8 << 8) | bits8); 6994 } 6995 6996 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of 6997 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N 6998 is 64. In the former case, the upper 32 bits of the returned value 6999 are guaranteed to be zero. */ 7000 static ULong VFPExpandImm ( ULong imm8, Int N ) 7001 { 7002 vassert(imm8 <= 0xFF); 7003 vassert(N == 32 || N == 64); 7004 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. 7005 Int F = N - E - 1; 7006 ULong imm8_6 = (imm8 >> 6) & 1; 7007 /* sign: 1 bit */ 7008 /* exp: E bits */ 7009 /* frac: F bits */ 7010 ULong sign = (imm8 >> 7) & 1; 7011 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); 7012 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); 7013 vassert(sign < (1ULL << 1)); 7014 vassert(exp < (1ULL << E)); 7015 vassert(frac < (1ULL << F)); 7016 vassert(1 + E + F == N); 7017 ULong res = (sign << (E+F)) | (exp << F) | frac; 7018 return res; 7019 } 7020 7021 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. 7022 This might fail, as indicated by the returned Bool. Page 2530 of 7023 the manual. */ 7024 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, 7025 UInt op, UInt cmode, UInt imm8 ) 7026 { 7027 vassert(op <= 1); 7028 vassert(cmode <= 15); 7029 vassert(imm8 <= 255); 7030 7031 *res = 0; /* will overwrite iff returning True */ 7032 7033 ULong imm64 = 0; 7034 Bool testimm8 = False; 7035 7036 switch (cmode >> 1) { 7037 case 0: 7038 testimm8 = False; imm64 = Replicate32x2(imm8); break; 7039 case 1: 7040 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; 7041 case 2: 7042 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; 7043 case 3: 7044 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; 7045 case 4: 7046 testimm8 = False; imm64 = Replicate16x4(imm8); break; 7047 case 5: 7048 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; 7049 case 6: 7050 testimm8 = True; 7051 if ((cmode & 1) == 0) 7052 imm64 = Replicate32x2((imm8 << 8) | 0xFF); 7053 else 7054 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); 7055 break; 7056 case 7: 7057 testimm8 = False; 7058 if ((cmode & 1) == 0 && op == 0) 7059 imm64 = Replicate8x8(imm8); 7060 if ((cmode & 1) == 0 && op == 1) { 7061 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; 7062 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; 7063 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; 7064 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; 7065 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; 7066 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; 7067 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; 7068 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; 7069 } 7070 if ((cmode & 1) == 1 && op == 0) { 7071 ULong imm8_7 = (imm8 >> 7) & 1; 7072 ULong imm8_6 = (imm8 >> 6) & 1; 7073 ULong imm8_50 = imm8 & 63; 7074 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) 7075 | ((imm8_6 ^ 1) << (5 + 6 + 19)) 7076 | (Replicate(imm8_6, 5) << (6 + 19)) 7077 | (imm8_50 << 19); 7078 imm64 = Replicate32x2(imm32); 7079 } 7080 if ((cmode & 1) == 1 && op == 1) { 7081 // imm64 = imm8<7>:NOT(imm8<6>) 7082 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); 7083 ULong imm8_7 = (imm8 >> 7) & 1; 7084 ULong imm8_6 = (imm8 >> 6) & 1; 7085 ULong imm8_50 = imm8 & 63; 7086 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) 7087 | (Replicate(imm8_6, 8) << 54) 7088 | (imm8_50 << 48); 7089 } 7090 break; 7091 default: 7092 vassert(0); 7093 } 7094 7095 if (testimm8 && imm8 == 0) 7096 return False; 7097 7098 *res = imm64; 7099 return True; 7100 } 7101 7102 /* Help a bit for decoding laneage for vector operations that can be 7103 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q 7104 and SZ bits, typically for vector floating point. */ 7105 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, 7106 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, 7107 /*OUT*/const HChar** arrSpec, 7108 Bool bitQ, Bool bitSZ ) 7109 { 7110 vassert(bitQ == True || bitQ == False); 7111 vassert(bitSZ == True || bitSZ == False); 7112 if (bitQ && bitSZ) { // 2x64 7113 if (tyI) *tyI = Ity_I64; 7114 if (tyF) *tyF = Ity_F64; 7115 if (nLanes) *nLanes = 2; 7116 if (zeroUpper) *zeroUpper = False; 7117 if (arrSpec) *arrSpec = "2d"; 7118 return True; 7119 } 7120 if (bitQ && !bitSZ) { // 4x32 7121 if (tyI) *tyI = Ity_I32; 7122 if (tyF) *tyF = Ity_F32; 7123 if (nLanes) *nLanes = 4; 7124 if (zeroUpper) *zeroUpper = False; 7125 if (arrSpec) *arrSpec = "4s"; 7126 return True; 7127 } 7128 if (!bitQ && !bitSZ) { // 2x32 7129 if (tyI) *tyI = Ity_I32; 7130 if (tyF) *tyF = Ity_F32; 7131 if (nLanes) *nLanes = 2; 7132 if (zeroUpper) *zeroUpper = True; 7133 if (arrSpec) *arrSpec = "2s"; 7134 return True; 7135 } 7136 // Else impliedly 1x64, which isn't allowed. 7137 return False; 7138 } 7139 7140 /* Helper for decoding laneage for shift-style vector operations 7141 that involve an immediate shift amount. */ 7142 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, 7143 UInt immh, UInt immb ) 7144 { 7145 vassert(immh < (1<<4)); 7146 vassert(immb < (1<<3)); 7147 UInt immhb = (immh << 3) | immb; 7148 if (immh & 8) { 7149 if (shift) *shift = 128 - immhb; 7150 if (szBlg2) *szBlg2 = 3; 7151 return True; 7152 } 7153 if (immh & 4) { 7154 if (shift) *shift = 64 - immhb; 7155 if (szBlg2) *szBlg2 = 2; 7156 return True; 7157 } 7158 if (immh & 2) { 7159 if (shift) *shift = 32 - immhb; 7160 if (szBlg2) *szBlg2 = 1; 7161 return True; 7162 } 7163 if (immh & 1) { 7164 if (shift) *shift = 16 - immhb; 7165 if (szBlg2) *szBlg2 = 0; 7166 return True; 7167 } 7168 return False; 7169 } 7170 7171 /* Generate IR to fold all lanes of the V128 value in 'src' as 7172 characterised by the operator 'op', and return the result in the 7173 bottom bits of a V128, with all other bits set to zero. */ 7174 static IRTemp math_FOLDV ( IRTemp src, IROp op ) 7175 { 7176 /* The basic idea is to use repeated applications of Iop_CatEven* 7177 and Iop_CatOdd* operators to 'src' so as to clone each lane into 7178 a complete vector. Then fold all those vectors with 'op' and 7179 zero out all but the least significant lane. */ 7180 switch (op) { 7181 case Iop_Min8Sx16: case Iop_Min8Ux16: 7182 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: { 7183 /* NB: temp naming here is misleading -- the naming is for 8 7184 lanes of 16 bit, whereas what is being operated on is 16 7185 lanes of 8 bits. */ 7186 IRTemp x76543210 = src; 7187 IRTemp x76547654 = newTempV128(); 7188 IRTemp x32103210 = newTempV128(); 7189 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7190 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7191 IRTemp x76767676 = newTempV128(); 7192 IRTemp x54545454 = newTempV128(); 7193 IRTemp x32323232 = newTempV128(); 7194 IRTemp x10101010 = newTempV128(); 7195 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7196 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7197 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7198 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7199 IRTemp x77777777 = newTempV128(); 7200 IRTemp x66666666 = newTempV128(); 7201 IRTemp x55555555 = newTempV128(); 7202 IRTemp x44444444 = newTempV128(); 7203 IRTemp x33333333 = newTempV128(); 7204 IRTemp x22222222 = newTempV128(); 7205 IRTemp x11111111 = newTempV128(); 7206 IRTemp x00000000 = newTempV128(); 7207 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7208 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7209 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7210 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7211 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7212 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7213 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7214 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7215 /* Naming not misleading after here. */ 7216 IRTemp xAllF = newTempV128(); 7217 IRTemp xAllE = newTempV128(); 7218 IRTemp xAllD = newTempV128(); 7219 IRTemp xAllC = newTempV128(); 7220 IRTemp xAllB = newTempV128(); 7221 IRTemp xAllA = newTempV128(); 7222 IRTemp xAll9 = newTempV128(); 7223 IRTemp xAll8 = newTempV128(); 7224 IRTemp xAll7 = newTempV128(); 7225 IRTemp xAll6 = newTempV128(); 7226 IRTemp xAll5 = newTempV128(); 7227 IRTemp xAll4 = newTempV128(); 7228 IRTemp xAll3 = newTempV128(); 7229 IRTemp xAll2 = newTempV128(); 7230 IRTemp xAll1 = newTempV128(); 7231 IRTemp xAll0 = newTempV128(); 7232 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); 7233 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); 7234 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); 7235 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); 7236 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); 7237 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); 7238 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); 7239 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); 7240 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); 7241 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); 7242 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); 7243 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); 7244 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); 7245 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); 7246 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); 7247 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); 7248 IRTemp maxFE = newTempV128(); 7249 IRTemp maxDC = newTempV128(); 7250 IRTemp maxBA = newTempV128(); 7251 IRTemp max98 = newTempV128(); 7252 IRTemp max76 = newTempV128(); 7253 IRTemp max54 = newTempV128(); 7254 IRTemp max32 = newTempV128(); 7255 IRTemp max10 = newTempV128(); 7256 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); 7257 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); 7258 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); 7259 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); 7260 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); 7261 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); 7262 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); 7263 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); 7264 IRTemp maxFEDC = newTempV128(); 7265 IRTemp maxBA98 = newTempV128(); 7266 IRTemp max7654 = newTempV128(); 7267 IRTemp max3210 = newTempV128(); 7268 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); 7269 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); 7270 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7271 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7272 IRTemp maxFEDCBA98 = newTempV128(); 7273 IRTemp max76543210 = newTempV128(); 7274 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); 7275 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7276 IRTemp maxAllLanes = newTempV128(); 7277 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), 7278 mkexpr(max76543210))); 7279 IRTemp res = newTempV128(); 7280 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); 7281 return res; 7282 } 7283 case Iop_Min16Sx8: case Iop_Min16Ux8: 7284 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: { 7285 IRTemp x76543210 = src; 7286 IRTemp x76547654 = newTempV128(); 7287 IRTemp x32103210 = newTempV128(); 7288 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7289 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7290 IRTemp x76767676 = newTempV128(); 7291 IRTemp x54545454 = newTempV128(); 7292 IRTemp x32323232 = newTempV128(); 7293 IRTemp x10101010 = newTempV128(); 7294 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7295 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7296 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7297 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7298 IRTemp x77777777 = newTempV128(); 7299 IRTemp x66666666 = newTempV128(); 7300 IRTemp x55555555 = newTempV128(); 7301 IRTemp x44444444 = newTempV128(); 7302 IRTemp x33333333 = newTempV128(); 7303 IRTemp x22222222 = newTempV128(); 7304 IRTemp x11111111 = newTempV128(); 7305 IRTemp x00000000 = newTempV128(); 7306 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7307 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7308 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7309 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7310 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7311 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7312 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7313 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7314 IRTemp max76 = newTempV128(); 7315 IRTemp max54 = newTempV128(); 7316 IRTemp max32 = newTempV128(); 7317 IRTemp max10 = newTempV128(); 7318 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); 7319 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); 7320 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); 7321 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); 7322 IRTemp max7654 = newTempV128(); 7323 IRTemp max3210 = newTempV128(); 7324 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7325 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7326 IRTemp max76543210 = newTempV128(); 7327 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7328 IRTemp res = newTempV128(); 7329 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); 7330 return res; 7331 } 7332 case Iop_Max32Fx4: case Iop_Min32Fx4: 7333 case Iop_Min32Sx4: case Iop_Min32Ux4: 7334 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: { 7335 IRTemp x3210 = src; 7336 IRTemp x3232 = newTempV128(); 7337 IRTemp x1010 = newTempV128(); 7338 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); 7339 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); 7340 IRTemp x3333 = newTempV128(); 7341 IRTemp x2222 = newTempV128(); 7342 IRTemp x1111 = newTempV128(); 7343 IRTemp x0000 = newTempV128(); 7344 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); 7345 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); 7346 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); 7347 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); 7348 IRTemp max32 = newTempV128(); 7349 IRTemp max10 = newTempV128(); 7350 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); 7351 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); 7352 IRTemp max3210 = newTempV128(); 7353 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7354 IRTemp res = newTempV128(); 7355 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); 7356 return res; 7357 } 7358 case Iop_Add64x2: { 7359 IRTemp x10 = src; 7360 IRTemp x00 = newTempV128(); 7361 IRTemp x11 = newTempV128(); 7362 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10))); 7363 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10))); 7364 IRTemp max10 = newTempV128(); 7365 assign(max10, binop(op, mkexpr(x11), mkexpr(x00))); 7366 IRTemp res = newTempV128(); 7367 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10))); 7368 return res; 7369 } 7370 default: 7371 vassert(0); 7372 } 7373 } 7374 7375 7376 /* Generate IR for TBL and TBX. This deals with the 128 bit case 7377 only. */ 7378 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, 7379 IRTemp oor_values ) 7380 { 7381 vassert(len >= 0 && len <= 3); 7382 7383 /* Generate some useful constants as concisely as possible. */ 7384 IRTemp half15 = newTemp(Ity_I64); 7385 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); 7386 IRTemp half16 = newTemp(Ity_I64); 7387 assign(half16, mkU64(0x1010101010101010ULL)); 7388 7389 /* A zero vector */ 7390 IRTemp allZero = newTempV128(); 7391 assign(allZero, mkV128(0x0000)); 7392 /* A vector containing 15 in each 8-bit lane */ 7393 IRTemp all15 = newTempV128(); 7394 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); 7395 /* A vector containing 16 in each 8-bit lane */ 7396 IRTemp all16 = newTempV128(); 7397 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); 7398 /* A vector containing 32 in each 8-bit lane */ 7399 IRTemp all32 = newTempV128(); 7400 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); 7401 /* A vector containing 48 in each 8-bit lane */ 7402 IRTemp all48 = newTempV128(); 7403 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); 7404 /* A vector containing 64 in each 8-bit lane */ 7405 IRTemp all64 = newTempV128(); 7406 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); 7407 7408 /* Group the 16/32/48/64 vectors so as to be indexable. */ 7409 IRTemp allXX[4] = { all16, all32, all48, all64 }; 7410 7411 /* Compute the result for each table vector, with zeroes in places 7412 where the index values are out of range, and OR them into the 7413 running vector. */ 7414 IRTemp running_result = newTempV128(); 7415 assign(running_result, mkV128(0)); 7416 7417 UInt tabent; 7418 for (tabent = 0; tabent <= len; tabent++) { 7419 vassert(tabent >= 0 && tabent < 4); 7420 IRTemp bias = newTempV128(); 7421 assign(bias, 7422 mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); 7423 IRTemp biased_indices = newTempV128(); 7424 assign(biased_indices, 7425 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); 7426 IRTemp valid_mask = newTempV128(); 7427 assign(valid_mask, 7428 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); 7429 IRTemp safe_biased_indices = newTempV128(); 7430 assign(safe_biased_indices, 7431 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); 7432 IRTemp results_or_junk = newTempV128(); 7433 assign(results_or_junk, 7434 binop(Iop_Perm8x16, mkexpr(tab[tabent]), 7435 mkexpr(safe_biased_indices))); 7436 IRTemp results_or_zero = newTempV128(); 7437 assign(results_or_zero, 7438 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); 7439 /* And OR that into the running result. */ 7440 IRTemp tmp = newTempV128(); 7441 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), 7442 mkexpr(running_result))); 7443 running_result = tmp; 7444 } 7445 7446 /* So now running_result holds the overall result where the indices 7447 are in range, and zero in out-of-range lanes. Now we need to 7448 compute an overall validity mask and use this to copy in the 7449 lanes in the oor_values for out of range indices. This is 7450 unnecessary for TBL but will get folded out by iropt, so we lean 7451 on that and generate the same code for TBL and TBX here. */ 7452 IRTemp overall_valid_mask = newTempV128(); 7453 assign(overall_valid_mask, 7454 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); 7455 IRTemp result = newTempV128(); 7456 assign(result, 7457 binop(Iop_OrV128, 7458 mkexpr(running_result), 7459 binop(Iop_AndV128, 7460 mkexpr(oor_values), 7461 unop(Iop_NotV128, mkexpr(overall_valid_mask))))); 7462 return result; 7463 } 7464 7465 7466 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be 7467 an op which takes two I64s and produces a V128. That is, a widening 7468 operator. Generate IR which applies |opI64x2toV128| to either the 7469 lower (if |is2| is False) or upper (if |is2| is True) halves of 7470 |argL| and |argR|, and return the value in a new IRTemp. 7471 */ 7472 static 7473 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128, 7474 IRExpr* argL, IRExpr* argR ) 7475 { 7476 IRTemp res = newTempV128(); 7477 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64; 7478 assign(res, binop(opI64x2toV128, unop(slice, argL), 7479 unop(slice, argR))); 7480 return res; 7481 } 7482 7483 7484 /* Generate signed/unsigned absolute difference vector IR. */ 7485 static 7486 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE ) 7487 { 7488 vassert(size <= 3); 7489 IRTemp argL = newTempV128(); 7490 IRTemp argR = newTempV128(); 7491 IRTemp msk = newTempV128(); 7492 IRTemp res = newTempV128(); 7493 assign(argL, argLE); 7494 assign(argR, argRE); 7495 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size), 7496 mkexpr(argL), mkexpr(argR))); 7497 assign(res, 7498 binop(Iop_OrV128, 7499 binop(Iop_AndV128, 7500 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)), 7501 mkexpr(msk)), 7502 binop(Iop_AndV128, 7503 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)), 7504 unop(Iop_NotV128, mkexpr(msk))))); 7505 return res; 7506 } 7507 7508 7509 /* Generate IR that takes a V128 and sign- or zero-widens 7510 either the lower or upper set of lanes to twice-as-wide, 7511 resulting in a new V128 value. */ 7512 static 7513 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf, 7514 UInt sizeNarrow, IRExpr* srcE ) 7515 { 7516 IRTemp src = newTempV128(); 7517 IRTemp res = newTempV128(); 7518 assign(src, srcE); 7519 switch (sizeNarrow) { 7520 case X10: 7521 assign(res, 7522 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2, 7523 binop(fromUpperHalf ? Iop_InterleaveHI32x4 7524 : Iop_InterleaveLO32x4, 7525 mkexpr(src), 7526 mkexpr(src)), 7527 mkU8(32))); 7528 break; 7529 case X01: 7530 assign(res, 7531 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4, 7532 binop(fromUpperHalf ? Iop_InterleaveHI16x8 7533 : Iop_InterleaveLO16x8, 7534 mkexpr(src), 7535 mkexpr(src)), 7536 mkU8(16))); 7537 break; 7538 case X00: 7539 assign(res, 7540 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8, 7541 binop(fromUpperHalf ? Iop_InterleaveHI8x16 7542 : Iop_InterleaveLO8x16, 7543 mkexpr(src), 7544 mkexpr(src)), 7545 mkU8(8))); 7546 break; 7547 default: 7548 vassert(0); 7549 } 7550 return res; 7551 } 7552 7553 7554 /* Generate IR that takes a V128 and sign- or zero-widens 7555 either the even or odd lanes to twice-as-wide, 7556 resulting in a new V128 value. */ 7557 static 7558 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd, 7559 UInt sizeNarrow, IRExpr* srcE ) 7560 { 7561 IRTemp src = newTempV128(); 7562 IRTemp res = newTempV128(); 7563 IROp opSAR = mkVecSARN(sizeNarrow+1); 7564 IROp opSHR = mkVecSHRN(sizeNarrow+1); 7565 IROp opSHL = mkVecSHLN(sizeNarrow+1); 7566 IROp opSxR = zWiden ? opSHR : opSAR; 7567 UInt amt = 0; 7568 switch (sizeNarrow) { 7569 case X10: amt = 32; break; 7570 case X01: amt = 16; break; 7571 case X00: amt = 8; break; 7572 default: vassert(0); 7573 } 7574 assign(src, srcE); 7575 if (fromOdd) { 7576 assign(res, binop(opSxR, mkexpr(src), mkU8(amt))); 7577 } else { 7578 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)), 7579 mkU8(amt))); 7580 } 7581 return res; 7582 } 7583 7584 7585 /* Generate IR that takes two V128s and narrows (takes lower half) 7586 of each lane, producing a single V128 value. */ 7587 static 7588 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow ) 7589 { 7590 IRTemp res = newTempV128(); 7591 assign(res, binop(mkVecCATEVENLANES(sizeNarrow), 7592 mkexpr(argHi), mkexpr(argLo))); 7593 return res; 7594 } 7595 7596 7597 /* Return a temp which holds the vector dup of the lane of width 7598 (1 << size) obtained from src[laneNo]. */ 7599 static 7600 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo ) 7601 { 7602 vassert(size <= 3); 7603 /* Normalise |laneNo| so it is of the form 7604 x000 for D, xx00 for S, xxx0 for H, and xxxx for B. 7605 This puts the bits we want to inspect at constant offsets 7606 regardless of the value of |size|. 7607 */ 7608 UInt ix = laneNo << size; 7609 vassert(ix <= 15); 7610 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 7611 switch (size) { 7612 case 0: /* B */ 7613 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16; 7614 /* fallthrough */ 7615 case 1: /* H */ 7616 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8; 7617 /* fallthrough */ 7618 case 2: /* S */ 7619 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4; 7620 /* fallthrough */ 7621 case 3: /* D */ 7622 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2; 7623 break; 7624 default: 7625 vassert(0); 7626 } 7627 IRTemp res = newTempV128(); 7628 assign(res, src); 7629 Int i; 7630 for (i = 3; i >= 0; i--) { 7631 if (ops[i] == Iop_INVALID) 7632 break; 7633 IRTemp tmp = newTempV128(); 7634 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res))); 7635 res = tmp; 7636 } 7637 return res; 7638 } 7639 7640 7641 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size 7642 selector encoded as shown below. Return a new V128 holding the 7643 selected lane from |srcV| dup'd out to V128, and also return the 7644 lane number, log2 of the lane size in bytes, and width-character via 7645 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5 7646 is an invalid selector, in which case return 7647 IRTemp_INVALID, 0, 0 and '?' respectively. 7648 7649 imm5 = xxxx1 signifies .b[xxxx] 7650 = xxx10 .h[xxx] 7651 = xx100 .s[xx] 7652 = x1000 .d[x] 7653 otherwise invalid 7654 */ 7655 static 7656 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo, 7657 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh, 7658 IRExpr* srcV, UInt imm5 ) 7659 { 7660 *laneNo = 0; 7661 *laneSzLg2 = 0; 7662 *laneCh = '?'; 7663 7664 if (imm5 & 1) { 7665 *laneNo = (imm5 >> 1) & 15; 7666 *laneSzLg2 = 0; 7667 *laneCh = 'b'; 7668 } 7669 else if (imm5 & 2) { 7670 *laneNo = (imm5 >> 2) & 7; 7671 *laneSzLg2 = 1; 7672 *laneCh = 'h'; 7673 } 7674 else if (imm5 & 4) { 7675 *laneNo = (imm5 >> 3) & 3; 7676 *laneSzLg2 = 2; 7677 *laneCh = 's'; 7678 } 7679 else if (imm5 & 8) { 7680 *laneNo = (imm5 >> 4) & 1; 7681 *laneSzLg2 = 3; 7682 *laneCh = 'd'; 7683 } 7684 else { 7685 /* invalid */ 7686 return IRTemp_INVALID; 7687 } 7688 7689 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo); 7690 } 7691 7692 7693 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */ 7694 static 7695 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm ) 7696 { 7697 IRType ty = Ity_INVALID; 7698 IRTemp rcS = IRTemp_INVALID; 7699 switch (size) { 7700 case X01: 7701 vassert(imm <= 0xFFFFULL); 7702 ty = Ity_I16; 7703 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm )); 7704 break; 7705 case X10: 7706 vassert(imm <= 0xFFFFFFFFULL); 7707 ty = Ity_I32; 7708 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm )); 7709 break; 7710 case X11: 7711 ty = Ity_I64; 7712 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break; 7713 default: 7714 vassert(0); 7715 } 7716 IRTemp rcV = math_DUP_TO_V128(rcS, ty); 7717 return rcV; 7718 } 7719 7720 7721 /* Let |new64| be a V128 in which only the lower 64 bits are interesting, 7722 and the upper can contain any value -- it is ignored. If |is2| is False, 7723 generate IR to put |new64| in the lower half of vector reg |dd| and zero 7724 the upper half. If |is2| is True, generate IR to put |new64| in the upper 7725 half of vector reg |dd| and leave the lower half unchanged. This 7726 simulates the behaviour of the "foo/foo2" instructions in which the 7727 destination is half the width of sources, for example addhn/addhn2. 7728 */ 7729 static 7730 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 ) 7731 { 7732 if (is2) { 7733 /* Get the old contents of Vdd, zero the upper half, and replace 7734 it with 'x'. */ 7735 IRTemp t_zero_oldLO = newTempV128(); 7736 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd))); 7737 IRTemp t_newHI_zero = newTempV128(); 7738 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64), 7739 mkV128(0x0000))); 7740 IRTemp res = newTempV128(); 7741 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO), 7742 mkexpr(t_newHI_zero))); 7743 putQReg128(dd, mkexpr(res)); 7744 } else { 7745 /* This is simple. */ 7746 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64))); 7747 } 7748 } 7749 7750 7751 /* Compute vector SQABS at lane size |size| for |srcE|, returning 7752 the q result in |*qabs| and the normal result in |*nabs|. */ 7753 static 7754 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs, 7755 IRExpr* srcE, UInt size ) 7756 { 7757 IRTemp src, mask, maskn, nsub, qsub; 7758 src = mask = maskn = nsub = qsub = IRTemp_INVALID; 7759 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs); 7760 assign(src, srcE); 7761 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src))); 7762 assign(maskn, unop(Iop_NotV128, mkexpr(mask))); 7763 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7764 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7765 assign(*nabs, binop(Iop_OrV128, 7766 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)), 7767 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7768 assign(*qabs, binop(Iop_OrV128, 7769 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)), 7770 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7771 } 7772 7773 7774 /* Compute vector SQNEG at lane size |size| for |srcE|, returning 7775 the q result in |*qneg| and the normal result in |*nneg|. */ 7776 static 7777 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg, 7778 IRExpr* srcE, UInt size ) 7779 { 7780 IRTemp src = IRTemp_INVALID; 7781 newTempsV128_3(&src, nneg, qneg); 7782 assign(src, srcE); 7783 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7784 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7785 } 7786 7787 7788 /* Zero all except the least significant lane of |srcE|, where |size| 7789 indicates the lane size in the usual way. */ 7790 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE ) 7791 { 7792 vassert(size < 4); 7793 IRTemp t = newTempV128(); 7794 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE)); 7795 return t; 7796 } 7797 7798 7799 /* Generate IR to compute vector widening MULL from either the lower 7800 (is2==False) or upper (is2==True) halves of vecN and vecM. The 7801 widening multiplies are unsigned when isU==True and signed when 7802 isU==False. |size| is the narrow lane size indication. Optionally, 7803 the product may be added to or subtracted from vecD, at the wide lane 7804 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas| 7805 is 'm' (only multiply) then the accumulate part does not happen, and 7806 |vecD| is expected to == IRTemp_INVALID. 7807 7808 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants 7809 are allowed. The result is returned in a new IRTemp, which is 7810 returned in *res. */ 7811 static 7812 void math_MULL_ACC ( /*OUT*/IRTemp* res, 7813 Bool is2, Bool isU, UInt size, HChar mas, 7814 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7815 { 7816 vassert(res && *res == IRTemp_INVALID); 7817 vassert(size <= 2); 7818 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7819 if (mas == 'm') vassert(vecD == IRTemp_INVALID); 7820 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size); 7821 IROp accOp = (mas == 'a') ? mkVecADD(size+1) 7822 : (mas == 's' ? mkVecSUB(size+1) 7823 : Iop_INVALID); 7824 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp, 7825 mkexpr(vecN), mkexpr(vecM)); 7826 *res = newTempV128(); 7827 assign(*res, mas == 'm' ? mkexpr(mul) 7828 : binop(accOp, mkexpr(vecD), mkexpr(mul))); 7829 } 7830 7831 7832 /* Same as math_MULL_ACC, except the multiply is signed widening, 7833 the multiplied value is then doubled, before being added to or 7834 subtracted from the accumulated value. And everything is 7835 saturated. In all cases, saturation residuals are returned 7836 via (sat1q, sat1n), and in the accumulate cases, 7837 via (sat2q, sat2n) too. All results are returned in new temporaries. 7838 In the no-accumulate case, *sat2q and *sat2n are never instantiated, 7839 so the caller can tell this has happened. */ 7840 static 7841 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res, 7842 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7843 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n, 7844 Bool is2, UInt size, HChar mas, 7845 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7846 { 7847 vassert(size <= 2); 7848 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7849 /* Compute 7850 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2 7851 sat1n = vecN.D[is2] *s vecM.d[is2] * 2 7852 IOW take either the low or high halves of vecN and vecM, signed widen, 7853 multiply, double that, and signedly saturate. Also compute the same 7854 but without saturation. 7855 */ 7856 vassert(sat2q && *sat2q == IRTemp_INVALID); 7857 vassert(sat2n && *sat2n == IRTemp_INVALID); 7858 newTempsV128_3(sat1q, sat1n, res); 7859 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size), 7860 mkexpr(vecN), mkexpr(vecM)); 7861 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size), 7862 mkexpr(vecN), mkexpr(vecM)); 7863 assign(*sat1q, mkexpr(tq)); 7864 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn))); 7865 7866 /* If there is no accumulation, the final result is sat1q, 7867 and there's no assignment to sat2q or sat2n. */ 7868 if (mas == 'm') { 7869 assign(*res, mkexpr(*sat1q)); 7870 return; 7871 } 7872 7873 /* Compute 7874 sat2q = vecD +sq/-sq sat1q 7875 sat2n = vecD +/- sat1n 7876 result = sat2q 7877 */ 7878 newTempsV128_2(sat2q, sat2n); 7879 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1), 7880 mkexpr(vecD), mkexpr(*sat1q))); 7881 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1), 7882 mkexpr(vecD), mkexpr(*sat1n))); 7883 assign(*res, mkexpr(*sat2q)); 7884 } 7885 7886 7887 /* Generate IR for widening signed vector multiplies. The operands 7888 have their lane width signedly widened, and they are then multiplied 7889 at the wider width, returning results in two new IRTemps. */ 7890 static 7891 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO, 7892 UInt sizeNarrow, IRTemp argL, IRTemp argR ) 7893 { 7894 vassert(sizeNarrow <= 2); 7895 newTempsV128_2(resHI, resLO); 7896 IRTemp argLhi = newTemp(Ity_I64); 7897 IRTemp argLlo = newTemp(Ity_I64); 7898 IRTemp argRhi = newTemp(Ity_I64); 7899 IRTemp argRlo = newTemp(Ity_I64); 7900 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL))); 7901 assign(argLlo, unop(Iop_V128to64, mkexpr(argL))); 7902 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR))); 7903 assign(argRlo, unop(Iop_V128to64, mkexpr(argR))); 7904 IROp opMulls = mkVecMULLS(sizeNarrow); 7905 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi))); 7906 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo))); 7907 } 7908 7909 7910 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply, 7911 double that, possibly add a rounding constant (R variants), and take 7912 the high half. */ 7913 static 7914 void math_SQDMULH ( /*OUT*/IRTemp* res, 7915 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7916 Bool isR, UInt size, IRTemp vN, IRTemp vM ) 7917 { 7918 vassert(size == X01 || size == X10); /* s or h only */ 7919 7920 newTempsV128_3(res, sat1q, sat1n); 7921 7922 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID; 7923 math_MULLS(&mullsHI, &mullsLO, size, vN, vM); 7924 7925 IRTemp addWide = mkVecADD(size+1); 7926 7927 if (isR) { 7928 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7929 7930 Int rcShift = size == X01 ? 15 : 31; 7931 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift); 7932 assign(*sat1n, 7933 binop(mkVecCATODDLANES(size), 7934 binop(addWide, 7935 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7936 mkexpr(roundConst)), 7937 binop(addWide, 7938 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)), 7939 mkexpr(roundConst)))); 7940 } else { 7941 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7942 7943 assign(*sat1n, 7944 binop(mkVecCATODDLANES(size), 7945 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7946 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)))); 7947 } 7948 7949 assign(*res, mkexpr(*sat1q)); 7950 } 7951 7952 7953 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in 7954 a new temp in *res, and the Q difference pair in new temps in 7955 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the 7956 three operations it is. */ 7957 static 7958 void math_QSHL_IMM ( /*OUT*/IRTemp* res, 7959 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2, 7960 IRTemp src, UInt size, UInt shift, const HChar* nm ) 7961 { 7962 vassert(size <= 3); 7963 UInt laneBits = 8 << size; 7964 vassert(shift < laneBits); 7965 newTempsV128_3(res, qDiff1, qDiff2); 7966 IRTemp z128 = newTempV128(); 7967 assign(z128, mkV128(0x0000)); 7968 7969 /* UQSHL */ 7970 if (vex_streq(nm, "uqshl")) { 7971 IROp qop = mkVecQSHLNSATUU(size); 7972 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7973 if (shift == 0) { 7974 /* No shift means no saturation. */ 7975 assign(*qDiff1, mkexpr(z128)); 7976 assign(*qDiff2, mkexpr(z128)); 7977 } else { 7978 /* Saturation has occurred if any of the shifted-out bits are 7979 nonzero. We get the shifted-out bits by right-shifting the 7980 original value. */ 7981 UInt rshift = laneBits - shift; 7982 vassert(rshift >= 1 && rshift < laneBits); 7983 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 7984 assign(*qDiff2, mkexpr(z128)); 7985 } 7986 return; 7987 } 7988 7989 /* SQSHL */ 7990 if (vex_streq(nm, "sqshl")) { 7991 IROp qop = mkVecQSHLNSATSS(size); 7992 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7993 if (shift == 0) { 7994 /* No shift means no saturation. */ 7995 assign(*qDiff1, mkexpr(z128)); 7996 assign(*qDiff2, mkexpr(z128)); 7997 } else { 7998 /* Saturation has occurred if any of the shifted-out bits are 7999 different from the top bit of the original value. */ 8000 UInt rshift = laneBits - 1 - shift; 8001 vassert(rshift >= 0 && rshift < laneBits-1); 8002 /* qDiff1 is the shifted out bits, and the top bit of the original 8003 value, preceded by zeroes. */ 8004 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 8005 /* qDiff2 is the top bit of the original value, cloned the 8006 correct number of times. */ 8007 assign(*qDiff2, binop(mkVecSHRN(size), 8008 binop(mkVecSARN(size), mkexpr(src), 8009 mkU8(laneBits-1)), 8010 mkU8(rshift))); 8011 /* This also succeeds in comparing the top bit of the original 8012 value to itself, which is a bit stupid, but not wrong. */ 8013 } 8014 return; 8015 } 8016 8017 /* SQSHLU */ 8018 if (vex_streq(nm, "sqshlu")) { 8019 IROp qop = mkVecQSHLNSATSU(size); 8020 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 8021 if (shift == 0) { 8022 /* If there's no shift, saturation depends on the top bit 8023 of the source. */ 8024 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1))); 8025 assign(*qDiff2, mkexpr(z128)); 8026 } else { 8027 /* Saturation has occurred if any of the shifted-out bits are 8028 nonzero. We get the shifted-out bits by right-shifting the 8029 original value. */ 8030 UInt rshift = laneBits - shift; 8031 vassert(rshift >= 1 && rshift < laneBits); 8032 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 8033 assign(*qDiff2, mkexpr(z128)); 8034 } 8035 return; 8036 } 8037 8038 vassert(0); 8039 } 8040 8041 8042 /* Generate IR to do SRHADD and URHADD. */ 8043 static 8044 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb ) 8045 { 8046 /* Generate this: 8047 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) 8048 */ 8049 vassert(size <= 3); 8050 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size); 8051 IROp opADD = mkVecADD(size); 8052 /* The only tricky bit is to generate the correct vector 1 constant. */ 8053 const ULong ones64[4] 8054 = { 0x0101010101010101ULL, 0x0001000100010001ULL, 8055 0x0000000100000001ULL, 0x0000000000000001ULL }; 8056 IRTemp imm64 = newTemp(Ity_I64); 8057 assign(imm64, mkU64(ones64[size])); 8058 IRTemp vecOne = newTempV128(); 8059 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64))); 8060 IRTemp scaOne = newTemp(Ity_I8); 8061 assign(scaOne, mkU8(1)); 8062 IRTemp res = newTempV128(); 8063 assign(res, 8064 binop(opADD, 8065 binop(opSHR, mkexpr(aa), mkexpr(scaOne)), 8066 binop(opADD, 8067 binop(opSHR, mkexpr(bb), mkexpr(scaOne)), 8068 binop(opSHR, 8069 binop(opADD, 8070 binop(opADD, 8071 binop(Iop_AndV128, mkexpr(aa), 8072 mkexpr(vecOne)), 8073 binop(Iop_AndV128, mkexpr(bb), 8074 mkexpr(vecOne)) 8075 ), 8076 mkexpr(vecOne) 8077 ), 8078 mkexpr(scaOne) 8079 ) 8080 ) 8081 ) 8082 ); 8083 return res; 8084 } 8085 8086 8087 /* QCFLAG tracks the SIMD sticky saturation status. Update the status 8088 thusly: if, after application of |opZHI| to both |qres| and |nres|, 8089 they have the same value, leave QCFLAG unchanged. Otherwise, set it 8090 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128 8091 operators, or Iop_INVALID, in which case |qres| and |nres| are used 8092 unmodified. The presence |opZHI| means this function can be used to 8093 generate QCFLAG update code for both scalar and vector SIMD operations. 8094 */ 8095 static 8096 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI ) 8097 { 8098 IRTemp diff = newTempV128(); 8099 IRTemp oldQCFLAG = newTempV128(); 8100 IRTemp newQCFLAG = newTempV128(); 8101 if (opZHI == Iop_INVALID) { 8102 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))); 8103 } else { 8104 vassert(opZHI == Iop_ZeroHI64ofV128 8105 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128); 8106 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)))); 8107 } 8108 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128)); 8109 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff))); 8110 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG))); 8111 } 8112 8113 8114 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres| 8115 are used unmodified, hence suitable for QCFLAG updates for whole-vector 8116 operations. */ 8117 static 8118 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres ) 8119 { 8120 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID); 8121 } 8122 8123 8124 /* Generate IR to rearrange two vector values in a way which is useful 8125 for doing S/D add-pair etc operations. There are 3 cases: 8126 8127 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0] 8128 8129 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0] 8130 8131 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0] 8132 8133 The cases are distinguished as follows: 8134 isD == True, bitQ == 1 => 2d 8135 isD == False, bitQ == 1 => 4s 8136 isD == False, bitQ == 0 => 2s 8137 */ 8138 static 8139 void math_REARRANGE_FOR_FLOATING_PAIRWISE ( 8140 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR, 8141 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ 8142 ) 8143 { 8144 vassert(rearrL && *rearrL == IRTemp_INVALID); 8145 vassert(rearrR && *rearrR == IRTemp_INVALID); 8146 *rearrL = newTempV128(); 8147 *rearrR = newTempV128(); 8148 if (isD) { 8149 // 2d case 8150 vassert(bitQ == 1); 8151 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN))); 8152 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN))); 8153 } 8154 else if (!isD && bitQ == 1) { 8155 // 4s case 8156 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8157 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8158 } else { 8159 // 2s case 8160 vassert(!isD && bitQ == 0); 8161 IRTemp m1n1m0n0 = newTempV128(); 8162 IRTemp m0n0m1n1 = newTempV128(); 8163 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4, 8164 mkexpr(vecM), mkexpr(vecN))); 8165 assign(m0n0m1n1, triop(Iop_SliceV128, 8166 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8))); 8167 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0))); 8168 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1))); 8169 } 8170 } 8171 8172 8173 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */ 8174 static Double two_to_the_minus ( Int n ) 8175 { 8176 if (n == 1) return 0.5; 8177 vassert(n >= 2 && n <= 64); 8178 Int half = n / 2; 8179 return two_to_the_minus(half) * two_to_the_minus(n - half); 8180 } 8181 8182 8183 /* Returns 2.0 ^ n for n in 1 .. 64 */ 8184 static Double two_to_the_plus ( Int n ) 8185 { 8186 if (n == 1) return 2.0; 8187 vassert(n >= 2 && n <= 64); 8188 Int half = n / 2; 8189 return two_to_the_plus(half) * two_to_the_plus(n - half); 8190 } 8191 8192 8193 /*------------------------------------------------------------*/ 8194 /*--- SIMD and FP instructions ---*/ 8195 /*------------------------------------------------------------*/ 8196 8197 static 8198 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) 8199 { 8200 /* 31 29 23 21 20 15 14 10 9 4 8201 0 q 101110 op2 0 m 0 imm4 0 n d 8202 Decode fields: op2 8203 */ 8204 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8205 if (INSN(31,31) != 0 8206 || INSN(29,24) != BITS6(1,0,1,1,1,0) 8207 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) { 8208 return False; 8209 } 8210 UInt bitQ = INSN(30,30); 8211 UInt op2 = INSN(23,22); 8212 UInt mm = INSN(20,16); 8213 UInt imm4 = INSN(14,11); 8214 UInt nn = INSN(9,5); 8215 UInt dd = INSN(4,0); 8216 8217 if (op2 == BITS2(0,0)) { 8218 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */ 8219 IRTemp sHi = newTempV128(); 8220 IRTemp sLo = newTempV128(); 8221 IRTemp res = newTempV128(); 8222 assign(sHi, getQReg128(mm)); 8223 assign(sLo, getQReg128(nn)); 8224 if (bitQ == 1) { 8225 if (imm4 == 0) { 8226 assign(res, mkexpr(sLo)); 8227 } else { 8228 vassert(imm4 >= 1 && imm4 <= 15); 8229 assign(res, triop(Iop_SliceV128, 8230 mkexpr(sHi), mkexpr(sLo), mkU8(imm4))); 8231 } 8232 putQReg128(dd, mkexpr(res)); 8233 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4); 8234 } else { 8235 if (imm4 >= 8) return False; 8236 if (imm4 == 0) { 8237 assign(res, mkexpr(sLo)); 8238 } else { 8239 vassert(imm4 >= 1 && imm4 <= 7); 8240 IRTemp hi64lo64 = newTempV128(); 8241 assign(hi64lo64, binop(Iop_InterleaveLO64x2, 8242 mkexpr(sHi), mkexpr(sLo))); 8243 assign(res, triop(Iop_SliceV128, 8244 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4))); 8245 } 8246 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 8247 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4); 8248 } 8249 return True; 8250 } 8251 8252 return False; 8253 # undef INSN 8254 } 8255 8256 8257 static 8258 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn) 8259 { 8260 /* 31 29 23 21 20 15 14 12 11 9 4 8261 0 q 001110 op2 0 m 0 len op 00 n d 8262 Decode fields: op2,len,op 8263 */ 8264 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8265 if (INSN(31,31) != 0 8266 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8267 || INSN(21,21) != 0 8268 || INSN(15,15) != 0 8269 || INSN(11,10) != BITS2(0,0)) { 8270 return False; 8271 } 8272 UInt bitQ = INSN(30,30); 8273 UInt op2 = INSN(23,22); 8274 UInt mm = INSN(20,16); 8275 UInt len = INSN(14,13); 8276 UInt bitOP = INSN(12,12); 8277 UInt nn = INSN(9,5); 8278 UInt dd = INSN(4,0); 8279 8280 if (op2 == X00) { 8281 /* -------- 00,xx,0 TBL, xx register table -------- */ 8282 /* -------- 00,xx,1 TBX, xx register table -------- */ 8283 /* 31 28 20 15 14 12 9 4 8284 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8285 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8286 where Ta = 16b(q=1) or 8b(q=0) 8287 */ 8288 Bool isTBX = bitOP == 1; 8289 /* The out-of-range values to use. */ 8290 IRTemp oor_values = newTempV128(); 8291 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); 8292 /* src value */ 8293 IRTemp src = newTempV128(); 8294 assign(src, getQReg128(mm)); 8295 /* The table values */ 8296 IRTemp tab[4]; 8297 UInt i; 8298 for (i = 0; i <= len; i++) { 8299 vassert(i < 4); 8300 tab[i] = newTempV128(); 8301 assign(tab[i], getQReg128((nn + i) % 32)); 8302 } 8303 IRTemp res = math_TBL_TBX(tab, len, src, oor_values); 8304 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8305 const HChar* Ta = bitQ ==1 ? "16b" : "8b"; 8306 const HChar* nm = isTBX ? "tbx" : "tbl"; 8307 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n", 8308 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); 8309 return True; 8310 } 8311 8312 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8313 return False; 8314 # undef INSN 8315 } 8316 8317 8318 static 8319 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn) 8320 { 8321 /* 31 29 23 21 20 15 14 11 9 4 8322 0 q 001110 size 0 m 0 opcode 10 n d 8323 Decode fields: opcode 8324 */ 8325 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8326 if (INSN(31,31) != 0 8327 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8328 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) { 8329 return False; 8330 } 8331 UInt bitQ = INSN(30,30); 8332 UInt size = INSN(23,22); 8333 UInt mm = INSN(20,16); 8334 UInt opcode = INSN(14,12); 8335 UInt nn = INSN(9,5); 8336 UInt dd = INSN(4,0); 8337 8338 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) { 8339 /* -------- 001 UZP1 std7_std7_std7 -------- */ 8340 /* -------- 101 UZP2 std7_std7_std7 -------- */ 8341 if (bitQ == 0 && size == X11) return False; // implied 1d case 8342 Bool isUZP1 = opcode == BITS3(0,0,1); 8343 IROp op = isUZP1 ? mkVecCATEVENLANES(size) 8344 : mkVecCATODDLANES(size); 8345 IRTemp preL = newTempV128(); 8346 IRTemp preR = newTempV128(); 8347 IRTemp res = newTempV128(); 8348 if (bitQ == 0) { 8349 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm), 8350 getQReg128(nn))); 8351 assign(preR, mkexpr(preL)); 8352 } else { 8353 assign(preL, getQReg128(mm)); 8354 assign(preR, getQReg128(nn)); 8355 } 8356 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8357 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8358 const HChar* nm = isUZP1 ? "uzp1" : "uzp2"; 8359 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8360 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8361 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8362 return True; 8363 } 8364 8365 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) { 8366 /* -------- 010 TRN1 std7_std7_std7 -------- */ 8367 /* -------- 110 TRN2 std7_std7_std7 -------- */ 8368 if (bitQ == 0 && size == X11) return False; // implied 1d case 8369 Bool isTRN1 = opcode == BITS3(0,1,0); 8370 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size) 8371 : mkVecCATODDLANES(size); 8372 IROp op2 = mkVecINTERLEAVEHI(size); 8373 IRTemp srcM = newTempV128(); 8374 IRTemp srcN = newTempV128(); 8375 IRTemp res = newTempV128(); 8376 assign(srcM, getQReg128(mm)); 8377 assign(srcN, getQReg128(nn)); 8378 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)), 8379 binop(op1, mkexpr(srcN), mkexpr(srcN)))); 8380 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8381 const HChar* nm = isTRN1 ? "trn1" : "trn2"; 8382 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8383 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8384 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8385 return True; 8386 } 8387 8388 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) { 8389 /* -------- 011 ZIP1 std7_std7_std7 -------- */ 8390 /* -------- 111 ZIP2 std7_std7_std7 -------- */ 8391 if (bitQ == 0 && size == X11) return False; // implied 1d case 8392 Bool isZIP1 = opcode == BITS3(0,1,1); 8393 IROp op = isZIP1 ? mkVecINTERLEAVELO(size) 8394 : mkVecINTERLEAVEHI(size); 8395 IRTemp preL = newTempV128(); 8396 IRTemp preR = newTempV128(); 8397 IRTemp res = newTempV128(); 8398 if (bitQ == 0 && !isZIP1) { 8399 IRTemp z128 = newTempV128(); 8400 assign(z128, mkV128(0x0000)); 8401 // preL = Vm shifted left 32 bits 8402 // preR = Vn shifted left 32 bits 8403 assign(preL, triop(Iop_SliceV128, 8404 getQReg128(mm), mkexpr(z128), mkU8(12))); 8405 assign(preR, triop(Iop_SliceV128, 8406 getQReg128(nn), mkexpr(z128), mkU8(12))); 8407 8408 } else { 8409 assign(preL, getQReg128(mm)); 8410 assign(preR, getQReg128(nn)); 8411 } 8412 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8413 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8414 const HChar* nm = isZIP1 ? "zip1" : "zip2"; 8415 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8416 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8417 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8418 return True; 8419 } 8420 8421 return False; 8422 # undef INSN 8423 } 8424 8425 8426 static 8427 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn) 8428 { 8429 /* 31 28 23 21 16 11 9 4 8430 0 q u 01110 size 11000 opcode 10 n d 8431 Decode fields: u,size,opcode 8432 */ 8433 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8434 if (INSN(31,31) != 0 8435 || INSN(28,24) != BITS5(0,1,1,1,0) 8436 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) { 8437 return False; 8438 } 8439 UInt bitQ = INSN(30,30); 8440 UInt bitU = INSN(29,29); 8441 UInt size = INSN(23,22); 8442 UInt opcode = INSN(16,12); 8443 UInt nn = INSN(9,5); 8444 UInt dd = INSN(4,0); 8445 8446 if (opcode == BITS5(0,0,0,1,1)) { 8447 /* -------- 0,xx,00011 SADDLV -------- */ 8448 /* -------- 1,xx,00011 UADDLV -------- */ 8449 /* size is the narrow size */ 8450 if (size == X11 || (size == X10 && bitQ == 0)) return False; 8451 Bool isU = bitU == 1; 8452 IRTemp src = newTempV128(); 8453 assign(src, getQReg128(nn)); 8454 /* The basic plan is to widen the lower half, and if Q = 1, 8455 the upper half too. Add them together (if Q = 1), and in 8456 either case fold with add at twice the lane width. 8457 */ 8458 IRExpr* widened 8459 = mkexpr(math_WIDEN_LO_OR_HI_LANES( 8460 isU, False/*!fromUpperHalf*/, size, mkexpr(src))); 8461 if (bitQ == 1) { 8462 widened 8463 = binop(mkVecADD(size+1), 8464 widened, 8465 mkexpr(math_WIDEN_LO_OR_HI_LANES( 8466 isU, True/*fromUpperHalf*/, size, mkexpr(src))) 8467 ); 8468 } 8469 /* Now fold. */ 8470 IRTemp tWi = newTempV128(); 8471 assign(tWi, widened); 8472 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1)); 8473 putQReg128(dd, mkexpr(res)); 8474 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8475 const HChar ch = "bhsd"[size]; 8476 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv", 8477 nameQReg128(dd), ch, nameQReg128(nn), arr); 8478 return True; 8479 } 8480 8481 UInt ix = 0; 8482 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; } 8483 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; } 8484 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; } 8485 /**/ 8486 if (ix != 0) { 8487 /* -------- 0,xx,01010: SMAXV -------- (1) */ 8488 /* -------- 1,xx,01010: UMAXV -------- (2) */ 8489 /* -------- 0,xx,11010: SMINV -------- (3) */ 8490 /* -------- 1,xx,11010: UMINV -------- (4) */ 8491 /* -------- 0,xx,11011: ADDV -------- (5) */ 8492 vassert(ix >= 1 && ix <= 5); 8493 if (size == X11) return False; // 1d,2d cases not allowed 8494 if (size == X10 && bitQ == 0) return False; // 2s case not allowed 8495 const IROp opMAXS[3] 8496 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; 8497 const IROp opMAXU[3] 8498 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; 8499 const IROp opMINS[3] 8500 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; 8501 const IROp opMINU[3] 8502 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 }; 8503 const IROp opADD[3] 8504 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 }; 8505 vassert(size < 3); 8506 IROp op = Iop_INVALID; 8507 const HChar* nm = NULL; 8508 switch (ix) { 8509 case 1: op = opMAXS[size]; nm = "smaxv"; break; 8510 case 2: op = opMAXU[size]; nm = "umaxv"; break; 8511 case 3: op = opMINS[size]; nm = "sminv"; break; 8512 case 4: op = opMINU[size]; nm = "uminv"; break; 8513 case 5: op = opADD[size]; nm = "addv"; break; 8514 default: vassert(0); 8515 } 8516 vassert(op != Iop_INVALID && nm != NULL); 8517 IRTemp tN1 = newTempV128(); 8518 assign(tN1, getQReg128(nn)); 8519 /* If Q == 0, we're just folding lanes in the lower half of 8520 the value. In which case, copy the lower half of the 8521 source into the upper half, so we can then treat it the 8522 same as the full width case. Except for the addition case, 8523 in which we have to zero out the upper half. */ 8524 IRTemp tN2 = newTempV128(); 8525 assign(tN2, bitQ == 0 8526 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1)) 8527 : mk_CatEvenLanes64x2(tN1,tN1)) 8528 : mkexpr(tN1)); 8529 IRTemp res = math_FOLDV(tN2, op); 8530 if (res == IRTemp_INVALID) 8531 return False; /* means math_FOLDV 8532 doesn't handle this case yet */ 8533 putQReg128(dd, mkexpr(res)); 8534 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 }; 8535 IRType laneTy = tys[size]; 8536 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8537 DIP("%s %s, %s.%s\n", nm, 8538 nameQRegLO(dd, laneTy), nameQReg128(nn), arr); 8539 return True; 8540 } 8541 8542 if ((size == X00 || size == X10) 8543 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) { 8544 /* -------- 0,00,01100: FMAXMNV s_4s -------- */ 8545 /* -------- 0,10,01100: FMINMNV s_4s -------- */ 8546 /* -------- 1,00,01111: FMAXV s_4s -------- */ 8547 /* -------- 1,10,01111: FMINV s_4s -------- */ 8548 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 8549 if (bitQ == 0) return False; // Only 4s is allowed 8550 Bool isMIN = (size & 2) == 2; 8551 Bool isNM = opcode == BITS5(0,1,1,0,0); 8552 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2); 8553 IRTemp src = newTempV128(); 8554 assign(src, getQReg128(nn)); 8555 IRTemp res = math_FOLDV(src, opMXX); 8556 putQReg128(dd, mkexpr(res)); 8557 DIP("%s%sv s%u, %u.4s\n", 8558 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn); 8559 return True; 8560 } 8561 8562 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8563 return False; 8564 # undef INSN 8565 } 8566 8567 8568 static 8569 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn) 8570 { 8571 /* 31 28 20 15 14 10 9 4 8572 0 q op 01110000 imm5 0 imm4 1 n d 8573 Decode fields: q,op,imm4 8574 */ 8575 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8576 if (INSN(31,31) != 0 8577 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0) 8578 || INSN(15,15) != 0 || INSN(10,10) != 1) { 8579 return False; 8580 } 8581 UInt bitQ = INSN(30,30); 8582 UInt bitOP = INSN(29,29); 8583 UInt imm5 = INSN(20,16); 8584 UInt imm4 = INSN(14,11); 8585 UInt nn = INSN(9,5); 8586 UInt dd = INSN(4,0); 8587 8588 /* -------- x,0,0000: DUP (element, vector) -------- */ 8589 /* 31 28 20 15 9 4 8590 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index] 8591 */ 8592 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) { 8593 UInt laneNo = 0; 8594 UInt laneSzLg2 = 0; 8595 HChar laneCh = '?'; 8596 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh, 8597 getQReg128(nn), imm5); 8598 if (res == IRTemp_INVALID) 8599 return False; 8600 if (bitQ == 0 && laneSzLg2 == X11) 8601 return False; /* .1d case */ 8602 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8603 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2); 8604 DIP("dup %s.%s, %s.%c[%u]\n", 8605 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo); 8606 return True; 8607 } 8608 8609 /* -------- x,0,0001: DUP (general, vector) -------- */ 8610 /* 31 28 20 15 9 4 8611 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn 8612 Q=0 writes 64, Q=1 writes 128 8613 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W 8614 xxx10 4H(q=0) or 8H(q=1), R=W 8615 xx100 2S(q=0) or 4S(q=1), R=W 8616 x1000 Invalid(q=0) or 2D(q=1), R=X 8617 x0000 Invalid(q=0) or Invalid(q=1) 8618 Require op=0, imm4=0001 8619 */ 8620 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) { 8621 Bool isQ = bitQ == 1; 8622 IRTemp w0 = newTemp(Ity_I64); 8623 const HChar* arT = "??"; 8624 IRType laneTy = Ity_INVALID; 8625 if (imm5 & 1) { 8626 arT = isQ ? "16b" : "8b"; 8627 laneTy = Ity_I8; 8628 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn)))); 8629 } 8630 else if (imm5 & 2) { 8631 arT = isQ ? "8h" : "4h"; 8632 laneTy = Ity_I16; 8633 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn)))); 8634 } 8635 else if (imm5 & 4) { 8636 arT = isQ ? "4s" : "2s"; 8637 laneTy = Ity_I32; 8638 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn)))); 8639 } 8640 else if ((imm5 & 8) && isQ) { 8641 arT = "2d"; 8642 laneTy = Ity_I64; 8643 assign(w0, getIReg64orZR(nn)); 8644 } 8645 else { 8646 /* invalid; leave laneTy unchanged. */ 8647 } 8648 /* */ 8649 if (laneTy != Ity_INVALID) { 8650 IRTemp w1 = math_DUP_TO_64(w0, laneTy); 8651 putQReg128(dd, binop(Iop_64HLtoV128, 8652 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1))); 8653 DIP("dup %s.%s, %s\n", 8654 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn)); 8655 return True; 8656 } 8657 /* invalid */ 8658 return False; 8659 } 8660 8661 /* -------- 1,0,0011: INS (general) -------- */ 8662 /* 31 28 20 15 9 4 8663 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn 8664 where Ts,ix = case imm5 of xxxx1 -> B, xxxx 8665 xxx10 -> H, xxx 8666 xx100 -> S, xx 8667 x1000 -> D, x 8668 */ 8669 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) { 8670 HChar ts = '?'; 8671 UInt laneNo = 16; 8672 IRExpr* src = NULL; 8673 if (imm5 & 1) { 8674 src = unop(Iop_64to8, getIReg64orZR(nn)); 8675 laneNo = (imm5 >> 1) & 15; 8676 ts = 'b'; 8677 } 8678 else if (imm5 & 2) { 8679 src = unop(Iop_64to16, getIReg64orZR(nn)); 8680 laneNo = (imm5 >> 2) & 7; 8681 ts = 'h'; 8682 } 8683 else if (imm5 & 4) { 8684 src = unop(Iop_64to32, getIReg64orZR(nn)); 8685 laneNo = (imm5 >> 3) & 3; 8686 ts = 's'; 8687 } 8688 else if (imm5 & 8) { 8689 src = getIReg64orZR(nn); 8690 laneNo = (imm5 >> 4) & 1; 8691 ts = 'd'; 8692 } 8693 /* */ 8694 if (src) { 8695 vassert(laneNo < 16); 8696 putQRegLane(dd, laneNo, src); 8697 DIP("ins %s.%c[%u], %s\n", 8698 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn)); 8699 return True; 8700 } 8701 /* invalid */ 8702 return False; 8703 } 8704 8705 /* -------- x,0,0101: SMOV -------- */ 8706 /* -------- x,0,0111: UMOV -------- */ 8707 /* 31 28 20 15 9 4 8708 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index] 8709 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index] 8710 dest is Xd when q==1, Wd when q==0 8711 UMOV: 8712 Ts,index,ops = case q:imm5 of 8713 0:xxxx1 -> B, xxxx, 8Uto64 8714 1:xxxx1 -> invalid 8715 0:xxx10 -> H, xxx, 16Uto64 8716 1:xxx10 -> invalid 8717 0:xx100 -> S, xx, 32Uto64 8718 1:xx100 -> invalid 8719 1:x1000 -> D, x, copy64 8720 other -> invalid 8721 SMOV: 8722 Ts,index,ops = case q:imm5 of 8723 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32) 8724 1:xxxx1 -> B, xxxx, 8Sto64 8725 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32) 8726 1:xxx10 -> H, xxx, 16Sto64 8727 0:xx100 -> invalid 8728 1:xx100 -> S, xx, 32Sto64 8729 1:x1000 -> invalid 8730 other -> invalid 8731 */ 8732 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) { 8733 Bool isU = (imm4 & 2) == 2; 8734 const HChar* arTs = "??"; 8735 UInt laneNo = 16; /* invalid */ 8736 // Setting 'res' to non-NULL determines valid/invalid 8737 IRExpr* res = NULL; 8738 if (!bitQ && (imm5 & 1)) { // 0:xxxx1 8739 laneNo = (imm5 >> 1) & 15; 8740 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 8741 res = isU ? unop(Iop_8Uto64, lane) 8742 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane)); 8743 arTs = "b"; 8744 } 8745 else if (bitQ && (imm5 & 1)) { // 1:xxxx1 8746 laneNo = (imm5 >> 1) & 15; 8747 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8); 8748 res = isU ? NULL 8749 : unop(Iop_8Sto64, lane); 8750 arTs = "b"; 8751 } 8752 else if (!bitQ && (imm5 & 2)) { // 0:xxx10 8753 laneNo = (imm5 >> 2) & 7; 8754 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 8755 res = isU ? unop(Iop_16Uto64, lane) 8756 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane)); 8757 arTs = "h"; 8758 } 8759 else if (bitQ && (imm5 & 2)) { // 1:xxx10 8760 laneNo = (imm5 >> 2) & 7; 8761 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16); 8762 res = isU ? NULL 8763 : unop(Iop_16Sto64, lane); 8764 arTs = "h"; 8765 } 8766 else if (!bitQ && (imm5 & 4)) { // 0:xx100 8767 laneNo = (imm5 >> 3) & 3; 8768 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 8769 res = isU ? unop(Iop_32Uto64, lane) 8770 : NULL; 8771 arTs = "s"; 8772 } 8773 else if (bitQ && (imm5 & 4)) { // 1:xxx10 8774 laneNo = (imm5 >> 3) & 3; 8775 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32); 8776 res = isU ? NULL 8777 : unop(Iop_32Sto64, lane); 8778 arTs = "s"; 8779 } 8780 else if (bitQ && (imm5 & 8)) { // 1:x1000 8781 laneNo = (imm5 >> 4) & 1; 8782 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64); 8783 res = isU ? lane 8784 : NULL; 8785 arTs = "d"; 8786 } 8787 /* */ 8788 if (res) { 8789 vassert(laneNo < 16); 8790 putIReg64orZR(dd, res); 8791 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's', 8792 nameIRegOrZR(bitQ == 1, dd), 8793 nameQReg128(nn), arTs, laneNo); 8794 return True; 8795 } 8796 /* invalid */ 8797 return False; 8798 } 8799 8800 /* -------- 1,1,xxxx: INS (element) -------- */ 8801 /* 31 28 20 14 9 4 8802 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2] 8803 where Ts,ix1,ix2 8804 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0] 8805 xxx10 -> H, xxx, imm4[3:1] 8806 xx100 -> S, xx, imm4[3:2] 8807 x1000 -> D, x, imm4[3:3] 8808 */ 8809 if (bitQ == 1 && bitOP == 1) { 8810 HChar ts = '?'; 8811 IRType ity = Ity_INVALID; 8812 UInt ix1 = 16; 8813 UInt ix2 = 16; 8814 if (imm5 & 1) { 8815 ts = 'b'; 8816 ity = Ity_I8; 8817 ix1 = (imm5 >> 1) & 15; 8818 ix2 = (imm4 >> 0) & 15; 8819 } 8820 else if (imm5 & 2) { 8821 ts = 'h'; 8822 ity = Ity_I16; 8823 ix1 = (imm5 >> 2) & 7; 8824 ix2 = (imm4 >> 1) & 7; 8825 } 8826 else if (imm5 & 4) { 8827 ts = 's'; 8828 ity = Ity_I32; 8829 ix1 = (imm5 >> 3) & 3; 8830 ix2 = (imm4 >> 2) & 3; 8831 } 8832 else if (imm5 & 8) { 8833 ts = 'd'; 8834 ity = Ity_I64; 8835 ix1 = (imm5 >> 4) & 1; 8836 ix2 = (imm4 >> 3) & 1; 8837 } 8838 /* */ 8839 if (ity != Ity_INVALID) { 8840 vassert(ix1 < 16); 8841 vassert(ix2 < 16); 8842 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity)); 8843 DIP("ins %s.%c[%u], %s.%c[%u]\n", 8844 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2); 8845 return True; 8846 } 8847 /* invalid */ 8848 return False; 8849 } 8850 8851 return False; 8852 # undef INSN 8853 } 8854 8855 8856 static 8857 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 8858 { 8859 /* 31 28 18 15 11 9 4 8860 0q op 01111 00000 abc cmode 01 defgh d 8861 Decode fields: q,op,cmode 8862 Bit 11 is really "o2", but it is always zero. 8863 */ 8864 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8865 if (INSN(31,31) != 0 8866 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0) 8867 || INSN(11,10) != BITS2(0,1)) { 8868 return False; 8869 } 8870 UInt bitQ = INSN(30,30); 8871 UInt bitOP = INSN(29,29); 8872 UInt cmode = INSN(15,12); 8873 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5); 8874 UInt dd = INSN(4,0); 8875 8876 ULong imm64lo = 0; 8877 UInt op_cmode = (bitOP << 4) | cmode; 8878 Bool ok = False; 8879 Bool isORR = False; 8880 Bool isBIC = False; 8881 Bool isMOV = False; 8882 Bool isMVN = False; 8883 Bool isFMOV = False; 8884 switch (op_cmode) { 8885 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */ 8886 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */ 8887 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */ 8888 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */ 8889 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0): 8890 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0 8891 ok = True; isMOV = True; break; 8892 8893 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */ 8894 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */ 8895 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */ 8896 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */ 8897 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1): 8898 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1 8899 ok = True; isORR = True; break; 8900 8901 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */ 8902 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */ 8903 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0 8904 ok = True; isMOV = True; break; 8905 8906 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */ 8907 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */ 8908 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1 8909 ok = True; isORR = True; break; 8910 8911 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */ 8912 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */ 8913 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x 8914 ok = True; isMOV = True; break; 8915 8916 /* -------- x,0,1110 MOVI 8-bit -------- */ 8917 case BITS5(0,1,1,1,0): 8918 ok = True; isMOV = True; break; 8919 8920 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */ 8921 case BITS5(0,1,1,1,1): // 0:1111 8922 ok = True; isFMOV = True; break; 8923 8924 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */ 8925 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */ 8926 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */ 8927 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */ 8928 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0): 8929 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0 8930 ok = True; isMVN = True; break; 8931 8932 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */ 8933 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */ 8934 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */ 8935 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */ 8936 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1): 8937 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1 8938 ok = True; isBIC = True; break; 8939 8940 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */ 8941 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */ 8942 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 8943 ok = True; isMVN = True; break; 8944 8945 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */ 8946 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */ 8947 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1 8948 ok = True; isBIC = True; break; 8949 8950 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */ 8951 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */ 8952 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x 8953 ok = True; isMVN = True; break; 8954 8955 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */ 8956 /* -------- 1,1,1110 MOVI 64-bit vector -------- */ 8957 case BITS5(1,1,1,1,0): 8958 ok = True; isMOV = True; break; 8959 8960 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */ 8961 case BITS5(1,1,1,1,1): // 1:1111 8962 ok = bitQ == 1; isFMOV = True; break; 8963 8964 default: 8965 break; 8966 } 8967 if (ok) { 8968 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0) 8969 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0)); 8970 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh); 8971 } 8972 if (ok) { 8973 if (isORR || isBIC) { 8974 ULong inv 8975 = isORR ? 0ULL : ~0ULL; 8976 IRExpr* immV128 8977 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo)); 8978 IRExpr* res 8979 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128); 8980 const HChar* nm = isORR ? "orr" : "bic"; 8981 if (bitQ == 0) { 8982 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res)); 8983 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo); 8984 } else { 8985 putQReg128(dd, res); 8986 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm, 8987 nameQReg128(dd), imm64lo, imm64lo); 8988 } 8989 } 8990 else if (isMOV || isMVN || isFMOV) { 8991 if (isMVN) imm64lo = ~imm64lo; 8992 ULong imm64hi = bitQ == 0 ? 0 : imm64lo; 8993 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi), 8994 mkU64(imm64lo)); 8995 putQReg128(dd, immV128); 8996 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); 8997 } 8998 return True; 8999 } 9000 /* else fall through */ 9001 9002 return False; 9003 # undef INSN 9004 } 9005 9006 9007 static 9008 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn) 9009 { 9010 /* 31 28 20 15 14 10 9 4 9011 01 op 11110000 imm5 0 imm4 1 n d 9012 Decode fields: op,imm4 9013 */ 9014 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9015 if (INSN(31,30) != BITS2(0,1) 9016 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0) 9017 || INSN(15,15) != 0 || INSN(10,10) != 1) { 9018 return False; 9019 } 9020 UInt bitOP = INSN(29,29); 9021 UInt imm5 = INSN(20,16); 9022 UInt imm4 = INSN(14,11); 9023 UInt nn = INSN(9,5); 9024 UInt dd = INSN(4,0); 9025 9026 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) { 9027 /* -------- 0,0000 DUP (element, scalar) -------- */ 9028 IRTemp w0 = newTemp(Ity_I64); 9029 const HChar* arTs = "??"; 9030 IRType laneTy = Ity_INVALID; 9031 UInt laneNo = 16; /* invalid */ 9032 if (imm5 & 1) { 9033 arTs = "b"; 9034 laneNo = (imm5 >> 1) & 15; 9035 laneTy = Ity_I8; 9036 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); 9037 } 9038 else if (imm5 & 2) { 9039 arTs = "h"; 9040 laneNo = (imm5 >> 2) & 7; 9041 laneTy = Ity_I16; 9042 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); 9043 } 9044 else if (imm5 & 4) { 9045 arTs = "s"; 9046 laneNo = (imm5 >> 3) & 3; 9047 laneTy = Ity_I32; 9048 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); 9049 } 9050 else if (imm5 & 8) { 9051 arTs = "d"; 9052 laneNo = (imm5 >> 4) & 1; 9053 laneTy = Ity_I64; 9054 assign(w0, getQRegLane(nn, laneNo, laneTy)); 9055 } 9056 else { 9057 /* invalid; leave laneTy unchanged. */ 9058 } 9059 /* */ 9060 if (laneTy != Ity_INVALID) { 9061 vassert(laneNo < 16); 9062 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0))); 9063 DIP("dup %s, %s.%s[%u]\n", 9064 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo); 9065 return True; 9066 } 9067 /* else fall through */ 9068 } 9069 9070 return False; 9071 # undef INSN 9072 } 9073 9074 9075 static 9076 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn) 9077 { 9078 /* 31 28 23 21 16 11 9 4 9079 01 u 11110 sz 11000 opcode 10 n d 9080 Decode fields: u,sz,opcode 9081 */ 9082 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9083 if (INSN(31,30) != BITS2(0,1) 9084 || INSN(28,24) != BITS5(1,1,1,1,0) 9085 || INSN(21,17) != BITS5(1,1,0,0,0) 9086 || INSN(11,10) != BITS2(1,0)) { 9087 return False; 9088 } 9089 UInt bitU = INSN(29,29); 9090 UInt sz = INSN(23,22); 9091 UInt opcode = INSN(16,12); 9092 UInt nn = INSN(9,5); 9093 UInt dd = INSN(4,0); 9094 9095 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) { 9096 /* -------- 0,11,11011 ADDP d_2d -------- */ 9097 IRTemp xy = newTempV128(); 9098 IRTemp xx = newTempV128(); 9099 assign(xy, getQReg128(nn)); 9100 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy))); 9101 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9102 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx)))); 9103 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn)); 9104 return True; 9105 } 9106 9107 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) { 9108 /* -------- 1,00,01101 ADDP s_2s -------- */ 9109 /* -------- 1,01,01101 ADDP d_2d -------- */ 9110 Bool isD = sz == X01; 9111 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2); 9112 IROp opADD = mkVecADDF(isD ? 3 : 2); 9113 IRTemp src = newTempV128(); 9114 IRTemp argL = newTempV128(); 9115 IRTemp argR = newTempV128(); 9116 assign(src, getQReg128(nn)); 9117 assign(argL, unop(opZHI, mkexpr(src))); 9118 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src), 9119 mkU8(isD ? 8 : 4)))); 9120 putQReg128(dd, unop(opZHI, 9121 triop(opADD, mkexpr(mk_get_IR_rounding_mode()), 9122 mkexpr(argL), mkexpr(argR)))); 9123 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn); 9124 return True; 9125 } 9126 9127 if (bitU == 1 9128 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) { 9129 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */ 9130 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */ 9131 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */ 9132 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */ 9133 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 9134 Bool isD = (sz & 1) == 1; 9135 Bool isMIN = (sz & 2) == 2; 9136 Bool isNM = opcode == BITS5(0,1,1,0,0); 9137 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2); 9138 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2); 9139 IRTemp src = newTempV128(); 9140 IRTemp argL = newTempV128(); 9141 IRTemp argR = newTempV128(); 9142 assign(src, getQReg128(nn)); 9143 assign(argL, unop(opZHI, mkexpr(src))); 9144 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src), 9145 mkU8(isD ? 8 : 4)))); 9146 putQReg128(dd, unop(opZHI, 9147 binop(opMXX, mkexpr(argL), mkexpr(argR)))); 9148 HChar c = isD ? 'd' : 's'; 9149 DIP("%s%sp %c%u, v%u.2%c\n", 9150 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c); 9151 return True; 9152 } 9153 9154 return False; 9155 # undef INSN 9156 } 9157 9158 9159 static 9160 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn) 9161 { 9162 /* 31 28 22 18 15 10 9 4 9163 01 u 111110 immh immb opcode 1 n d 9164 Decode fields: u,immh,opcode 9165 */ 9166 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9167 if (INSN(31,30) != BITS2(0,1) 9168 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) { 9169 return False; 9170 } 9171 UInt bitU = INSN(29,29); 9172 UInt immh = INSN(22,19); 9173 UInt immb = INSN(18,16); 9174 UInt opcode = INSN(15,11); 9175 UInt nn = INSN(9,5); 9176 UInt dd = INSN(4,0); 9177 UInt immhb = (immh << 3) | immb; 9178 9179 if ((immh & 8) == 8 9180 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) { 9181 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */ 9182 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */ 9183 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */ 9184 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */ 9185 Bool isU = bitU == 1; 9186 Bool isAcc = opcode == BITS5(0,0,0,1,0); 9187 UInt sh = 128 - immhb; 9188 vassert(sh >= 1 && sh <= 64); 9189 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2; 9190 IRExpr* src = getQReg128(nn); 9191 IRTemp shf = newTempV128(); 9192 IRTemp res = newTempV128(); 9193 if (sh == 64 && isU) { 9194 assign(shf, mkV128(0x0000)); 9195 } else { 9196 UInt nudge = 0; 9197 if (sh == 64) { 9198 vassert(!isU); 9199 nudge = 1; 9200 } 9201 assign(shf, binop(op, src, mkU8(sh - nudge))); 9202 } 9203 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf)) 9204 : mkexpr(shf)); 9205 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9206 const HChar* nm = isAcc ? (isU ? "usra" : "ssra") 9207 : (isU ? "ushr" : "sshr"); 9208 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh); 9209 return True; 9210 } 9211 9212 if ((immh & 8) == 8 9213 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) { 9214 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */ 9215 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */ 9216 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */ 9217 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */ 9218 Bool isU = bitU == 1; 9219 Bool isAcc = opcode == BITS5(0,0,1,1,0); 9220 UInt sh = 128 - immhb; 9221 vassert(sh >= 1 && sh <= 64); 9222 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2; 9223 vassert(sh >= 1 && sh <= 64); 9224 IRExpr* src = getQReg128(nn); 9225 IRTemp imm8 = newTemp(Ity_I8); 9226 assign(imm8, mkU8((UChar)(-sh))); 9227 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8)); 9228 IRTemp shf = newTempV128(); 9229 IRTemp res = newTempV128(); 9230 assign(shf, binop(op, src, amt)); 9231 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf)) 9232 : mkexpr(shf)); 9233 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9234 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra") 9235 : (isU ? "urshr" : "srshr"); 9236 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh); 9237 return True; 9238 } 9239 9240 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) { 9241 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */ 9242 UInt sh = 128 - immhb; 9243 vassert(sh >= 1 && sh <= 64); 9244 if (sh == 64) { 9245 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd))); 9246 } else { 9247 /* sh is in range 1 .. 63 */ 9248 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1)); 9249 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask)); 9250 IRTemp res = newTempV128(); 9251 assign(res, binop(Iop_OrV128, 9252 binop(Iop_AndV128, getQReg128(dd), nmaskV), 9253 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh)))); 9254 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9255 } 9256 DIP("sri d%u, d%u, #%u\n", dd, nn, sh); 9257 return True; 9258 } 9259 9260 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) { 9261 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */ 9262 UInt sh = immhb - 64; 9263 vassert(sh >= 0 && sh < 64); 9264 putQReg128(dd, 9265 unop(Iop_ZeroHI64ofV128, 9266 sh == 0 ? getQReg128(nn) 9267 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 9268 DIP("shl d%u, d%u, #%u\n", dd, nn, sh); 9269 return True; 9270 } 9271 9272 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) { 9273 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */ 9274 UInt sh = immhb - 64; 9275 vassert(sh >= 0 && sh < 64); 9276 if (sh == 0) { 9277 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn))); 9278 } else { 9279 /* sh is in range 1 .. 63 */ 9280 ULong nmask = (1ULL << sh) - 1; 9281 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask)); 9282 IRTemp res = newTempV128(); 9283 assign(res, binop(Iop_OrV128, 9284 binop(Iop_AndV128, getQReg128(dd), nmaskV), 9285 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh)))); 9286 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9287 } 9288 DIP("sli d%u, d%u, #%u\n", dd, nn, sh); 9289 return True; 9290 } 9291 9292 if (opcode == BITS5(0,1,1,1,0) 9293 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) { 9294 /* -------- 0,01110 SQSHL #imm -------- */ 9295 /* -------- 1,01110 UQSHL #imm -------- */ 9296 /* -------- 1,01100 SQSHLU #imm -------- */ 9297 UInt size = 0; 9298 UInt shift = 0; 9299 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 9300 if (!ok) return False; 9301 vassert(size >= 0 && size <= 3); 9302 /* The shift encoding has opposite sign for the leftwards case. 9303 Adjust shift to compensate. */ 9304 UInt lanebits = 8 << size; 9305 shift = lanebits - shift; 9306 vassert(shift >= 0 && shift < lanebits); 9307 const HChar* nm = NULL; 9308 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl"; 9309 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl"; 9310 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu"; 9311 else vassert(0); 9312 IRTemp qDiff1 = IRTemp_INVALID; 9313 IRTemp qDiff2 = IRTemp_INVALID; 9314 IRTemp res = IRTemp_INVALID; 9315 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn)); 9316 /* This relies on the fact that the zeroed out lanes generate zeroed 9317 result lanes and don't saturate, so there's no point in trimming 9318 the resulting res, qDiff1 or qDiff2 values. */ 9319 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm); 9320 putQReg128(dd, mkexpr(res)); 9321 updateQCFLAGwithDifference(qDiff1, qDiff2); 9322 const HChar arr = "bhsd"[size]; 9323 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift); 9324 return True; 9325 } 9326 9327 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1) 9328 || (bitU == 1 9329 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) { 9330 /* -------- 0,10010 SQSHRN #imm -------- */ 9331 /* -------- 1,10010 UQSHRN #imm -------- */ 9332 /* -------- 0,10011 SQRSHRN #imm -------- */ 9333 /* -------- 1,10011 UQRSHRN #imm -------- */ 9334 /* -------- 1,10000 SQSHRUN #imm -------- */ 9335 /* -------- 1,10001 SQRSHRUN #imm -------- */ 9336 UInt size = 0; 9337 UInt shift = 0; 9338 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 9339 if (!ok || size == X11) return False; 9340 vassert(size >= X00 && size <= X10); 9341 vassert(shift >= 1 && shift <= (8 << size)); 9342 const HChar* nm = "??"; 9343 IROp op = Iop_INVALID; 9344 /* Decide on the name and the operation. */ 9345 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 9346 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size); 9347 } 9348 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 9349 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size); 9350 } 9351 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) { 9352 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size); 9353 } 9354 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 9355 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size); 9356 } 9357 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) { 9358 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size); 9359 } 9360 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) { 9361 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size); 9362 } 9363 else vassert(0); 9364 /* Compute the result (Q, shifted value) pair. */ 9365 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn)); 9366 IRTemp pair = newTempV128(); 9367 assign(pair, binop(op, mkexpr(src128), mkU8(shift))); 9368 /* Update the result reg */ 9369 IRTemp res64in128 = newTempV128(); 9370 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair))); 9371 putQReg128(dd, mkexpr(res64in128)); 9372 /* Update the Q flag. */ 9373 IRTemp q64q64 = newTempV128(); 9374 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair))); 9375 IRTemp z128 = newTempV128(); 9376 assign(z128, mkV128(0x0000)); 9377 updateQCFLAGwithDifference(q64q64, z128); 9378 /* */ 9379 const HChar arrNarrow = "bhsd"[size]; 9380 const HChar arrWide = "bhsd"[size+1]; 9381 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift); 9382 return True; 9383 } 9384 9385 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) { 9386 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */ 9387 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */ 9388 UInt size = 0; 9389 UInt fbits = 0; 9390 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 9391 /* The following holds because immh is never zero. */ 9392 vassert(ok); 9393 /* The following holds because immh >= 0100. */ 9394 vassert(size == X10 || size == X11); 9395 Bool isD = size == X11; 9396 Bool isU = bitU == 1; 9397 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 9398 Double scale = two_to_the_minus(fbits); 9399 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 9400 : IRExpr_Const(IRConst_F32( (Float)scale )); 9401 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 9402 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 9403 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 9404 IRType tyF = isD ? Ity_F64 : Ity_F32; 9405 IRType tyI = isD ? Ity_I64 : Ity_I32; 9406 IRTemp src = newTemp(tyI); 9407 IRTemp res = newTemp(tyF); 9408 IRTemp rm = mk_get_IR_rounding_mode(); 9409 assign(src, getQRegLane(nn, 0, tyI)); 9410 assign(res, triop(opMUL, mkexpr(rm), 9411 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE)); 9412 putQRegLane(dd, 0, mkexpr(res)); 9413 if (!isD) { 9414 putQRegLane(dd, 1, mkU32(0)); 9415 } 9416 putQRegLane(dd, 1, mkU64(0)); 9417 const HChar ch = isD ? 'd' : 's'; 9418 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf", 9419 ch, dd, ch, nn, fbits); 9420 return True; 9421 } 9422 9423 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) { 9424 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */ 9425 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */ 9426 UInt size = 0; 9427 UInt fbits = 0; 9428 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 9429 /* The following holds because immh is never zero. */ 9430 vassert(ok); 9431 /* The following holds because immh >= 0100. */ 9432 vassert(size == X10 || size == X11); 9433 Bool isD = size == X11; 9434 Bool isU = bitU == 1; 9435 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 9436 Double scale = two_to_the_plus(fbits); 9437 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 9438 : IRExpr_Const(IRConst_F32( (Float)scale )); 9439 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 9440 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U) 9441 : (isD ? Iop_F64toI64S : Iop_F32toI32S); 9442 IRType tyF = isD ? Ity_F64 : Ity_F32; 9443 IRType tyI = isD ? Ity_I64 : Ity_I32; 9444 IRTemp src = newTemp(tyF); 9445 IRTemp res = newTemp(tyI); 9446 IRTemp rm = newTemp(Ity_I32); 9447 assign(src, getQRegLane(nn, 0, tyF)); 9448 assign(rm, mkU32(Irrm_ZERO)); 9449 assign(res, binop(opCVT, mkexpr(rm), 9450 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE))); 9451 putQRegLane(dd, 0, mkexpr(res)); 9452 if (!isD) { 9453 putQRegLane(dd, 1, mkU32(0)); 9454 } 9455 putQRegLane(dd, 1, mkU64(0)); 9456 const HChar ch = isD ? 'd' : 's'; 9457 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs", 9458 ch, dd, ch, nn, fbits); 9459 return True; 9460 } 9461 9462 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9463 return False; 9464 # undef INSN 9465 } 9466 9467 9468 static 9469 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn) 9470 { 9471 /* 31 29 28 23 21 20 15 11 9 4 9472 01 U 11110 size 1 m opcode 00 n d 9473 Decode fields: u,opcode 9474 */ 9475 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9476 if (INSN(31,30) != BITS2(0,1) 9477 || INSN(28,24) != BITS5(1,1,1,1,0) 9478 || INSN(21,21) != 1 9479 || INSN(11,10) != BITS2(0,0)) { 9480 return False; 9481 } 9482 UInt bitU = INSN(29,29); 9483 UInt size = INSN(23,22); 9484 UInt mm = INSN(20,16); 9485 UInt opcode = INSN(15,12); 9486 UInt nn = INSN(9,5); 9487 UInt dd = INSN(4,0); 9488 vassert(size < 4); 9489 9490 if (bitU == 0 9491 && (opcode == BITS4(1,1,0,1) 9492 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) { 9493 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks) 9494 /* -------- 0,1001 SQDMLAL -------- */ // 1 9495 /* -------- 0,1011 SQDMLSL -------- */ // 2 9496 /* Widens, and size refers to the narrowed lanes. */ 9497 UInt ks = 3; 9498 switch (opcode) { 9499 case BITS4(1,1,0,1): ks = 0; break; 9500 case BITS4(1,0,0,1): ks = 1; break; 9501 case BITS4(1,0,1,1): ks = 2; break; 9502 default: vassert(0); 9503 } 9504 vassert(ks >= 0 && ks <= 2); 9505 if (size == X00 || size == X11) return False; 9506 vassert(size <= 2); 9507 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n; 9508 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 9509 newTempsV128_3(&vecN, &vecM, &vecD); 9510 assign(vecN, getQReg128(nn)); 9511 assign(vecM, getQReg128(mm)); 9512 assign(vecD, getQReg128(dd)); 9513 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 9514 False/*!is2*/, size, "mas"[ks], 9515 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 9516 IROp opZHI = mkVecZEROHIxxOFV128(size+1); 9517 putQReg128(dd, unop(opZHI, mkexpr(res))); 9518 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 9519 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 9520 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 9521 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI); 9522 } 9523 const HChar* nm = ks == 0 ? "sqdmull" 9524 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 9525 const HChar arrNarrow = "bhsd"[size]; 9526 const HChar arrWide = "bhsd"[size+1]; 9527 DIP("%s %c%u, %c%u, %c%u\n", 9528 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm); 9529 return True; 9530 } 9531 9532 return False; 9533 # undef INSN 9534 } 9535 9536 9537 static 9538 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) 9539 { 9540 /* 31 29 28 23 21 20 15 10 9 4 9541 01 U 11110 size 1 m opcode 1 n d 9542 Decode fields: u,size,opcode 9543 */ 9544 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9545 if (INSN(31,30) != BITS2(0,1) 9546 || INSN(28,24) != BITS5(1,1,1,1,0) 9547 || INSN(21,21) != 1 9548 || INSN(10,10) != 1) { 9549 return False; 9550 } 9551 UInt bitU = INSN(29,29); 9552 UInt size = INSN(23,22); 9553 UInt mm = INSN(20,16); 9554 UInt opcode = INSN(15,11); 9555 UInt nn = INSN(9,5); 9556 UInt dd = INSN(4,0); 9557 vassert(size < 4); 9558 9559 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) { 9560 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */ 9561 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */ 9562 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */ 9563 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */ 9564 Bool isADD = opcode == BITS5(0,0,0,0,1); 9565 Bool isU = bitU == 1; 9566 IROp qop = Iop_INVALID; 9567 IROp nop = Iop_INVALID; 9568 if (isADD) { 9569 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size); 9570 nop = mkVecADD(size); 9571 } else { 9572 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size); 9573 nop = mkVecSUB(size); 9574 } 9575 IRTemp argL = newTempV128(); 9576 IRTemp argR = newTempV128(); 9577 IRTemp qres = newTempV128(); 9578 IRTemp nres = newTempV128(); 9579 assign(argL, getQReg128(nn)); 9580 assign(argR, getQReg128(mm)); 9581 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9582 size, binop(qop, mkexpr(argL), mkexpr(argR))))); 9583 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9584 size, binop(nop, mkexpr(argL), mkexpr(argR))))); 9585 putQReg128(dd, mkexpr(qres)); 9586 updateQCFLAGwithDifference(qres, nres); 9587 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd") 9588 : (isU ? "uqsub" : "sqsub"); 9589 const HChar arr = "bhsd"[size]; 9590 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); 9591 return True; 9592 } 9593 9594 if (size == X11 && opcode == BITS5(0,0,1,1,0)) { 9595 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s 9596 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u 9597 Bool isGT = bitU == 0; 9598 IRExpr* argL = getQReg128(nn); 9599 IRExpr* argR = getQReg128(mm); 9600 IRTemp res = newTempV128(); 9601 assign(res, 9602 isGT ? binop(Iop_CmpGT64Sx2, argL, argR) 9603 : binop(Iop_CmpGT64Ux2, argL, argR)); 9604 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9605 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi", 9606 nameQRegLO(dd, Ity_I64), 9607 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9608 return True; 9609 } 9610 9611 if (size == X11 && opcode == BITS5(0,0,1,1,1)) { 9612 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s 9613 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u 9614 Bool isGE = bitU == 0; 9615 IRExpr* argL = getQReg128(nn); 9616 IRExpr* argR = getQReg128(mm); 9617 IRTemp res = newTempV128(); 9618 assign(res, 9619 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)) 9620 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL))); 9621 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9622 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs", 9623 nameQRegLO(dd, Ity_I64), 9624 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9625 return True; 9626 } 9627 9628 if (size == X11 && (opcode == BITS5(0,1,0,0,0) 9629 || opcode == BITS5(0,1,0,1,0))) { 9630 /* -------- 0,xx,01000 SSHL d_d_d -------- */ 9631 /* -------- 0,xx,01010 SRSHL d_d_d -------- */ 9632 /* -------- 1,xx,01000 USHL d_d_d -------- */ 9633 /* -------- 1,xx,01010 URSHL d_d_d -------- */ 9634 Bool isU = bitU == 1; 9635 Bool isR = opcode == BITS5(0,1,0,1,0); 9636 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size)) 9637 : (isU ? mkVecSHU(size) : mkVecSHS(size)); 9638 IRTemp res = newTempV128(); 9639 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 9640 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9641 const HChar* nm = isR ? (isU ? "urshl" : "srshl") 9642 : (isU ? "ushl" : "sshl"); 9643 DIP("%s %s, %s, %s\n", nm, 9644 nameQRegLO(dd, Ity_I64), 9645 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9646 return True; 9647 } 9648 9649 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) { 9650 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */ 9651 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */ 9652 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */ 9653 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */ 9654 Bool isU = bitU == 1; 9655 Bool isR = opcode == BITS5(0,1,0,1,1); 9656 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size)) 9657 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size)); 9658 /* This is a bit tricky. Since we're only interested in the lowest 9659 lane of the result, we zero out all the rest in the operands, so 9660 as to ensure that other lanes don't pollute the returned Q value. 9661 This works because it means, for the lanes we don't care about, we 9662 are shifting zero by zero, which can never saturate. */ 9663 IRTemp res256 = newTemp(Ity_V256); 9664 IRTemp resSH = newTempV128(); 9665 IRTemp resQ = newTempV128(); 9666 IRTemp zero = newTempV128(); 9667 assign( 9668 res256, 9669 binop(op, 9670 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))), 9671 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm))))); 9672 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256))); 9673 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256))); 9674 assign(zero, mkV128(0x0000)); 9675 putQReg128(dd, mkexpr(resSH)); 9676 updateQCFLAGwithDifference(resQ, zero); 9677 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl") 9678 : (isU ? "uqshl" : "sqshl"); 9679 const HChar arr = "bhsd"[size]; 9680 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); 9681 return True; 9682 } 9683 9684 if (size == X11 && opcode == BITS5(1,0,0,0,0)) { 9685 /* -------- 0,11,10000 ADD d_d_d -------- */ 9686 /* -------- 1,11,10000 SUB d_d_d -------- */ 9687 Bool isSUB = bitU == 1; 9688 IRTemp res = newTemp(Ity_I64); 9689 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64, 9690 getQRegLane(nn, 0, Ity_I64), 9691 getQRegLane(mm, 0, Ity_I64))); 9692 putQRegLane(dd, 0, mkexpr(res)); 9693 putQRegLane(dd, 1, mkU64(0)); 9694 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add", 9695 nameQRegLO(dd, Ity_I64), 9696 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9697 return True; 9698 } 9699 9700 if (size == X11 && opcode == BITS5(1,0,0,0,1)) { 9701 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0 9702 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // == 9703 Bool isEQ = bitU == 1; 9704 IRExpr* argL = getQReg128(nn); 9705 IRExpr* argR = getQReg128(mm); 9706 IRTemp res = newTempV128(); 9707 assign(res, 9708 isEQ ? binop(Iop_CmpEQ64x2, argL, argR) 9709 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2, 9710 binop(Iop_AndV128, argL, argR), 9711 mkV128(0x0000)))); 9712 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9713 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst", 9714 nameQRegLO(dd, Ity_I64), 9715 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); 9716 return True; 9717 } 9718 9719 if (opcode == BITS5(1,0,1,1,0)) { 9720 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */ 9721 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */ 9722 if (size == X00 || size == X11) return False; 9723 Bool isR = bitU == 1; 9724 IRTemp res, sat1q, sat1n, vN, vM; 9725 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 9726 newTempsV128_2(&vN, &vM); 9727 assign(vN, getQReg128(nn)); 9728 assign(vM, getQReg128(mm)); 9729 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 9730 putQReg128(dd, 9731 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)))); 9732 updateQCFLAGwithDifference( 9733 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)), 9734 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n))); 9735 const HChar arr = "bhsd"[size]; 9736 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 9737 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm); 9738 return True; 9739 } 9740 9741 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) { 9742 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */ 9743 IRType ity = size == X11 ? Ity_F64 : Ity_F32; 9744 IRTemp res = newTemp(ity); 9745 assign(res, unop(mkABSF(ity), 9746 triop(mkSUBF(ity), 9747 mkexpr(mk_get_IR_rounding_mode()), 9748 getQRegLO(nn,ity), getQRegLO(mm,ity)))); 9749 putQReg128(dd, mkV128(0x0000)); 9750 putQRegLO(dd, mkexpr(res)); 9751 DIP("fabd %s, %s, %s\n", 9752 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9753 return True; 9754 } 9755 9756 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) { 9757 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */ 9758 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 9759 IRType ity = size == X01 ? Ity_F64 : Ity_F32; 9760 IRTemp res = newTemp(ity); 9761 assign(res, triop(mkMULF(ity), 9762 mkexpr(mk_get_IR_rounding_mode()), 9763 getQRegLO(nn,ity), getQRegLO(mm,ity))); 9764 putQReg128(dd, mkV128(0x0000)); 9765 putQRegLO(dd, mkexpr(res)); 9766 DIP("fmulx %s, %s, %s\n", 9767 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9768 return True; 9769 } 9770 9771 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) { 9772 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */ 9773 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */ 9774 Bool isD = size == X01; 9775 IRType ity = isD ? Ity_F64 : Ity_F32; 9776 Bool isGE = bitU == 1; 9777 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4) 9778 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4); 9779 IRTemp res = newTempV128(); 9780 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd 9781 : binop(opCMP, getQReg128(nn), getQReg128(mm))); 9782 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9783 mkexpr(res)))); 9784 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq", 9785 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9786 return True; 9787 } 9788 9789 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) { 9790 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */ 9791 Bool isD = size == X11; 9792 IRType ity = isD ? Ity_F64 : Ity_F32; 9793 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 9794 IRTemp res = newTempV128(); 9795 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd 9796 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9797 mkexpr(res)))); 9798 DIP("%s %s, %s, %s\n", "fcmgt", 9799 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9800 return True; 9801 } 9802 9803 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) { 9804 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */ 9805 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */ 9806 Bool isD = (size & 1) == 1; 9807 IRType ity = isD ? Ity_F64 : Ity_F32; 9808 Bool isGT = (size & 2) == 2; 9809 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4) 9810 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4); 9811 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 9812 IRTemp res = newTempV128(); 9813 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)), 9814 unop(opABS, getQReg128(nn)))); // swapd 9815 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9816 mkexpr(res)))); 9817 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge", 9818 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 9819 return True; 9820 } 9821 9822 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { 9823 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */ 9824 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */ 9825 Bool isSQRT = (size & 2) == 2; 9826 Bool isD = (size & 1) == 1; 9827 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) 9828 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); 9829 IRTemp res = newTempV128(); 9830 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 9831 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9832 mkexpr(res)))); 9833 HChar c = isD ? 'd' : 's'; 9834 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps", 9835 c, dd, c, nn, c, mm); 9836 return True; 9837 } 9838 9839 return False; 9840 # undef INSN 9841 } 9842 9843 9844 static 9845 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) 9846 { 9847 /* 31 29 28 23 21 16 11 9 4 9848 01 U 11110 size 10000 opcode 10 n d 9849 Decode fields: u,size,opcode 9850 */ 9851 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 9852 if (INSN(31,30) != BITS2(0,1) 9853 || INSN(28,24) != BITS5(1,1,1,1,0) 9854 || INSN(21,17) != BITS5(1,0,0,0,0) 9855 || INSN(11,10) != BITS2(1,0)) { 9856 return False; 9857 } 9858 UInt bitU = INSN(29,29); 9859 UInt size = INSN(23,22); 9860 UInt opcode = INSN(16,12); 9861 UInt nn = INSN(9,5); 9862 UInt dd = INSN(4,0); 9863 vassert(size < 4); 9864 9865 if (opcode == BITS5(0,0,0,1,1)) { 9866 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */ 9867 /* -------- 1,xx,00011: USQADD std4_std4 -------- */ 9868 /* These are a bit tricky (to say the least). See comments on 9869 the vector variants (in dis_AdvSIMD_two_reg_misc) below for 9870 details. */ 9871 Bool isUSQADD = bitU == 1; 9872 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) 9873 : mkVecQADDEXTUSSATSS(size); 9874 IROp nop = mkVecADD(size); 9875 IRTemp argL = newTempV128(); 9876 IRTemp argR = newTempV128(); 9877 assign(argL, getQReg128(nn)); 9878 assign(argR, getQReg128(dd)); 9879 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9880 size, binop(qop, mkexpr(argL), mkexpr(argR))); 9881 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 9882 size, binop(nop, mkexpr(argL), mkexpr(argR))); 9883 putQReg128(dd, mkexpr(qres)); 9884 updateQCFLAGwithDifference(qres, nres); 9885 const HChar arr = "bhsd"[size]; 9886 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn); 9887 return True; 9888 } 9889 9890 if (opcode == BITS5(0,0,1,1,1)) { 9891 /* -------- 0,xx,00111 SQABS std4_std4 -------- */ 9892 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */ 9893 Bool isNEG = bitU == 1; 9894 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID; 9895 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW, 9896 getQReg128(nn), size ); 9897 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW)); 9898 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW)); 9899 putQReg128(dd, mkexpr(qres)); 9900 updateQCFLAGwithDifference(qres, nres); 9901 const HChar arr = "bhsd"[size]; 9902 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn); 9903 return True; 9904 } 9905 9906 if (size == X11 && opcode == BITS5(0,1,0,0,0)) { 9907 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0 9908 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0 9909 Bool isGT = bitU == 0; 9910 IRExpr* argL = getQReg128(nn); 9911 IRExpr* argR = mkV128(0x0000); 9912 IRTemp res = newTempV128(); 9913 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR) 9914 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))); 9915 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9916 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn); 9917 return True; 9918 } 9919 9920 if (size == X11 && opcode == BITS5(0,1,0,0,1)) { 9921 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0 9922 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0 9923 Bool isEQ = bitU == 0; 9924 IRExpr* argL = getQReg128(nn); 9925 IRExpr* argR = mkV128(0x0000); 9926 IRTemp res = newTempV128(); 9927 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR) 9928 : unop(Iop_NotV128, 9929 binop(Iop_CmpGT64Sx2, argL, argR))); 9930 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 9931 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn); 9932 return True; 9933 } 9934 9935 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) { 9936 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0 9937 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9938 binop(Iop_CmpGT64Sx2, mkV128(0x0000), 9939 getQReg128(nn)))); 9940 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn); 9941 return True; 9942 } 9943 9944 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) { 9945 /* -------- 0,11,01011 ABS d_d -------- */ 9946 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9947 unop(Iop_Abs64x2, getQReg128(nn)))); 9948 DIP("abs d%u, d%u\n", dd, nn); 9949 return True; 9950 } 9951 9952 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) { 9953 /* -------- 1,11,01011 NEG d_d -------- */ 9954 putQReg128(dd, unop(Iop_ZeroHI64ofV128, 9955 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn)))); 9956 DIP("neg d%u, d%u\n", dd, nn); 9957 return True; 9958 } 9959 9960 UInt ix = 0; /*INVALID*/ 9961 if (size >= X10) { 9962 switch (opcode) { 9963 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break; 9964 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break; 9965 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; 9966 default: break; 9967 } 9968 } 9969 if (ix > 0) { 9970 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */ 9971 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */ 9972 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */ 9973 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */ 9974 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */ 9975 Bool isD = size == X11; 9976 IRType ity = isD ? Ity_F64 : Ity_F32; 9977 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 9978 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 9979 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 9980 IROp opCmp = Iop_INVALID; 9981 Bool swap = False; 9982 const HChar* nm = "??"; 9983 switch (ix) { 9984 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break; 9985 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break; 9986 case 3: nm = "fcmlt"; opCmp = opCmpLT; break; 9987 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break; 9988 case 5: nm = "fcmle"; opCmp = opCmpLE; break; 9989 default: vassert(0); 9990 } 9991 IRExpr* zero = mkV128(0x0000); 9992 IRTemp res = newTempV128(); 9993 assign(res, swap ? binop(opCmp, zero, getQReg128(nn)) 9994 : binop(opCmp, getQReg128(nn), zero)); 9995 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 9996 mkexpr(res)))); 9997 9998 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 9999 return True; 10000 } 10001 10002 if (opcode == BITS5(1,0,1,0,0) 10003 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) { 10004 /* -------- 0,xx,10100: SQXTN -------- */ 10005 /* -------- 1,xx,10100: UQXTN -------- */ 10006 /* -------- 1,xx,10010: SQXTUN -------- */ 10007 if (size == X11) return False; 10008 vassert(size < 3); 10009 IROp opN = Iop_INVALID; 10010 Bool zWiden = True; 10011 const HChar* nm = "??"; 10012 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) { 10013 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False; 10014 } 10015 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) { 10016 opN = mkVecQNARROWUNUU(size); nm = "uqxtn"; 10017 } 10018 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 10019 opN = mkVecQNARROWUNSU(size); nm = "sqxtun"; 10020 } 10021 else vassert(0); 10022 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 10023 size+1, getQReg128(nn)); 10024 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE( 10025 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src)))); 10026 putQReg128(dd, mkexpr(resN)); 10027 /* This widens zero lanes to zero, and compares it against zero, so all 10028 of the non-participating lanes make no contribution to the 10029 Q flag state. */ 10030 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/, 10031 size, mkexpr(resN)); 10032 updateQCFLAGwithDifference(src, resW); 10033 const HChar arrNarrow = "bhsd"[size]; 10034 const HChar arrWide = "bhsd"[size+1]; 10035 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn); 10036 return True; 10037 } 10038 10039 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) { 10040 /* -------- 1,01,10110 FCVTXN s_d -------- */ 10041 /* Using Irrm_NEAREST here isn't right. The docs say "round to 10042 odd" but I don't know what that really means. */ 10043 putQRegLO(dd, 10044 binop(Iop_F64toF32, mkU32(Irrm_NEAREST), 10045 getQRegLO(nn, Ity_F64))); 10046 putQRegLane(dd, 1, mkU32(0)); 10047 putQRegLane(dd, 1, mkU64(0)); 10048 DIP("fcvtxn s%u, d%u\n", dd, nn); 10049 return True; 10050 } 10051 10052 ix = 0; /*INVALID*/ 10053 switch (opcode) { 10054 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break; 10055 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break; 10056 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break; 10057 default: break; 10058 } 10059 if (ix > 0) { 10060 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ 10061 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ 10062 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ 10063 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ 10064 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ 10065 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */ 10066 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */ 10067 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */ 10068 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */ 10069 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */ 10070 Bool isD = (size & 1) == 1; 10071 IRType tyF = isD ? Ity_F64 : Ity_F32; 10072 IRType tyI = isD ? Ity_I64 : Ity_I32; 10073 IRRoundingMode irrm = 8; /*impossible*/ 10074 HChar ch = '?'; 10075 switch (ix) { 10076 case 1: ch = 'n'; irrm = Irrm_NEAREST; break; 10077 case 2: ch = 'm'; irrm = Irrm_NegINF; break; 10078 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */ 10079 case 4: ch = 'p'; irrm = Irrm_PosINF; break; 10080 case 5: ch = 'z'; irrm = Irrm_ZERO; break; 10081 default: vassert(0); 10082 } 10083 IROp cvt = Iop_INVALID; 10084 if (bitU == 1) { 10085 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U; 10086 } else { 10087 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S; 10088 } 10089 IRTemp src = newTemp(tyF); 10090 IRTemp res = newTemp(tyI); 10091 assign(src, getQRegLane(nn, 0, tyF)); 10092 assign(res, binop(cvt, mkU32(irrm), mkexpr(src))); 10093 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */ 10094 if (!isD) { 10095 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */ 10096 } 10097 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */ 10098 HChar sOrD = isD ? 'd' : 's'; 10099 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's', 10100 sOrD, dd, sOrD, nn); 10101 return True; 10102 } 10103 10104 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) { 10105 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */ 10106 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */ 10107 Bool isU = bitU == 1; 10108 Bool isD = (size & 1) == 1; 10109 IRType tyI = isD ? Ity_I64 : Ity_I32; 10110 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 10111 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 10112 IRTemp rm = mk_get_IR_rounding_mode(); 10113 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI))); 10114 if (!isD) { 10115 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */ 10116 } 10117 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */ 10118 HChar c = isD ? 'd' : 's'; 10119 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn); 10120 return True; 10121 } 10122 10123 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { 10124 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */ 10125 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */ 10126 Bool isSQRT = bitU == 1; 10127 Bool isD = (size & 1) == 1; 10128 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) 10129 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); 10130 IRTemp resV = newTempV128(); 10131 assign(resV, unop(op, getQReg128(nn))); 10132 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10, 10133 mkexpr(resV)))); 10134 HChar c = isD ? 'd' : 's'; 10135 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn); 10136 return True; 10137 } 10138 10139 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { 10140 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */ 10141 Bool isD = (size & 1) == 1; 10142 IRType ty = isD ? Ity_F64 : Ity_F32; 10143 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32; 10144 IRTemp res = newTemp(ty); 10145 IRTemp rm = mk_get_IR_rounding_mode(); 10146 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty))); 10147 putQReg128(dd, mkV128(0x0000)); 10148 putQRegLane(dd, 0, mkexpr(res)); 10149 HChar c = isD ? 'd' : 's'; 10150 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn); 10151 return True; 10152 } 10153 10154 return False; 10155 # undef INSN 10156 } 10157 10158 10159 static 10160 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn) 10161 { 10162 /* 31 28 23 21 20 19 15 11 9 4 10163 01 U 11111 size L M m opcode H 0 n d 10164 Decode fields are: u,size,opcode 10165 M is really part of the mm register number. Individual 10166 cases need to inspect L and H though. 10167 */ 10168 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10169 if (INSN(31,30) != BITS2(0,1) 10170 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) { 10171 return False; 10172 } 10173 UInt bitU = INSN(29,29); 10174 UInt size = INSN(23,22); 10175 UInt bitL = INSN(21,21); 10176 UInt bitM = INSN(20,20); 10177 UInt mmLO4 = INSN(19,16); 10178 UInt opcode = INSN(15,12); 10179 UInt bitH = INSN(11,11); 10180 UInt nn = INSN(9,5); 10181 UInt dd = INSN(4,0); 10182 vassert(size < 4); 10183 vassert(bitH < 2 && bitM < 2 && bitL < 2); 10184 10185 if (bitU == 0 && size >= X10 10186 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) { 10187 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */ 10188 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */ 10189 Bool isD = (size & 1) == 1; 10190 Bool isSUB = opcode == BITS4(0,1,0,1); 10191 UInt index; 10192 if (!isD) index = (bitH << 1) | bitL; 10193 else if (isD && bitL == 0) index = bitH; 10194 else return False; // sz:L == x11 => unallocated encoding 10195 vassert(index < (isD ? 2 : 4)); 10196 IRType ity = isD ? Ity_F64 : Ity_F32; 10197 IRTemp elem = newTemp(ity); 10198 UInt mm = (bitM << 4) | mmLO4; 10199 assign(elem, getQRegLane(mm, index, ity)); 10200 IRTemp dupd = math_DUP_TO_V128(elem, ity); 10201 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 10202 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 10203 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 10204 IRTemp rm = mk_get_IR_rounding_mode(); 10205 IRTemp t1 = newTempV128(); 10206 IRTemp t2 = newTempV128(); 10207 // FIXME: double rounding; use FMA primops instead 10208 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 10209 assign(t2, triop(isSUB ? opSUB : opADD, 10210 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 10211 putQReg128(dd, 10212 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2, 10213 mkexpr(t2)))); 10214 const HChar c = isD ? 'd' : 's'; 10215 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla", 10216 c, dd, c, nn, nameQReg128(mm), c, index); 10217 return True; 10218 } 10219 10220 if (size >= X10 && opcode == BITS4(1,0,0,1)) { 10221 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */ 10222 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */ 10223 Bool isD = (size & 1) == 1; 10224 Bool isMULX = bitU == 1; 10225 UInt index; 10226 if (!isD) index = (bitH << 1) | bitL; 10227 else if (isD && bitL == 0) index = bitH; 10228 else return False; // sz:L == x11 => unallocated encoding 10229 vassert(index < (isD ? 2 : 4)); 10230 IRType ity = isD ? Ity_F64 : Ity_F32; 10231 IRTemp elem = newTemp(ity); 10232 UInt mm = (bitM << 4) | mmLO4; 10233 assign(elem, getQRegLane(mm, index, ity)); 10234 IRTemp dupd = math_DUP_TO_V128(elem, ity); 10235 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 10236 IRTemp rm = mk_get_IR_rounding_mode(); 10237 IRTemp t1 = newTempV128(); 10238 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 10239 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 10240 putQReg128(dd, 10241 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2, 10242 mkexpr(t1)))); 10243 const HChar c = isD ? 'd' : 's'; 10244 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul", 10245 c, dd, c, nn, nameQReg128(mm), c, index); 10246 return True; 10247 } 10248 10249 if (bitU == 0 10250 && (opcode == BITS4(1,0,1,1) 10251 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) { 10252 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks) 10253 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1 10254 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2 10255 /* Widens, and size refers to the narrowed lanes. */ 10256 UInt ks = 3; 10257 switch (opcode) { 10258 case BITS4(1,0,1,1): ks = 0; break; 10259 case BITS4(0,0,1,1): ks = 1; break; 10260 case BITS4(0,1,1,1): ks = 2; break; 10261 default: vassert(0); 10262 } 10263 vassert(ks >= 0 && ks <= 2); 10264 UInt mm = 32; // invalid 10265 UInt ix = 16; // invalid 10266 switch (size) { 10267 case X00: 10268 return False; // h_b_b[] case is not allowed 10269 case X01: 10270 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 10271 case X10: 10272 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 10273 case X11: 10274 return False; // q_d_d[] case is not allowed 10275 default: 10276 vassert(0); 10277 } 10278 vassert(mm < 32 && ix < 16); 10279 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n; 10280 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 10281 newTempsV128_2(&vecN, &vecD); 10282 assign(vecN, getQReg128(nn)); 10283 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 10284 assign(vecD, getQReg128(dd)); 10285 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 10286 False/*!is2*/, size, "mas"[ks], 10287 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 10288 IROp opZHI = mkVecZEROHIxxOFV128(size+1); 10289 putQReg128(dd, unop(opZHI, mkexpr(res))); 10290 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 10291 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 10292 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 10293 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI); 10294 } 10295 const HChar* nm = ks == 0 ? "sqmull" 10296 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 10297 const HChar arrNarrow = "bhsd"[size]; 10298 const HChar arrWide = "bhsd"[size+1]; 10299 DIP("%s %c%u, %c%u, v%u.%c[%u]\n", 10300 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix); 10301 return True; 10302 } 10303 10304 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { 10305 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ 10306 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ 10307 UInt mm = 32; // invalid 10308 UInt ix = 16; // invalid 10309 switch (size) { 10310 case X00: 10311 return False; // b case is not allowed 10312 case X01: 10313 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 10314 case X10: 10315 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 10316 case X11: 10317 return False; // q case is not allowed 10318 default: 10319 vassert(0); 10320 } 10321 vassert(mm < 32 && ix < 16); 10322 Bool isR = opcode == BITS4(1,1,0,1); 10323 IRTemp res, sat1q, sat1n, vN, vM; 10324 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 10325 vN = newTempV128(); 10326 assign(vN, getQReg128(nn)); 10327 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 10328 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 10329 IROp opZHI = mkVecZEROHIxxOFV128(size); 10330 putQReg128(dd, unop(opZHI, mkexpr(res))); 10331 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 10332 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 10333 HChar ch = size == X01 ? 'h' : 's'; 10334 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix); 10335 return True; 10336 } 10337 10338 return False; 10339 # undef INSN 10340 } 10341 10342 10343 static 10344 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 10345 { 10346 /* 31 28 22 18 15 10 9 4 10347 0 q u 011110 immh immb opcode 1 n d 10348 Decode fields: u,opcode 10349 */ 10350 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10351 if (INSN(31,31) != 0 10352 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) { 10353 return False; 10354 } 10355 UInt bitQ = INSN(30,30); 10356 UInt bitU = INSN(29,29); 10357 UInt immh = INSN(22,19); 10358 UInt immb = INSN(18,16); 10359 UInt opcode = INSN(15,11); 10360 UInt nn = INSN(9,5); 10361 UInt dd = INSN(4,0); 10362 10363 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) { 10364 /* -------- 0,00000 SSHR std7_std7_#imm -------- */ 10365 /* -------- 1,00000 USHR std7_std7_#imm -------- */ 10366 /* -------- 0,00010 SSRA std7_std7_#imm -------- */ 10367 /* -------- 1,00010 USRA std7_std7_#imm -------- */ 10368 /* laneTy, shift = case immh:immb of 10369 0001:xxx -> B, SHR:8-xxx 10370 001x:xxx -> H, SHR:16-xxxx 10371 01xx:xxx -> S, SHR:32-xxxxx 10372 1xxx:xxx -> D, SHR:64-xxxxxx 10373 other -> invalid 10374 */ 10375 UInt size = 0; 10376 UInt shift = 0; 10377 Bool isQ = bitQ == 1; 10378 Bool isU = bitU == 1; 10379 Bool isAcc = opcode == BITS5(0,0,0,1,0); 10380 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10381 if (!ok || (bitQ == 0 && size == X11)) return False; 10382 vassert(size >= 0 && size <= 3); 10383 UInt lanebits = 8 << size; 10384 vassert(shift >= 1 && shift <= lanebits); 10385 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size); 10386 IRExpr* src = getQReg128(nn); 10387 IRTemp shf = newTempV128(); 10388 IRTemp res = newTempV128(); 10389 if (shift == lanebits && isU) { 10390 assign(shf, mkV128(0x0000)); 10391 } else { 10392 UInt nudge = 0; 10393 if (shift == lanebits) { 10394 vassert(!isU); 10395 nudge = 1; 10396 } 10397 assign(shf, binop(op, src, mkU8(shift - nudge))); 10398 } 10399 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf)) 10400 : mkexpr(shf)); 10401 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10402 HChar laneCh = "bhsd"[size]; 10403 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10404 const HChar* nm = isAcc ? (isU ? "usra" : "ssra") 10405 : (isU ? "ushr" : "sshr"); 10406 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10407 nameQReg128(dd), nLanes, laneCh, 10408 nameQReg128(nn), nLanes, laneCh, shift); 10409 return True; 10410 } 10411 10412 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) { 10413 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */ 10414 /* -------- 1,00100 URSHR std7_std7_#imm -------- */ 10415 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */ 10416 /* -------- 1,00110 URSRA std7_std7_#imm -------- */ 10417 /* laneTy, shift = case immh:immb of 10418 0001:xxx -> B, SHR:8-xxx 10419 001x:xxx -> H, SHR:16-xxxx 10420 01xx:xxx -> S, SHR:32-xxxxx 10421 1xxx:xxx -> D, SHR:64-xxxxxx 10422 other -> invalid 10423 */ 10424 UInt size = 0; 10425 UInt shift = 0; 10426 Bool isQ = bitQ == 1; 10427 Bool isU = bitU == 1; 10428 Bool isAcc = opcode == BITS5(0,0,1,1,0); 10429 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10430 if (!ok || (bitQ == 0 && size == X11)) return False; 10431 vassert(size >= 0 && size <= 3); 10432 UInt lanebits = 8 << size; 10433 vassert(shift >= 1 && shift <= lanebits); 10434 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size); 10435 IRExpr* src = getQReg128(nn); 10436 IRTemp imm8 = newTemp(Ity_I8); 10437 assign(imm8, mkU8((UChar)(-shift))); 10438 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8)); 10439 IRTemp shf = newTempV128(); 10440 IRTemp res = newTempV128(); 10441 assign(shf, binop(op, src, amt)); 10442 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf)) 10443 : mkexpr(shf)); 10444 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10445 HChar laneCh = "bhsd"[size]; 10446 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10447 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra") 10448 : (isU ? "urshr" : "srshr"); 10449 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10450 nameQReg128(dd), nLanes, laneCh, 10451 nameQReg128(nn), nLanes, laneCh, shift); 10452 return True; 10453 } 10454 10455 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) { 10456 /* -------- 1,01000 SRI std7_std7_#imm -------- */ 10457 /* laneTy, shift = case immh:immb of 10458 0001:xxx -> B, SHR:8-xxx 10459 001x:xxx -> H, SHR:16-xxxx 10460 01xx:xxx -> S, SHR:32-xxxxx 10461 1xxx:xxx -> D, SHR:64-xxxxxx 10462 other -> invalid 10463 */ 10464 UInt size = 0; 10465 UInt shift = 0; 10466 Bool isQ = bitQ == 1; 10467 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10468 if (!ok || (bitQ == 0 && size == X11)) return False; 10469 vassert(size >= 0 && size <= 3); 10470 UInt lanebits = 8 << size; 10471 vassert(shift >= 1 && shift <= lanebits); 10472 IRExpr* src = getQReg128(nn); 10473 IRTemp res = newTempV128(); 10474 if (shift == lanebits) { 10475 assign(res, getQReg128(dd)); 10476 } else { 10477 assign(res, binop(mkVecSHRN(size), src, mkU8(shift))); 10478 IRExpr* nmask = binop(mkVecSHLN(size), 10479 mkV128(0xFFFF), mkU8(lanebits - shift)); 10480 IRTemp tmp = newTempV128(); 10481 assign(tmp, binop(Iop_OrV128, 10482 mkexpr(res), 10483 binop(Iop_AndV128, getQReg128(dd), nmask))); 10484 res = tmp; 10485 } 10486 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10487 HChar laneCh = "bhsd"[size]; 10488 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10489 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri", 10490 nameQReg128(dd), nLanes, laneCh, 10491 nameQReg128(nn), nLanes, laneCh, shift); 10492 return True; 10493 } 10494 10495 if (opcode == BITS5(0,1,0,1,0)) { 10496 /* -------- 0,01010 SHL std7_std7_#imm -------- */ 10497 /* -------- 1,01010 SLI std7_std7_#imm -------- */ 10498 /* laneTy, shift = case immh:immb of 10499 0001:xxx -> B, xxx 10500 001x:xxx -> H, xxxx 10501 01xx:xxx -> S, xxxxx 10502 1xxx:xxx -> D, xxxxxx 10503 other -> invalid 10504 */ 10505 UInt size = 0; 10506 UInt shift = 0; 10507 Bool isSLI = bitU == 1; 10508 Bool isQ = bitQ == 1; 10509 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10510 if (!ok || (bitQ == 0 && size == X11)) return False; 10511 vassert(size >= 0 && size <= 3); 10512 /* The shift encoding has opposite sign for the leftwards case. 10513 Adjust shift to compensate. */ 10514 UInt lanebits = 8 << size; 10515 shift = lanebits - shift; 10516 vassert(shift >= 0 && shift < lanebits); 10517 IROp op = mkVecSHLN(size); 10518 IRExpr* src = getQReg128(nn); 10519 IRTemp res = newTempV128(); 10520 if (shift == 0) { 10521 assign(res, src); 10522 } else { 10523 assign(res, binop(op, src, mkU8(shift))); 10524 if (isSLI) { 10525 IRExpr* nmask = binop(mkVecSHRN(size), 10526 mkV128(0xFFFF), mkU8(lanebits - shift)); 10527 IRTemp tmp = newTempV128(); 10528 assign(tmp, binop(Iop_OrV128, 10529 mkexpr(res), 10530 binop(Iop_AndV128, getQReg128(dd), nmask))); 10531 res = tmp; 10532 } 10533 } 10534 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10535 HChar laneCh = "bhsd"[size]; 10536 UInt nLanes = (isQ ? 128 : 64) / lanebits; 10537 const HChar* nm = isSLI ? "sli" : "shl"; 10538 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm, 10539 nameQReg128(dd), nLanes, laneCh, 10540 nameQReg128(nn), nLanes, laneCh, shift); 10541 return True; 10542 } 10543 10544 if (opcode == BITS5(0,1,1,1,0) 10545 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) { 10546 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */ 10547 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */ 10548 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */ 10549 UInt size = 0; 10550 UInt shift = 0; 10551 Bool isQ = bitQ == 1; 10552 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10553 if (!ok || (bitQ == 0 && size == X11)) return False; 10554 vassert(size >= 0 && size <= 3); 10555 /* The shift encoding has opposite sign for the leftwards case. 10556 Adjust shift to compensate. */ 10557 UInt lanebits = 8 << size; 10558 shift = lanebits - shift; 10559 vassert(shift >= 0 && shift < lanebits); 10560 const HChar* nm = NULL; 10561 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl"; 10562 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl"; 10563 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu"; 10564 else vassert(0); 10565 IRTemp qDiff1 = IRTemp_INVALID; 10566 IRTemp qDiff2 = IRTemp_INVALID; 10567 IRTemp res = IRTemp_INVALID; 10568 IRTemp src = newTempV128(); 10569 assign(src, getQReg128(nn)); 10570 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm); 10571 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 10572 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2, 10573 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128); 10574 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10575 DIP("%s %s.%s, %s.%s, #%u\n", nm, 10576 nameQReg128(dd), arr, nameQReg128(nn), arr, shift); 10577 return True; 10578 } 10579 10580 if (bitU == 0 10581 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) { 10582 /* -------- 0,10000 SHRN{,2} #imm -------- */ 10583 /* -------- 0,10001 RSHRN{,2} #imm -------- */ 10584 /* Narrows, and size is the narrow size. */ 10585 UInt size = 0; 10586 UInt shift = 0; 10587 Bool is2 = bitQ == 1; 10588 Bool isR = opcode == BITS5(1,0,0,0,1); 10589 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10590 if (!ok || size == X11) return False; 10591 vassert(shift >= 1); 10592 IRTemp t1 = newTempV128(); 10593 IRTemp t2 = newTempV128(); 10594 IRTemp t3 = newTempV128(); 10595 assign(t1, getQReg128(nn)); 10596 assign(t2, isR ? binop(mkVecADD(size+1), 10597 mkexpr(t1), 10598 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1)))) 10599 : mkexpr(t1)); 10600 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift))); 10601 IRTemp t4 = math_NARROW_LANES(t3, t3, size); 10602 putLO64andZUorPutHI64(is2, dd, t4); 10603 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10604 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10605 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn", 10606 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift); 10607 return True; 10608 } 10609 10610 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1) 10611 || (bitU == 1 10612 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) { 10613 /* -------- 0,10010 SQSHRN{,2} #imm -------- */ 10614 /* -------- 1,10010 UQSHRN{,2} #imm -------- */ 10615 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */ 10616 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */ 10617 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */ 10618 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */ 10619 UInt size = 0; 10620 UInt shift = 0; 10621 Bool is2 = bitQ == 1; 10622 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb); 10623 if (!ok || size == X11) return False; 10624 vassert(shift >= 1 && shift <= (8 << size)); 10625 const HChar* nm = "??"; 10626 IROp op = Iop_INVALID; 10627 /* Decide on the name and the operation. */ 10628 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 10629 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size); 10630 } 10631 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 10632 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size); 10633 } 10634 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) { 10635 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size); 10636 } 10637 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 10638 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size); 10639 } 10640 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) { 10641 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size); 10642 } 10643 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) { 10644 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size); 10645 } 10646 else vassert(0); 10647 /* Compute the result (Q, shifted value) pair. */ 10648 IRTemp src128 = newTempV128(); 10649 assign(src128, getQReg128(nn)); 10650 IRTemp pair = newTempV128(); 10651 assign(pair, binop(op, mkexpr(src128), mkU8(shift))); 10652 /* Update the result reg */ 10653 IRTemp res64in128 = newTempV128(); 10654 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair))); 10655 putLO64andZUorPutHI64(is2, dd, res64in128); 10656 /* Update the Q flag. */ 10657 IRTemp q64q64 = newTempV128(); 10658 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair))); 10659 IRTemp z128 = newTempV128(); 10660 assign(z128, mkV128(0x0000)); 10661 updateQCFLAGwithDifference(q64q64, z128); 10662 /* */ 10663 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10664 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10665 DIP("%s %s.%s, %s.%s, #%u\n", nm, 10666 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift); 10667 return True; 10668 } 10669 10670 if (opcode == BITS5(1,0,1,0,0)) { 10671 /* -------- 0,10100 SSHLL{,2} #imm -------- */ 10672 /* -------- 1,10100 USHLL{,2} #imm -------- */ 10673 /* 31 28 22 18 15 9 4 10674 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh 10675 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh 10676 where Ta,Tb,sh 10677 = case immh of 1xxx -> invalid 10678 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31) 10679 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15) 10680 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7) 10681 0000 -> AdvSIMD modified immediate (???) 10682 */ 10683 Bool isQ = bitQ == 1; 10684 Bool isU = bitU == 1; 10685 UInt immhb = (immh << 3) | immb; 10686 IRTemp src = newTempV128(); 10687 IRTemp zero = newTempV128(); 10688 IRExpr* res = NULL; 10689 UInt sh = 0; 10690 const HChar* ta = "??"; 10691 const HChar* tb = "??"; 10692 assign(src, getQReg128(nn)); 10693 assign(zero, mkV128(0x0000)); 10694 if (immh & 8) { 10695 /* invalid; don't assign to res */ 10696 } 10697 else if (immh & 4) { 10698 sh = immhb - 32; 10699 vassert(sh < 32); /* so 32-sh is 1..32 */ 10700 ta = "2d"; 10701 tb = isQ ? "4s" : "2s"; 10702 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero) 10703 : mk_InterleaveLO32x4(src, zero); 10704 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh)); 10705 } 10706 else if (immh & 2) { 10707 sh = immhb - 16; 10708 vassert(sh < 16); /* so 16-sh is 1..16 */ 10709 ta = "4s"; 10710 tb = isQ ? "8h" : "4h"; 10711 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero) 10712 : mk_InterleaveLO16x8(src, zero); 10713 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh)); 10714 } 10715 else if (immh & 1) { 10716 sh = immhb - 8; 10717 vassert(sh < 8); /* so 8-sh is 1..8 */ 10718 ta = "8h"; 10719 tb = isQ ? "16b" : "8b"; 10720 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero) 10721 : mk_InterleaveLO8x16(src, zero); 10722 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh)); 10723 } else { 10724 vassert(immh == 0); 10725 /* invalid; don't assign to res */ 10726 } 10727 /* */ 10728 if (res) { 10729 putQReg128(dd, res); 10730 DIP("%cshll%s %s.%s, %s.%s, #%u\n", 10731 isU ? 'u' : 's', isQ ? "2" : "", 10732 nameQReg128(dd), ta, nameQReg128(nn), tb, sh); 10733 return True; 10734 } 10735 return False; 10736 } 10737 10738 if (opcode == BITS5(1,1,1,0,0)) { 10739 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */ 10740 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */ 10741 /* If immh is of the form 00xx, the insn is invalid. */ 10742 if (immh < BITS4(0,1,0,0)) return False; 10743 UInt size = 0; 10744 UInt fbits = 0; 10745 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 10746 /* The following holds because immh is never zero. */ 10747 vassert(ok); 10748 /* The following holds because immh >= 0100. */ 10749 vassert(size == X10 || size == X11); 10750 Bool isD = size == X11; 10751 Bool isU = bitU == 1; 10752 Bool isQ = bitQ == 1; 10753 if (isD && !isQ) return False; /* reject .1d case */ 10754 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 10755 Double scale = two_to_the_minus(fbits); 10756 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 10757 : IRExpr_Const(IRConst_F32( (Float)scale )); 10758 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 10759 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32) 10760 : (isD ? Iop_I64StoF64 : Iop_I32StoF32); 10761 IRType tyF = isD ? Ity_F64 : Ity_F32; 10762 IRType tyI = isD ? Ity_I64 : Ity_I32; 10763 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2); 10764 vassert(nLanes == 2 || nLanes == 4); 10765 for (UInt i = 0; i < nLanes; i++) { 10766 IRTemp src = newTemp(tyI); 10767 IRTemp res = newTemp(tyF); 10768 IRTemp rm = mk_get_IR_rounding_mode(); 10769 assign(src, getQRegLane(nn, i, tyI)); 10770 assign(res, triop(opMUL, mkexpr(rm), 10771 binop(opCVT, mkexpr(rm), mkexpr(src)), 10772 scaleE)); 10773 putQRegLane(dd, i, mkexpr(res)); 10774 } 10775 if (!isQ) { 10776 putQRegLane(dd, 1, mkU64(0)); 10777 } 10778 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10779 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf", 10780 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits); 10781 return True; 10782 } 10783 10784 if (opcode == BITS5(1,1,1,1,1)) { 10785 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */ 10786 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */ 10787 /* If immh is of the form 00xx, the insn is invalid. */ 10788 if (immh < BITS4(0,1,0,0)) return False; 10789 UInt size = 0; 10790 UInt fbits = 0; 10791 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb); 10792 /* The following holds because immh is never zero. */ 10793 vassert(ok); 10794 /* The following holds because immh >= 0100. */ 10795 vassert(size == X10 || size == X11); 10796 Bool isD = size == X11; 10797 Bool isU = bitU == 1; 10798 Bool isQ = bitQ == 1; 10799 if (isD && !isQ) return False; /* reject .1d case */ 10800 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32)); 10801 Double scale = two_to_the_plus(fbits); 10802 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale)) 10803 : IRExpr_Const(IRConst_F32( (Float)scale )); 10804 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32; 10805 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U) 10806 : (isD ? Iop_F64toI64S : Iop_F32toI32S); 10807 IRType tyF = isD ? Ity_F64 : Ity_F32; 10808 IRType tyI = isD ? Ity_I64 : Ity_I32; 10809 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2); 10810 vassert(nLanes == 2 || nLanes == 4); 10811 for (UInt i = 0; i < nLanes; i++) { 10812 IRTemp src = newTemp(tyF); 10813 IRTemp res = newTemp(tyI); 10814 IRTemp rm = newTemp(Ity_I32); 10815 assign(src, getQRegLane(nn, i, tyF)); 10816 assign(rm, mkU32(Irrm_ZERO)); 10817 assign(res, binop(opCVT, mkexpr(rm), 10818 triop(opMUL, mkexpr(rm), 10819 mkexpr(src), scaleE))); 10820 putQRegLane(dd, i, mkexpr(res)); 10821 } 10822 if (!isQ) { 10823 putQRegLane(dd, 1, mkU64(0)); 10824 } 10825 const HChar* arr = nameArr_Q_SZ(bitQ, size); 10826 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs", 10827 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits); 10828 return True; 10829 } 10830 10831 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10832 return False; 10833 # undef INSN 10834 } 10835 10836 10837 static 10838 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn) 10839 { 10840 /* 31 30 29 28 23 21 20 15 11 9 4 10841 0 Q U 01110 size 1 m opcode 00 n d 10842 Decode fields: u,opcode 10843 */ 10844 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 10845 if (INSN(31,31) != 0 10846 || INSN(28,24) != BITS5(0,1,1,1,0) 10847 || INSN(21,21) != 1 10848 || INSN(11,10) != BITS2(0,0)) { 10849 return False; 10850 } 10851 UInt bitQ = INSN(30,30); 10852 UInt bitU = INSN(29,29); 10853 UInt size = INSN(23,22); 10854 UInt mm = INSN(20,16); 10855 UInt opcode = INSN(15,12); 10856 UInt nn = INSN(9,5); 10857 UInt dd = INSN(4,0); 10858 vassert(size < 4); 10859 Bool is2 = bitQ == 1; 10860 10861 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) { 10862 /* -------- 0,0000 SADDL{2} -------- */ 10863 /* -------- 1,0000 UADDL{2} -------- */ 10864 /* -------- 0,0010 SSUBL{2} -------- */ 10865 /* -------- 1,0010 USUBL{2} -------- */ 10866 /* Widens, and size refers to the narrowed lanes. */ 10867 if (size == X11) return False; 10868 vassert(size <= 2); 10869 Bool isU = bitU == 1; 10870 Bool isADD = opcode == BITS4(0,0,0,0); 10871 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn)); 10872 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10873 IRTemp res = newTempV128(); 10874 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10875 mkexpr(argL), mkexpr(argR))); 10876 putQReg128(dd, mkexpr(res)); 10877 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10878 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10879 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl") 10880 : (isU ? "usubl" : "ssubl"); 10881 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10882 nameQReg128(dd), arrWide, 10883 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 10884 return True; 10885 } 10886 10887 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) { 10888 /* -------- 0,0001 SADDW{2} -------- */ 10889 /* -------- 1,0001 UADDW{2} -------- */ 10890 /* -------- 0,0011 SSUBW{2} -------- */ 10891 /* -------- 1,0011 USUBW{2} -------- */ 10892 /* Widens, and size refers to the narrowed lanes. */ 10893 if (size == X11) return False; 10894 vassert(size <= 2); 10895 Bool isU = bitU == 1; 10896 Bool isADD = opcode == BITS4(0,0,0,1); 10897 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10898 IRTemp res = newTempV128(); 10899 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10900 getQReg128(nn), mkexpr(argR))); 10901 putQReg128(dd, mkexpr(res)); 10902 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10903 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10904 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw") 10905 : (isU ? "usubw" : "ssubw"); 10906 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10907 nameQReg128(dd), arrWide, 10908 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow); 10909 return True; 10910 } 10911 10912 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) { 10913 /* -------- 0,0100 ADDHN{2} -------- */ 10914 /* -------- 1,0100 RADDHN{2} -------- */ 10915 /* -------- 0,0110 SUBHN{2} -------- */ 10916 /* -------- 1,0110 RSUBHN{2} -------- */ 10917 /* Narrows, and size refers to the narrowed lanes. */ 10918 if (size == X11) return False; 10919 vassert(size <= 2); 10920 const UInt shift[3] = { 8, 16, 32 }; 10921 Bool isADD = opcode == BITS4(0,1,0,0); 10922 Bool isR = bitU == 1; 10923 /* Combined elements in wide lanes */ 10924 IRTemp wide = newTempV128(); 10925 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1), 10926 getQReg128(nn), getQReg128(mm)); 10927 if (isR) { 10928 wideE = binop(mkVecADD(size+1), 10929 wideE, 10930 mkexpr(math_VEC_DUP_IMM(size+1, 10931 1ULL << (shift[size]-1)))); 10932 } 10933 assign(wide, wideE); 10934 /* Top halves of elements, still in wide lanes */ 10935 IRTemp shrd = newTempV128(); 10936 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size]))); 10937 /* Elements now compacted into lower 64 bits */ 10938 IRTemp new64 = newTempV128(); 10939 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd))); 10940 putLO64andZUorPutHI64(is2, dd, new64); 10941 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10942 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10943 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn") 10944 : (isR ? "rsubhn" : "subhn"); 10945 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10946 nameQReg128(dd), arrNarrow, 10947 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide); 10948 return True; 10949 } 10950 10951 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) { 10952 /* -------- 0,0101 SABAL{2} -------- */ 10953 /* -------- 1,0101 UABAL{2} -------- */ 10954 /* -------- 0,0111 SABDL{2} -------- */ 10955 /* -------- 1,0111 UABDL{2} -------- */ 10956 /* Widens, and size refers to the narrowed lanes. */ 10957 if (size == X11) return False; 10958 vassert(size <= 2); 10959 Bool isU = bitU == 1; 10960 Bool isACC = opcode == BITS4(0,1,0,1); 10961 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn)); 10962 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm)); 10963 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR)); 10964 IRTemp res = newTempV128(); 10965 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd)) 10966 : mkexpr(abd)); 10967 putQReg128(dd, mkexpr(res)); 10968 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 10969 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 10970 const HChar* nm = isACC ? (isU ? "uabal" : "sabal") 10971 : (isU ? "uabdl" : "sabdl"); 10972 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 10973 nameQReg128(dd), arrWide, 10974 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 10975 return True; 10976 } 10977 10978 if (opcode == BITS4(1,1,0,0) 10979 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) { 10980 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks) 10981 /* -------- 1,1100 UMULL{2} -------- */ // 0 10982 /* -------- 0,1000 SMLAL{2} -------- */ // 1 10983 /* -------- 1,1000 UMLAL{2} -------- */ // 1 10984 /* -------- 0,1010 SMLSL{2} -------- */ // 2 10985 /* -------- 1,1010 UMLSL{2} -------- */ // 2 10986 /* Widens, and size refers to the narrowed lanes. */ 10987 UInt ks = 3; 10988 switch (opcode) { 10989 case BITS4(1,1,0,0): ks = 0; break; 10990 case BITS4(1,0,0,0): ks = 1; break; 10991 case BITS4(1,0,1,0): ks = 2; break; 10992 default: vassert(0); 10993 } 10994 vassert(ks >= 0 && ks <= 2); 10995 if (size == X11) return False; 10996 vassert(size <= 2); 10997 Bool isU = bitU == 1; 10998 IRTemp vecN = newTempV128(); 10999 IRTemp vecM = newTempV128(); 11000 IRTemp vecD = newTempV128(); 11001 assign(vecN, getQReg128(nn)); 11002 assign(vecM, getQReg128(mm)); 11003 assign(vecD, getQReg128(dd)); 11004 IRTemp res = IRTemp_INVALID; 11005 math_MULL_ACC(&res, is2, isU, size, "mas"[ks], 11006 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 11007 putQReg128(dd, mkexpr(res)); 11008 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11009 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 11010 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl"); 11011 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "", 11012 nameQReg128(dd), arrWide, 11013 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 11014 return True; 11015 } 11016 11017 if (bitU == 0 11018 && (opcode == BITS4(1,1,0,1) 11019 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) { 11020 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks) 11021 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1 11022 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2 11023 /* Widens, and size refers to the narrowed lanes. */ 11024 UInt ks = 3; 11025 switch (opcode) { 11026 case BITS4(1,1,0,1): ks = 0; break; 11027 case BITS4(1,0,0,1): ks = 1; break; 11028 case BITS4(1,0,1,1): ks = 2; break; 11029 default: vassert(0); 11030 } 11031 vassert(ks >= 0 && ks <= 2); 11032 if (size == X00 || size == X11) return False; 11033 vassert(size <= 2); 11034 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n; 11035 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 11036 newTempsV128_3(&vecN, &vecM, &vecD); 11037 assign(vecN, getQReg128(nn)); 11038 assign(vecM, getQReg128(mm)); 11039 assign(vecD, getQReg128(dd)); 11040 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 11041 is2, size, "mas"[ks], 11042 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 11043 putQReg128(dd, mkexpr(res)); 11044 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 11045 updateQCFLAGwithDifference(sat1q, sat1n); 11046 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 11047 updateQCFLAGwithDifference(sat2q, sat2n); 11048 } 11049 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11050 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 11051 const HChar* nm = ks == 0 ? "sqdmull" 11052 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 11053 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "", 11054 nameQReg128(dd), arrWide, 11055 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow); 11056 return True; 11057 } 11058 11059 if (bitU == 0 && opcode == BITS4(1,1,1,0)) { 11060 /* -------- 0,1110 PMULL{2} -------- */ 11061 /* Widens, and size refers to the narrowed lanes. */ 11062 if (size != X00) return False; 11063 IRTemp res 11064 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8, 11065 getQReg128(nn), getQReg128(mm)); 11066 putQReg128(dd, mkexpr(res)); 11067 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11068 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 11069 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "", 11070 nameQReg128(dd), arrNarrow, 11071 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide); 11072 return True; 11073 } 11074 11075 return False; 11076 # undef INSN 11077 } 11078 11079 11080 static 11081 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn) 11082 { 11083 /* 31 30 29 28 23 21 20 15 10 9 4 11084 0 Q U 01110 size 1 m opcode 1 n d 11085 Decode fields: u,size,opcode 11086 */ 11087 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 11088 if (INSN(31,31) != 0 11089 || INSN(28,24) != BITS5(0,1,1,1,0) 11090 || INSN(21,21) != 1 11091 || INSN(10,10) != 1) { 11092 return False; 11093 } 11094 UInt bitQ = INSN(30,30); 11095 UInt bitU = INSN(29,29); 11096 UInt size = INSN(23,22); 11097 UInt mm = INSN(20,16); 11098 UInt opcode = INSN(15,11); 11099 UInt nn = INSN(9,5); 11100 UInt dd = INSN(4,0); 11101 vassert(size < 4); 11102 11103 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) { 11104 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */ 11105 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */ 11106 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */ 11107 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */ 11108 if (size == X11) return False; 11109 Bool isADD = opcode == BITS5(0,0,0,0,0); 11110 Bool isU = bitU == 1; 11111 /* Widen both args out, do the math, narrow to final result. */ 11112 IRTemp argL = newTempV128(); 11113 IRTemp argLhi = IRTemp_INVALID; 11114 IRTemp argLlo = IRTemp_INVALID; 11115 IRTemp argR = newTempV128(); 11116 IRTemp argRhi = IRTemp_INVALID; 11117 IRTemp argRlo = IRTemp_INVALID; 11118 IRTemp resHi = newTempV128(); 11119 IRTemp resLo = newTempV128(); 11120 IRTemp res = IRTemp_INVALID; 11121 assign(argL, getQReg128(nn)); 11122 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL)); 11123 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL)); 11124 assign(argR, getQReg128(mm)); 11125 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR)); 11126 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR)); 11127 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1); 11128 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1); 11129 assign(resHi, binop(opSxR, 11130 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)), 11131 mkU8(1))); 11132 assign(resLo, binop(opSxR, 11133 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)), 11134 mkU8(1))); 11135 res = math_NARROW_LANES ( resHi, resLo, size ); 11136 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11137 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd") 11138 : (isU ? "uhsub" : "shsub"); 11139 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11140 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11141 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11142 return True; 11143 } 11144 11145 if (opcode == BITS5(0,0,0,1,0)) { 11146 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */ 11147 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */ 11148 if (bitQ == 0 && size == X11) return False; // implied 1d case 11149 Bool isU = bitU == 1; 11150 IRTemp argL = newTempV128(); 11151 IRTemp argR = newTempV128(); 11152 assign(argL, getQReg128(nn)); 11153 assign(argR, getQReg128(mm)); 11154 IRTemp res = math_RHADD(size, isU, argL, argR); 11155 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11156 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11157 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd", 11158 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11159 return True; 11160 } 11161 11162 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) { 11163 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */ 11164 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */ 11165 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */ 11166 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */ 11167 if (bitQ == 0 && size == X11) return False; // implied 1d case 11168 Bool isADD = opcode == BITS5(0,0,0,0,1); 11169 Bool isU = bitU == 1; 11170 IROp qop = Iop_INVALID; 11171 IROp nop = Iop_INVALID; 11172 if (isADD) { 11173 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size); 11174 nop = mkVecADD(size); 11175 } else { 11176 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size); 11177 nop = mkVecSUB(size); 11178 } 11179 IRTemp argL = newTempV128(); 11180 IRTemp argR = newTempV128(); 11181 IRTemp qres = newTempV128(); 11182 IRTemp nres = newTempV128(); 11183 assign(argL, getQReg128(nn)); 11184 assign(argR, getQReg128(mm)); 11185 assign(qres, math_MAYBE_ZERO_HI64_fromE( 11186 bitQ, binop(qop, mkexpr(argL), mkexpr(argR)))); 11187 assign(nres, math_MAYBE_ZERO_HI64_fromE( 11188 bitQ, binop(nop, mkexpr(argL), mkexpr(argR)))); 11189 putQReg128(dd, mkexpr(qres)); 11190 updateQCFLAGwithDifference(qres, nres); 11191 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd") 11192 : (isU ? "uqsub" : "sqsub"); 11193 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11194 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11195 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11196 return True; 11197 } 11198 11199 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) { 11200 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */ 11201 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */ 11202 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */ 11203 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */ 11204 Bool isORx = (size & 2) == 2; 11205 Bool invert = (size & 1) == 1; 11206 IRTemp res = newTempV128(); 11207 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128, 11208 getQReg128(nn), 11209 invert ? unop(Iop_NotV128, getQReg128(mm)) 11210 : getQReg128(mm))); 11211 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11212 const HChar* names[4] = { "and", "bic", "orr", "orn" }; 11213 const HChar* ar = bitQ == 1 ? "16b" : "8b"; 11214 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)], 11215 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar); 11216 return True; 11217 } 11218 11219 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) { 11220 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */ 11221 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */ 11222 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */ 11223 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */ 11224 IRTemp argD = newTempV128(); 11225 IRTemp argN = newTempV128(); 11226 IRTemp argM = newTempV128(); 11227 assign(argD, getQReg128(dd)); 11228 assign(argN, getQReg128(nn)); 11229 assign(argM, getQReg128(mm)); 11230 const IROp opXOR = Iop_XorV128; 11231 const IROp opAND = Iop_AndV128; 11232 const IROp opNOT = Iop_NotV128; 11233 IRTemp res = newTempV128(); 11234 switch (size) { 11235 case BITS2(0,0): /* EOR */ 11236 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN))); 11237 break; 11238 case BITS2(0,1): /* BSL */ 11239 assign(res, binop(opXOR, mkexpr(argM), 11240 binop(opAND, 11241 binop(opXOR, mkexpr(argM), mkexpr(argN)), 11242 mkexpr(argD)))); 11243 break; 11244 case BITS2(1,0): /* BIT */ 11245 assign(res, binop(opXOR, mkexpr(argD), 11246 binop(opAND, 11247 binop(opXOR, mkexpr(argD), mkexpr(argN)), 11248 mkexpr(argM)))); 11249 break; 11250 case BITS2(1,1): /* BIF */ 11251 assign(res, binop(opXOR, mkexpr(argD), 11252 binop(opAND, 11253 binop(opXOR, mkexpr(argD), mkexpr(argN)), 11254 unop(opNOT, mkexpr(argM))))); 11255 break; 11256 default: 11257 vassert(0); 11258 } 11259 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11260 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" }; 11261 const HChar* arr = bitQ == 1 ? "16b" : "8b"; 11262 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size], 11263 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11264 return True; 11265 } 11266 11267 if (opcode == BITS5(0,0,1,1,0)) { 11268 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s 11269 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u 11270 if (bitQ == 0 && size == X11) return False; // implied 1d case 11271 Bool isGT = bitU == 0; 11272 IRExpr* argL = getQReg128(nn); 11273 IRExpr* argR = getQReg128(mm); 11274 IRTemp res = newTempV128(); 11275 assign(res, 11276 isGT ? binop(mkVecCMPGTS(size), argL, argR) 11277 : binop(mkVecCMPGTU(size), argL, argR)); 11278 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11279 const HChar* nm = isGT ? "cmgt" : "cmhi"; 11280 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11281 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11282 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11283 return True; 11284 } 11285 11286 if (opcode == BITS5(0,0,1,1,1)) { 11287 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s 11288 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u 11289 if (bitQ == 0 && size == X11) return False; // implied 1d case 11290 Bool isGE = bitU == 0; 11291 IRExpr* argL = getQReg128(nn); 11292 IRExpr* argR = getQReg128(mm); 11293 IRTemp res = newTempV128(); 11294 assign(res, 11295 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL)) 11296 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL))); 11297 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11298 const HChar* nm = isGE ? "cmge" : "cmhs"; 11299 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11300 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11301 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11302 return True; 11303 } 11304 11305 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) { 11306 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */ 11307 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */ 11308 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */ 11309 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */ 11310 if (bitQ == 0 && size == X11) return False; // implied 1d case 11311 Bool isU = bitU == 1; 11312 Bool isR = opcode == BITS5(0,1,0,1,0); 11313 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size)) 11314 : (isU ? mkVecSHU(size) : mkVecSHS(size)); 11315 IRTemp res = newTempV128(); 11316 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 11317 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11318 const HChar* nm = isR ? (isU ? "urshl" : "srshl") 11319 : (isU ? "ushl" : "sshl"); 11320 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11321 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11322 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11323 return True; 11324 } 11325 11326 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) { 11327 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */ 11328 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */ 11329 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */ 11330 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */ 11331 if (bitQ == 0 && size == X11) return False; // implied 1d case 11332 Bool isU = bitU == 1; 11333 Bool isR = opcode == BITS5(0,1,0,1,1); 11334 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size)) 11335 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size)); 11336 /* This is a bit tricky. If we're only interested in the lowest 64 bits 11337 of the result (viz, bitQ == 0), then we must adjust the operands to 11338 ensure that the upper part of the result, that we don't care about, 11339 doesn't pollute the returned Q value. To do this, zero out the upper 11340 operand halves beforehand. This works because it means, for the 11341 lanes we don't care about, we are shifting zero by zero, which can 11342 never saturate. */ 11343 IRTemp res256 = newTemp(Ity_V256); 11344 IRTemp resSH = newTempV128(); 11345 IRTemp resQ = newTempV128(); 11346 IRTemp zero = newTempV128(); 11347 assign(res256, binop(op, 11348 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)), 11349 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm)))); 11350 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256))); 11351 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256))); 11352 assign(zero, mkV128(0x0000)); 11353 putQReg128(dd, mkexpr(resSH)); 11354 updateQCFLAGwithDifference(resQ, zero); 11355 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl") 11356 : (isU ? "uqshl" : "sqshl"); 11357 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11358 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11359 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11360 return True; 11361 } 11362 11363 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) { 11364 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */ 11365 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */ 11366 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */ 11367 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */ 11368 if (bitQ == 0 && size == X11) return False; // implied 1d case 11369 Bool isU = bitU == 1; 11370 Bool isMAX = (opcode & 1) == 0; 11371 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size)) 11372 : (isU ? mkVecMINU(size) : mkVecMINS(size)); 11373 IRTemp t = newTempV128(); 11374 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 11375 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t)); 11376 const HChar* nm = isMAX ? (isU ? "umax" : "smax") 11377 : (isU ? "umin" : "smin"); 11378 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11379 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11380 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11381 return True; 11382 } 11383 11384 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) { 11385 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */ 11386 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */ 11387 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */ 11388 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */ 11389 if (size == X11) return False; // 1d/2d cases not allowed 11390 Bool isU = bitU == 1; 11391 Bool isACC = opcode == BITS5(0,1,1,1,1); 11392 vassert(size <= 2); 11393 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm)); 11394 IRTemp t2 = newTempV128(); 11395 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd)) 11396 : mkexpr(t1)); 11397 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11398 const HChar* nm = isACC ? (isU ? "uaba" : "saba") 11399 : (isU ? "uabd" : "sabd"); 11400 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11401 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11402 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11403 return True; 11404 } 11405 11406 if (opcode == BITS5(1,0,0,0,0)) { 11407 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */ 11408 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */ 11409 if (bitQ == 0 && size == X11) return False; // implied 1d case 11410 Bool isSUB = bitU == 1; 11411 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size); 11412 IRTemp t = newTempV128(); 11413 assign(t, binop(op, getQReg128(nn), getQReg128(mm))); 11414 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t)); 11415 const HChar* nm = isSUB ? "sub" : "add"; 11416 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11417 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11418 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11419 return True; 11420 } 11421 11422 if (opcode == BITS5(1,0,0,0,1)) { 11423 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0 11424 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // == 11425 if (bitQ == 0 && size == X11) return False; // implied 1d case 11426 Bool isEQ = bitU == 1; 11427 IRExpr* argL = getQReg128(nn); 11428 IRExpr* argR = getQReg128(mm); 11429 IRTemp res = newTempV128(); 11430 assign(res, 11431 isEQ ? binop(mkVecCMPEQ(size), argL, argR) 11432 : unop(Iop_NotV128, binop(mkVecCMPEQ(size), 11433 binop(Iop_AndV128, argL, argR), 11434 mkV128(0x0000)))); 11435 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11436 const HChar* nm = isEQ ? "cmeq" : "cmtst"; 11437 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11438 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11439 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11440 return True; 11441 } 11442 11443 if (opcode == BITS5(1,0,0,1,0)) { 11444 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */ 11445 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */ 11446 if (bitQ == 0 && size == X11) return False; // implied 1d case 11447 Bool isMLS = bitU == 1; 11448 IROp opMUL = mkVecMUL(size); 11449 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size); 11450 IRTemp res = newTempV128(); 11451 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) { 11452 assign(res, binop(opADDSUB, 11453 getQReg128(dd), 11454 binop(opMUL, getQReg128(nn), getQReg128(mm)))); 11455 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11456 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11457 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla", 11458 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11459 return True; 11460 } 11461 return False; 11462 } 11463 11464 if (opcode == BITS5(1,0,0,1,1)) { 11465 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */ 11466 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */ 11467 if (bitQ == 0 && size == X11) return False; // implied 1d case 11468 Bool isPMUL = bitU == 1; 11469 const IROp opsPMUL[4] 11470 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 11471 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size); 11472 IRTemp res = newTempV128(); 11473 if (opMUL != Iop_INVALID) { 11474 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm))); 11475 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11476 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11477 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul", 11478 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11479 return True; 11480 } 11481 return False; 11482 } 11483 11484 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) { 11485 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */ 11486 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */ 11487 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */ 11488 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */ 11489 if (size == X11) return False; 11490 Bool isU = bitU == 1; 11491 Bool isMAX = opcode == BITS5(1,0,1,0,0); 11492 IRTemp vN = newTempV128(); 11493 IRTemp vM = newTempV128(); 11494 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size)) 11495 : (isU ? mkVecMINU(size) : mkVecMINS(size)); 11496 assign(vN, getQReg128(nn)); 11497 assign(vM, getQReg128(mm)); 11498 IRTemp res128 = newTempV128(); 11499 assign(res128, 11500 binop(op, 11501 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)), 11502 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN)))); 11503 /* In the half-width case, use CatEL32x4 to extract the half-width 11504 result from the full-width result. */ 11505 IRExpr* res 11506 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128, 11507 binop(Iop_CatEvenLanes32x4, mkexpr(res128), 11508 mkexpr(res128))) 11509 : mkexpr(res128); 11510 putQReg128(dd, res); 11511 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11512 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp") 11513 : (isU ? "uminp" : "sminp"); 11514 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11515 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11516 return True; 11517 } 11518 11519 if (opcode == BITS5(1,0,1,1,0)) { 11520 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */ 11521 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */ 11522 if (size == X00 || size == X11) return False; 11523 Bool isR = bitU == 1; 11524 IRTemp res, sat1q, sat1n, vN, vM; 11525 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 11526 newTempsV128_2(&vN, &vM); 11527 assign(vN, getQReg128(nn)); 11528 assign(vM, getQReg128(mm)); 11529 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 11530 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11531 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; 11532 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 11533 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11534 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 11535 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 11536 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11537 return True; 11538 } 11539 11540 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) { 11541 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */ 11542 if (bitQ == 0 && size == X11) return False; // implied 1d case 11543 IRTemp vN = newTempV128(); 11544 IRTemp vM = newTempV128(); 11545 assign(vN, getQReg128(nn)); 11546 assign(vM, getQReg128(mm)); 11547 IRTemp res128 = newTempV128(); 11548 assign(res128, 11549 binop(mkVecADD(size), 11550 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)), 11551 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN)))); 11552 /* In the half-width case, use CatEL32x4 to extract the half-width 11553 result from the full-width result. */ 11554 IRExpr* res 11555 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128, 11556 binop(Iop_CatEvenLanes32x4, mkexpr(res128), 11557 mkexpr(res128))) 11558 : mkexpr(res128); 11559 putQReg128(dd, res); 11560 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11561 DIP("addp %s.%s, %s.%s, %s.%s\n", 11562 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11563 return True; 11564 } 11565 11566 if (bitU == 0 11567 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) { 11568 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11569 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11570 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11571 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11572 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 11573 Bool isD = (size & 1) == 1; 11574 if (bitQ == 0 && isD) return False; // implied 1d case 11575 Bool isMIN = (size & 2) == 2; 11576 Bool isNM = opcode == BITS5(1,1,0,0,0); 11577 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10); 11578 IRTemp res = newTempV128(); 11579 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm))); 11580 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11581 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11582 DIP("%s%s %s.%s, %s.%s, %s.%s\n", 11583 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", 11584 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11585 return True; 11586 } 11587 11588 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) { 11589 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11590 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11591 Bool isD = (size & 1) == 1; 11592 Bool isSUB = (size & 2) == 2; 11593 if (bitQ == 0 && isD) return False; // implied 1d case 11594 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 11595 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 11596 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 11597 IRTemp rm = mk_get_IR_rounding_mode(); 11598 IRTemp t1 = newTempV128(); 11599 IRTemp t2 = newTempV128(); 11600 // FIXME: double rounding; use FMA primops instead 11601 assign(t1, triop(opMUL, 11602 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11603 assign(t2, triop(isSUB ? opSUB : opADD, 11604 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 11605 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11606 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11607 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla", 11608 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11609 return True; 11610 } 11611 11612 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) { 11613 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11614 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11615 Bool isD = (size & 1) == 1; 11616 Bool isSUB = (size & 2) == 2; 11617 if (bitQ == 0 && isD) return False; // implied 1d case 11618 const IROp ops[4] 11619 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 }; 11620 IROp op = ops[size]; 11621 IRTemp rm = mk_get_IR_rounding_mode(); 11622 IRTemp t1 = newTempV128(); 11623 IRTemp t2 = newTempV128(); 11624 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11625 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1)); 11626 putQReg128(dd, mkexpr(t2)); 11627 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11628 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd", 11629 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11630 return True; 11631 } 11632 11633 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) { 11634 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11635 Bool isD = (size & 1) == 1; 11636 if (bitQ == 0 && isD) return False; // implied 1d case 11637 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 11638 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 11639 IRTemp rm = mk_get_IR_rounding_mode(); 11640 IRTemp t1 = newTempV128(); 11641 IRTemp t2 = newTempV128(); 11642 // FIXME: use Abd primop instead? 11643 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11644 assign(t2, unop(opABS, mkexpr(t1))); 11645 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 11646 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11647 DIP("fabd %s.%s, %s.%s, %s.%s\n", 11648 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11649 return True; 11650 } 11651 11652 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) { 11653 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11654 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11655 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 11656 Bool isD = (size & 1) == 1; 11657 Bool isMULX = bitU == 0; 11658 if (bitQ == 0 && isD) return False; // implied 1d case 11659 IRTemp rm = mk_get_IR_rounding_mode(); 11660 IRTemp t1 = newTempV128(); 11661 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4, 11662 mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11663 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11664 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11665 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul", 11666 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11667 return True; 11668 } 11669 11670 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) { 11671 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11672 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11673 Bool isD = (size & 1) == 1; 11674 if (bitQ == 0 && isD) return False; // implied 1d case 11675 Bool isGE = bitU == 1; 11676 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4) 11677 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4); 11678 IRTemp t1 = newTempV128(); 11679 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd 11680 : binop(opCMP, getQReg128(nn), getQReg128(mm))); 11681 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11682 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11683 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq", 11684 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11685 return True; 11686 } 11687 11688 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) { 11689 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11690 Bool isD = (size & 1) == 1; 11691 if (bitQ == 0 && isD) return False; // implied 1d case 11692 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 11693 IRTemp t1 = newTempV128(); 11694 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd 11695 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11696 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11697 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt", 11698 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11699 return True; 11700 } 11701 11702 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) { 11703 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11704 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11705 Bool isD = (size & 1) == 1; 11706 Bool isGT = (size & 2) == 2; 11707 if (bitQ == 0 && isD) return False; // implied 1d case 11708 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4) 11709 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4); 11710 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4; 11711 IRTemp t1 = newTempV128(); 11712 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)), 11713 unop(opABS, getQReg128(nn)))); // swapd 11714 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1)); 11715 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11716 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge", 11717 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11718 return True; 11719 } 11720 11721 if (bitU == 1 11722 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) { 11723 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11724 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11725 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11726 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11727 /* FMAXNM, FMINNM: FIXME -- KLUDGED */ 11728 Bool isD = (size & 1) == 1; 11729 if (bitQ == 0 && isD) return False; // implied 1d case 11730 Bool isMIN = (size & 2) == 2; 11731 Bool isNM = opcode == BITS5(1,1,0,0,0); 11732 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2); 11733 IRTemp srcN = newTempV128(); 11734 IRTemp srcM = newTempV128(); 11735 IRTemp preL = IRTemp_INVALID; 11736 IRTemp preR = IRTemp_INVALID; 11737 assign(srcN, getQReg128(nn)); 11738 assign(srcM, getQReg128(mm)); 11739 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, 11740 srcM, srcN, isD, bitQ); 11741 putQReg128( 11742 dd, math_MAYBE_ZERO_HI64_fromE( 11743 bitQ, 11744 binop(opMXX, mkexpr(preL), mkexpr(preR)))); 11745 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11746 DIP("%s%sp %s.%s, %s.%s, %s.%s\n", 11747 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", 11748 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11749 return True; 11750 } 11751 11752 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) { 11753 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11754 Bool isD = size == X01; 11755 if (bitQ == 0 && isD) return False; // implied 1d case 11756 IRTemp srcN = newTempV128(); 11757 IRTemp srcM = newTempV128(); 11758 IRTemp preL = IRTemp_INVALID; 11759 IRTemp preR = IRTemp_INVALID; 11760 assign(srcN, getQReg128(nn)); 11761 assign(srcM, getQReg128(mm)); 11762 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR, 11763 srcM, srcN, isD, bitQ); 11764 putQReg128( 11765 dd, math_MAYBE_ZERO_HI64_fromE( 11766 bitQ, 11767 triop(mkVecADDF(isD ? 3 : 2), 11768 mkexpr(mk_get_IR_rounding_mode()), 11769 mkexpr(preL), mkexpr(preR)))); 11770 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11771 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp", 11772 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11773 return True; 11774 } 11775 11776 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) { 11777 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11778 Bool isD = (size & 1) == 1; 11779 if (bitQ == 0 && isD) return False; // implied 1d case 11780 vassert(size <= 1); 11781 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 }; 11782 IROp op = ops[size]; 11783 IRTemp rm = mk_get_IR_rounding_mode(); 11784 IRTemp t1 = newTempV128(); 11785 IRTemp t2 = newTempV128(); 11786 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm))); 11787 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1)); 11788 putQReg128(dd, mkexpr(t2)); 11789 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11790 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv", 11791 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11792 return True; 11793 } 11794 11795 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) { 11796 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11797 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */ 11798 Bool isSQRT = (size & 2) == 2; 11799 Bool isD = (size & 1) == 1; 11800 if (bitQ == 0 && isD) return False; // implied 1d case 11801 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4) 11802 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4); 11803 IRTemp res = newTempV128(); 11804 assign(res, binop(op, getQReg128(nn), getQReg128(mm))); 11805 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11806 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 11807 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps", 11808 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 11809 return True; 11810 } 11811 11812 return False; 11813 # undef INSN 11814 } 11815 11816 11817 static 11818 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) 11819 { 11820 /* 31 30 29 28 23 21 16 11 9 4 11821 0 Q U 01110 size 10000 opcode 10 n d 11822 Decode fields: U,size,opcode 11823 */ 11824 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 11825 if (INSN(31,31) != 0 11826 || INSN(28,24) != BITS5(0,1,1,1,0) 11827 || INSN(21,17) != BITS5(1,0,0,0,0) 11828 || INSN(11,10) != BITS2(1,0)) { 11829 return False; 11830 } 11831 UInt bitQ = INSN(30,30); 11832 UInt bitU = INSN(29,29); 11833 UInt size = INSN(23,22); 11834 UInt opcode = INSN(16,12); 11835 UInt nn = INSN(9,5); 11836 UInt dd = INSN(4,0); 11837 vassert(size < 4); 11838 11839 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) { 11840 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */ 11841 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */ 11842 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */ 11843 const IROp iops[3] = { Iop_Reverse8sIn64_x2, 11844 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 }; 11845 vassert(size <= 2); 11846 IRTemp res = newTempV128(); 11847 assign(res, unop(iops[size], getQReg128(nn))); 11848 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11849 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11850 DIP("%s %s.%s, %s.%s\n", "rev64", 11851 nameQReg128(dd), arr, nameQReg128(nn), arr); 11852 return True; 11853 } 11854 11855 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) { 11856 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */ 11857 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */ 11858 Bool isH = size == X01; 11859 IRTemp res = newTempV128(); 11860 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4; 11861 assign(res, unop(iop, getQReg128(nn))); 11862 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11863 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11864 DIP("%s %s.%s, %s.%s\n", "rev32", 11865 nameQReg128(dd), arr, nameQReg128(nn), arr); 11866 return True; 11867 } 11868 11869 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) { 11870 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */ 11871 IRTemp res = newTempV128(); 11872 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn))); 11873 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11874 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11875 DIP("%s %s.%s, %s.%s\n", "rev16", 11876 nameQReg128(dd), arr, nameQReg128(nn), arr); 11877 return True; 11878 } 11879 11880 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) { 11881 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */ 11882 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */ 11883 /* -------- 0,xx,00110: SADALP std6_std6 -------- */ 11884 /* -------- 1,xx,00110: UADALP std6_std6 -------- */ 11885 /* Widens, and size refers to the narrow size. */ 11886 if (size == X11) return False; // no 1d or 2d cases 11887 Bool isU = bitU == 1; 11888 Bool isACC = opcode == BITS5(0,0,1,1,0); 11889 IRTemp src = newTempV128(); 11890 IRTemp sum = newTempV128(); 11891 IRTemp res = newTempV128(); 11892 assign(src, getQReg128(nn)); 11893 assign(sum, 11894 binop(mkVecADD(size+1), 11895 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES( 11896 isU, True/*fromOdd*/, size, mkexpr(src))), 11897 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES( 11898 isU, False/*!fromOdd*/, size, mkexpr(src))))); 11899 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd)) 11900 : mkexpr(sum)); 11901 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11902 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 11903 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1); 11904 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp") 11905 : (isU ? "uaddlp" : "saddlp"), 11906 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow); 11907 return True; 11908 } 11909 11910 if (opcode == BITS5(0,0,0,1,1)) { 11911 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */ 11912 /* -------- 1,xx,00011: USQADD std7_std7 -------- */ 11913 if (bitQ == 0 && size == X11) return False; // implied 1d case 11914 Bool isUSQADD = bitU == 1; 11915 /* This is switched (in the US vs SU sense) deliberately. 11916 SUQADD corresponds to the ExtUSsatSS variants and 11917 USQADD corresponds to the ExtSUsatUU variants. 11918 See libvex_ir for more details. */ 11919 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) 11920 : mkVecQADDEXTUSSATSS(size); 11921 IROp nop = mkVecADD(size); 11922 IRTemp argL = newTempV128(); 11923 IRTemp argR = newTempV128(); 11924 IRTemp qres = newTempV128(); 11925 IRTemp nres = newTempV128(); 11926 /* Because the two arguments to the addition are implicitly 11927 extended differently (one signedly, the other unsignedly) it is 11928 important to present them to the primop in the correct order. */ 11929 assign(argL, getQReg128(nn)); 11930 assign(argR, getQReg128(dd)); 11931 assign(qres, math_MAYBE_ZERO_HI64_fromE( 11932 bitQ, binop(qop, mkexpr(argL), mkexpr(argR)))); 11933 assign(nres, math_MAYBE_ZERO_HI64_fromE( 11934 bitQ, binop(nop, mkexpr(argL), mkexpr(argR)))); 11935 putQReg128(dd, mkexpr(qres)); 11936 updateQCFLAGwithDifference(qres, nres); 11937 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11938 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd", 11939 nameQReg128(dd), arr, nameQReg128(nn), arr); 11940 return True; 11941 } 11942 11943 if (opcode == BITS5(0,0,1,0,0)) { 11944 /* -------- 0,xx,00100: CLS std6_std6 -------- */ 11945 /* -------- 1,xx,00100: CLZ std6_std6 -------- */ 11946 if (size == X11) return False; // no 1d or 2d cases 11947 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 }; 11948 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 }; 11949 Bool isCLZ = bitU == 1; 11950 IRTemp res = newTempV128(); 11951 vassert(size <= 2); 11952 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn))); 11953 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11954 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11955 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls", 11956 nameQReg128(dd), arr, nameQReg128(nn), arr); 11957 return True; 11958 } 11959 11960 if (size == X00 && opcode == BITS5(0,0,1,0,1)) { 11961 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */ 11962 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */ 11963 IRTemp res = newTempV128(); 11964 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn))); 11965 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11966 const HChar* arr = nameArr_Q_SZ(bitQ, 0); 11967 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not", 11968 nameQReg128(dd), arr, nameQReg128(nn), arr); 11969 return True; 11970 } 11971 11972 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) { 11973 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */ 11974 IRTemp res = newTempV128(); 11975 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn))); 11976 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 11977 const HChar* arr = nameArr_Q_SZ(bitQ, 0); 11978 DIP("%s %s.%s, %s.%s\n", "rbit", 11979 nameQReg128(dd), arr, nameQReg128(nn), arr); 11980 return True; 11981 } 11982 11983 if (opcode == BITS5(0,0,1,1,1)) { 11984 /* -------- 0,xx,00111 SQABS std7_std7 -------- */ 11985 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */ 11986 if (bitQ == 0 && size == X11) return False; // implied 1d case 11987 Bool isNEG = bitU == 1; 11988 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID; 11989 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW, 11990 getQReg128(nn), size ); 11991 IRTemp qres = newTempV128(), nres = newTempV128(); 11992 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW)); 11993 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW)); 11994 putQReg128(dd, mkexpr(qres)); 11995 updateQCFLAGwithDifference(qres, nres); 11996 const HChar* arr = nameArr_Q_SZ(bitQ, size); 11997 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs", 11998 nameQReg128(dd), arr, nameQReg128(nn), arr); 11999 return True; 12000 } 12001 12002 if (opcode == BITS5(0,1,0,0,0)) { 12003 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0 12004 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0 12005 if (bitQ == 0 && size == X11) return False; // implied 1d case 12006 Bool isGT = bitU == 0; 12007 IRExpr* argL = getQReg128(nn); 12008 IRExpr* argR = mkV128(0x0000); 12009 IRTemp res = newTempV128(); 12010 IROp opGTS = mkVecCMPGTS(size); 12011 assign(res, isGT ? binop(opGTS, argL, argR) 12012 : unop(Iop_NotV128, binop(opGTS, argR, argL))); 12013 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12014 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12015 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge", 12016 nameQReg128(dd), arr, nameQReg128(nn), arr); 12017 return True; 12018 } 12019 12020 if (opcode == BITS5(0,1,0,0,1)) { 12021 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0 12022 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0 12023 if (bitQ == 0 && size == X11) return False; // implied 1d case 12024 Bool isEQ = bitU == 0; 12025 IRExpr* argL = getQReg128(nn); 12026 IRExpr* argR = mkV128(0x0000); 12027 IRTemp res = newTempV128(); 12028 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR) 12029 : unop(Iop_NotV128, 12030 binop(mkVecCMPGTS(size), argL, argR))); 12031 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12032 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12033 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le", 12034 nameQReg128(dd), arr, nameQReg128(nn), arr); 12035 return True; 12036 } 12037 12038 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) { 12039 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0 12040 if (bitQ == 0 && size == X11) return False; // implied 1d case 12041 IRExpr* argL = getQReg128(nn); 12042 IRExpr* argR = mkV128(0x0000); 12043 IRTemp res = newTempV128(); 12044 assign(res, binop(mkVecCMPGTS(size), argR, argL)); 12045 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12046 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12047 DIP("cm%s %s.%s, %s.%s, #0\n", "lt", 12048 nameQReg128(dd), arr, nameQReg128(nn), arr); 12049 return True; 12050 } 12051 12052 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) { 12053 /* -------- 0,xx,01011: ABS std7_std7 -------- */ 12054 if (bitQ == 0 && size == X11) return False; // implied 1d case 12055 IRTemp res = newTempV128(); 12056 assign(res, unop(mkVecABS(size), getQReg128(nn))); 12057 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12058 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12059 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr); 12060 return True; 12061 } 12062 12063 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) { 12064 /* -------- 1,xx,01011: NEG std7_std7 -------- */ 12065 if (bitQ == 0 && size == X11) return False; // implied 1d case 12066 IRTemp res = newTempV128(); 12067 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn))); 12068 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12069 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12070 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr); 12071 return True; 12072 } 12073 12074 UInt ix = 0; /*INVALID*/ 12075 if (size >= X10) { 12076 switch (opcode) { 12077 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break; 12078 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break; 12079 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break; 12080 default: break; 12081 } 12082 } 12083 if (ix > 0) { 12084 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */ 12085 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */ 12086 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */ 12087 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */ 12088 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */ 12089 if (bitQ == 0 && size == X11) return False; // implied 1d case 12090 Bool isD = size == X11; 12091 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; 12092 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; 12093 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; 12094 IROp opCmp = Iop_INVALID; 12095 Bool swap = False; 12096 const HChar* nm = "??"; 12097 switch (ix) { 12098 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break; 12099 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break; 12100 case 3: nm = "fcmlt"; opCmp = opCmpLT; break; 12101 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break; 12102 case 5: nm = "fcmle"; opCmp = opCmpLE; break; 12103 default: vassert(0); 12104 } 12105 IRExpr* zero = mkV128(0x0000); 12106 IRTemp res = newTempV128(); 12107 assign(res, swap ? binop(opCmp, zero, getQReg128(nn)) 12108 : binop(opCmp, getQReg128(nn), zero)); 12109 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12110 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12111 DIP("%s %s.%s, %s.%s, #0.0\n", nm, 12112 nameQReg128(dd), arr, nameQReg128(nn), arr); 12113 return True; 12114 } 12115 12116 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) { 12117 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */ 12118 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */ 12119 if (bitQ == 0 && size == X11) return False; // implied 1d case 12120 Bool isFNEG = bitU == 1; 12121 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2) 12122 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2); 12123 IRTemp res = newTempV128(); 12124 assign(res, unop(op, getQReg128(nn))); 12125 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12126 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12127 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs", 12128 nameQReg128(dd), arr, nameQReg128(nn), arr); 12129 return True; 12130 } 12131 12132 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) { 12133 /* -------- 0,xx,10010: XTN{,2} -------- */ 12134 if (size == X11) return False; 12135 vassert(size < 3); 12136 Bool is2 = bitQ == 1; 12137 IROp opN = mkVecNARROWUN(size); 12138 IRTemp resN = newTempV128(); 12139 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn)))); 12140 putLO64andZUorPutHI64(is2, dd, resN); 12141 const HChar* nm = "xtn"; 12142 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12143 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12144 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm, 12145 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12146 return True; 12147 } 12148 12149 if (opcode == BITS5(1,0,1,0,0) 12150 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) { 12151 /* -------- 0,xx,10100: SQXTN{,2} -------- */ 12152 /* -------- 1,xx,10100: UQXTN{,2} -------- */ 12153 /* -------- 1,xx,10010: SQXTUN{,2} -------- */ 12154 if (size == X11) return False; 12155 vassert(size < 3); 12156 Bool is2 = bitQ == 1; 12157 IROp opN = Iop_INVALID; 12158 Bool zWiden = True; 12159 const HChar* nm = "??"; 12160 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) { 12161 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False; 12162 } 12163 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) { 12164 opN = mkVecQNARROWUNUU(size); nm = "uqxtn"; 12165 } 12166 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) { 12167 opN = mkVecQNARROWUNSU(size); nm = "sqxtun"; 12168 } 12169 else vassert(0); 12170 IRTemp src = newTempV128(); 12171 assign(src, getQReg128(nn)); 12172 IRTemp resN = newTempV128(); 12173 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src)))); 12174 putLO64andZUorPutHI64(is2, dd, resN); 12175 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/, 12176 size, mkexpr(resN)); 12177 updateQCFLAGwithDifference(src, resW); 12178 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12179 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12180 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm, 12181 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12182 return True; 12183 } 12184 12185 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) { 12186 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */ 12187 /* Widens, and size is the narrow size. */ 12188 if (size == X11) return False; 12189 Bool is2 = bitQ == 1; 12190 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size); 12191 IROp opSHL = mkVecSHLN(size+1); 12192 IRTemp src = newTempV128(); 12193 IRTemp res = newTempV128(); 12194 assign(src, getQReg128(nn)); 12195 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)), 12196 mkU8(8 << size))); 12197 putQReg128(dd, mkexpr(res)); 12198 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12199 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12200 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "", 12201 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size); 12202 return True; 12203 } 12204 12205 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) { 12206 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */ 12207 UInt nLanes = size == X00 ? 4 : 2; 12208 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64; 12209 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32; 12210 IRTemp rm = mk_get_IR_rounding_mode(); 12211 IRTemp src[nLanes]; 12212 for (UInt i = 0; i < nLanes; i++) { 12213 src[i] = newTemp(srcTy); 12214 assign(src[i], getQRegLane(nn, i, srcTy)); 12215 } 12216 for (UInt i = 0; i < nLanes; i++) { 12217 putQRegLane(dd, nLanes * bitQ + i, 12218 binop(opCvt, mkexpr(rm), mkexpr(src[i]))); 12219 } 12220 if (bitQ == 0) { 12221 putQRegLane(dd, 1, mkU64(0)); 12222 } 12223 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12224 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12225 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12226 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12227 return True; 12228 } 12229 12230 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) { 12231 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */ 12232 /* Using Irrm_NEAREST here isn't right. The docs say "round to 12233 odd" but I don't know what that really means. */ 12234 IRType srcTy = Ity_F64; 12235 IROp opCvt = Iop_F64toF32; 12236 IRTemp src[2]; 12237 for (UInt i = 0; i < 2; i++) { 12238 src[i] = newTemp(srcTy); 12239 assign(src[i], getQRegLane(nn, i, srcTy)); 12240 } 12241 for (UInt i = 0; i < 2; i++) { 12242 putQRegLane(dd, 2 * bitQ + i, 12243 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i]))); 12244 } 12245 if (bitQ == 0) { 12246 putQRegLane(dd, 1, mkU64(0)); 12247 } 12248 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12249 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12250 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12251 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide); 12252 return True; 12253 } 12254 12255 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) { 12256 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */ 12257 UInt nLanes = size == X00 ? 4 : 2; 12258 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32; 12259 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64; 12260 IRTemp src[nLanes]; 12261 for (UInt i = 0; i < nLanes; i++) { 12262 src[i] = newTemp(srcTy); 12263 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy)); 12264 } 12265 for (UInt i = 0; i < nLanes; i++) { 12266 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i]))); 12267 } 12268 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size); 12269 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1); 12270 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "", 12271 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow); 12272 return True; 12273 } 12274 12275 ix = 0; 12276 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) { 12277 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0)); 12278 // = 1 + bitU[0]:size[1]:opcode[0] 12279 vassert(ix >= 1 && ix <= 8); 12280 if (ix == 7) ix = 0; 12281 } 12282 if (ix > 0) { 12283 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */ 12284 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */ 12285 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */ 12286 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */ 12287 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */ 12288 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */ 12289 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */ 12290 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */ 12291 /* rm plan: 12292 FRINTN: tieeven -- !! FIXME KLUDGED !! 12293 FRINTM: -inf 12294 FRINTP: +inf 12295 FRINTZ: zero 12296 FRINTA: tieaway -- !! FIXME KLUDGED !! 12297 FRINTX: per FPCR + "exact = TRUE" 12298 FRINTI: per FPCR 12299 */ 12300 Bool isD = (size & 1) == 1; 12301 if (bitQ == 0 && isD) return False; // implied 1d case 12302 12303 IRTemp irrmRM = mk_get_IR_rounding_mode(); 12304 12305 UChar ch = '?'; 12306 IRTemp irrm = newTemp(Ity_I32); 12307 switch (ix) { 12308 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break; 12309 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break; 12310 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break; 12311 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; 12312 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 12313 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break; 12314 // I am unsure about the following, due to the "integral exact" 12315 // description in the manual. What does it mean? (frintx, that is) 12316 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break; 12317 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; 12318 default: vassert(0); 12319 } 12320 12321 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; 12322 if (isD) { 12323 for (UInt i = 0; i < 2; i++) { 12324 putQRegLane(dd, i, binop(opRND, mkexpr(irrm), 12325 getQRegLane(nn, i, Ity_F64))); 12326 } 12327 } else { 12328 UInt n = bitQ==1 ? 4 : 2; 12329 for (UInt i = 0; i < n; i++) { 12330 putQRegLane(dd, i, binop(opRND, mkexpr(irrm), 12331 getQRegLane(nn, i, Ity_F32))); 12332 } 12333 if (bitQ == 0) 12334 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 12335 } 12336 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12337 DIP("frint%c %s.%s, %s.%s\n", ch, 12338 nameQReg128(dd), arr, nameQReg128(nn), arr); 12339 return True; 12340 } 12341 12342 ix = 0; /*INVALID*/ 12343 switch (opcode) { 12344 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break; 12345 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break; 12346 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break; 12347 default: break; 12348 } 12349 if (ix > 0) { 12350 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */ 12351 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */ 12352 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */ 12353 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */ 12354 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */ 12355 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */ 12356 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */ 12357 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */ 12358 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */ 12359 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */ 12360 Bool isD = (size & 1) == 1; 12361 if (bitQ == 0 && isD) return False; // implied 1d case 12362 12363 IRRoundingMode irrm = 8; /*impossible*/ 12364 HChar ch = '?'; 12365 switch (ix) { 12366 case 1: ch = 'n'; irrm = Irrm_NEAREST; break; 12367 case 2: ch = 'm'; irrm = Irrm_NegINF; break; 12368 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */ 12369 case 4: ch = 'p'; irrm = Irrm_PosINF; break; 12370 case 5: ch = 'z'; irrm = Irrm_ZERO; break; 12371 default: vassert(0); 12372 } 12373 IROp cvt = Iop_INVALID; 12374 if (bitU == 1) { 12375 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U; 12376 } else { 12377 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S; 12378 } 12379 if (isD) { 12380 for (UInt i = 0; i < 2; i++) { 12381 putQRegLane(dd, i, binop(cvt, mkU32(irrm), 12382 getQRegLane(nn, i, Ity_F64))); 12383 } 12384 } else { 12385 UInt n = bitQ==1 ? 4 : 2; 12386 for (UInt i = 0; i < n; i++) { 12387 putQRegLane(dd, i, binop(cvt, mkU32(irrm), 12388 getQRegLane(nn, i, Ity_F32))); 12389 } 12390 if (bitQ == 0) 12391 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 12392 } 12393 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12394 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's', 12395 nameQReg128(dd), arr, nameQReg128(nn), arr); 12396 return True; 12397 } 12398 12399 if (size == X10 && opcode == BITS5(1,1,1,0,0)) { 12400 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */ 12401 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */ 12402 Bool isREC = bitU == 0; 12403 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4; 12404 IRTemp res = newTempV128(); 12405 assign(res, unop(op, getQReg128(nn))); 12406 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12407 const HChar* nm = isREC ? "urecpe" : "ursqrte"; 12408 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12409 DIP("%s %s.%s, %s.%s\n", nm, 12410 nameQReg128(dd), arr, nameQReg128(nn), arr); 12411 return True; 12412 } 12413 12414 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) { 12415 /* -------- 0,0x,11101: SCVTF -------- */ 12416 /* -------- 1,0x,11101: UCVTF -------- */ 12417 /* 31 28 22 21 15 9 4 12418 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn 12419 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn 12420 with laneage: 12421 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D 12422 */ 12423 Bool isQ = bitQ == 1; 12424 Bool isU = bitU == 1; 12425 Bool isF64 = (size & 1) == 1; 12426 if (isQ || !isF64) { 12427 IRType tyF = Ity_INVALID, tyI = Ity_INVALID; 12428 UInt nLanes = 0; 12429 Bool zeroHI = False; 12430 const HChar* arrSpec = NULL; 12431 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec, 12432 isQ, isF64 ); 12433 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32) 12434 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32); 12435 IRTemp rm = mk_get_IR_rounding_mode(); 12436 UInt i; 12437 vassert(ok); /* the 'if' above should ensure this */ 12438 for (i = 0; i < nLanes; i++) { 12439 putQRegLane(dd, i, 12440 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI))); 12441 } 12442 if (zeroHI) { 12443 putQRegLane(dd, 1, mkU64(0)); 12444 } 12445 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's', 12446 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec); 12447 return True; 12448 } 12449 /* else fall through */ 12450 } 12451 12452 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) { 12453 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */ 12454 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */ 12455 Bool isSQRT = bitU == 1; 12456 Bool isD = (size & 1) == 1; 12457 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4) 12458 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4); 12459 if (bitQ == 0 && isD) return False; // implied 1d case 12460 IRTemp resV = newTempV128(); 12461 assign(resV, unop(op, getQReg128(nn))); 12462 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); 12463 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12464 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe", 12465 nameQReg128(dd), arr, nameQReg128(nn), arr); 12466 return True; 12467 } 12468 12469 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) { 12470 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */ 12471 Bool isD = (size & 1) == 1; 12472 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4; 12473 if (bitQ == 0 && isD) return False; // implied 1d case 12474 IRTemp resV = newTempV128(); 12475 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()), 12476 getQReg128(nn))); 12477 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV)); 12478 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s"); 12479 DIP("%s %s.%s, %s.%s\n", "fsqrt", 12480 nameQReg128(dd), arr, nameQReg128(nn), arr); 12481 return True; 12482 } 12483 12484 return False; 12485 # undef INSN 12486 } 12487 12488 12489 static 12490 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn) 12491 { 12492 /* 31 28 23 21 20 19 15 11 9 4 12493 0 Q U 01111 size L M m opcode H 0 n d 12494 Decode fields are: u,size,opcode 12495 M is really part of the mm register number. Individual 12496 cases need to inspect L and H though. 12497 */ 12498 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12499 if (INSN(31,31) != 0 12500 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) { 12501 return False; 12502 } 12503 UInt bitQ = INSN(30,30); 12504 UInt bitU = INSN(29,29); 12505 UInt size = INSN(23,22); 12506 UInt bitL = INSN(21,21); 12507 UInt bitM = INSN(20,20); 12508 UInt mmLO4 = INSN(19,16); 12509 UInt opcode = INSN(15,12); 12510 UInt bitH = INSN(11,11); 12511 UInt nn = INSN(9,5); 12512 UInt dd = INSN(4,0); 12513 vassert(size < 4); 12514 vassert(bitH < 2 && bitM < 2 && bitL < 2); 12515 12516 if (bitU == 0 && size >= X10 12517 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) { 12518 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12519 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12520 if (bitQ == 0 && size == X11) return False; // implied 1d case 12521 Bool isD = (size & 1) == 1; 12522 Bool isSUB = opcode == BITS4(0,1,0,1); 12523 UInt index; 12524 if (!isD) index = (bitH << 1) | bitL; 12525 else if (isD && bitL == 0) index = bitH; 12526 else return False; // sz:L == x11 => unallocated encoding 12527 vassert(index < (isD ? 2 : 4)); 12528 IRType ity = isD ? Ity_F64 : Ity_F32; 12529 IRTemp elem = newTemp(ity); 12530 UInt mm = (bitM << 4) | mmLO4; 12531 assign(elem, getQRegLane(mm, index, ity)); 12532 IRTemp dupd = math_DUP_TO_V128(elem, ity); 12533 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4; 12534 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4; 12535 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4; 12536 IRTemp rm = mk_get_IR_rounding_mode(); 12537 IRTemp t1 = newTempV128(); 12538 IRTemp t2 = newTempV128(); 12539 // FIXME: double rounding; use FMA primops instead 12540 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd))); 12541 assign(t2, triop(isSUB ? opSUB : opADD, 12542 mkexpr(rm), getQReg128(dd), mkexpr(t1))); 12543 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2)); 12544 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 12545 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla", 12546 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), 12547 isD ? 'd' : 's', index); 12548 return True; 12549 } 12550 12551 if (size >= X10 && opcode == BITS4(1,0,0,1)) { 12552 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12553 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */ 12554 if (bitQ == 0 && size == X11) return False; // implied 1d case 12555 Bool isD = (size & 1) == 1; 12556 Bool isMULX = bitU == 1; 12557 UInt index; 12558 if (!isD) index = (bitH << 1) | bitL; 12559 else if (isD && bitL == 0) index = bitH; 12560 else return False; // sz:L == x11 => unallocated encoding 12561 vassert(index < (isD ? 2 : 4)); 12562 IRType ity = isD ? Ity_F64 : Ity_F32; 12563 IRTemp elem = newTemp(ity); 12564 UInt mm = (bitM << 4) | mmLO4; 12565 assign(elem, getQRegLane(mm, index, ity)); 12566 IRTemp dupd = math_DUP_TO_V128(elem, ity); 12567 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right. 12568 IRTemp res = newTempV128(); 12569 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4, 12570 mkexpr(mk_get_IR_rounding_mode()), 12571 getQReg128(nn), mkexpr(dupd))); 12572 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12573 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s"); 12574 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", 12575 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr, 12576 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index); 12577 return True; 12578 } 12579 12580 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0))) 12581 || (bitU == 0 && opcode == BITS4(1,0,0,0))) { 12582 /* -------- 1,xx,0000 MLA s/h variants only -------- */ 12583 /* -------- 1,xx,0100 MLS s/h variants only -------- */ 12584 /* -------- 0,xx,1000 MUL s/h variants only -------- */ 12585 Bool isMLA = opcode == BITS4(0,0,0,0); 12586 Bool isMLS = opcode == BITS4(0,1,0,0); 12587 UInt mm = 32; // invalid 12588 UInt ix = 16; // invalid 12589 switch (size) { 12590 case X00: 12591 return False; // b case is not allowed 12592 case X01: 12593 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12594 case X10: 12595 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12596 case X11: 12597 return False; // d case is not allowed 12598 default: 12599 vassert(0); 12600 } 12601 vassert(mm < 32 && ix < 16); 12602 IROp opMUL = mkVecMUL(size); 12603 IROp opADD = mkVecADD(size); 12604 IROp opSUB = mkVecSUB(size); 12605 HChar ch = size == X01 ? 'h' : 's'; 12606 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12607 IRTemp vecD = newTempV128(); 12608 IRTemp vecN = newTempV128(); 12609 IRTemp res = newTempV128(); 12610 assign(vecD, getQReg128(dd)); 12611 assign(vecN, getQReg128(nn)); 12612 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM)); 12613 if (isMLA || isMLS) { 12614 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod)); 12615 } else { 12616 assign(res, prod); 12617 } 12618 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12619 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12620 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla" 12621 : (isMLS ? "mls" : "mul"), 12622 nameQReg128(dd), arr, 12623 nameQReg128(nn), arr, nameQReg128(dd), ch, ix); 12624 return True; 12625 } 12626 12627 if (opcode == BITS4(1,0,1,0) 12628 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) { 12629 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks) 12630 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0 12631 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1 12632 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1 12633 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2 12634 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2 12635 /* Widens, and size refers to the narrowed lanes. */ 12636 UInt ks = 3; 12637 switch (opcode) { 12638 case BITS4(1,0,1,0): ks = 0; break; 12639 case BITS4(0,0,1,0): ks = 1; break; 12640 case BITS4(0,1,1,0): ks = 2; break; 12641 default: vassert(0); 12642 } 12643 vassert(ks >= 0 && ks <= 2); 12644 Bool isU = bitU == 1; 12645 Bool is2 = bitQ == 1; 12646 UInt mm = 32; // invalid 12647 UInt ix = 16; // invalid 12648 switch (size) { 12649 case X00: 12650 return False; // h_b_b[] case is not allowed 12651 case X01: 12652 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12653 case X10: 12654 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12655 case X11: 12656 return False; // q_d_d[] case is not allowed 12657 default: 12658 vassert(0); 12659 } 12660 vassert(mm < 32 && ix < 16); 12661 IRTemp vecN = newTempV128(); 12662 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12663 IRTemp vecD = newTempV128(); 12664 assign(vecN, getQReg128(nn)); 12665 assign(vecD, getQReg128(dd)); 12666 IRTemp res = IRTemp_INVALID; 12667 math_MULL_ACC(&res, is2, isU, size, "mas"[ks], 12668 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 12669 putQReg128(dd, mkexpr(res)); 12670 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl"); 12671 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12672 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12673 HChar ch = size == X01 ? 'h' : 's'; 12674 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n", 12675 isU ? 'u' : 's', nm, is2 ? "2" : "", 12676 nameQReg128(dd), arrWide, 12677 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix); 12678 return True; 12679 } 12680 12681 if (bitU == 0 12682 && (opcode == BITS4(1,0,1,1) 12683 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) { 12684 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks) 12685 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1 12686 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2 12687 /* Widens, and size refers to the narrowed lanes. */ 12688 UInt ks = 3; 12689 switch (opcode) { 12690 case BITS4(1,0,1,1): ks = 0; break; 12691 case BITS4(0,0,1,1): ks = 1; break; 12692 case BITS4(0,1,1,1): ks = 2; break; 12693 default: vassert(0); 12694 } 12695 vassert(ks >= 0 && ks <= 2); 12696 Bool is2 = bitQ == 1; 12697 UInt mm = 32; // invalid 12698 UInt ix = 16; // invalid 12699 switch (size) { 12700 case X00: 12701 return False; // h_b_b[] case is not allowed 12702 case X01: 12703 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12704 case X10: 12705 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12706 case X11: 12707 return False; // q_d_d[] case is not allowed 12708 default: 12709 vassert(0); 12710 } 12711 vassert(mm < 32 && ix < 16); 12712 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n; 12713 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID; 12714 newTempsV128_2(&vecN, &vecD); 12715 assign(vecN, getQReg128(nn)); 12716 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12717 assign(vecD, getQReg128(dd)); 12718 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n, 12719 is2, size, "mas"[ks], 12720 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD); 12721 putQReg128(dd, mkexpr(res)); 12722 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID); 12723 updateQCFLAGwithDifference(sat1q, sat1n); 12724 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) { 12725 updateQCFLAGwithDifference(sat2q, sat2n); 12726 } 12727 const HChar* nm = ks == 0 ? "sqdmull" 12728 : (ks == 1 ? "sqdmlal" : "sqdmlsl"); 12729 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); 12730 const HChar* arrWide = nameArr_Q_SZ(1, size+1); 12731 HChar ch = size == X01 ? 'h' : 's'; 12732 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n", 12733 nm, is2 ? "2" : "", 12734 nameQReg128(dd), arrWide, 12735 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix); 12736 return True; 12737 } 12738 12739 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) { 12740 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */ 12741 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */ 12742 UInt mm = 32; // invalid 12743 UInt ix = 16; // invalid 12744 switch (size) { 12745 case X00: 12746 return False; // b case is not allowed 12747 case X01: 12748 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break; 12749 case X10: 12750 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break; 12751 case X11: 12752 return False; // q case is not allowed 12753 default: 12754 vassert(0); 12755 } 12756 vassert(mm < 32 && ix < 16); 12757 Bool isR = opcode == BITS4(1,1,0,1); 12758 IRTemp res, sat1q, sat1n, vN, vM; 12759 res = sat1q = sat1n = vN = vM = IRTemp_INVALID; 12760 vN = newTempV128(); 12761 assign(vN, getQReg128(nn)); 12762 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix); 12763 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM); 12764 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 12765 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID; 12766 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI); 12767 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh"; 12768 const HChar* arr = nameArr_Q_SZ(bitQ, size); 12769 HChar ch = size == X01 ? 'h' : 's'; 12770 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm, 12771 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix); 12772 return True; 12773 } 12774 12775 return False; 12776 # undef INSN 12777 } 12778 12779 12780 static 12781 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn) 12782 { 12783 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12784 return False; 12785 # undef INSN 12786 } 12787 12788 12789 static 12790 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn) 12791 { 12792 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12793 return False; 12794 # undef INSN 12795 } 12796 12797 12798 static 12799 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn) 12800 { 12801 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12802 return False; 12803 # undef INSN 12804 } 12805 12806 12807 static 12808 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn) 12809 { 12810 /* 31 28 23 21 20 15 13 9 4 12811 000 11110 ty 1 m op 1000 n opcode2 12812 The first 3 bits are really "M 0 S", but M and S are always zero. 12813 Decode fields are: ty,op,opcode2 12814 */ 12815 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12816 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12817 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) { 12818 return False; 12819 } 12820 UInt ty = INSN(23,22); 12821 UInt mm = INSN(20,16); 12822 UInt op = INSN(15,14); 12823 UInt nn = INSN(9,5); 12824 UInt opcode2 = INSN(4,0); 12825 vassert(ty < 4); 12826 12827 if (ty <= X01 && op == X00 12828 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) { 12829 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */ 12830 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */ 12831 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */ 12832 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */ 12833 /* 31 23 20 15 9 4 12834 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm 12835 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0 12836 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm 12837 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0 12838 12839 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm 12840 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0 12841 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm 12842 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0 12843 12844 FCMPE generates Invalid Operation exn if either arg is any kind 12845 of NaN. FCMP generates Invalid Operation exn if either arg is a 12846 signalling NaN. We ignore this detail here and produce the same 12847 IR for both. 12848 */ 12849 Bool isD = (ty & 1) == 1; 12850 Bool isCMPE = (opcode2 & 16) == 16; 12851 Bool cmpZero = (opcode2 & 8) == 8; 12852 IRType ity = isD ? Ity_F64 : Ity_F32; 12853 Bool valid = True; 12854 if (cmpZero && mm != 0) valid = False; 12855 if (valid) { 12856 IRTemp argL = newTemp(ity); 12857 IRTemp argR = newTemp(ity); 12858 IRTemp irRes = newTemp(Ity_I32); 12859 assign(argL, getQRegLO(nn, ity)); 12860 assign(argR, 12861 cmpZero 12862 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0))) 12863 : getQRegLO(mm, ity)); 12864 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 12865 mkexpr(argL), mkexpr(argR))); 12866 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes); 12867 IRTemp nzcv_28x0 = newTemp(Ity_I64); 12868 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28))); 12869 setFlags_COPY(nzcv_28x0); 12870 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity), 12871 cmpZero ? "#0.0" : nameQRegLO(mm, ity)); 12872 return True; 12873 } 12874 return False; 12875 } 12876 12877 return False; 12878 # undef INSN 12879 } 12880 12881 12882 static 12883 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn) 12884 { 12885 /* 31 28 23 21 20 15 11 9 4 3 12886 000 11110 ty 1 m cond 01 n op nzcv 12887 The first 3 bits are really "M 0 S", but M and S are always zero. 12888 Decode fields are: ty,op 12889 */ 12890 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12891 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12892 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) { 12893 return False; 12894 } 12895 UInt ty = INSN(23,22); 12896 UInt mm = INSN(20,16); 12897 UInt cond = INSN(15,12); 12898 UInt nn = INSN(9,5); 12899 UInt op = INSN(4,4); 12900 UInt nzcv = INSN(3,0); 12901 vassert(ty < 4 && op <= 1); 12902 12903 if (ty <= BITS2(0,1)) { 12904 /* -------- 00,0 FCCMP s_s -------- */ 12905 /* -------- 00,1 FCCMPE s_s -------- */ 12906 /* -------- 01,0 FCCMP d_d -------- */ 12907 /* -------- 01,1 FCCMPE d_d -------- */ 12908 12909 /* FCCMPE generates Invalid Operation exn if either arg is any kind 12910 of NaN. FCCMP generates Invalid Operation exn if either arg is a 12911 signalling NaN. We ignore this detail here and produce the same 12912 IR for both. 12913 */ 12914 Bool isD = (ty & 1) == 1; 12915 Bool isCMPE = op == 1; 12916 IRType ity = isD ? Ity_F64 : Ity_F32; 12917 IRTemp argL = newTemp(ity); 12918 IRTemp argR = newTemp(ity); 12919 IRTemp irRes = newTemp(Ity_I32); 12920 assign(argL, getQRegLO(nn, ity)); 12921 assign(argR, getQRegLO(mm, ity)); 12922 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32, 12923 mkexpr(argL), mkexpr(argR))); 12924 IRTemp condT = newTemp(Ity_I1); 12925 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 12926 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes); 12927 12928 IRTemp nzcvT_28x0 = newTemp(Ity_I64); 12929 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28))); 12930 12931 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28); 12932 12933 IRTemp nzcv_28x0 = newTemp(Ity_I64); 12934 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT), 12935 mkexpr(nzcvT_28x0), nzcvF_28x0)); 12936 setFlags_COPY(nzcv_28x0); 12937 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "", 12938 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond)); 12939 return True; 12940 } 12941 12942 return False; 12943 # undef INSN 12944 } 12945 12946 12947 static 12948 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn) 12949 { 12950 /* 31 23 21 20 15 11 9 5 12951 000 11110 ty 1 m cond 11 n d 12952 The first 3 bits are really "M 0 S", but M and S are always zero. 12953 Decode fields: ty 12954 */ 12955 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12956 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1 12957 || INSN(11,10) != BITS2(1,1)) { 12958 return False; 12959 } 12960 UInt ty = INSN(23,22); 12961 UInt mm = INSN(20,16); 12962 UInt cond = INSN(15,12); 12963 UInt nn = INSN(9,5); 12964 UInt dd = INSN(4,0); 12965 if (ty <= X01) { 12966 /* -------- 00: FCSEL s_s -------- */ 12967 /* -------- 00: FCSEL d_d -------- */ 12968 IRType ity = ty == X01 ? Ity_F64 : Ity_F32; 12969 IRTemp srcT = newTemp(ity); 12970 IRTemp srcF = newTemp(ity); 12971 IRTemp res = newTemp(ity); 12972 assign(srcT, getQRegLO(nn, ity)); 12973 assign(srcF, getQRegLO(mm, ity)); 12974 assign(res, IRExpr_ITE( 12975 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 12976 mkexpr(srcT), mkexpr(srcF))); 12977 putQReg128(dd, mkV128(0x0000)); 12978 putQRegLO(dd, mkexpr(res)); 12979 DIP("fcsel %s, %s, %s, %s\n", 12980 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity), 12981 nameCC(cond)); 12982 return True; 12983 } 12984 return False; 12985 # undef INSN 12986 } 12987 12988 12989 static 12990 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn) 12991 { 12992 /* 31 28 23 21 20 14 9 4 12993 000 11110 ty 1 opcode 10000 n d 12994 The first 3 bits are really "M 0 S", but M and S are always zero. 12995 Decode fields: ty,opcode 12996 */ 12997 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 12998 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 12999 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) { 13000 return False; 13001 } 13002 UInt ty = INSN(23,22); 13003 UInt opcode = INSN(20,15); 13004 UInt nn = INSN(9,5); 13005 UInt dd = INSN(4,0); 13006 13007 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) { 13008 /* -------- 0x,000000: FMOV d_d, s_s -------- */ 13009 /* -------- 0x,000001: FABS d_d, s_s -------- */ 13010 /* -------- 0x,000010: FNEG d_d, s_s -------- */ 13011 /* -------- 0x,000011: FSQRT d_d, s_s -------- */ 13012 IRType ity = ty == X01 ? Ity_F64 : Ity_F32; 13013 IRTemp src = newTemp(ity); 13014 IRTemp res = newTemp(ity); 13015 const HChar* nm = "??"; 13016 assign(src, getQRegLO(nn, ity)); 13017 switch (opcode) { 13018 case BITS6(0,0,0,0,0,0): 13019 nm = "fmov"; assign(res, mkexpr(src)); break; 13020 case BITS6(0,0,0,0,0,1): 13021 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break; 13022 case BITS6(0,0,0,0,1,0): 13023 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break; 13024 case BITS6(0,0,0,0,1,1): 13025 nm = "fsqrt"; 13026 assign(res, binop(mkSQRTF(ity), 13027 mkexpr(mk_get_IR_rounding_mode()), 13028 mkexpr(src))); break; 13029 default: 13030 vassert(0); 13031 } 13032 putQReg128(dd, mkV128(0x0000)); 13033 putQRegLO(dd, mkexpr(res)); 13034 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 13035 return True; 13036 } 13037 13038 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0) 13039 || opcode == BITS6(0,0,0,1,0,1))) 13040 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1) 13041 || opcode == BITS6(0,0,0,1,0,1))) 13042 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1) 13043 || opcode == BITS6(0,0,0,1,0,0)))) { 13044 /* -------- 11,000100: FCVT s_h -------- */ 13045 /* -------- 11,000101: FCVT d_h -------- */ 13046 /* -------- 00,000111: FCVT h_s -------- */ 13047 /* -------- 00,000101: FCVT d_s -------- */ 13048 /* -------- 01,000111: FCVT h_d -------- */ 13049 /* -------- 01,000100: FCVT s_d -------- */ 13050 /* 31 23 21 16 14 9 4 13051 000 11110 11 10001 00 10000 n d FCVT Sd, Hn 13052 --------- 11 ----- 01 --------- FCVT Dd, Hn 13053 --------- 00 ----- 11 --------- FCVT Hd, Sn 13054 --------- 00 ----- 01 --------- FCVT Dd, Sn 13055 --------- 01 ----- 11 --------- FCVT Hd, Dn 13056 --------- 01 ----- 00 --------- FCVT Sd, Dn 13057 Rounding, when dst is smaller than src, is per the FPCR. 13058 */ 13059 UInt b2322 = ty; 13060 UInt b1615 = opcode & BITS2(1,1); 13061 switch ((b2322 << 2) | b1615) { 13062 case BITS4(0,0,0,1): // S -> D 13063 case BITS4(1,1,0,1): { // H -> D 13064 Bool srcIsH = b2322 == BITS2(1,1); 13065 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32; 13066 IRTemp res = newTemp(Ity_F64); 13067 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64, 13068 getQRegLO(nn, srcTy))); 13069 putQReg128(dd, mkV128(0x0000)); 13070 putQRegLO(dd, mkexpr(res)); 13071 DIP("fcvt %s, %s\n", 13072 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy)); 13073 return True; 13074 } 13075 case BITS4(0,1,0,0): // D -> S 13076 case BITS4(0,1,1,1): { // D -> H 13077 Bool dstIsH = b1615 == BITS2(1,1); 13078 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32; 13079 IRTemp res = newTemp(dstTy); 13080 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32, 13081 mkexpr(mk_get_IR_rounding_mode()), 13082 getQRegLO(nn, Ity_F64))); 13083 putQReg128(dd, mkV128(0x0000)); 13084 putQRegLO(dd, mkexpr(res)); 13085 DIP("fcvt %s, %s\n", 13086 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64)); 13087 return True; 13088 } 13089 case BITS4(0,0,1,1): // S -> H 13090 case BITS4(1,1,0,0): { // H -> S 13091 Bool toH = b1615 == BITS2(1,1); 13092 IRType srcTy = toH ? Ity_F32 : Ity_F16; 13093 IRType dstTy = toH ? Ity_F16 : Ity_F32; 13094 IRTemp res = newTemp(dstTy); 13095 if (toH) { 13096 assign(res, binop(Iop_F32toF16, 13097 mkexpr(mk_get_IR_rounding_mode()), 13098 getQRegLO(nn, srcTy))); 13099 13100 } else { 13101 assign(res, unop(Iop_F16toF32, 13102 getQRegLO(nn, srcTy))); 13103 } 13104 putQReg128(dd, mkV128(0x0000)); 13105 putQRegLO(dd, mkexpr(res)); 13106 DIP("fcvt %s, %s\n", 13107 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy)); 13108 return True; 13109 } 13110 default: 13111 break; 13112 } 13113 /* else unhandled */ 13114 return False; 13115 } 13116 13117 if (ty <= X01 13118 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1) 13119 && opcode != BITS6(0,0,1,1,0,1)) { 13120 /* -------- 0x,001000 FRINTN d_d, s_s -------- */ 13121 /* -------- 0x,001001 FRINTP d_d, s_s -------- */ 13122 /* -------- 0x,001010 FRINTM d_d, s_s -------- */ 13123 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */ 13124 /* -------- 0x,001100 FRINTA d_d, s_s -------- */ 13125 /* -------- 0x,001110 FRINTX d_d, s_s -------- */ 13126 /* -------- 0x,001111 FRINTI d_d, s_s -------- */ 13127 /* 31 23 21 17 14 9 4 13128 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR) 13129 rm 13130 x==0 => S-registers, x==1 => D-registers 13131 rm (17:15) encodings: 13132 111 per FPCR (FRINTI) 13133 001 +inf (FRINTP) 13134 010 -inf (FRINTM) 13135 011 zero (FRINTZ) 13136 000 tieeven (FRINTN) -- !! FIXME KLUDGED !! 13137 100 tieaway (FRINTA) -- !! FIXME KLUDGED !! 13138 110 per FPCR + "exact = TRUE" (FRINTX) 13139 101 unallocated 13140 */ 13141 Bool isD = (ty & 1) == 1; 13142 UInt rm = opcode & BITS6(0,0,0,1,1,1); 13143 IRType ity = isD ? Ity_F64 : Ity_F32; 13144 IRExpr* irrmE = NULL; 13145 UChar ch = '?'; 13146 switch (rm) { 13147 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; 13148 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; 13149 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; 13150 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 13151 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break; 13152 // I am unsure about the following, due to the "integral exact" 13153 // description in the manual. What does it mean? (frintx, that is) 13154 case BITS3(1,1,0): 13155 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; 13156 case BITS3(1,1,1): 13157 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; 13158 // The following is a kludge. There's no Irrm_ value to represent 13159 // this ("to nearest, with ties to even") 13160 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break; 13161 default: break; 13162 } 13163 if (irrmE) { 13164 IRTemp src = newTemp(ity); 13165 IRTemp dst = newTemp(ity); 13166 assign(src, getQRegLO(nn, ity)); 13167 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 13168 irrmE, mkexpr(src))); 13169 putQReg128(dd, mkV128(0x0000)); 13170 putQRegLO(dd, mkexpr(dst)); 13171 DIP("frint%c %s, %s\n", 13172 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity)); 13173 return True; 13174 } 13175 return False; 13176 } 13177 13178 return False; 13179 # undef INSN 13180 } 13181 13182 13183 static 13184 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn) 13185 { 13186 /* 31 28 23 21 20 15 11 9 4 13187 000 11110 ty 1 m opcode 10 n d 13188 The first 3 bits are really "M 0 S", but M and S are always zero. 13189 Decode fields: ty, opcode 13190 */ 13191 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13192 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 13193 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) { 13194 return False; 13195 } 13196 UInt ty = INSN(23,22); 13197 UInt mm = INSN(20,16); 13198 UInt opcode = INSN(15,12); 13199 UInt nn = INSN(9,5); 13200 UInt dd = INSN(4,0); 13201 13202 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) { 13203 /* ------- 0x,0000: FMUL d_d, s_s ------- */ 13204 /* ------- 0x,0001: FDIV d_d, s_s ------- */ 13205 /* ------- 0x,0010: FADD d_d, s_s ------- */ 13206 /* ------- 0x,0011: FSUB d_d, s_s ------- */ 13207 /* ------- 0x,0100: FMAX d_d, s_s ------- */ 13208 /* ------- 0x,0101: FMIN d_d, s_s ------- */ 13209 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */ 13210 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */ 13211 IRType ity = ty == X00 ? Ity_F32 : Ity_F64; 13212 IROp iop = Iop_INVALID; 13213 const HChar* nm = "???"; 13214 switch (opcode) { 13215 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break; 13216 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break; 13217 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break; 13218 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break; 13219 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break; 13220 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break; 13221 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!! 13222 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!! 13223 default: vassert(0); 13224 } 13225 if (opcode <= BITS4(0,0,1,1)) { 13226 // This is really not good code. TODO: avoid width-changing 13227 IRTemp res = newTemp(ity); 13228 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()), 13229 getQRegLO(nn, ity), getQRegLO(mm, ity))); 13230 putQReg128(dd, mkV128(0)); 13231 putQRegLO(dd, mkexpr(res)); 13232 } else { 13233 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2), 13234 binop(iop, getQReg128(nn), getQReg128(mm)))); 13235 } 13236 DIP("%s %s, %s, %s\n", 13237 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 13238 return True; 13239 } 13240 13241 if (ty <= X01 && opcode == BITS4(1,0,0,0)) { 13242 /* ------- 0x,1000: FNMUL d_d, s_s ------- */ 13243 IRType ity = ty == X00 ? Ity_F32 : Ity_F64; 13244 IROp iop = mkMULF(ity); 13245 IROp iopn = mkNEGF(ity); 13246 const HChar* nm = "fnmul"; 13247 IRExpr* resE = unop(iopn, 13248 triop(iop, mkexpr(mk_get_IR_rounding_mode()), 13249 getQRegLO(nn, ity), getQRegLO(mm, ity))); 13250 IRTemp res = newTemp(ity); 13251 assign(res, resE); 13252 putQReg128(dd, mkV128(0)); 13253 putQRegLO(dd, mkexpr(res)); 13254 DIP("%s %s, %s, %s\n", 13255 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity)); 13256 return True; 13257 } 13258 13259 return False; 13260 # undef INSN 13261 } 13262 13263 13264 static 13265 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn) 13266 { 13267 /* 31 28 23 21 20 15 14 9 4 13268 000 11111 ty o1 m o0 a n d 13269 The first 3 bits are really "M 0 S", but M and S are always zero. 13270 Decode fields: ty,o1,o0 13271 */ 13272 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13273 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) { 13274 return False; 13275 } 13276 UInt ty = INSN(23,22); 13277 UInt bitO1 = INSN(21,21); 13278 UInt mm = INSN(20,16); 13279 UInt bitO0 = INSN(15,15); 13280 UInt aa = INSN(14,10); 13281 UInt nn = INSN(9,5); 13282 UInt dd = INSN(4,0); 13283 vassert(ty < 4); 13284 13285 if (ty <= X01) { 13286 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */ 13287 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */ 13288 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */ 13289 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */ 13290 /* -------------------- F{N}M{ADD,SUB} -------------------- */ 13291 /* 31 22 20 15 14 9 4 ix 13292 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa 13293 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa 13294 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa 13295 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa 13296 where Fx=Dx when sz=1, Fx=Sx when sz=0 13297 13298 -----SPEC------ ----IMPL---- 13299 fmadd a + n * m a + n * m 13300 fmsub a + (-n) * m a - n * m 13301 fnmadd (-a) + (-n) * m -(a + n * m) 13302 fnmsub (-a) + n * m -(a - n * m) 13303 */ 13304 Bool isD = (ty & 1) == 1; 13305 UInt ix = (bitO1 << 1) | bitO0; 13306 IRType ity = isD ? Ity_F64 : Ity_F32; 13307 IROp opADD = mkADDF(ity); 13308 IROp opSUB = mkSUBF(ity); 13309 IROp opMUL = mkMULF(ity); 13310 IROp opNEG = mkNEGF(ity); 13311 IRTemp res = newTemp(ity); 13312 IRExpr* eA = getQRegLO(aa, ity); 13313 IRExpr* eN = getQRegLO(nn, ity); 13314 IRExpr* eM = getQRegLO(mm, ity); 13315 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); 13316 IRExpr* eNxM = triop(opMUL, rm, eN, eM); 13317 switch (ix) { 13318 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; 13319 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; 13320 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; 13321 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; 13322 default: vassert(0); 13323 } 13324 putQReg128(dd, mkV128(0x0000)); 13325 putQRegLO(dd, mkexpr(res)); 13326 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" }; 13327 DIP("%s %s, %s, %s, %s\n", 13328 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity), 13329 nameQRegLO(mm, ity), nameQRegLO(aa, ity)); 13330 return True; 13331 } 13332 13333 return False; 13334 # undef INSN 13335 } 13336 13337 13338 static 13339 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn) 13340 { 13341 /* 31 28 23 21 20 12 9 4 13342 000 11110 ty 1 imm8 100 imm5 d 13343 The first 3 bits are really "M 0 S", but M and S are always zero. 13344 */ 13345 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13346 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) 13347 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) { 13348 return False; 13349 } 13350 UInt ty = INSN(23,22); 13351 UInt imm8 = INSN(20,13); 13352 UInt imm5 = INSN(9,5); 13353 UInt dd = INSN(4,0); 13354 13355 /* ------- 00,00000: FMOV s_imm ------- */ 13356 /* ------- 01,00000: FMOV d_imm ------- */ 13357 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) { 13358 Bool isD = (ty & 1) == 1; 13359 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32); 13360 if (!isD) { 13361 vassert(0 == (imm & 0xFFFFFFFF00000000ULL)); 13362 } 13363 putQReg128(dd, mkV128(0)); 13364 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL)); 13365 DIP("fmov %s, #0x%llx\n", 13366 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm); 13367 return True; 13368 } 13369 13370 return False; 13371 # undef INSN 13372 } 13373 13374 13375 static 13376 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn) 13377 { 13378 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13379 /* 31 30 29 28 23 21 20 18 15 9 4 13380 sf 0 0 11110 type 0 rmode opcode scale n d 13381 The first 3 bits are really "sf 0 S", but S is always zero. 13382 Decode fields: sf,type,rmode,opcode 13383 */ 13384 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13385 if (INSN(30,29) != BITS2(0,0) 13386 || INSN(28,24) != BITS5(1,1,1,1,0) 13387 || INSN(21,21) != 0) { 13388 return False; 13389 } 13390 UInt bitSF = INSN(31,31); 13391 UInt ty = INSN(23,22); // type 13392 UInt rm = INSN(20,19); // rmode 13393 UInt op = INSN(18,16); // opcode 13394 UInt sc = INSN(15,10); // scale 13395 UInt nn = INSN(9,5); 13396 UInt dd = INSN(4,0); 13397 13398 if (ty <= X01 && rm == X11 13399 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) { 13400 /* -------- (ix) sf ty rm opc -------- */ 13401 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */ 13402 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */ 13403 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */ 13404 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */ 13405 13406 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */ 13407 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */ 13408 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */ 13409 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */ 13410 Bool isI64 = bitSF == 1; 13411 Bool isF64 = (ty & 1) == 1; 13412 Bool isU = (op & 1) == 1; 13413 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13414 13415 Int fbits = 64 - sc; 13416 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32)); 13417 13418 Double scale = two_to_the_plus(fbits); 13419 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale)) 13420 : IRExpr_Const(IRConst_F32( (Float)scale )); 13421 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32; 13422 13423 const IROp ops[8] 13424 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S, 13425 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U }; 13426 IRTemp irrm = newTemp(Ity_I32); 13427 assign(irrm, mkU32(Irrm_ZERO)); 13428 13429 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32); 13430 IRExpr* res = binop(ops[ix], mkexpr(irrm), 13431 triop(opMUL, mkexpr(irrm), src, scaleE)); 13432 putIRegOrZR(isI64, dd, res); 13433 13434 DIP("fcvtz%c %s, %s, #%d\n", 13435 isU ? 'u' : 's', nameIRegOrZR(isI64, dd), 13436 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits); 13437 return True; 13438 } 13439 13440 /* ------ sf,ty,rm,opc ------ */ 13441 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */ 13442 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */ 13443 /* (ix) sf S 28 ty rm opc 15 9 4 13444 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits 13445 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits 13446 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits 13447 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits 13448 13449 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits 13450 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits 13451 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits 13452 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits 13453 13454 These are signed/unsigned conversion from integer registers to 13455 FP registers, all 4 32/64-bit combinations, rounded per FPCR, 13456 scaled per |scale|. 13457 */ 13458 if (ty <= X01 && rm == X00 13459 && (op == BITS3(0,1,0) || op == BITS3(0,1,1)) 13460 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) { 13461 Bool isI64 = bitSF == 1; 13462 Bool isF64 = (ty & 1) == 1; 13463 Bool isU = (op & 1) == 1; 13464 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13465 13466 Int fbits = 64 - sc; 13467 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32)); 13468 13469 Double scale = two_to_the_minus(fbits); 13470 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale)) 13471 : IRExpr_Const(IRConst_F32( (Float)scale )); 13472 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32; 13473 13474 const IROp ops[8] 13475 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 13476 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 13477 IRExpr* src = getIRegOrZR(isI64, nn); 13478 IRExpr* res = (isF64 && !isI64) 13479 ? unop(ops[ix], src) 13480 : binop(ops[ix], 13481 mkexpr(mk_get_IR_rounding_mode()), src); 13482 putQReg128(dd, mkV128(0)); 13483 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE)); 13484 13485 DIP("%ccvtf %s, %s, #%d\n", 13486 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 13487 nameIRegOrZR(isI64, nn), fbits); 13488 return True; 13489 } 13490 13491 return False; 13492 # undef INSN 13493 } 13494 13495 13496 static 13497 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn) 13498 { 13499 /* 31 30 29 28 23 21 20 18 15 9 4 13500 sf 0 0 11110 type 1 rmode opcode 000000 n d 13501 The first 3 bits are really "sf 0 S", but S is always zero. 13502 Decode fields: sf,type,rmode,opcode 13503 */ 13504 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13505 if (INSN(30,29) != BITS2(0,0) 13506 || INSN(28,24) != BITS5(1,1,1,1,0) 13507 || INSN(21,21) != 1 13508 || INSN(15,10) != BITS6(0,0,0,0,0,0)) { 13509 return False; 13510 } 13511 UInt bitSF = INSN(31,31); 13512 UInt ty = INSN(23,22); // type 13513 UInt rm = INSN(20,19); // rmode 13514 UInt op = INSN(18,16); // opcode 13515 UInt nn = INSN(9,5); 13516 UInt dd = INSN(4,0); 13517 13518 // op = 000, 001 13519 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */ 13520 /* 30 23 20 18 15 9 4 13521 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to 13522 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest) 13523 ---------------- 01 -------------- FCVTP-------- (round to +inf) 13524 ---------------- 10 -------------- FCVTM-------- (round to -inf) 13525 ---------------- 11 -------------- FCVTZ-------- (round to zero) 13526 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away) 13527 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away) 13528 13529 Rd is Xd when sf==1, Wd when sf==0 13530 Fn is Dn when x==1, Sn when x==0 13531 20:19 carry the rounding mode, using the same encoding as FPCR 13532 */ 13533 if (ty <= X01 13534 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True) 13535 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0)) 13536 ) 13537 ) { 13538 Bool isI64 = bitSF == 1; 13539 Bool isF64 = (ty & 1) == 1; 13540 Bool isU = (op & 1) == 1; 13541 /* Decide on the IR rounding mode to use. */ 13542 IRRoundingMode irrm = 8; /*impossible*/ 13543 HChar ch = '?'; 13544 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) { 13545 switch (rm) { 13546 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break; 13547 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break; 13548 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break; 13549 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break; 13550 default: vassert(0); 13551 } 13552 } else { 13553 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1)); 13554 switch (rm) { 13555 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break; 13556 default: vassert(0); 13557 } 13558 } 13559 vassert(irrm != 8); 13560 /* Decide on the conversion primop, based on the source size, 13561 dest size and signedness (8 possibilities). Case coding: 13562 F32 ->s I32 0 13563 F32 ->u I32 1 13564 F32 ->s I64 2 13565 F32 ->u I64 3 13566 F64 ->s I32 4 13567 F64 ->u I32 5 13568 F64 ->s I64 6 13569 F64 ->u I64 7 13570 */ 13571 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0); 13572 vassert(ix < 8); 13573 const IROp iops[8] 13574 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U, 13575 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U }; 13576 IROp iop = iops[ix]; 13577 // A bit of ATCery: bounce all cases we haven't seen an example of. 13578 if (/* F32toI32S */ 13579 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ 13580 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ 13581 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ 13582 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */ 13583 /* F32toI32U */ 13584 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ 13585 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ 13586 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */ 13587 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */ 13588 /* F32toI64S */ 13589 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ 13590 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */ 13591 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */ 13592 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */ 13593 /* F32toI64U */ 13594 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ 13595 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */ 13596 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ 13597 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */ 13598 /* F64toI32S */ 13599 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ 13600 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ 13601 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ 13602 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */ 13603 /* F64toI32U */ 13604 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ 13605 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ 13606 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ 13607 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */ 13608 /* F64toI64S */ 13609 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ 13610 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ 13611 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ 13612 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */ 13613 /* F64toI64U */ 13614 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ 13615 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */ 13616 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ 13617 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */ 13618 ) { 13619 /* validated */ 13620 } else { 13621 return False; 13622 } 13623 IRType srcTy = isF64 ? Ity_F64 : Ity_F32; 13624 IRType dstTy = isI64 ? Ity_I64 : Ity_I32; 13625 IRTemp src = newTemp(srcTy); 13626 IRTemp dst = newTemp(dstTy); 13627 assign(src, getQRegLO(nn, srcTy)); 13628 assign(dst, binop(iop, mkU32(irrm), mkexpr(src))); 13629 putIRegOrZR(isI64, dd, mkexpr(dst)); 13630 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's', 13631 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy)); 13632 return True; 13633 } 13634 13635 // op = 010, 011 13636 /* -------------- {S,U}CVTF (scalar, integer) -------------- */ 13637 /* (ix) sf S 28 ty rm op 15 9 4 13638 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 13639 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 13640 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 13641 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 13642 13643 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 13644 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 13645 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 13646 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 13647 13648 These are signed/unsigned conversion from integer registers to 13649 FP registers, all 4 32/64-bit combinations, rounded per FPCR. 13650 */ 13651 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) { 13652 Bool isI64 = bitSF == 1; 13653 Bool isF64 = (ty & 1) == 1; 13654 Bool isU = (op & 1) == 1; 13655 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0); 13656 const IROp ops[8] 13657 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64, 13658 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 }; 13659 IRExpr* src = getIRegOrZR(isI64, nn); 13660 IRExpr* res = (isF64 && !isI64) 13661 ? unop(ops[ix], src) 13662 : binop(ops[ix], 13663 mkexpr(mk_get_IR_rounding_mode()), src); 13664 putQReg128(dd, mkV128(0)); 13665 putQRegLO(dd, res); 13666 DIP("%ccvtf %s, %s\n", 13667 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32), 13668 nameIRegOrZR(isI64, nn)); 13669 return True; 13670 } 13671 13672 // op = 110, 111 13673 /* -------- FMOV (general) -------- */ 13674 /* case sf S ty rm op 15 9 4 13675 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn 13676 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn 13677 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn 13678 13679 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn 13680 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn 13681 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1] 13682 */ 13683 if (1) { 13684 UInt ix = 0; // case 13685 if (bitSF == 0) { 13686 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 13687 ix = 1; 13688 else 13689 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 13690 ix = 4; 13691 } else { 13692 vassert(bitSF == 1); 13693 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1)) 13694 ix = 2; 13695 else 13696 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0)) 13697 ix = 5; 13698 else 13699 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1)) 13700 ix = 3; 13701 else 13702 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0)) 13703 ix = 6; 13704 } 13705 if (ix > 0) { 13706 switch (ix) { 13707 case 1: 13708 putQReg128(dd, mkV128(0)); 13709 putQRegLO(dd, getIReg32orZR(nn)); 13710 DIP("fmov s%u, w%u\n", dd, nn); 13711 break; 13712 case 2: 13713 putQReg128(dd, mkV128(0)); 13714 putQRegLO(dd, getIReg64orZR(nn)); 13715 DIP("fmov d%u, x%u\n", dd, nn); 13716 break; 13717 case 3: 13718 putQRegHI64(dd, getIReg64orZR(nn)); 13719 DIP("fmov v%u.d[1], x%u\n", dd, nn); 13720 break; 13721 case 4: 13722 putIReg32orZR(dd, getQRegLO(nn, Ity_I32)); 13723 DIP("fmov w%u, s%u\n", dd, nn); 13724 break; 13725 case 5: 13726 putIReg64orZR(dd, getQRegLO(nn, Ity_I64)); 13727 DIP("fmov x%u, d%u\n", dd, nn); 13728 break; 13729 case 6: 13730 putIReg64orZR(dd, getQRegHI64(nn)); 13731 DIP("fmov x%u, v%u.d[1]\n", dd, nn); 13732 break; 13733 default: 13734 vassert(0); 13735 } 13736 return True; 13737 } 13738 /* undecodable; fall through */ 13739 } 13740 13741 return False; 13742 # undef INSN 13743 } 13744 13745 13746 static 13747 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) 13748 { 13749 Bool ok; 13750 ok = dis_AdvSIMD_EXT(dres, insn); 13751 if (UNLIKELY(ok)) return True; 13752 ok = dis_AdvSIMD_TBL_TBX(dres, insn); 13753 if (UNLIKELY(ok)) return True; 13754 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn); 13755 if (UNLIKELY(ok)) return True; 13756 ok = dis_AdvSIMD_across_lanes(dres, insn); 13757 if (UNLIKELY(ok)) return True; 13758 ok = dis_AdvSIMD_copy(dres, insn); 13759 if (UNLIKELY(ok)) return True; 13760 ok = dis_AdvSIMD_modified_immediate(dres, insn); 13761 if (UNLIKELY(ok)) return True; 13762 ok = dis_AdvSIMD_scalar_copy(dres, insn); 13763 if (UNLIKELY(ok)) return True; 13764 ok = dis_AdvSIMD_scalar_pairwise(dres, insn); 13765 if (UNLIKELY(ok)) return True; 13766 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn); 13767 if (UNLIKELY(ok)) return True; 13768 ok = dis_AdvSIMD_scalar_three_different(dres, insn); 13769 if (UNLIKELY(ok)) return True; 13770 ok = dis_AdvSIMD_scalar_three_same(dres, insn); 13771 if (UNLIKELY(ok)) return True; 13772 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn); 13773 if (UNLIKELY(ok)) return True; 13774 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn); 13775 if (UNLIKELY(ok)) return True; 13776 ok = dis_AdvSIMD_shift_by_immediate(dres, insn); 13777 if (UNLIKELY(ok)) return True; 13778 ok = dis_AdvSIMD_three_different(dres, insn); 13779 if (UNLIKELY(ok)) return True; 13780 ok = dis_AdvSIMD_three_same(dres, insn); 13781 if (UNLIKELY(ok)) return True; 13782 ok = dis_AdvSIMD_two_reg_misc(dres, insn); 13783 if (UNLIKELY(ok)) return True; 13784 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn); 13785 if (UNLIKELY(ok)) return True; 13786 ok = dis_AdvSIMD_crypto_aes(dres, insn); 13787 if (UNLIKELY(ok)) return True; 13788 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn); 13789 if (UNLIKELY(ok)) return True; 13790 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn); 13791 if (UNLIKELY(ok)) return True; 13792 ok = dis_AdvSIMD_fp_compare(dres, insn); 13793 if (UNLIKELY(ok)) return True; 13794 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn); 13795 if (UNLIKELY(ok)) return True; 13796 ok = dis_AdvSIMD_fp_conditional_select(dres, insn); 13797 if (UNLIKELY(ok)) return True; 13798 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn); 13799 if (UNLIKELY(ok)) return True; 13800 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn); 13801 if (UNLIKELY(ok)) return True; 13802 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn); 13803 if (UNLIKELY(ok)) return True; 13804 ok = dis_AdvSIMD_fp_immediate(dres, insn); 13805 if (UNLIKELY(ok)) return True; 13806 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn); 13807 if (UNLIKELY(ok)) return True; 13808 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn); 13809 if (UNLIKELY(ok)) return True; 13810 return False; 13811 } 13812 13813 13814 /*------------------------------------------------------------*/ 13815 /*--- Disassemble a single ARM64 instruction ---*/ 13816 /*------------------------------------------------------------*/ 13817 13818 /* Disassemble a single ARM64 instruction into IR. The instruction 13819 has is located at |guest_instr| and has guest IP of 13820 |guest_PC_curr_instr|, which will have been set before the call 13821 here. Returns True iff the instruction was decoded, in which case 13822 *dres will be set accordingly, or False, in which case *dres should 13823 be ignored by the caller. */ 13824 13825 static 13826 Bool disInstr_ARM64_WRK ( 13827 /*MB_OUT*/DisResult* dres, 13828 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 13829 Bool resteerCisOk, 13830 void* callback_opaque, 13831 const UChar* guest_instr, 13832 const VexArchInfo* archinfo, 13833 const VexAbiInfo* abiinfo 13834 ) 13835 { 13836 // A macro to fish bits out of 'insn'. 13837 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 13838 13839 //ZZ DisResult dres; 13840 //ZZ UInt insn; 13841 //ZZ //Bool allow_VFP = False; 13842 //ZZ //UInt hwcaps = archinfo->hwcaps; 13843 //ZZ IRTemp condT; /* :: Ity_I32 */ 13844 //ZZ UInt summary; 13845 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text 13846 //ZZ 13847 //ZZ /* What insn variants are we supporting today? */ 13848 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP)); 13849 //ZZ // etc etc 13850 13851 /* Set result defaults. */ 13852 dres->whatNext = Dis_Continue; 13853 dres->len = 4; 13854 dres->continueAt = 0; 13855 dres->jk_StopHere = Ijk_INVALID; 13856 13857 /* At least this is simple on ARM64: insns are all 4 bytes long, and 13858 4-aligned. So just fish the whole thing out of memory right now 13859 and have done. */ 13860 UInt insn = getUIntLittleEndianly( guest_instr ); 13861 13862 if (0) vex_printf("insn: 0x%x\n", insn); 13863 13864 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr); 13865 13866 vassert(0 == (guest_PC_curr_instr & 3ULL)); 13867 13868 /* ----------------------------------------------------------- */ 13869 13870 /* Spot "Special" instructions (see comment at top of file). */ 13871 { 13872 const UChar* code = guest_instr; 13873 /* Spot the 16-byte preamble: 13874 93CC0D8C ror x12, x12, #3 13875 93CC358C ror x12, x12, #13 13876 93CCCD8C ror x12, x12, #51 13877 93CCF58C ror x12, x12, #61 13878 */ 13879 UInt word1 = 0x93CC0D8C; 13880 UInt word2 = 0x93CC358C; 13881 UInt word3 = 0x93CCCD8C; 13882 UInt word4 = 0x93CCF58C; 13883 if (getUIntLittleEndianly(code+ 0) == word1 && 13884 getUIntLittleEndianly(code+ 4) == word2 && 13885 getUIntLittleEndianly(code+ 8) == word3 && 13886 getUIntLittleEndianly(code+12) == word4) { 13887 /* Got a "Special" instruction preamble. Which one is it? */ 13888 if (getUIntLittleEndianly(code+16) == 0xAA0A014A 13889 /* orr x10,x10,x10 */) { 13890 /* X3 = client_request ( X4 ) */ 13891 DIP("x3 = client_request ( x4 )\n"); 13892 putPC(mkU64( guest_PC_curr_instr + 20 )); 13893 dres->jk_StopHere = Ijk_ClientReq; 13894 dres->whatNext = Dis_StopHere; 13895 return True; 13896 } 13897 else 13898 if (getUIntLittleEndianly(code+16) == 0xAA0B016B 13899 /* orr x11,x11,x11 */) { 13900 /* X3 = guest_NRADDR */ 13901 DIP("x3 = guest_NRADDR\n"); 13902 dres->len = 20; 13903 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 13904 return True; 13905 } 13906 else 13907 if (getUIntLittleEndianly(code+16) == 0xAA0C018C 13908 /* orr x12,x12,x12 */) { 13909 /* branch-and-link-to-noredir X8 */ 13910 DIP("branch-and-link-to-noredir x8\n"); 13911 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20)); 13912 putPC(getIReg64orZR(8)); 13913 dres->jk_StopHere = Ijk_NoRedir; 13914 dres->whatNext = Dis_StopHere; 13915 return True; 13916 } 13917 else 13918 if (getUIntLittleEndianly(code+16) == 0xAA090129 13919 /* orr x9,x9,x9 */) { 13920 /* IR injection */ 13921 DIP("IR injection\n"); 13922 vex_inject_ir(irsb, Iend_LE); 13923 // Invalidate the current insn. The reason is that the IRop we're 13924 // injecting here can change. In which case the translation has to 13925 // be redone. For ease of handling, we simply invalidate all the 13926 // time. 13927 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr))); 13928 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20))); 13929 putPC(mkU64( guest_PC_curr_instr + 20 )); 13930 dres->whatNext = Dis_StopHere; 13931 dres->jk_StopHere = Ijk_InvalICache; 13932 return True; 13933 } 13934 /* We don't know what it is. */ 13935 return False; 13936 /*NOTREACHED*/ 13937 } 13938 } 13939 13940 /* ----------------------------------------------------------- */ 13941 13942 /* Main ARM64 instruction decoder starts here. */ 13943 13944 Bool ok = False; 13945 13946 /* insn[28:25] determines the top-level grouping, so let's start 13947 off with that. 13948 13949 For all of these dis_ARM64_ functions, we pass *dres with the 13950 normal default results "insn OK, 4 bytes long, keep decoding" so 13951 they don't need to change it. However, decodes of control-flow 13952 insns may cause *dres to change. 13953 */ 13954 switch (INSN(28,25)) { 13955 case BITS4(1,0,0,0): case BITS4(1,0,0,1): 13956 // Data processing - immediate 13957 ok = dis_ARM64_data_processing_immediate(dres, insn); 13958 break; 13959 case BITS4(1,0,1,0): case BITS4(1,0,1,1): 13960 // Branch, exception generation and system instructions 13961 ok = dis_ARM64_branch_etc(dres, insn, archinfo); 13962 break; 13963 case BITS4(0,1,0,0): case BITS4(0,1,1,0): 13964 case BITS4(1,1,0,0): case BITS4(1,1,1,0): 13965 // Loads and stores 13966 ok = dis_ARM64_load_store(dres, insn); 13967 break; 13968 case BITS4(0,1,0,1): case BITS4(1,1,0,1): 13969 // Data processing - register 13970 ok = dis_ARM64_data_processing_register(dres, insn); 13971 break; 13972 case BITS4(0,1,1,1): case BITS4(1,1,1,1): 13973 // Data processing - SIMD and floating point 13974 ok = dis_ARM64_simd_and_fp(dres, insn); 13975 break; 13976 case BITS4(0,0,0,0): case BITS4(0,0,0,1): 13977 case BITS4(0,0,1,0): case BITS4(0,0,1,1): 13978 // UNALLOCATED 13979 break; 13980 default: 13981 vassert(0); /* Can't happen */ 13982 } 13983 13984 /* If the next-level down decoders failed, make sure |dres| didn't 13985 get changed. */ 13986 if (!ok) { 13987 vassert(dres->whatNext == Dis_Continue); 13988 vassert(dres->len == 4); 13989 vassert(dres->continueAt == 0); 13990 vassert(dres->jk_StopHere == Ijk_INVALID); 13991 } 13992 13993 return ok; 13994 13995 # undef INSN 13996 } 13997 13998 13999 /*------------------------------------------------------------*/ 14000 /*--- Top-level fn ---*/ 14001 /*------------------------------------------------------------*/ 14002 14003 /* Disassemble a single instruction into IR. The instruction 14004 is located in host memory at &guest_code[delta]. */ 14005 14006 DisResult disInstr_ARM64 ( IRSB* irsb_IN, 14007 Bool (*resteerOkFn) ( void*, Addr ), 14008 Bool resteerCisOk, 14009 void* callback_opaque, 14010 const UChar* guest_code_IN, 14011 Long delta_IN, 14012 Addr guest_IP, 14013 VexArch guest_arch, 14014 const VexArchInfo* archinfo, 14015 const VexAbiInfo* abiinfo, 14016 VexEndness host_endness_IN, 14017 Bool sigill_diag_IN ) 14018 { 14019 DisResult dres; 14020 vex_bzero(&dres, sizeof(dres)); 14021 14022 /* Set globals (see top of this file) */ 14023 vassert(guest_arch == VexArchARM64); 14024 14025 irsb = irsb_IN; 14026 host_endness = host_endness_IN; 14027 guest_PC_curr_instr = (Addr64)guest_IP; 14028 14029 /* Sanity checks */ 14030 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */ 14031 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15); 14032 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15); 14033 14034 /* Try to decode */ 14035 Bool ok = disInstr_ARM64_WRK( &dres, 14036 resteerOkFn, resteerCisOk, callback_opaque, 14037 &guest_code_IN[delta_IN], 14038 archinfo, abiinfo ); 14039 if (ok) { 14040 /* All decode successes end up here. */ 14041 vassert(dres.len == 4 || dres.len == 20); 14042 switch (dres.whatNext) { 14043 case Dis_Continue: 14044 putPC( mkU64(dres.len + guest_PC_curr_instr) ); 14045 break; 14046 case Dis_ResteerU: 14047 case Dis_ResteerC: 14048 putPC(mkU64(dres.continueAt)); 14049 break; 14050 case Dis_StopHere: 14051 break; 14052 default: 14053 vassert(0); 14054 } 14055 DIP("\n"); 14056 } else { 14057 /* All decode failures end up here. */ 14058 if (sigill_diag_IN) { 14059 Int i, j; 14060 UChar buf[64]; 14061 UInt insn 14062 = getUIntLittleEndianly( &guest_code_IN[delta_IN] ); 14063 vex_bzero(buf, sizeof(buf)); 14064 for (i = j = 0; i < 32; i++) { 14065 if (i > 0) { 14066 if ((i & 7) == 0) buf[j++] = ' '; 14067 else if ((i & 3) == 0) buf[j++] = '\''; 14068 } 14069 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0'; 14070 } 14071 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn); 14072 vex_printf("disInstr(arm64): %s\n", buf); 14073 } 14074 14075 /* Tell the dispatcher that this insn cannot be decoded, and so 14076 has not been executed, and (is currently) the next to be 14077 executed. PC should be up-to-date since it is made so at the 14078 start of each insn, but nevertheless be paranoid and update 14079 it again right now. */ 14080 putPC( mkU64(guest_PC_curr_instr) ); 14081 dres.len = 0; 14082 dres.whatNext = Dis_StopHere; 14083 dres.jk_StopHere = Ijk_NoDecode; 14084 dres.continueAt = 0; 14085 } 14086 return dres; 14087 } 14088 14089 14090 /*--------------------------------------------------------------------*/ 14091 /*--- end guest_arm64_toIR.c ---*/ 14092 /*--------------------------------------------------------------------*/ 14093