1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*--------------------------------------------------------------------*/ 4 /*--- begin guest_arm64_toIR.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2013-2013 OpenWorks 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 /* KNOWN LIMITATIONS 2014-Nov-16 33 34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN. 35 36 Also FP comparison "unordered" .. is implemented as normal FP 37 comparison. 38 39 Both should be fixed. They behave incorrectly in the presence of 40 NaNs. 41 42 FMULX is treated the same as FMUL. That's also not correct. 43 44 * Floating multiply-add (etc) insns. Are split into a multiply and 45 an add, and so suffer double rounding and hence sometimes the 46 least significant mantissa bit is incorrect. Fix: use the IR 47 multiply-add IROps instead. 48 49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special 50 handling for the "ties" case. FRINTX might be dubious too. 51 52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation 53 just rounds to nearest. 54 */ 55 56 /* "Special" instructions. 57 58 This instruction decoder can decode four special instructions 59 which mean nothing natively (are no-ops as far as regs/mem are 60 concerned) but have meaning for supporting Valgrind. A special 61 instruction is flagged by a 16-byte preamble: 62 63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C 64 (ror x12, x12, #3; ror x12, x12, #13 65 ror x12, x12, #51; ror x12, x12, #61) 66 67 Following that, one of the following 3 are allowed 68 (standard interpretation in parentheses): 69 70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 ) 71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR 72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8 73 AA090129 (orr x9,x9,x9) IR injection 74 75 Any other bytes following the 16-byte preamble are illegal and 76 constitute a failure in instruction decoding. This all assumes 77 that the preamble will never occur except in specific code 78 fragments designed for Valgrind to catch. 79 */ 80 81 /* Translates ARM64 code to IR. */ 82 83 #include "libvex_basictypes.h" 84 #include "libvex_ir.h" 85 #include "libvex.h" 86 #include "libvex_guest_arm64.h" 87 88 #include "main_util.h" 89 #include "main_globals.h" 90 #include "guest_generic_bb_to_IR.h" 91 #include "guest_arm64_defs.h" 92 93 94 /*------------------------------------------------------------*/ 95 /*--- Globals ---*/ 96 /*------------------------------------------------------------*/ 97 98 /* These are set at the start of the translation of a instruction, so 99 that we don't have to pass them around endlessly. CONST means does 100 not change during translation of the instruction. 101 */ 102 103 /* CONST: what is the host's endianness? We need to know this in 104 order to do sub-register accesses to the SIMD/FP registers 105 correctly. */ 106 static VexEndness host_endness; 107 108 /* CONST: The guest address for the instruction currently being 109 translated. */ 110 static Addr64 guest_PC_curr_instr; 111 112 /* MOD: The IRSB* into which we're generating code. */ 113 static IRSB* irsb; 114 115 116 /*------------------------------------------------------------*/ 117 /*--- Debugging output ---*/ 118 /*------------------------------------------------------------*/ 119 120 #define DIP(format, args...) \ 121 if (vex_traceflags & VEX_TRACE_FE) \ 122 vex_printf(format, ## args) 123 124 #define DIS(buf, format, args...) \ 125 if (vex_traceflags & VEX_TRACE_FE) \ 126 vex_sprintf(buf, format, ## args) 127 128 129 /*------------------------------------------------------------*/ 130 /*--- Helper bits and pieces for deconstructing the ---*/ 131 /*--- arm insn stream. ---*/ 132 /*------------------------------------------------------------*/ 133 134 /* Do a little-endian load of a 32-bit word, regardless of the 135 endianness of the underlying host. */ 136 static inline UInt getUIntLittleEndianly ( const UChar* p ) 137 { 138 UInt w = 0; 139 w = (w << 8) | p[3]; 140 w = (w << 8) | p[2]; 141 w = (w << 8) | p[1]; 142 w = (w << 8) | p[0]; 143 return w; 144 } 145 146 /* Sign extend a N-bit value up to 64 bits, by copying 147 bit N-1 into all higher positions. */ 148 static ULong sx_to_64 ( ULong x, UInt n ) 149 { 150 vassert(n > 1 && n < 64); 151 Long r = (Long)x; 152 r = (r << (64-n)) >> (64-n); 153 return (ULong)r; 154 } 155 156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the 157 //ZZ endianness of the underlying host. */ 158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p ) 159 //ZZ { 160 //ZZ UShort w = 0; 161 //ZZ w = (w << 8) | p[1]; 162 //ZZ w = (w << 8) | p[0]; 163 //ZZ return w; 164 //ZZ } 165 //ZZ 166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) { 167 //ZZ vassert(sh >= 0 && sh < 32); 168 //ZZ if (sh == 0) 169 //ZZ return x; 170 //ZZ else 171 //ZZ return (x << (32-sh)) | (x >> sh); 172 //ZZ } 173 //ZZ 174 //ZZ static Int popcount32 ( UInt x ) 175 //ZZ { 176 //ZZ Int res = 0, i; 177 //ZZ for (i = 0; i < 32; i++) { 178 //ZZ res += (x & 1); 179 //ZZ x >>= 1; 180 //ZZ } 181 //ZZ return res; 182 //ZZ } 183 //ZZ 184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b ) 185 //ZZ { 186 //ZZ UInt mask = 1 << ix; 187 //ZZ x &= ~mask; 188 //ZZ x |= ((b << ix) & mask); 189 //ZZ return x; 190 //ZZ } 191 192 #define BITS2(_b1,_b0) \ 193 (((_b1) << 1) | (_b0)) 194 195 #define BITS3(_b2,_b1,_b0) \ 196 (((_b2) << 2) | ((_b1) << 1) | (_b0)) 197 198 #define BITS4(_b3,_b2,_b1,_b0) \ 199 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0)) 200 201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 202 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \ 203 | BITS4((_b3),(_b2),(_b1),(_b0))) 204 205 #define BITS5(_b4,_b3,_b2,_b1,_b0) \ 206 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0))) 207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \ 208 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 210 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 211 212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 213 (((_b8) << 8) \ 214 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 215 216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 217 (((_b9) << 9) | ((_b8) << 8) \ 218 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0))) 219 220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 221 (((_b10) << 10) \ 222 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 223 224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \ 225 (((_b11) << 11) \ 226 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)) 227 228 #define X00 BITS2(0,0) 229 #define X01 BITS2(0,1) 230 #define X10 BITS2(1,0) 231 #define X11 BITS2(1,1) 232 233 // produces _uint[_bMax:_bMin] 234 #define SLICE_UInt(_uint,_bMax,_bMin) \ 235 (( ((UInt)(_uint)) >> (_bMin)) \ 236 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL)) 237 238 239 /*------------------------------------------------------------*/ 240 /*--- Helper bits and pieces for creating IR fragments. ---*/ 241 /*------------------------------------------------------------*/ 242 243 static IRExpr* mkV128 ( UShort w ) 244 { 245 return IRExpr_Const(IRConst_V128(w)); 246 } 247 248 static IRExpr* mkU64 ( ULong i ) 249 { 250 return IRExpr_Const(IRConst_U64(i)); 251 } 252 253 static IRExpr* mkU32 ( UInt i ) 254 { 255 return IRExpr_Const(IRConst_U32(i)); 256 } 257 258 static IRExpr* mkU16 ( UInt i ) 259 { 260 vassert(i < 65536); 261 return IRExpr_Const(IRConst_U16(i)); 262 } 263 264 static IRExpr* mkU8 ( UInt i ) 265 { 266 vassert(i < 256); 267 return IRExpr_Const(IRConst_U8( (UChar)i )); 268 } 269 270 static IRExpr* mkexpr ( IRTemp tmp ) 271 { 272 return IRExpr_RdTmp(tmp); 273 } 274 275 static IRExpr* unop ( IROp op, IRExpr* a ) 276 { 277 return IRExpr_Unop(op, a); 278 } 279 280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 281 { 282 return IRExpr_Binop(op, a1, a2); 283 } 284 285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 286 { 287 return IRExpr_Triop(op, a1, a2, a3); 288 } 289 290 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 291 { 292 return IRExpr_Load(Iend_LE, ty, addr); 293 } 294 295 /* Add a statement to the list held by "irbb". */ 296 static void stmt ( IRStmt* st ) 297 { 298 addStmtToIRSB( irsb, st ); 299 } 300 301 static void assign ( IRTemp dst, IRExpr* e ) 302 { 303 stmt( IRStmt_WrTmp(dst, e) ); 304 } 305 306 static void storeLE ( IRExpr* addr, IRExpr* data ) 307 { 308 stmt( IRStmt_Store(Iend_LE, addr, data) ); 309 } 310 311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT ) 312 //ZZ { 313 //ZZ if (guardT == IRTemp_INVALID) { 314 //ZZ /* unconditional */ 315 //ZZ storeLE(addr, data); 316 //ZZ } else { 317 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data, 318 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 319 //ZZ } 320 //ZZ } 321 //ZZ 322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt, 323 //ZZ IRExpr* addr, IRExpr* alt, 324 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 325 //ZZ { 326 //ZZ if (guardT == IRTemp_INVALID) { 327 //ZZ /* unconditional */ 328 //ZZ IRExpr* loaded = NULL; 329 //ZZ switch (cvt) { 330 //ZZ case ILGop_Ident32: 331 //ZZ loaded = loadLE(Ity_I32, addr); break; 332 //ZZ case ILGop_8Uto32: 333 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break; 334 //ZZ case ILGop_8Sto32: 335 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break; 336 //ZZ case ILGop_16Uto32: 337 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break; 338 //ZZ case ILGop_16Sto32: 339 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break; 340 //ZZ default: 341 //ZZ vassert(0); 342 //ZZ } 343 //ZZ vassert(loaded != NULL); 344 //ZZ assign(dst, loaded); 345 //ZZ } else { 346 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the 347 //ZZ loaded data before putting the data in 'dst'. If the load 348 //ZZ does not take place, 'alt' is placed directly in 'dst'. */ 349 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt, 350 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) ); 351 //ZZ } 352 //ZZ } 353 354 /* Generate a new temporary of the given type. */ 355 static IRTemp newTemp ( IRType ty ) 356 { 357 vassert(isPlausibleIRType(ty)); 358 return newIRTemp( irsb->tyenv, ty ); 359 } 360 361 /* This is used in many places, so the brevity is an advantage. */ 362 static IRTemp newTempV128(void) 363 { 364 return newTemp(Ity_V128); 365 } 366 367 /* Initialise V128 temporaries en masse. */ 368 static 369 void newTempsV128_2(IRTemp* t1, IRTemp* t2) 370 { 371 vassert(t1 && *t1 == IRTemp_INVALID); 372 vassert(t2 && *t2 == IRTemp_INVALID); 373 *t1 = newTempV128(); 374 *t2 = newTempV128(); 375 } 376 377 static 378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3) 379 { 380 vassert(t1 && *t1 == IRTemp_INVALID); 381 vassert(t2 && *t2 == IRTemp_INVALID); 382 vassert(t3 && *t3 == IRTemp_INVALID); 383 *t1 = newTempV128(); 384 *t2 = newTempV128(); 385 *t3 = newTempV128(); 386 } 387 388 static 389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4) 390 { 391 vassert(t1 && *t1 == IRTemp_INVALID); 392 vassert(t2 && *t2 == IRTemp_INVALID); 393 vassert(t3 && *t3 == IRTemp_INVALID); 394 vassert(t4 && *t4 == IRTemp_INVALID); 395 *t1 = newTempV128(); 396 *t2 = newTempV128(); 397 *t3 = newTempV128(); 398 *t4 = newTempV128(); 399 } 400 401 static 402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3, 403 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7) 404 { 405 vassert(t1 && *t1 == IRTemp_INVALID); 406 vassert(t2 && *t2 == IRTemp_INVALID); 407 vassert(t3 && *t3 == IRTemp_INVALID); 408 vassert(t4 && *t4 == IRTemp_INVALID); 409 vassert(t5 && *t5 == IRTemp_INVALID); 410 vassert(t6 && *t6 == IRTemp_INVALID); 411 vassert(t7 && *t7 == IRTemp_INVALID); 412 *t1 = newTempV128(); 413 *t2 = newTempV128(); 414 *t3 = newTempV128(); 415 *t4 = newTempV128(); 416 *t5 = newTempV128(); 417 *t6 = newTempV128(); 418 *t7 = newTempV128(); 419 } 420 421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type 422 //ZZ IRRoundingMode. */ 423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 424 //ZZ { 425 //ZZ return mkU32(Irrm_NEAREST); 426 //ZZ } 427 //ZZ 428 //ZZ /* Generate an expression for SRC rotated right by ROT. */ 429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot ) 430 //ZZ { 431 //ZZ vassert(rot >= 0 && rot < 32); 432 //ZZ if (rot == 0) 433 //ZZ return mkexpr(src); 434 //ZZ return 435 //ZZ binop(Iop_Or32, 436 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)), 437 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot))); 438 //ZZ } 439 //ZZ 440 //ZZ static IRExpr* mkU128 ( ULong i ) 441 //ZZ { 442 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i)); 443 //ZZ } 444 //ZZ 445 //ZZ /* Generate a 4-aligned version of the given expression if 446 //ZZ the given condition is true. Else return it unchanged. */ 447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b ) 448 //ZZ { 449 //ZZ if (b) 450 //ZZ return binop(Iop_And32, e, mkU32(~3)); 451 //ZZ else 452 //ZZ return e; 453 //ZZ } 454 455 /* Other IR construction helpers. */ 456 static IROp mkAND ( IRType ty ) { 457 switch (ty) { 458 case Ity_I32: return Iop_And32; 459 case Ity_I64: return Iop_And64; 460 default: vpanic("mkAND"); 461 } 462 } 463 464 static IROp mkOR ( IRType ty ) { 465 switch (ty) { 466 case Ity_I32: return Iop_Or32; 467 case Ity_I64: return Iop_Or64; 468 default: vpanic("mkOR"); 469 } 470 } 471 472 static IROp mkXOR ( IRType ty ) { 473 switch (ty) { 474 case Ity_I32: return Iop_Xor32; 475 case Ity_I64: return Iop_Xor64; 476 default: vpanic("mkXOR"); 477 } 478 } 479 480 static IROp mkSHL ( IRType ty ) { 481 switch (ty) { 482 case Ity_I32: return Iop_Shl32; 483 case Ity_I64: return Iop_Shl64; 484 default: vpanic("mkSHL"); 485 } 486 } 487 488 static IROp mkSHR ( IRType ty ) { 489 switch (ty) { 490 case Ity_I32: return Iop_Shr32; 491 case Ity_I64: return Iop_Shr64; 492 default: vpanic("mkSHR"); 493 } 494 } 495 496 static IROp mkSAR ( IRType ty ) { 497 switch (ty) { 498 case Ity_I32: return Iop_Sar32; 499 case Ity_I64: return Iop_Sar64; 500 default: vpanic("mkSAR"); 501 } 502 } 503 504 static IROp mkNOT ( IRType ty ) { 505 switch (ty) { 506 case Ity_I32: return Iop_Not32; 507 case Ity_I64: return Iop_Not64; 508 default: vpanic("mkNOT"); 509 } 510 } 511 512 static IROp mkADD ( IRType ty ) { 513 switch (ty) { 514 case Ity_I32: return Iop_Add32; 515 case Ity_I64: return Iop_Add64; 516 default: vpanic("mkADD"); 517 } 518 } 519 520 static IROp mkSUB ( IRType ty ) { 521 switch (ty) { 522 case Ity_I32: return Iop_Sub32; 523 case Ity_I64: return Iop_Sub64; 524 default: vpanic("mkSUB"); 525 } 526 } 527 528 static IROp mkADDF ( IRType ty ) { 529 switch (ty) { 530 case Ity_F32: return Iop_AddF32; 531 case Ity_F64: return Iop_AddF64; 532 default: vpanic("mkADDF"); 533 } 534 } 535 536 static IROp mkSUBF ( IRType ty ) { 537 switch (ty) { 538 case Ity_F32: return Iop_SubF32; 539 case Ity_F64: return Iop_SubF64; 540 default: vpanic("mkSUBF"); 541 } 542 } 543 544 static IROp mkMULF ( IRType ty ) { 545 switch (ty) { 546 case Ity_F32: return Iop_MulF32; 547 case Ity_F64: return Iop_MulF64; 548 default: vpanic("mkMULF"); 549 } 550 } 551 552 static IROp mkDIVF ( IRType ty ) { 553 switch (ty) { 554 case Ity_F32: return Iop_DivF32; 555 case Ity_F64: return Iop_DivF64; 556 default: vpanic("mkMULF"); 557 } 558 } 559 560 static IROp mkNEGF ( IRType ty ) { 561 switch (ty) { 562 case Ity_F32: return Iop_NegF32; 563 case Ity_F64: return Iop_NegF64; 564 default: vpanic("mkNEGF"); 565 } 566 } 567 568 static IROp mkABSF ( IRType ty ) { 569 switch (ty) { 570 case Ity_F32: return Iop_AbsF32; 571 case Ity_F64: return Iop_AbsF64; 572 default: vpanic("mkNEGF"); 573 } 574 } 575 576 static IROp mkSQRTF ( IRType ty ) { 577 switch (ty) { 578 case Ity_F32: return Iop_SqrtF32; 579 case Ity_F64: return Iop_SqrtF64; 580 default: vpanic("mkNEGF"); 581 } 582 } 583 584 static IROp mkVecADD ( UInt size ) { 585 const IROp ops[4] 586 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; 587 vassert(size < 4); 588 return ops[size]; 589 } 590 591 static IROp mkVecQADDU ( UInt size ) { 592 const IROp ops[4] 593 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 }; 594 vassert(size < 4); 595 return ops[size]; 596 } 597 598 static IROp mkVecQADDS ( UInt size ) { 599 const IROp ops[4] 600 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 }; 601 vassert(size < 4); 602 return ops[size]; 603 } 604 605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) { 606 const IROp ops[4] 607 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, 608 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 }; 609 vassert(size < 4); 610 return ops[size]; 611 } 612 613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) { 614 const IROp ops[4] 615 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, 616 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 }; 617 vassert(size < 4); 618 return ops[size]; 619 } 620 621 static IROp mkVecSUB ( UInt size ) { 622 const IROp ops[4] 623 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; 624 vassert(size < 4); 625 return ops[size]; 626 } 627 628 static IROp mkVecQSUBU ( UInt size ) { 629 const IROp ops[4] 630 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 }; 631 vassert(size < 4); 632 return ops[size]; 633 } 634 635 static IROp mkVecQSUBS ( UInt size ) { 636 const IROp ops[4] 637 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 }; 638 vassert(size < 4); 639 return ops[size]; 640 } 641 642 static IROp mkVecSARN ( UInt size ) { 643 const IROp ops[4] 644 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; 645 vassert(size < 4); 646 return ops[size]; 647 } 648 649 static IROp mkVecSHRN ( UInt size ) { 650 const IROp ops[4] 651 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; 652 vassert(size < 4); 653 return ops[size]; 654 } 655 656 static IROp mkVecSHLN ( UInt size ) { 657 const IROp ops[4] 658 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; 659 vassert(size < 4); 660 return ops[size]; 661 } 662 663 static IROp mkVecCATEVENLANES ( UInt size ) { 664 const IROp ops[4] 665 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, 666 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 }; 667 vassert(size < 4); 668 return ops[size]; 669 } 670 671 static IROp mkVecCATODDLANES ( UInt size ) { 672 const IROp ops[4] 673 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, 674 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 }; 675 vassert(size < 4); 676 return ops[size]; 677 } 678 679 static IROp mkVecINTERLEAVELO ( UInt size ) { 680 const IROp ops[4] 681 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 682 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 }; 683 vassert(size < 4); 684 return ops[size]; 685 } 686 687 static IROp mkVecINTERLEAVEHI ( UInt size ) { 688 const IROp ops[4] 689 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 690 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 }; 691 vassert(size < 4); 692 return ops[size]; 693 } 694 695 static IROp mkVecMAXU ( UInt size ) { 696 const IROp ops[4] 697 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; 698 vassert(size < 4); 699 return ops[size]; 700 } 701 702 static IROp mkVecMAXS ( UInt size ) { 703 const IROp ops[4] 704 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; 705 vassert(size < 4); 706 return ops[size]; 707 } 708 709 static IROp mkVecMINU ( UInt size ) { 710 const IROp ops[4] 711 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; 712 vassert(size < 4); 713 return ops[size]; 714 } 715 716 static IROp mkVecMINS ( UInt size ) { 717 const IROp ops[4] 718 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; 719 vassert(size < 4); 720 return ops[size]; 721 } 722 723 static IROp mkVecMUL ( UInt size ) { 724 const IROp ops[4] 725 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID }; 726 vassert(size < 3); 727 return ops[size]; 728 } 729 730 static IROp mkVecMULLU ( UInt sizeNarrow ) { 731 const IROp ops[4] 732 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID }; 733 vassert(sizeNarrow < 3); 734 return ops[sizeNarrow]; 735 } 736 737 static IROp mkVecMULLS ( UInt sizeNarrow ) { 738 const IROp ops[4] 739 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID }; 740 vassert(sizeNarrow < 3); 741 return ops[sizeNarrow]; 742 } 743 744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) { 745 const IROp ops[4] 746 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID }; 747 vassert(sizeNarrow < 3); 748 return ops[sizeNarrow]; 749 } 750 751 static IROp mkVecCMPEQ ( UInt size ) { 752 const IROp ops[4] 753 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 }; 754 vassert(size < 4); 755 return ops[size]; 756 } 757 758 static IROp mkVecCMPGTU ( UInt size ) { 759 const IROp ops[4] 760 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 }; 761 vassert(size < 4); 762 return ops[size]; 763 } 764 765 static IROp mkVecCMPGTS ( UInt size ) { 766 const IROp ops[4] 767 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 }; 768 vassert(size < 4); 769 return ops[size]; 770 } 771 772 static IROp mkVecABS ( UInt size ) { 773 const IROp ops[4] 774 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 }; 775 vassert(size < 4); 776 return ops[size]; 777 } 778 779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) { 780 const IROp ops[4] 781 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128, 782 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 }; 783 vassert(size < 4); 784 return ops[size]; 785 } 786 787 static IRExpr* mkU ( IRType ty, ULong imm ) { 788 switch (ty) { 789 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL)); 790 case Ity_I64: return mkU64(imm); 791 default: vpanic("mkU"); 792 } 793 } 794 795 static IROp mkVecQDMULHIS ( UInt size ) { 796 const IROp ops[4] 797 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID }; 798 vassert(size < 4); 799 return ops[size]; 800 } 801 802 static IROp mkVecQRDMULHIS ( UInt size ) { 803 const IROp ops[4] 804 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID }; 805 vassert(size < 4); 806 return ops[size]; 807 } 808 809 static IROp mkVecQANDUQSH ( UInt size ) { 810 const IROp ops[4] 811 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8, 812 Iop_QandUQsh32x4, Iop_QandUQsh64x2 }; 813 vassert(size < 4); 814 return ops[size]; 815 } 816 817 static IROp mkVecQANDSQSH ( UInt size ) { 818 const IROp ops[4] 819 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8, 820 Iop_QandSQsh32x4, Iop_QandSQsh64x2 }; 821 vassert(size < 4); 822 return ops[size]; 823 } 824 825 static IROp mkVecQANDUQRSH ( UInt size ) { 826 const IROp ops[4] 827 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8, 828 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 }; 829 vassert(size < 4); 830 return ops[size]; 831 } 832 833 static IROp mkVecQANDSQRSH ( UInt size ) { 834 const IROp ops[4] 835 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8, 836 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 }; 837 vassert(size < 4); 838 return ops[size]; 839 } 840 841 static IROp mkVecSHU ( UInt size ) { 842 const IROp ops[4] 843 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 }; 844 vassert(size < 4); 845 return ops[size]; 846 } 847 848 static IROp mkVecSHS ( UInt size ) { 849 const IROp ops[4] 850 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 }; 851 vassert(size < 4); 852 return ops[size]; 853 } 854 855 static IROp mkVecRSHU ( UInt size ) { 856 const IROp ops[4] 857 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 }; 858 vassert(size < 4); 859 return ops[size]; 860 } 861 862 static IROp mkVecRSHS ( UInt size ) { 863 const IROp ops[4] 864 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 }; 865 vassert(size < 4); 866 return ops[size]; 867 } 868 869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) { 870 const IROp ops[4] 871 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, 872 Iop_NarrowUn64to32x2, Iop_INVALID }; 873 vassert(sizeNarrow < 4); 874 return ops[sizeNarrow]; 875 } 876 877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) { 878 const IROp ops[4] 879 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, 880 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID }; 881 vassert(sizeNarrow < 4); 882 return ops[sizeNarrow]; 883 } 884 885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) { 886 const IROp ops[4] 887 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, 888 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID }; 889 vassert(sizeNarrow < 4); 890 return ops[sizeNarrow]; 891 } 892 893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) { 894 const IROp ops[4] 895 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, 896 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID }; 897 vassert(sizeNarrow < 4); 898 return ops[sizeNarrow]; 899 } 900 901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) { 902 const IROp ops[4] 903 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4, 904 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID }; 905 vassert(sizeNarrow < 4); 906 return ops[sizeNarrow]; 907 } 908 909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) { 910 const IROp ops[4] 911 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4, 912 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID }; 913 vassert(sizeNarrow < 4); 914 return ops[sizeNarrow]; 915 } 916 917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) { 918 const IROp ops[4] 919 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4, 920 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID }; 921 vassert(sizeNarrow < 4); 922 return ops[sizeNarrow]; 923 } 924 925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) { 926 const IROp ops[4] 927 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4, 928 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID }; 929 vassert(sizeNarrow < 4); 930 return ops[sizeNarrow]; 931 } 932 933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) { 934 const IROp ops[4] 935 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4, 936 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID }; 937 vassert(sizeNarrow < 4); 938 return ops[sizeNarrow]; 939 } 940 941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) { 942 const IROp ops[4] 943 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4, 944 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID }; 945 vassert(sizeNarrow < 4); 946 return ops[sizeNarrow]; 947 } 948 949 static IROp mkVecQSHLNSATUU ( UInt size ) { 950 const IROp ops[4] 951 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8, 952 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 }; 953 vassert(size < 4); 954 return ops[size]; 955 } 956 957 static IROp mkVecQSHLNSATSS ( UInt size ) { 958 const IROp ops[4] 959 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8, 960 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 }; 961 vassert(size < 4); 962 return ops[size]; 963 } 964 965 static IROp mkVecQSHLNSATSU ( UInt size ) { 966 const IROp ops[4] 967 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8, 968 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 }; 969 vassert(size < 4); 970 return ops[size]; 971 } 972 973 static IROp mkVecADDF ( UInt size ) { 974 const IROp ops[4] 975 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 }; 976 vassert(size < 4); 977 return ops[size]; 978 } 979 980 static IROp mkVecMAXF ( UInt size ) { 981 const IROp ops[4] 982 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 }; 983 vassert(size < 4); 984 return ops[size]; 985 } 986 987 static IROp mkVecMINF ( UInt size ) { 988 const IROp ops[4] 989 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 }; 990 vassert(size < 4); 991 return ops[size]; 992 } 993 994 /* Generate IR to create 'arg rotated right by imm', for sane values 995 of 'ty' and 'imm'. */ 996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm ) 997 { 998 UInt w = 0; 999 if (ty == Ity_I64) { 1000 w = 64; 1001 } else { 1002 vassert(ty == Ity_I32); 1003 w = 32; 1004 } 1005 vassert(w != 0); 1006 vassert(imm < w); 1007 if (imm == 0) { 1008 return arg; 1009 } 1010 IRTemp res = newTemp(ty); 1011 assign(res, binop(mkOR(ty), 1012 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)), 1013 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) )); 1014 return res; 1015 } 1016 1017 /* Generate IR to set the returned temp to either all-zeroes or 1018 all ones, as a copy of arg<imm>. */ 1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) 1020 { 1021 UInt w = 0; 1022 if (ty == Ity_I64) { 1023 w = 64; 1024 } else { 1025 vassert(ty == Ity_I32); 1026 w = 32; 1027 } 1028 vassert(w != 0); 1029 vassert(imm < w); 1030 IRTemp res = newTemp(ty); 1031 assign(res, binop(mkSAR(ty), 1032 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)), 1033 mkU8(w - 1))); 1034 return res; 1035 } 1036 1037 /* U-widen 8/16/32/64 bit int expr to 64. */ 1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) 1039 { 1040 switch (srcTy) { 1041 case Ity_I64: return e; 1042 case Ity_I32: return unop(Iop_32Uto64, e); 1043 case Ity_I16: return unop(Iop_16Uto64, e); 1044 case Ity_I8: return unop(Iop_8Uto64, e); 1045 default: vpanic("widenUto64(arm64)"); 1046 } 1047 } 1048 1049 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some 1050 of these combinations make sense. */ 1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) 1052 { 1053 switch (dstTy) { 1054 case Ity_I64: return e; 1055 case Ity_I32: return unop(Iop_64to32, e); 1056 case Ity_I16: return unop(Iop_64to16, e); 1057 case Ity_I8: return unop(Iop_64to8, e); 1058 default: vpanic("narrowFrom64(arm64)"); 1059 } 1060 } 1061 1062 1063 /*------------------------------------------------------------*/ 1064 /*--- Helpers for accessing guest registers. ---*/ 1065 /*------------------------------------------------------------*/ 1066 1067 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0) 1068 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1) 1069 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2) 1070 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3) 1071 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4) 1072 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5) 1073 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6) 1074 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7) 1075 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8) 1076 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9) 1077 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10) 1078 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11) 1079 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12) 1080 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13) 1081 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14) 1082 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15) 1083 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16) 1084 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17) 1085 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18) 1086 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19) 1087 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20) 1088 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21) 1089 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22) 1090 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23) 1091 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24) 1092 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25) 1093 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26) 1094 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27) 1095 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28) 1096 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29) 1097 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30) 1098 1099 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP) 1100 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC) 1101 1102 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP) 1103 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1) 1104 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2) 1105 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP) 1106 1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0) 1108 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR) 1109 1110 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0) 1111 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1) 1112 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2) 1113 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3) 1114 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4) 1115 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5) 1116 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6) 1117 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7) 1118 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8) 1119 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9) 1120 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10) 1121 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11) 1122 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12) 1123 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13) 1124 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14) 1125 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15) 1126 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16) 1127 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17) 1128 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18) 1129 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19) 1130 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20) 1131 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21) 1132 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22) 1133 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23) 1134 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24) 1135 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25) 1136 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26) 1137 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27) 1138 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28) 1139 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29) 1140 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30) 1141 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31) 1142 1143 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR) 1144 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG) 1145 1146 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) 1147 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) 1148 1149 1150 /* ---------------- Integer registers ---------------- */ 1151 1152 static Int offsetIReg64 ( UInt iregNo ) 1153 { 1154 /* Do we care about endianness here? We do if sub-parts of integer 1155 registers are accessed. */ 1156 switch (iregNo) { 1157 case 0: return OFFB_X0; 1158 case 1: return OFFB_X1; 1159 case 2: return OFFB_X2; 1160 case 3: return OFFB_X3; 1161 case 4: return OFFB_X4; 1162 case 5: return OFFB_X5; 1163 case 6: return OFFB_X6; 1164 case 7: return OFFB_X7; 1165 case 8: return OFFB_X8; 1166 case 9: return OFFB_X9; 1167 case 10: return OFFB_X10; 1168 case 11: return OFFB_X11; 1169 case 12: return OFFB_X12; 1170 case 13: return OFFB_X13; 1171 case 14: return OFFB_X14; 1172 case 15: return OFFB_X15; 1173 case 16: return OFFB_X16; 1174 case 17: return OFFB_X17; 1175 case 18: return OFFB_X18; 1176 case 19: return OFFB_X19; 1177 case 20: return OFFB_X20; 1178 case 21: return OFFB_X21; 1179 case 22: return OFFB_X22; 1180 case 23: return OFFB_X23; 1181 case 24: return OFFB_X24; 1182 case 25: return OFFB_X25; 1183 case 26: return OFFB_X26; 1184 case 27: return OFFB_X27; 1185 case 28: return OFFB_X28; 1186 case 29: return OFFB_X29; 1187 case 30: return OFFB_X30; 1188 /* but not 31 */ 1189 default: vassert(0); 1190 } 1191 } 1192 1193 static Int offsetIReg64orSP ( UInt iregNo ) 1194 { 1195 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo); 1196 } 1197 1198 static const HChar* nameIReg64orZR ( UInt iregNo ) 1199 { 1200 vassert(iregNo < 32); 1201 static const HChar* names[32] 1202 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 1203 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 1204 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 1205 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" }; 1206 return names[iregNo]; 1207 } 1208 1209 static const HChar* nameIReg64orSP ( UInt iregNo ) 1210 { 1211 if (iregNo == 31) { 1212 return "sp"; 1213 } 1214 vassert(iregNo < 31); 1215 return nameIReg64orZR(iregNo); 1216 } 1217 1218 static IRExpr* getIReg64orSP ( UInt iregNo ) 1219 { 1220 vassert(iregNo < 32); 1221 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1222 } 1223 1224 static IRExpr* getIReg64orZR ( UInt iregNo ) 1225 { 1226 if (iregNo == 31) { 1227 return mkU64(0); 1228 } 1229 vassert(iregNo < 31); 1230 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ); 1231 } 1232 1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e ) 1234 { 1235 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1236 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1237 } 1238 1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e ) 1240 { 1241 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1242 if (iregNo == 31) { 1243 return; 1244 } 1245 vassert(iregNo < 31); 1246 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) ); 1247 } 1248 1249 static const HChar* nameIReg32orZR ( UInt iregNo ) 1250 { 1251 vassert(iregNo < 32); 1252 static const HChar* names[32] 1253 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 1254 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 1255 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 1256 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" }; 1257 return names[iregNo]; 1258 } 1259 1260 static const HChar* nameIReg32orSP ( UInt iregNo ) 1261 { 1262 if (iregNo == 31) { 1263 return "wsp"; 1264 } 1265 vassert(iregNo < 31); 1266 return nameIReg32orZR(iregNo); 1267 } 1268 1269 static IRExpr* getIReg32orSP ( UInt iregNo ) 1270 { 1271 vassert(iregNo < 32); 1272 return unop(Iop_64to32, 1273 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1274 } 1275 1276 static IRExpr* getIReg32orZR ( UInt iregNo ) 1277 { 1278 if (iregNo == 31) { 1279 return mkU32(0); 1280 } 1281 vassert(iregNo < 31); 1282 return unop(Iop_64to32, 1283 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 )); 1284 } 1285 1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e ) 1287 { 1288 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1290 } 1291 1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e ) 1293 { 1294 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1295 if (iregNo == 31) { 1296 return; 1297 } 1298 vassert(iregNo < 31); 1299 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) ); 1300 } 1301 1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo ) 1303 { 1304 vassert(is64 == True || is64 == False); 1305 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo); 1306 } 1307 1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo ) 1309 { 1310 vassert(is64 == True || is64 == False); 1311 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo); 1312 } 1313 1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo ) 1315 { 1316 vassert(is64 == True || is64 == False); 1317 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo); 1318 } 1319 1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e ) 1321 { 1322 vassert(is64 == True || is64 == False); 1323 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e); 1324 } 1325 1326 static void putPC ( IRExpr* e ) 1327 { 1328 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 1329 stmt( IRStmt_Put(OFFB_PC, e) ); 1330 } 1331 1332 1333 /* ---------------- Vector (Q) registers ---------------- */ 1334 1335 static Int offsetQReg128 ( UInt qregNo ) 1336 { 1337 /* We don't care about endianness at this point. It only becomes 1338 relevant when dealing with sections of these registers.*/ 1339 switch (qregNo) { 1340 case 0: return OFFB_Q0; 1341 case 1: return OFFB_Q1; 1342 case 2: return OFFB_Q2; 1343 case 3: return OFFB_Q3; 1344 case 4: return OFFB_Q4; 1345 case 5: return OFFB_Q5; 1346 case 6: return OFFB_Q6; 1347 case 7: return OFFB_Q7; 1348 case 8: return OFFB_Q8; 1349 case 9: return OFFB_Q9; 1350 case 10: return OFFB_Q10; 1351 case 11: return OFFB_Q11; 1352 case 12: return OFFB_Q12; 1353 case 13: return OFFB_Q13; 1354 case 14: return OFFB_Q14; 1355 case 15: return OFFB_Q15; 1356 case 16: return OFFB_Q16; 1357 case 17: return OFFB_Q17; 1358 case 18: return OFFB_Q18; 1359 case 19: return OFFB_Q19; 1360 case 20: return OFFB_Q20; 1361 case 21: return OFFB_Q21; 1362 case 22: return OFFB_Q22; 1363 case 23: return OFFB_Q23; 1364 case 24: return OFFB_Q24; 1365 case 25: return OFFB_Q25; 1366 case 26: return OFFB_Q26; 1367 case 27: return OFFB_Q27; 1368 case 28: return OFFB_Q28; 1369 case 29: return OFFB_Q29; 1370 case 30: return OFFB_Q30; 1371 case 31: return OFFB_Q31; 1372 default: vassert(0); 1373 } 1374 } 1375 1376 /* Write to a complete Qreg. */ 1377 static void putQReg128 ( UInt qregNo, IRExpr* e ) 1378 { 1379 vassert(qregNo < 32); 1380 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); 1381 stmt( IRStmt_Put(offsetQReg128(qregNo), e) ); 1382 } 1383 1384 /* Read a complete Qreg. */ 1385 static IRExpr* getQReg128 ( UInt qregNo ) 1386 { 1387 vassert(qregNo < 32); 1388 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128); 1389 } 1390 1391 /* Produce the IR type for some sub-part of a vector. For 32- and 64- 1392 bit sub-parts we can choose either integer or float types, and 1393 choose float on the basis that that is the common use case and so 1394 will give least interference with Put-to-Get forwarding later 1395 on. */ 1396 static IRType preferredVectorSubTypeFromSize ( UInt szB ) 1397 { 1398 switch (szB) { 1399 case 1: return Ity_I8; 1400 case 2: return Ity_I16; 1401 case 4: return Ity_I32; //Ity_F32; 1402 case 8: return Ity_F64; 1403 case 16: return Ity_V128; 1404 default: vassert(0); 1405 } 1406 } 1407 1408 /* Find the offset of the laneNo'th lane of type laneTy in the given 1409 Qreg. Since the host is little-endian, the least significant lane 1410 has the lowest offset. */ 1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo ) 1412 { 1413 vassert(host_endness == VexEndnessLE); 1414 Int base = offsetQReg128(qregNo); 1415 /* Since the host is little-endian, the least significant lane 1416 will be at the lowest address. */ 1417 /* Restrict this to known types, so as to avoid silently accepting 1418 stupid types. */ 1419 UInt laneSzB = 0; 1420 switch (laneTy) { 1421 case Ity_I8: laneSzB = 1; break; 1422 case Ity_F16: case Ity_I16: laneSzB = 2; break; 1423 case Ity_F32: case Ity_I32: laneSzB = 4; break; 1424 case Ity_F64: case Ity_I64: laneSzB = 8; break; 1425 case Ity_V128: laneSzB = 16; break; 1426 default: break; 1427 } 1428 vassert(laneSzB > 0); 1429 UInt minOff = laneNo * laneSzB; 1430 UInt maxOff = minOff + laneSzB - 1; 1431 vassert(maxOff < 16); 1432 return base + minOff; 1433 } 1434 1435 /* Put to the least significant lane of a Qreg. */ 1436 static void putQRegLO ( UInt qregNo, IRExpr* e ) 1437 { 1438 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1439 Int off = offsetQRegLane(qregNo, ty, 0); 1440 switch (ty) { 1441 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 1442 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128: 1443 break; 1444 default: 1445 vassert(0); // Other cases are probably invalid 1446 } 1447 stmt(IRStmt_Put(off, e)); 1448 } 1449 1450 /* Get from the least significant lane of a Qreg. */ 1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty ) 1452 { 1453 Int off = offsetQRegLane(qregNo, ty, 0); 1454 switch (ty) { 1455 case Ity_I8: 1456 case Ity_F16: case Ity_I16: 1457 case Ity_I32: case Ity_I64: 1458 case Ity_F32: case Ity_F64: case Ity_V128: 1459 break; 1460 default: 1461 vassert(0); // Other cases are ATC 1462 } 1463 return IRExpr_Get(off, ty); 1464 } 1465 1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy ) 1467 { 1468 static const HChar* namesQ[32] 1469 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1470 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", 1471 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", 1472 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" }; 1473 static const HChar* namesD[32] 1474 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1475 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 1476 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1477 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" }; 1478 static const HChar* namesS[32] 1479 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", 1480 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", 1481 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", 1482 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }; 1483 static const HChar* namesH[32] 1484 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", 1485 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", 1486 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", 1487 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" }; 1488 static const HChar* namesB[32] 1489 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", 1490 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", 1491 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", 1492 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" }; 1493 vassert(qregNo < 32); 1494 switch (sizeofIRType(laneTy)) { 1495 case 1: return namesB[qregNo]; 1496 case 2: return namesH[qregNo]; 1497 case 4: return namesS[qregNo]; 1498 case 8: return namesD[qregNo]; 1499 case 16: return namesQ[qregNo]; 1500 default: vassert(0); 1501 } 1502 /*NOTREACHED*/ 1503 } 1504 1505 static const HChar* nameQReg128 ( UInt qregNo ) 1506 { 1507 return nameQRegLO(qregNo, Ity_V128); 1508 } 1509 1510 /* Find the offset of the most significant half (8 bytes) of the given 1511 Qreg. This requires knowing the endianness of the host. */ 1512 static Int offsetQRegHI64 ( UInt qregNo ) 1513 { 1514 return offsetQRegLane(qregNo, Ity_I64, 1); 1515 } 1516 1517 static IRExpr* getQRegHI64 ( UInt qregNo ) 1518 { 1519 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64); 1520 } 1521 1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e ) 1523 { 1524 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1525 Int off = offsetQRegHI64(qregNo); 1526 switch (ty) { 1527 case Ity_I64: case Ity_F64: 1528 break; 1529 default: 1530 vassert(0); // Other cases are plain wrong 1531 } 1532 stmt(IRStmt_Put(off, e)); 1533 } 1534 1535 /* Put to a specified lane of a Qreg. */ 1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e ) 1537 { 1538 IRType laneTy = typeOfIRExpr(irsb->tyenv, e); 1539 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1540 switch (laneTy) { 1541 case Ity_F64: case Ity_I64: 1542 case Ity_I32: case Ity_F32: 1543 case Ity_I16: case Ity_F16: 1544 case Ity_I8: 1545 break; 1546 default: 1547 vassert(0); // Other cases are ATC 1548 } 1549 stmt(IRStmt_Put(off, e)); 1550 } 1551 1552 /* Get from a specified lane of a Qreg. */ 1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy ) 1554 { 1555 Int off = offsetQRegLane(qregNo, laneTy, laneNo); 1556 switch (laneTy) { 1557 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 1558 case Ity_F64: case Ity_F32: case Ity_F16: 1559 break; 1560 default: 1561 vassert(0); // Other cases are ATC 1562 } 1563 return IRExpr_Get(off, laneTy); 1564 } 1565 1566 1567 //ZZ /* ---------------- Misc registers ---------------- */ 1568 //ZZ 1569 //ZZ static void putMiscReg32 ( UInt gsoffset, 1570 //ZZ IRExpr* e, /* :: Ity_I32 */ 1571 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */) 1572 //ZZ { 1573 //ZZ switch (gsoffset) { 1574 //ZZ case OFFB_FPSCR: break; 1575 //ZZ case OFFB_QFLAG32: break; 1576 //ZZ case OFFB_GEFLAG0: break; 1577 //ZZ case OFFB_GEFLAG1: break; 1578 //ZZ case OFFB_GEFLAG2: break; 1579 //ZZ case OFFB_GEFLAG3: break; 1580 //ZZ default: vassert(0); /* awaiting more cases */ 1581 //ZZ } 1582 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 1583 //ZZ 1584 //ZZ if (guardT == IRTemp_INVALID) { 1585 //ZZ /* unconditional write */ 1586 //ZZ stmt(IRStmt_Put(gsoffset, e)); 1587 //ZZ } else { 1588 //ZZ stmt(IRStmt_Put( 1589 //ZZ gsoffset, 1590 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)), 1591 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) ) 1592 //ZZ )); 1593 //ZZ } 1594 //ZZ } 1595 //ZZ 1596 //ZZ static IRTemp get_ITSTATE ( void ) 1597 //ZZ { 1598 //ZZ ASSERT_IS_THUMB; 1599 //ZZ IRTemp t = newTemp(Ity_I32); 1600 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32)); 1601 //ZZ return t; 1602 //ZZ } 1603 //ZZ 1604 //ZZ static void put_ITSTATE ( IRTemp t ) 1605 //ZZ { 1606 //ZZ ASSERT_IS_THUMB; 1607 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) ); 1608 //ZZ } 1609 //ZZ 1610 //ZZ static IRTemp get_QFLAG32 ( void ) 1611 //ZZ { 1612 //ZZ IRTemp t = newTemp(Ity_I32); 1613 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32)); 1614 //ZZ return t; 1615 //ZZ } 1616 //ZZ 1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT ) 1618 //ZZ { 1619 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT ); 1620 //ZZ } 1621 //ZZ 1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program 1623 //ZZ Status Register) to indicate that overflow or saturation occurred. 1624 //ZZ Nb: t must be zero to denote no saturation, and any nonzero 1625 //ZZ value to indicate saturation. */ 1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT ) 1627 //ZZ { 1628 //ZZ IRTemp old = get_QFLAG32(); 1629 //ZZ IRTemp nyu = newTemp(Ity_I32); 1630 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) ); 1631 //ZZ put_QFLAG32(nyu, condT); 1632 //ZZ } 1633 1634 1635 /* ---------------- FPCR stuff ---------------- */ 1636 1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and 1638 convert them to IR format. Bind the final result to the 1639 returned temp. */ 1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void ) 1641 { 1642 /* The ARMvfp encoding for rounding mode bits is: 1643 00 to nearest 1644 01 to +infinity 1645 10 to -infinity 1646 11 to zero 1647 We need to convert that to the IR encoding: 1648 00 to nearest (the default) 1649 10 to +infinity 1650 01 to -infinity 1651 11 to zero 1652 Which can be done by swapping bits 0 and 1. 1653 The rmode bits are at 23:22 in FPSCR. 1654 */ 1655 IRTemp armEncd = newTemp(Ity_I32); 1656 IRTemp swapped = newTemp(Ity_I32); 1657 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that 1658 we don't zero out bits 24 and above, since the assignment to 1659 'swapped' will mask them out anyway. */ 1660 assign(armEncd, 1661 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22))); 1662 /* Now swap them. */ 1663 assign(swapped, 1664 binop(Iop_Or32, 1665 binop(Iop_And32, 1666 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)), 1667 mkU32(2)), 1668 binop(Iop_And32, 1669 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)), 1670 mkU32(1)) 1671 )); 1672 return swapped; 1673 } 1674 1675 1676 /*------------------------------------------------------------*/ 1677 /*--- Helpers for flag handling and conditional insns ---*/ 1678 /*------------------------------------------------------------*/ 1679 1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond ) 1681 { 1682 switch (cond) { 1683 case ARM64CondEQ: return "eq"; 1684 case ARM64CondNE: return "ne"; 1685 case ARM64CondCS: return "cs"; // or 'hs' 1686 case ARM64CondCC: return "cc"; // or 'lo' 1687 case ARM64CondMI: return "mi"; 1688 case ARM64CondPL: return "pl"; 1689 case ARM64CondVS: return "vs"; 1690 case ARM64CondVC: return "vc"; 1691 case ARM64CondHI: return "hi"; 1692 case ARM64CondLS: return "ls"; 1693 case ARM64CondGE: return "ge"; 1694 case ARM64CondLT: return "lt"; 1695 case ARM64CondGT: return "gt"; 1696 case ARM64CondLE: return "le"; 1697 case ARM64CondAL: return "al"; 1698 case ARM64CondNV: return "nv"; 1699 default: vpanic("name_ARM64Condcode"); 1700 } 1701 } 1702 1703 /* and a handy shorthand for it */ 1704 static const HChar* nameCC ( ARM64Condcode cond ) { 1705 return nameARM64Condcode(cond); 1706 } 1707 1708 1709 /* Build IR to calculate some particular condition from stored 1710 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1711 Ity_I64, suitable for narrowing. Although the return type is 1712 Ity_I64, the returned value is either 0 or 1. 'cond' must be 1713 :: Ity_I64 and must denote the condition to compute in 1714 bits 7:4, and be zero everywhere else. 1715 */ 1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond ) 1717 { 1718 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64); 1719 /* And 'cond' had better produce a value in which only bits 7:4 are 1720 nonzero. However, obviously we can't assert for that. */ 1721 1722 /* So what we're constructing for the first argument is 1723 "(cond << 4) | stored-operation". 1724 However, as per comments above, 'cond' must be supplied 1725 pre-shifted to this function. 1726 1727 This pairing scheme requires that the ARM64_CC_OP_ values all fit 1728 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest 1729 8 bits of the first argument. */ 1730 IRExpr** args 1731 = mkIRExprVec_4( 1732 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond), 1733 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1734 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1735 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) 1736 ); 1737 IRExpr* call 1738 = mkIRExprCCall( 1739 Ity_I64, 1740 0/*regparm*/, 1741 "arm64g_calculate_condition", &arm64g_calculate_condition, 1742 args 1743 ); 1744 1745 /* Exclude the requested condition, OP and NDEP from definedness 1746 checking. We're only interested in DEP1 and DEP2. */ 1747 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1748 return call; 1749 } 1750 1751 1752 /* Build IR to calculate some particular condition from stored 1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type 1754 Ity_I64, suitable for narrowing. Although the return type is 1755 Ity_I64, the returned value is either 0 or 1. 1756 */ 1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond ) 1758 { 1759 /* First arg is "(cond << 4) | condition". This requires that the 1760 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a 1761 (COND, OP) pair in the lowest 8 bits of the first argument. */ 1762 vassert(cond >= 0 && cond <= 15); 1763 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) ); 1764 } 1765 1766 1767 /* Build IR to calculate just the carry flag from stored 1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1769 Ity_I64. */ 1770 static IRExpr* mk_arm64g_calculate_flag_c ( void ) 1771 { 1772 IRExpr** args 1773 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1774 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1775 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1776 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1777 IRExpr* call 1778 = mkIRExprCCall( 1779 Ity_I64, 1780 0/*regparm*/, 1781 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c, 1782 args 1783 ); 1784 /* Exclude OP and NDEP from definedness checking. We're only 1785 interested in DEP1 and DEP2. */ 1786 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1787 return call; 1788 } 1789 1790 1791 //ZZ /* Build IR to calculate just the overflow flag from stored 1792 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1793 //ZZ Ity_I32. */ 1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void ) 1795 //ZZ { 1796 //ZZ IRExpr** args 1797 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 1798 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 1799 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 1800 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 1801 //ZZ IRExpr* call 1802 //ZZ = mkIRExprCCall( 1803 //ZZ Ity_I32, 1804 //ZZ 0/*regparm*/, 1805 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v, 1806 //ZZ args 1807 //ZZ ); 1808 //ZZ /* Exclude OP and NDEP from definedness checking. We're only 1809 //ZZ interested in DEP1 and DEP2. */ 1810 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1811 //ZZ return call; 1812 //ZZ } 1813 1814 1815 /* Build IR to calculate N Z C V in bits 31:28 of the 1816 returned word. */ 1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void ) 1818 { 1819 IRExpr** args 1820 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1821 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1822 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1823 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1824 IRExpr* call 1825 = mkIRExprCCall( 1826 Ity_I64, 1827 0/*regparm*/, 1828 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv, 1829 args 1830 ); 1831 /* Exclude OP and NDEP from definedness checking. We're only 1832 interested in DEP1 and DEP2. */ 1833 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1834 return call; 1835 } 1836 1837 1838 /* Build IR to set the flags thunk, in the most general case. */ 1839 static 1840 void setFlags_D1_D2_ND ( UInt cc_op, 1841 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep ) 1842 { 1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64)); 1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64)); 1845 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64)); 1846 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER); 1847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) )); 1848 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) )); 1849 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) )); 1850 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) )); 1851 } 1852 1853 /* Build IR to set the flags thunk after ADD or SUB. */ 1854 static 1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR ) 1856 { 1857 IRTemp argL64 = IRTemp_INVALID; 1858 IRTemp argR64 = IRTemp_INVALID; 1859 IRTemp z64 = newTemp(Ity_I64); 1860 if (is64) { 1861 argL64 = argL; 1862 argR64 = argR; 1863 } else { 1864 argL64 = newTemp(Ity_I64); 1865 argR64 = newTemp(Ity_I64); 1866 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1867 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1868 } 1869 assign(z64, mkU64(0)); 1870 UInt cc_op = ARM64G_CC_OP_NUMBER; 1871 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; } 1872 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; } 1873 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; } 1874 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; } 1875 else { vassert(0); } 1876 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64); 1877 } 1878 1879 /* Build IR to set the flags thunk after ADC or SBC. */ 1880 static 1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC, 1882 IRTemp argL, IRTemp argR, IRTemp oldC ) 1883 { 1884 IRTemp argL64 = IRTemp_INVALID; 1885 IRTemp argR64 = IRTemp_INVALID; 1886 IRTemp oldC64 = IRTemp_INVALID; 1887 if (is64) { 1888 argL64 = argL; 1889 argR64 = argR; 1890 oldC64 = oldC; 1891 } else { 1892 argL64 = newTemp(Ity_I64); 1893 argR64 = newTemp(Ity_I64); 1894 oldC64 = newTemp(Ity_I64); 1895 assign(argL64, unop(Iop_32Uto64, mkexpr(argL))); 1896 assign(argR64, unop(Iop_32Uto64, mkexpr(argR))); 1897 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC))); 1898 } 1899 UInt cc_op = ARM64G_CC_OP_NUMBER; 1900 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; } 1901 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; } 1902 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; } 1903 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; } 1904 else { vassert(0); } 1905 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64); 1906 } 1907 1908 /* Build IR to set the flags thunk after ADD or SUB, if the given 1909 condition evaluates to True at run time. If not, the flags are set 1910 to the specified NZCV value. */ 1911 static 1912 void setFlags_ADD_SUB_conditionally ( 1913 Bool is64, Bool isSUB, 1914 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv 1915 ) 1916 { 1917 /* Generate IR as follows: 1918 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY) 1919 CC_DEP1 = ITE(cond, argL64, nzcv << 28) 1920 CC_DEP2 = ITE(cond, argR64, 0) 1921 CC_NDEP = 0 1922 */ 1923 1924 IRTemp z64 = newTemp(Ity_I64); 1925 assign(z64, mkU64(0)); 1926 1927 /* Establish the operation and operands for the True case. */ 1928 IRTemp t_dep1 = IRTemp_INVALID; 1929 IRTemp t_dep2 = IRTemp_INVALID; 1930 UInt t_op = ARM64G_CC_OP_NUMBER; 1931 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; } 1932 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; } 1933 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; } 1934 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; } 1935 else { vassert(0); } 1936 /* */ 1937 if (is64) { 1938 t_dep1 = argL; 1939 t_dep2 = argR; 1940 } else { 1941 t_dep1 = newTemp(Ity_I64); 1942 t_dep2 = newTemp(Ity_I64); 1943 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL))); 1944 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR))); 1945 } 1946 1947 /* Establish the operation and operands for the False case. */ 1948 IRTemp f_dep1 = newTemp(Ity_I64); 1949 IRTemp f_dep2 = z64; 1950 UInt f_op = ARM64G_CC_OP_COPY; 1951 assign(f_dep1, mkU64(nzcv << 28)); 1952 1953 /* Final thunk values */ 1954 IRTemp dep1 = newTemp(Ity_I64); 1955 IRTemp dep2 = newTemp(Ity_I64); 1956 IRTemp op = newTemp(Ity_I64); 1957 1958 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op))); 1959 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1))); 1960 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2))); 1961 1962 /* finally .. */ 1963 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) )); 1964 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) )); 1965 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) )); 1966 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) )); 1967 } 1968 1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */ 1970 static 1971 void setFlags_LOGIC ( Bool is64, IRTemp res ) 1972 { 1973 IRTemp res64 = IRTemp_INVALID; 1974 IRTemp z64 = newTemp(Ity_I64); 1975 UInt cc_op = ARM64G_CC_OP_NUMBER; 1976 if (is64) { 1977 res64 = res; 1978 cc_op = ARM64G_CC_OP_LOGIC64; 1979 } else { 1980 res64 = newTemp(Ity_I64); 1981 assign(res64, unop(Iop_32Uto64, mkexpr(res))); 1982 cc_op = ARM64G_CC_OP_LOGIC32; 1983 } 1984 assign(z64, mkU64(0)); 1985 setFlags_D1_D2_ND(cc_op, res64, z64, z64); 1986 } 1987 1988 /* Build IR to set the flags thunk to a given NZCV value. NZCV is 1989 located in bits 31:28 of the supplied value. */ 1990 static 1991 void setFlags_COPY ( IRTemp nzcv_28x0 ) 1992 { 1993 IRTemp z64 = newTemp(Ity_I64); 1994 assign(z64, mkU64(0)); 1995 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64); 1996 } 1997 1998 1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it 2000 //ZZ sets it at all) */ 2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1, 2002 //ZZ IRTemp t_dep2, 2003 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2004 //ZZ { 2005 //ZZ IRTemp z32 = newTemp(Ity_I32); 2006 //ZZ assign( z32, mkU32(0) ); 2007 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT ); 2008 //ZZ } 2009 //ZZ 2010 //ZZ 2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it 2012 //ZZ sets it at all) */ 2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1, 2014 //ZZ IRTemp t_ndep, 2015 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2016 //ZZ { 2017 //ZZ IRTemp z32 = newTemp(Ity_I32); 2018 //ZZ assign( z32, mkU32(0) ); 2019 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT ); 2020 //ZZ } 2021 //ZZ 2022 //ZZ 2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it 2024 //ZZ sets them at all) */ 2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1, 2026 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ ) 2027 //ZZ { 2028 //ZZ IRTemp z32 = newTemp(Ity_I32); 2029 //ZZ assign( z32, mkU32(0) ); 2030 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT ); 2031 //ZZ } 2032 2033 2034 /*------------------------------------------------------------*/ 2035 /*--- Misc math helpers ---*/ 2036 /*------------------------------------------------------------*/ 2037 2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */ 2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh ) 2040 { 2041 IRTemp maskT = newTemp(Ity_I64); 2042 IRTemp res = newTemp(Ity_I64); 2043 vassert(sh >= 1 && sh <= 63); 2044 assign(maskT, mkU64(mask)); 2045 assign( res, 2046 binop(Iop_Or64, 2047 binop(Iop_Shr64, 2048 binop(Iop_And64,mkexpr(x),mkexpr(maskT)), 2049 mkU8(sh)), 2050 binop(Iop_And64, 2051 binop(Iop_Shl64,mkexpr(x),mkU8(sh)), 2052 mkexpr(maskT)) 2053 ) 2054 ); 2055 return res; 2056 } 2057 2058 /* Generates byte swaps within 32-bit lanes. */ 2059 static IRTemp math_UINTSWAP64 ( IRTemp src ) 2060 { 2061 IRTemp res; 2062 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2063 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2064 return res; 2065 } 2066 2067 /* Generates byte swaps within 16-bit lanes. */ 2068 static IRTemp math_USHORTSWAP64 ( IRTemp src ) 2069 { 2070 IRTemp res; 2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2072 return res; 2073 } 2074 2075 /* Generates a 64-bit byte swap. */ 2076 static IRTemp math_BYTESWAP64 ( IRTemp src ) 2077 { 2078 IRTemp res; 2079 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8); 2080 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16); 2081 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32); 2082 return res; 2083 } 2084 2085 /* Generates a 64-bit bit swap. */ 2086 static IRTemp math_BITSWAP64 ( IRTemp src ) 2087 { 2088 IRTemp res; 2089 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1); 2090 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2); 2091 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4); 2092 return math_BYTESWAP64(res); 2093 } 2094 2095 /* Duplicates the bits at the bottom of the given word to fill the 2096 whole word. src :: Ity_I64 is assumed to have zeroes everywhere 2097 except for the bottom bits. */ 2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy ) 2099 { 2100 if (srcTy == Ity_I8) { 2101 IRTemp t16 = newTemp(Ity_I64); 2102 assign(t16, binop(Iop_Or64, mkexpr(src), 2103 binop(Iop_Shl64, mkexpr(src), mkU8(8)))); 2104 IRTemp t32 = newTemp(Ity_I64); 2105 assign(t32, binop(Iop_Or64, mkexpr(t16), 2106 binop(Iop_Shl64, mkexpr(t16), mkU8(16)))); 2107 IRTemp t64 = newTemp(Ity_I64); 2108 assign(t64, binop(Iop_Or64, mkexpr(t32), 2109 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2110 return t64; 2111 } 2112 if (srcTy == Ity_I16) { 2113 IRTemp t32 = newTemp(Ity_I64); 2114 assign(t32, binop(Iop_Or64, mkexpr(src), 2115 binop(Iop_Shl64, mkexpr(src), mkU8(16)))); 2116 IRTemp t64 = newTemp(Ity_I64); 2117 assign(t64, binop(Iop_Or64, mkexpr(t32), 2118 binop(Iop_Shl64, mkexpr(t32), mkU8(32)))); 2119 return t64; 2120 } 2121 if (srcTy == Ity_I32) { 2122 IRTemp t64 = newTemp(Ity_I64); 2123 assign(t64, binop(Iop_Or64, mkexpr(src), 2124 binop(Iop_Shl64, mkexpr(src), mkU8(32)))); 2125 return t64; 2126 } 2127 if (srcTy == Ity_I64) { 2128 return src; 2129 } 2130 vassert(0); 2131 } 2132 2133 2134 /* Duplicates the src element exactly so as to fill a V128 value. */ 2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy ) 2136 { 2137 IRTemp res = newTempV128(); 2138 if (srcTy == Ity_F64) { 2139 IRTemp i64 = newTemp(Ity_I64); 2140 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src))); 2141 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64))); 2142 return res; 2143 } 2144 if (srcTy == Ity_F32) { 2145 IRTemp i64a = newTemp(Ity_I64); 2146 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src)))); 2147 IRTemp i64b = newTemp(Ity_I64); 2148 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)), 2149 mkexpr(i64a))); 2150 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b))); 2151 return res; 2152 } 2153 if (srcTy == Ity_I64) { 2154 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src))); 2155 return res; 2156 } 2157 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) { 2158 IRTemp t1 = newTemp(Ity_I64); 2159 assign(t1, widenUto64(srcTy, mkexpr(src))); 2160 IRTemp t2 = math_DUP_TO_64(t1, srcTy); 2161 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2))); 2162 return res; 2163 } 2164 vassert(0); 2165 } 2166 2167 2168 /* |fullWidth| is a full V128 width result. Depending on bitQ, 2169 zero out the upper half. */ 2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth ) 2171 { 2172 if (bitQ == 1) return mkexpr(fullWidth); 2173 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth)); 2174 vassert(0); 2175 } 2176 2177 /* The same, but from an expression instead. */ 2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth ) 2179 { 2180 IRTemp fullWidthT = newTempV128(); 2181 assign(fullWidthT, fullWidth); 2182 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT); 2183 } 2184 2185 2186 /*------------------------------------------------------------*/ 2187 /*--- FP comparison helpers ---*/ 2188 /*------------------------------------------------------------*/ 2189 2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded 2191 as an IRCmpF64Result. Generate code to convert it to an 2192 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value. 2193 Assign a new temp to hold that value, and return the temp. */ 2194 static 2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 ) 2196 { 2197 IRTemp ix = newTemp(Ity_I64); 2198 IRTemp termL = newTemp(Ity_I64); 2199 IRTemp termR = newTemp(Ity_I64); 2200 IRTemp nzcv = newTemp(Ity_I64); 2201 IRTemp irRes = newTemp(Ity_I64); 2202 2203 /* This is where the fun starts. We have to convert 'irRes' from 2204 an IR-convention return result (IRCmpF64Result) to an 2205 ARM-encoded (N,Z,C,V) group. The final result is in the bottom 2206 4 bits of 'nzcv'. */ 2207 /* Map compare result from IR to ARM(nzcv) */ 2208 /* 2209 FP cmp result | IR | ARM(nzcv) 2210 -------------------------------- 2211 UN 0x45 0011 2212 LT 0x01 1000 2213 GT 0x00 0010 2214 EQ 0x40 0110 2215 */ 2216 /* Now since you're probably wondering WTF .. 2217 2218 ix fishes the useful bits out of the IR value, bits 6 and 0, and 2219 places them side by side, giving a number which is 0, 1, 2 or 3. 2220 2221 termL is a sequence cooked up by GNU superopt. It converts ix 2222 into an almost correct value NZCV value (incredibly), except 2223 for the case of UN, where it produces 0100 instead of the 2224 required 0011. 2225 2226 termR is therefore a correction term, also computed from ix. It 2227 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get 2228 the final correct value, we subtract termR from termL. 2229 2230 Don't take my word for it. There's a test program at the bottom 2231 of guest_arm_toIR.c, to try this out with. 2232 */ 2233 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32))); 2234 2235 assign( 2236 ix, 2237 binop(Iop_Or64, 2238 binop(Iop_And64, 2239 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)), 2240 mkU64(3)), 2241 binop(Iop_And64, mkexpr(irRes), mkU64(1)))); 2242 2243 assign( 2244 termL, 2245 binop(Iop_Add64, 2246 binop(Iop_Shr64, 2247 binop(Iop_Sub64, 2248 binop(Iop_Shl64, 2249 binop(Iop_Xor64, mkexpr(ix), mkU64(1)), 2250 mkU8(62)), 2251 mkU64(1)), 2252 mkU8(61)), 2253 mkU64(1))); 2254 2255 assign( 2256 termR, 2257 binop(Iop_And64, 2258 binop(Iop_And64, 2259 mkexpr(ix), 2260 binop(Iop_Shr64, mkexpr(ix), mkU8(1))), 2261 mkU64(1))); 2262 2263 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR))); 2264 return nzcv; 2265 } 2266 2267 2268 /*------------------------------------------------------------*/ 2269 /*--- Data processing (immediate) ---*/ 2270 /*------------------------------------------------------------*/ 2271 2272 /* Helper functions for supporting "DecodeBitMasks" */ 2273 2274 static ULong dbm_ROR ( Int width, ULong x, Int rot ) 2275 { 2276 vassert(width > 0 && width <= 64); 2277 vassert(rot >= 0 && rot < width); 2278 if (rot == 0) return x; 2279 ULong res = x >> rot; 2280 res |= (x << (width - rot)); 2281 if (width < 64) 2282 res &= ((1ULL << width) - 1); 2283 return res; 2284 } 2285 2286 static ULong dbm_RepTo64( Int esize, ULong x ) 2287 { 2288 switch (esize) { 2289 case 64: 2290 return x; 2291 case 32: 2292 x &= 0xFFFFFFFF; x |= (x << 32); 2293 return x; 2294 case 16: 2295 x &= 0xFFFF; x |= (x << 16); x |= (x << 32); 2296 return x; 2297 case 8: 2298 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32); 2299 return x; 2300 case 4: 2301 x &= 0xF; x |= (x << 4); x |= (x << 8); 2302 x |= (x << 16); x |= (x << 32); 2303 return x; 2304 case 2: 2305 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8); 2306 x |= (x << 16); x |= (x << 32); 2307 return x; 2308 default: 2309 break; 2310 } 2311 vpanic("dbm_RepTo64"); 2312 /*NOTREACHED*/ 2313 return 0; 2314 } 2315 2316 static Int dbm_highestSetBit ( ULong x ) 2317 { 2318 Int i; 2319 for (i = 63; i >= 0; i--) { 2320 if (x & (1ULL << i)) 2321 return i; 2322 } 2323 vassert(x == 0); 2324 return -1; 2325 } 2326 2327 static 2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask, 2329 ULong immN, ULong imms, ULong immr, Bool immediate, 2330 UInt M /*32 or 64*/) 2331 { 2332 vassert(immN < (1ULL << 1)); 2333 vassert(imms < (1ULL << 6)); 2334 vassert(immr < (1ULL << 6)); 2335 vassert(immediate == False || immediate == True); 2336 vassert(M == 32 || M == 64); 2337 2338 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) ); 2339 if (len < 1) { /* printf("fail1\n"); */ return False; } 2340 vassert(len <= 6); 2341 vassert(M >= (1 << len)); 2342 2343 vassert(len >= 1 && len <= 6); 2344 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len); 2345 (1 << len) - 1; 2346 vassert(levels >= 1 && levels <= 63); 2347 2348 if (immediate && ((imms & levels) == levels)) { 2349 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */ 2350 return False; 2351 } 2352 2353 ULong S = imms & levels; 2354 ULong R = immr & levels; 2355 Int diff = S - R; 2356 diff &= 63; 2357 Int esize = 1 << len; 2358 vassert(2 <= esize && esize <= 64); 2359 2360 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the 2361 same below with d. S can be 63 in which case we have an out of 2362 range and hence undefined shift. */ 2363 vassert(S >= 0 && S <= 63); 2364 vassert(esize >= (S+1)); 2365 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1) 2366 //(1ULL << (S+1)) - 1; 2367 ((1ULL << S) - 1) + (1ULL << S); 2368 2369 Int d = // diff<len-1:0> 2370 diff & ((1 << len)-1); 2371 vassert(esize >= (d+1)); 2372 vassert(d >= 0 && d <= 63); 2373 2374 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1) 2375 //(1ULL << (d+1)) - 1; 2376 ((1ULL << d) - 1) + (1ULL << d); 2377 2378 if (esize != 64) vassert(elem_s < (1ULL << esize)); 2379 if (esize != 64) vassert(elem_d < (1ULL << esize)); 2380 2381 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R)); 2382 if (tmask) *tmask = dbm_RepTo64(esize, elem_d); 2383 2384 return True; 2385 } 2386 2387 2388 static 2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres, 2390 UInt insn) 2391 { 2392 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2393 2394 /* insn[28:23] 2395 10000x PC-rel addressing 2396 10001x Add/subtract (immediate) 2397 100100 Logical (immediate) 2398 100101 Move Wide (immediate) 2399 100110 Bitfield 2400 100111 Extract 2401 */ 2402 2403 /* ------------------ ADD/SUB{,S} imm12 ------------------ */ 2404 if (INSN(28,24) == BITS5(1,0,0,0,1)) { 2405 Bool is64 = INSN(31,31) == 1; 2406 Bool isSub = INSN(30,30) == 1; 2407 Bool setCC = INSN(29,29) == 1; 2408 UInt sh = INSN(23,22); 2409 UInt uimm12 = INSN(21,10); 2410 UInt nn = INSN(9,5); 2411 UInt dd = INSN(4,0); 2412 const HChar* nm = isSub ? "sub" : "add"; 2413 if (sh >= 2) { 2414 /* Invalid; fall through */ 2415 } else { 2416 vassert(sh <= 1); 2417 uimm12 <<= (12 * sh); 2418 if (is64) { 2419 IRTemp argL = newTemp(Ity_I64); 2420 IRTemp argR = newTemp(Ity_I64); 2421 IRTemp res = newTemp(Ity_I64); 2422 assign(argL, getIReg64orSP(nn)); 2423 assign(argR, mkU64(uimm12)); 2424 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 2425 mkexpr(argL), mkexpr(argR))); 2426 if (setCC) { 2427 putIReg64orZR(dd, mkexpr(res)); 2428 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 2429 DIP("%ss %s, %s, 0x%x\n", 2430 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12); 2431 } else { 2432 putIReg64orSP(dd, mkexpr(res)); 2433 DIP("%s %s, %s, 0x%x\n", 2434 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12); 2435 } 2436 } else { 2437 IRTemp argL = newTemp(Ity_I32); 2438 IRTemp argR = newTemp(Ity_I32); 2439 IRTemp res = newTemp(Ity_I32); 2440 assign(argL, getIReg32orSP(nn)); 2441 assign(argR, mkU32(uimm12)); 2442 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32, 2443 mkexpr(argL), mkexpr(argR))); 2444 if (setCC) { 2445 putIReg32orZR(dd, mkexpr(res)); 2446 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR); 2447 DIP("%ss %s, %s, 0x%x\n", 2448 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12); 2449 } else { 2450 putIReg32orSP(dd, mkexpr(res)); 2451 DIP("%s %s, %s, 0x%x\n", 2452 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12); 2453 } 2454 } 2455 return True; 2456 } 2457 } 2458 2459 /* -------------------- ADR/ADRP -------------------- */ 2460 if (INSN(28,24) == BITS5(1,0,0,0,0)) { 2461 UInt bP = INSN(31,31); 2462 UInt immLo = INSN(30,29); 2463 UInt immHi = INSN(23,5); 2464 UInt rD = INSN(4,0); 2465 ULong uimm = (immHi << 2) | immLo; 2466 ULong simm = sx_to_64(uimm, 21); 2467 ULong val; 2468 if (bP) { 2469 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12); 2470 } else { 2471 val = guest_PC_curr_instr + simm; 2472 } 2473 putIReg64orZR(rD, mkU64(val)); 2474 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val); 2475 return True; 2476 } 2477 2478 /* -------------------- LOGIC(imm) -------------------- */ 2479 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) { 2480 /* 31 30 28 22 21 15 9 4 2481 sf op 100100 N immr imms Rn Rd 2482 op=00: AND Rd|SP, Rn, #imm 2483 op=01: ORR Rd|SP, Rn, #imm 2484 op=10: EOR Rd|SP, Rn, #imm 2485 op=11: ANDS Rd|ZR, Rn, #imm 2486 */ 2487 Bool is64 = INSN(31,31) == 1; 2488 UInt op = INSN(30,29); 2489 UInt N = INSN(22,22); 2490 UInt immR = INSN(21,16); 2491 UInt immS = INSN(15,10); 2492 UInt nn = INSN(9,5); 2493 UInt dd = INSN(4,0); 2494 ULong imm = 0; 2495 Bool ok; 2496 if (N == 1 && !is64) 2497 goto after_logic_imm; /* not allowed; fall through */ 2498 ok = dbm_DecodeBitMasks(&imm, NULL, 2499 N, immS, immR, True, is64 ? 64 : 32); 2500 if (!ok) 2501 goto after_logic_imm; 2502 2503 const HChar* names[4] = { "and", "orr", "eor", "ands" }; 2504 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 }; 2505 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 }; 2506 2507 vassert(op < 4); 2508 if (is64) { 2509 IRExpr* argL = getIReg64orZR(nn); 2510 IRExpr* argR = mkU64(imm); 2511 IRTemp res = newTemp(Ity_I64); 2512 assign(res, binop(ops64[op], argL, argR)); 2513 if (op < 3) { 2514 putIReg64orSP(dd, mkexpr(res)); 2515 DIP("%s %s, %s, 0x%llx\n", names[op], 2516 nameIReg64orSP(dd), nameIReg64orZR(nn), imm); 2517 } else { 2518 putIReg64orZR(dd, mkexpr(res)); 2519 setFlags_LOGIC(True/*is64*/, res); 2520 DIP("%s %s, %s, 0x%llx\n", names[op], 2521 nameIReg64orZR(dd), nameIReg64orZR(nn), imm); 2522 } 2523 } else { 2524 IRExpr* argL = getIReg32orZR(nn); 2525 IRExpr* argR = mkU32((UInt)imm); 2526 IRTemp res = newTemp(Ity_I32); 2527 assign(res, binop(ops32[op], argL, argR)); 2528 if (op < 3) { 2529 putIReg32orSP(dd, mkexpr(res)); 2530 DIP("%s %s, %s, 0x%x\n", names[op], 2531 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm); 2532 } else { 2533 putIReg32orZR(dd, mkexpr(res)); 2534 setFlags_LOGIC(False/*!is64*/, res); 2535 DIP("%s %s, %s, 0x%x\n", names[op], 2536 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm); 2537 } 2538 } 2539 return True; 2540 } 2541 after_logic_imm: 2542 2543 /* -------------------- MOV{Z,N,K} -------------------- */ 2544 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) { 2545 /* 31 30 28 22 20 4 2546 | | | | | | 2547 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw)) 2548 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw)) 2549 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw)) 2550 */ 2551 Bool is64 = INSN(31,31) == 1; 2552 UInt subopc = INSN(30,29); 2553 UInt hw = INSN(22,21); 2554 UInt imm16 = INSN(20,5); 2555 UInt dd = INSN(4,0); 2556 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) { 2557 /* invalid; fall through */ 2558 } else { 2559 ULong imm64 = ((ULong)imm16) << (16 * hw); 2560 if (!is64) 2561 vassert(imm64 < 0x100000000ULL); 2562 switch (subopc) { 2563 case BITS2(1,0): // MOVZ 2564 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2565 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2566 break; 2567 case BITS2(0,0): // MOVN 2568 imm64 = ~imm64; 2569 if (!is64) 2570 imm64 &= 0xFFFFFFFFULL; 2571 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64)); 2572 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64); 2573 break; 2574 case BITS2(1,1): // MOVK 2575 /* This is more complex. We are inserting a slice into 2576 the destination register, so we need to have the old 2577 value of it. */ 2578 if (is64) { 2579 IRTemp old = newTemp(Ity_I64); 2580 assign(old, getIReg64orZR(dd)); 2581 ULong mask = 0xFFFFULL << (16 * hw); 2582 IRExpr* res 2583 = binop(Iop_Or64, 2584 binop(Iop_And64, mkexpr(old), mkU64(~mask)), 2585 mkU64(imm64)); 2586 putIReg64orZR(dd, res); 2587 DIP("movk %s, 0x%x, lsl %u\n", 2588 nameIReg64orZR(dd), imm16, 16*hw); 2589 } else { 2590 IRTemp old = newTemp(Ity_I32); 2591 assign(old, getIReg32orZR(dd)); 2592 vassert(hw <= 1); 2593 UInt mask = 0xFFFF << (16 * hw); 2594 IRExpr* res 2595 = binop(Iop_Or32, 2596 binop(Iop_And32, mkexpr(old), mkU32(~mask)), 2597 mkU32((UInt)imm64)); 2598 putIReg32orZR(dd, res); 2599 DIP("movk %s, 0x%x, lsl %u\n", 2600 nameIReg32orZR(dd), imm16, 16*hw); 2601 } 2602 break; 2603 default: 2604 vassert(0); 2605 } 2606 return True; 2607 } 2608 } 2609 2610 /* -------------------- {U,S,}BFM -------------------- */ 2611 /* 30 28 22 21 15 9 4 2612 2613 sf 10 100110 N immr imms nn dd 2614 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2615 UBFM Xd, Xn, #immr, #imms when sf=1, N=1 2616 2617 sf 00 100110 N immr imms nn dd 2618 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2619 SBFM Xd, Xn, #immr, #imms when sf=1, N=1 2620 2621 sf 01 100110 N immr imms nn dd 2622 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0 2623 BFM Xd, Xn, #immr, #imms when sf=1, N=1 2624 */ 2625 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) { 2626 UInt sf = INSN(31,31); 2627 UInt opc = INSN(30,29); 2628 UInt N = INSN(22,22); 2629 UInt immR = INSN(21,16); 2630 UInt immS = INSN(15,10); 2631 UInt nn = INSN(9,5); 2632 UInt dd = INSN(4,0); 2633 Bool inZero = False; 2634 Bool extend = False; 2635 const HChar* nm = "???"; 2636 /* skip invalid combinations */ 2637 switch (opc) { 2638 case BITS2(0,0): 2639 inZero = True; extend = True; nm = "sbfm"; break; 2640 case BITS2(0,1): 2641 inZero = False; extend = False; nm = "bfm"; break; 2642 case BITS2(1,0): 2643 inZero = True; extend = False; nm = "ubfm"; break; 2644 case BITS2(1,1): 2645 goto after_bfm; /* invalid */ 2646 default: 2647 vassert(0); 2648 } 2649 if (sf == 1 && N != 1) goto after_bfm; 2650 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0 2651 || ((immS >> 5) & 1) != 0)) goto after_bfm; 2652 ULong wmask = 0, tmask = 0; 2653 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask, 2654 N, immS, immR, False, sf == 1 ? 64 : 32); 2655 if (!ok) goto after_bfm; /* hmmm */ 2656 2657 Bool is64 = sf == 1; 2658 IRType ty = is64 ? Ity_I64 : Ity_I32; 2659 2660 IRTemp dst = newTemp(ty); 2661 IRTemp src = newTemp(ty); 2662 IRTemp bot = newTemp(ty); 2663 IRTemp top = newTemp(ty); 2664 IRTemp res = newTemp(ty); 2665 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd)); 2666 assign(src, getIRegOrZR(is64, nn)); 2667 /* perform bitfield move on low bits */ 2668 assign(bot, binop(mkOR(ty), 2669 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)), 2670 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)), 2671 mkU(ty, wmask)))); 2672 /* determine extension bits (sign, zero or dest register) */ 2673 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst)); 2674 /* combine extension bits and result bits */ 2675 assign(res, binop(mkOR(ty), 2676 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)), 2677 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask)))); 2678 putIRegOrZR(is64, dd, mkexpr(res)); 2679 DIP("%s %s, %s, immR=%u, immS=%u\n", 2680 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS); 2681 return True; 2682 } 2683 after_bfm: 2684 2685 /* ---------------------- EXTR ---------------------- */ 2686 /* 30 28 22 20 15 9 4 2687 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6 2688 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32 2689 */ 2690 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) { 2691 Bool is64 = INSN(31,31) == 1; 2692 UInt mm = INSN(20,16); 2693 UInt imm6 = INSN(15,10); 2694 UInt nn = INSN(9,5); 2695 UInt dd = INSN(4,0); 2696 Bool valid = True; 2697 if (INSN(31,31) != INSN(22,22)) 2698 valid = False; 2699 if (!is64 && imm6 >= 32) 2700 valid = False; 2701 if (!valid) goto after_extr; 2702 IRType ty = is64 ? Ity_I64 : Ity_I32; 2703 IRTemp srcHi = newTemp(ty); 2704 IRTemp srcLo = newTemp(ty); 2705 IRTemp res = newTemp(ty); 2706 assign(srcHi, getIRegOrZR(is64, nn)); 2707 assign(srcLo, getIRegOrZR(is64, mm)); 2708 if (imm6 == 0) { 2709 assign(res, mkexpr(srcLo)); 2710 } else { 2711 UInt szBits = 8 * sizeofIRType(ty); 2712 vassert(imm6 > 0 && imm6 < szBits); 2713 assign(res, binop(mkOR(ty), 2714 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)), 2715 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6)))); 2716 } 2717 putIRegOrZR(is64, dd, mkexpr(res)); 2718 DIP("extr %s, %s, %s, #%u\n", 2719 nameIRegOrZR(is64,dd), 2720 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6); 2721 return True; 2722 } 2723 after_extr: 2724 2725 vex_printf("ARM64 front end: data_processing_immediate\n"); 2726 return False; 2727 # undef INSN 2728 } 2729 2730 2731 /*------------------------------------------------------------*/ 2732 /*--- Data processing (register) instructions ---*/ 2733 /*------------------------------------------------------------*/ 2734 2735 static const HChar* nameSH ( UInt sh ) { 2736 switch (sh) { 2737 case 0: return "lsl"; 2738 case 1: return "lsr"; 2739 case 2: return "asr"; 2740 case 3: return "ror"; 2741 default: vassert(0); 2742 } 2743 } 2744 2745 /* Generate IR to get a register value, possibly shifted by an 2746 immediate. Returns either a 32- or 64-bit temporary holding the 2747 result. After the shift, the value can optionally be NOT-ed 2748 too. 2749 2750 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be 2751 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR 2752 isn't allowed, but it's the job of the caller to check that. 2753 */ 2754 static IRTemp getShiftedIRegOrZR ( Bool is64, 2755 UInt sh_how, UInt sh_amt, UInt regNo, 2756 Bool invert ) 2757 { 2758 vassert(sh_how < 4); 2759 vassert(sh_amt < (is64 ? 64 : 32)); 2760 IRType ty = is64 ? Ity_I64 : Ity_I32; 2761 IRTemp t0 = newTemp(ty); 2762 assign(t0, getIRegOrZR(is64, regNo)); 2763 IRTemp t1 = newTemp(ty); 2764 switch (sh_how) { 2765 case BITS2(0,0): 2766 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt))); 2767 break; 2768 case BITS2(0,1): 2769 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt))); 2770 break; 2771 case BITS2(1,0): 2772 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt))); 2773 break; 2774 case BITS2(1,1): 2775 assign(t1, mkexpr(mathROR(ty, t0, sh_amt))); 2776 break; 2777 default: 2778 vassert(0); 2779 } 2780 if (invert) { 2781 IRTemp t2 = newTemp(ty); 2782 assign(t2, unop(mkNOT(ty), mkexpr(t1))); 2783 return t2; 2784 } else { 2785 return t1; 2786 } 2787 } 2788 2789 2790 static 2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres, 2792 UInt insn) 2793 { 2794 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 2795 2796 /* ------------------- ADD/SUB(reg) ------------------- */ 2797 /* x==0 => 32 bit op x==1 => 64 bit op 2798 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED) 2799 2800 31 30 29 28 23 21 20 15 9 4 2801 | | | | | | | | | | 2802 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6) 2803 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6) 2804 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6) 2805 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6) 2806 */ 2807 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) { 2808 UInt bX = INSN(31,31); 2809 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */ 2810 UInt bS = INSN(29, 29); /* set flags? */ 2811 UInt sh = INSN(23,22); 2812 UInt rM = INSN(20,16); 2813 UInt imm6 = INSN(15,10); 2814 UInt rN = INSN(9,5); 2815 UInt rD = INSN(4,0); 2816 Bool isSUB = bOP == 1; 2817 Bool is64 = bX == 1; 2818 IRType ty = is64 ? Ity_I64 : Ity_I32; 2819 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) { 2820 /* invalid; fall through */ 2821 } else { 2822 IRTemp argL = newTemp(ty); 2823 assign(argL, getIRegOrZR(is64, rN)); 2824 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False); 2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2826 IRTemp res = newTemp(ty); 2827 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2828 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2829 if (bS) { 2830 setFlags_ADD_SUB(is64, isSUB, argL, argR); 2831 } 2832 DIP("%s%s %s, %s, %s, %s #%u\n", 2833 bOP ? "sub" : "add", bS ? "s" : "", 2834 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2835 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2836 return True; 2837 } 2838 } 2839 2840 /* ------------------- ADC/SBC(reg) ------------------- */ 2841 /* x==0 => 32 bit op x==1 => 64 bit op 2842 2843 31 30 29 28 23 21 20 15 9 4 2844 | | | | | | | | | | 2845 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm 2846 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm 2847 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm 2848 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm 2849 */ 2850 2851 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) { 2852 UInt bX = INSN(31,31); 2853 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */ 2854 UInt bS = INSN(29,29); /* set flags */ 2855 UInt rM = INSN(20,16); 2856 UInt rN = INSN(9,5); 2857 UInt rD = INSN(4,0); 2858 2859 Bool isSUB = bOP == 1; 2860 Bool is64 = bX == 1; 2861 IRType ty = is64 ? Ity_I64 : Ity_I32; 2862 2863 IRTemp oldC = newTemp(ty); 2864 assign(oldC, 2865 is64 ? mk_arm64g_calculate_flag_c() 2866 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) ); 2867 2868 IRTemp argL = newTemp(ty); 2869 assign(argL, getIRegOrZR(is64, rN)); 2870 IRTemp argR = newTemp(ty); 2871 assign(argR, getIRegOrZR(is64, rM)); 2872 2873 IROp op = isSUB ? mkSUB(ty) : mkADD(ty); 2874 IRTemp res = newTemp(ty); 2875 if (isSUB) { 2876 IRExpr* one = is64 ? mkU64(1) : mkU32(1); 2877 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32; 2878 assign(res, 2879 binop(op, 2880 binop(op, mkexpr(argL), mkexpr(argR)), 2881 binop(xorOp, mkexpr(oldC), one))); 2882 } else { 2883 assign(res, 2884 binop(op, 2885 binop(op, mkexpr(argL), mkexpr(argR)), 2886 mkexpr(oldC))); 2887 } 2888 2889 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res)); 2890 2891 if (bS) { 2892 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC); 2893 } 2894 2895 DIP("%s%s %s, %s, %s\n", 2896 bOP ? "sbc" : "adc", bS ? "s" : "", 2897 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2898 nameIRegOrZR(is64, rM)); 2899 return True; 2900 } 2901 2902 /* -------------------- LOGIC(reg) -------------------- */ 2903 /* x==0 => 32 bit op x==1 => 64 bit op 2904 N==0 => inv? is no-op (no inversion) 2905 N==1 => inv? is NOT 2906 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR 2907 2908 31 30 28 23 21 20 15 9 4 2909 | | | | | | | | | 2910 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6)) 2911 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6)) 2912 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6)) 2913 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6)) 2914 With N=1, the names are: BIC ORN EON BICS 2915 */ 2916 if (INSN(28,24) == BITS5(0,1,0,1,0)) { 2917 UInt bX = INSN(31,31); 2918 UInt sh = INSN(23,22); 2919 UInt bN = INSN(21,21); 2920 UInt rM = INSN(20,16); 2921 UInt imm6 = INSN(15,10); 2922 UInt rN = INSN(9,5); 2923 UInt rD = INSN(4,0); 2924 Bool is64 = bX == 1; 2925 IRType ty = is64 ? Ity_I64 : Ity_I32; 2926 if (!is64 && imm6 > 31) { 2927 /* invalid; fall though */ 2928 } else { 2929 IRTemp argL = newTemp(ty); 2930 assign(argL, getIRegOrZR(is64, rN)); 2931 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1); 2932 IROp op = Iop_INVALID; 2933 switch (INSN(30,29)) { 2934 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break; 2935 case BITS2(0,1): op = mkOR(ty); break; 2936 case BITS2(1,0): op = mkXOR(ty); break; 2937 default: vassert(0); 2938 } 2939 IRTemp res = newTemp(ty); 2940 assign(res, binop(op, mkexpr(argL), mkexpr(argR))); 2941 if (INSN(30,29) == BITS2(1,1)) { 2942 setFlags_LOGIC(is64, res); 2943 } 2944 putIRegOrZR(is64, rD, mkexpr(res)); 2945 2946 static const HChar* names_op[8] 2947 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" }; 2948 vassert(((bN << 2) | INSN(30,29)) < 8); 2949 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)]; 2950 /* Special-case the printing of "MOV" */ 2951 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) { 2952 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD), 2953 nameIRegOrZR(is64, rM)); 2954 } else { 2955 DIP("%s %s, %s, %s, %s #%u\n", nm_op, 2956 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN), 2957 nameIRegOrZR(is64, rM), nameSH(sh), imm6); 2958 } 2959 return True; 2960 } 2961 } 2962 2963 /* -------------------- {U,S}MULH -------------------- */ 2964 /* 31 23 22 20 15 9 4 2965 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm 2966 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm 2967 */ 2968 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) 2969 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) { 2970 Bool isU = INSN(23,23) == 1; 2971 UInt mm = INSN(20,16); 2972 UInt nn = INSN(9,5); 2973 UInt dd = INSN(4,0); 2974 putIReg64orZR(dd, unop(Iop_128HIto64, 2975 binop(isU ? Iop_MullU64 : Iop_MullS64, 2976 getIReg64orZR(nn), getIReg64orZR(mm)))); 2977 DIP("%cmulh %s, %s, %s\n", 2978 isU ? 'u' : 's', 2979 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm)); 2980 return True; 2981 } 2982 2983 /* -------------------- M{ADD,SUB} -------------------- */ 2984 /* 31 30 20 15 14 9 4 2985 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n 2986 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n 2987 */ 2988 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) { 2989 Bool is64 = INSN(31,31) == 1; 2990 UInt mm = INSN(20,16); 2991 Bool isAdd = INSN(15,15) == 0; 2992 UInt aa = INSN(14,10); 2993 UInt nn = INSN(9,5); 2994 UInt dd = INSN(4,0); 2995 if (is64) { 2996 putIReg64orZR( 2997 dd, 2998 binop(isAdd ? Iop_Add64 : Iop_Sub64, 2999 getIReg64orZR(aa), 3000 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn)))); 3001 } else { 3002 putIReg32orZR( 3003 dd, 3004 binop(isAdd ? Iop_Add32 : Iop_Sub32, 3005 getIReg32orZR(aa), 3006 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn)))); 3007 } 3008 DIP("%s %s, %s, %s, %s\n", 3009 isAdd ? "madd" : "msub", 3010 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3011 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa)); 3012 return True; 3013 } 3014 3015 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */ 3016 /* 31 30 28 20 15 11 9 4 3017 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm 3018 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm 3019 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm 3020 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm 3021 In all cases, the operation is: Rd = if cond then Rn else OP(Rm) 3022 */ 3023 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) { 3024 Bool is64 = INSN(31,31) == 1; 3025 UInt b30 = INSN(30,30); 3026 UInt mm = INSN(20,16); 3027 UInt cond = INSN(15,12); 3028 UInt b10 = INSN(10,10); 3029 UInt nn = INSN(9,5); 3030 UInt dd = INSN(4,0); 3031 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */ 3032 IRType ty = is64 ? Ity_I64 : Ity_I32; 3033 IRExpr* argL = getIRegOrZR(is64, nn); 3034 IRExpr* argR = getIRegOrZR(is64, mm); 3035 switch (op) { 3036 case BITS2(0,0): 3037 break; 3038 case BITS2(0,1): 3039 argR = binop(mkADD(ty), argR, mkU(ty,1)); 3040 break; 3041 case BITS2(1,0): 3042 argR = unop(mkNOT(ty), argR); 3043 break; 3044 case BITS2(1,1): 3045 argR = binop(mkSUB(ty), mkU(ty,0), argR); 3046 break; 3047 default: 3048 vassert(0); 3049 } 3050 putIRegOrZR( 3051 is64, dd, 3052 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 3053 argL, argR) 3054 ); 3055 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" }; 3056 DIP("%s %s, %s, %s, %s\n", op_nm[op], 3057 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), 3058 nameIRegOrZR(is64, mm), nameCC(cond)); 3059 return True; 3060 } 3061 3062 /* -------------- ADD/SUB(extended reg) -------------- */ 3063 /* 28 20 15 12 9 4 3064 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld 3065 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld 3066 3067 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld 3068 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld 3069 3070 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld 3071 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld 3072 3073 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld 3074 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld 3075 3076 The 'm' operand is extended per opt, thusly: 3077 3078 000 Xm & 0xFF UXTB 3079 001 Xm & 0xFFFF UXTH 3080 010 Xm & (2^32)-1 UXTW 3081 011 Xm UXTX 3082 3083 100 Xm sx from bit 7 SXTB 3084 101 Xm sx from bit 15 SXTH 3085 110 Xm sx from bit 31 SXTW 3086 111 Xm SXTX 3087 3088 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity 3089 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX 3090 are the identity operation on Wm. 3091 3092 After extension, the value is shifted left by imm3 bits, which 3093 may only be in the range 0 .. 4 inclusive. 3094 */ 3095 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) { 3096 Bool is64 = INSN(31,31) == 1; 3097 Bool isSub = INSN(30,30) == 1; 3098 Bool setCC = INSN(29,29) == 1; 3099 UInt mm = INSN(20,16); 3100 UInt opt = INSN(15,13); 3101 UInt imm3 = INSN(12,10); 3102 UInt nn = INSN(9,5); 3103 UInt dd = INSN(4,0); 3104 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx", 3105 "sxtb", "sxth", "sxtw", "sxtx" }; 3106 /* Do almost the same thing in the 32- and 64-bit cases. */ 3107 IRTemp xN = newTemp(Ity_I64); 3108 IRTemp xM = newTemp(Ity_I64); 3109 assign(xN, getIReg64orSP(nn)); 3110 assign(xM, getIReg64orZR(mm)); 3111 IRExpr* xMw = mkexpr(xM); /* "xM widened" */ 3112 Int shSX = 0; 3113 /* widen Xm .. */ 3114 switch (opt) { 3115 case BITS3(0,0,0): // UXTB 3116 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break; 3117 case BITS3(0,0,1): // UXTH 3118 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break; 3119 case BITS3(0,1,0): // UXTW -- noop for the 32bit case 3120 if (is64) { 3121 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw)); 3122 } 3123 break; 3124 case BITS3(0,1,1): // UXTX -- always a noop 3125 break; 3126 case BITS3(1,0,0): // SXTB 3127 shSX = 56; goto sxTo64; 3128 case BITS3(1,0,1): // SXTH 3129 shSX = 48; goto sxTo64; 3130 case BITS3(1,1,0): // SXTW -- noop for the 32bit case 3131 if (is64) { 3132 shSX = 32; goto sxTo64; 3133 } 3134 break; 3135 case BITS3(1,1,1): // SXTX -- always a noop 3136 break; 3137 sxTo64: 3138 vassert(shSX >= 32); 3139 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)), 3140 mkU8(shSX)); 3141 break; 3142 default: 3143 vassert(0); 3144 } 3145 /* and now shift */ 3146 IRTemp argL = xN; 3147 IRTemp argR = newTemp(Ity_I64); 3148 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3))); 3149 IRTemp res = newTemp(Ity_I64); 3150 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64, 3151 mkexpr(argL), mkexpr(argR))); 3152 if (is64) { 3153 if (setCC) { 3154 putIReg64orZR(dd, mkexpr(res)); 3155 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR); 3156 } else { 3157 putIReg64orSP(dd, mkexpr(res)); 3158 } 3159 } else { 3160 if (setCC) { 3161 IRTemp argL32 = newTemp(Ity_I32); 3162 IRTemp argR32 = newTemp(Ity_I32); 3163 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res))); 3164 assign(argL32, unop(Iop_64to32, mkexpr(argL))); 3165 assign(argR32, unop(Iop_64to32, mkexpr(argR))); 3166 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32); 3167 } else { 3168 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res))); 3169 } 3170 } 3171 DIP("%s%s %s, %s, %s %s lsl %u\n", 3172 isSub ? "sub" : "add", setCC ? "s" : "", 3173 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd), 3174 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm), 3175 nameExt[opt], imm3); 3176 return True; 3177 } 3178 3179 /* ---------------- CCMP/CCMN(imm) ---------------- */ 3180 /* Bizarrely, these appear in the "data processing register" 3181 category, even though they are operations against an 3182 immediate. */ 3183 /* 31 29 20 15 11 9 3 3184 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond 3185 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond 3186 3187 Operation is: 3188 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv 3189 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv 3190 */ 3191 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3192 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) { 3193 Bool is64 = INSN(31,31) == 1; 3194 Bool isSUB = INSN(30,30) == 1; 3195 UInt imm5 = INSN(20,16); 3196 UInt cond = INSN(15,12); 3197 UInt nn = INSN(9,5); 3198 UInt nzcv = INSN(3,0); 3199 3200 IRTemp condT = newTemp(Ity_I1); 3201 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3202 3203 IRType ty = is64 ? Ity_I64 : Ity_I32; 3204 IRTemp argL = newTemp(ty); 3205 IRTemp argR = newTemp(ty); 3206 3207 if (is64) { 3208 assign(argL, getIReg64orZR(nn)); 3209 assign(argR, mkU64(imm5)); 3210 } else { 3211 assign(argL, getIReg32orZR(nn)); 3212 assign(argR, mkU32(imm5)); 3213 } 3214 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3215 3216 DIP("ccm%c %s, #%u, #%u, %s\n", 3217 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3218 imm5, nzcv, nameCC(cond)); 3219 return True; 3220 } 3221 3222 /* ---------------- CCMP/CCMN(reg) ---------------- */ 3223 /* 31 29 20 15 11 9 3 3224 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond 3225 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond 3226 Operation is: 3227 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv 3228 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv 3229 */ 3230 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0) 3231 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) { 3232 Bool is64 = INSN(31,31) == 1; 3233 Bool isSUB = INSN(30,30) == 1; 3234 UInt mm = INSN(20,16); 3235 UInt cond = INSN(15,12); 3236 UInt nn = INSN(9,5); 3237 UInt nzcv = INSN(3,0); 3238 3239 IRTemp condT = newTemp(Ity_I1); 3240 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond))); 3241 3242 IRType ty = is64 ? Ity_I64 : Ity_I32; 3243 IRTemp argL = newTemp(ty); 3244 IRTemp argR = newTemp(ty); 3245 3246 if (is64) { 3247 assign(argL, getIReg64orZR(nn)); 3248 assign(argR, getIReg64orZR(mm)); 3249 } else { 3250 assign(argL, getIReg32orZR(nn)); 3251 assign(argR, getIReg32orZR(mm)); 3252 } 3253 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv); 3254 3255 DIP("ccm%c %s, %s, #%u, %s\n", 3256 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn), 3257 nameIRegOrZR(is64, mm), nzcv, nameCC(cond)); 3258 return True; 3259 } 3260 3261 3262 /* -------------- REV/REV16/REV32/RBIT -------------- */ 3263 /* 31 30 28 20 15 11 9 4 3264 3265 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn 3266 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn 3267 3268 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn 3269 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn 3270 3271 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn 3272 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn 3273 3274 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn 3275 */ 3276 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3277 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) { 3278 UInt b31 = INSN(31,31); 3279 UInt opc = INSN(11,10); 3280 3281 UInt ix = 0; 3282 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1; 3283 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2; 3284 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3; 3285 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4; 3286 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5; 3287 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6; 3288 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7; 3289 if (ix >= 1 && ix <= 7) { 3290 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7; 3291 UInt nn = INSN(9,5); 3292 UInt dd = INSN(4,0); 3293 IRTemp src = newTemp(Ity_I64); 3294 IRTemp dst = IRTemp_INVALID; 3295 IRTemp (*math)(IRTemp) = NULL; 3296 switch (ix) { 3297 case 1: case 2: math = math_BYTESWAP64; break; 3298 case 3: case 4: math = math_BITSWAP64; break; 3299 case 5: case 6: math = math_USHORTSWAP64; break; 3300 case 7: math = math_UINTSWAP64; break; 3301 default: vassert(0); 3302 } 3303 const HChar* names[7] 3304 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" }; 3305 const HChar* nm = names[ix-1]; 3306 vassert(math); 3307 if (ix == 6) { 3308 /* This has to be special cased, since the logic below doesn't 3309 handle it correctly. */ 3310 assign(src, getIReg64orZR(nn)); 3311 dst = math(src); 3312 putIReg64orZR(dd, 3313 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst)))); 3314 } else if (is64) { 3315 assign(src, getIReg64orZR(nn)); 3316 dst = math(src); 3317 putIReg64orZR(dd, mkexpr(dst)); 3318 } else { 3319 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32))); 3320 dst = math(src); 3321 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3322 } 3323 DIP("%s %s, %s\n", nm, 3324 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn)); 3325 return True; 3326 } 3327 /* else fall through */ 3328 } 3329 3330 /* -------------------- CLZ/CLS -------------------- */ 3331 /* 30 28 24 20 15 9 4 3332 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn 3333 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn 3334 */ 3335 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0) 3336 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) { 3337 Bool is64 = INSN(31,31) == 1; 3338 Bool isCLS = INSN(10,10) == 1; 3339 UInt nn = INSN(9,5); 3340 UInt dd = INSN(4,0); 3341 IRTemp src = newTemp(Ity_I64); 3342 IRTemp srcZ = newTemp(Ity_I64); 3343 IRTemp dst = newTemp(Ity_I64); 3344 /* Get the argument, widened out to 64 bit */ 3345 if (is64) { 3346 assign(src, getIReg64orZR(nn)); 3347 } else { 3348 assign(src, binop(Iop_Shl64, 3349 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32))); 3350 } 3351 /* If this is CLS, mash the arg around accordingly */ 3352 if (isCLS) { 3353 IRExpr* one = mkU8(1); 3354 assign(srcZ, 3355 binop(Iop_Xor64, 3356 binop(Iop_Shl64, mkexpr(src), one), 3357 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one))); 3358 } else { 3359 assign(srcZ, mkexpr(src)); 3360 } 3361 /* And compute CLZ. */ 3362 if (is64) { 3363 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3364 mkU64(isCLS ? 63 : 64), 3365 unop(Iop_Clz64, mkexpr(srcZ)))); 3366 putIReg64orZR(dd, mkexpr(dst)); 3367 } else { 3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)), 3369 mkU64(isCLS ? 31 : 32), 3370 unop(Iop_Clz64, mkexpr(srcZ)))); 3371 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst))); 3372 } 3373 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z', 3374 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn)); 3375 return True; 3376 } 3377 3378 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */ 3379 /* 30 28 20 15 11 9 4 3380 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm 3381 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm 3382 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm 3383 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm 3384 */ 3385 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3386 && INSN(15,12) == BITS4(0,0,1,0)) { 3387 Bool is64 = INSN(31,31) == 1; 3388 UInt mm = INSN(20,16); 3389 UInt op = INSN(11,10); 3390 UInt nn = INSN(9,5); 3391 UInt dd = INSN(4,0); 3392 IRType ty = is64 ? Ity_I64 : Ity_I32; 3393 IRTemp srcL = newTemp(ty); 3394 IRTemp srcR = newTemp(Ity_I64); 3395 IRTemp res = newTemp(ty); 3396 IROp iop = Iop_INVALID; 3397 assign(srcL, getIRegOrZR(is64, nn)); 3398 assign(srcR, binop(Iop_And64, getIReg64orZR(mm), 3399 mkU64(is64 ? 63 : 31))); 3400 if (op < 3) { 3401 // LSLV, LSRV, ASRV 3402 switch (op) { 3403 case BITS2(0,0): iop = mkSHL(ty); break; 3404 case BITS2(0,1): iop = mkSHR(ty); break; 3405 case BITS2(1,0): iop = mkSAR(ty); break; 3406 default: vassert(0); 3407 } 3408 assign(res, binop(iop, mkexpr(srcL), 3409 unop(Iop_64to8, mkexpr(srcR)))); 3410 } else { 3411 // RORV 3412 IROp opSHL = mkSHL(ty); 3413 IROp opSHR = mkSHR(ty); 3414 IROp opOR = mkOR(ty); 3415 IRExpr* width = mkU64(is64 ? 64: 32); 3416 assign( 3417 res, 3418 IRExpr_ITE( 3419 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)), 3420 mkexpr(srcL), 3421 binop(opOR, 3422 binop(opSHL, 3423 mkexpr(srcL), 3424 unop(Iop_64to8, binop(Iop_Sub64, width, 3425 mkexpr(srcR)))), 3426 binop(opSHR, 3427 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR)))) 3428 )); 3429 } 3430 putIRegOrZR(is64, dd, mkexpr(res)); 3431 vassert(op < 4); 3432 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" }; 3433 DIP("%s %s, %s, %s\n", 3434 names[op], nameIRegOrZR(is64,dd), 3435 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm)); 3436 return True; 3437 } 3438 3439 /* -------------------- SDIV/UDIV -------------------- */ 3440 /* 30 28 20 15 10 9 4 3441 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm 3442 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm 3443 */ 3444 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0) 3445 && INSN(15,11) == BITS5(0,0,0,0,1)) { 3446 Bool is64 = INSN(31,31) == 1; 3447 UInt mm = INSN(20,16); 3448 Bool isS = INSN(10,10) == 1; 3449 UInt nn = INSN(9,5); 3450 UInt dd = INSN(4,0); 3451 if (isS) { 3452 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32, 3453 getIRegOrZR(is64, nn), 3454 getIRegOrZR(is64, mm))); 3455 } else { 3456 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32, 3457 getIRegOrZR(is64, nn), 3458 getIRegOrZR(is64, mm))); 3459 } 3460 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u', 3461 nameIRegOrZR(is64, dd), 3462 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm)); 3463 return True; 3464 } 3465 3466 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */ 3467 /* 31 23 20 15 14 9 4 3468 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa 3469 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa 3470 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa 3471 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa 3472 with operation 3473 Xd = Xa +/- (Wn *u/s Wm) 3474 */ 3475 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) { 3476 Bool isU = INSN(23,23) == 1; 3477 UInt mm = INSN(20,16); 3478 Bool isAdd = INSN(15,15) == 0; 3479 UInt aa = INSN(14,10); 3480 UInt nn = INSN(9,5); 3481 UInt dd = INSN(4,0); 3482 IRTemp wN = newTemp(Ity_I32); 3483 IRTemp wM = newTemp(Ity_I32); 3484 IRTemp xA = newTemp(Ity_I64); 3485 IRTemp muld = newTemp(Ity_I64); 3486 IRTemp res = newTemp(Ity_I64); 3487 assign(wN, getIReg32orZR(nn)); 3488 assign(wM, getIReg32orZR(mm)); 3489 assign(xA, getIReg64orZR(aa)); 3490 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32, 3491 mkexpr(wN), mkexpr(wM))); 3492 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64, 3493 mkexpr(xA), mkexpr(muld))); 3494 putIReg64orZR(dd, mkexpr(res)); 3495 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub", 3496 nameIReg64orZR(dd), nameIReg32orZR(nn), 3497 nameIReg32orZR(mm), nameIReg64orZR(aa)); 3498 return True; 3499 } 3500 vex_printf("ARM64 front end: data_processing_register\n"); 3501 return False; 3502 # undef INSN 3503 } 3504 3505 3506 /*------------------------------------------------------------*/ 3507 /*--- Math helpers for vector interleave/deinterleave ---*/ 3508 /*------------------------------------------------------------*/ 3509 3510 #define EX(_tmp) \ 3511 mkexpr(_tmp) 3512 #define SL(_hi128,_lo128,_nbytes) \ 3513 ( (_nbytes) == 0 \ 3514 ? (_lo128) \ 3515 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) ) 3516 #define ROR(_v128,_nbytes) \ 3517 SL((_v128),(_v128),(_nbytes)) 3518 #define ROL(_v128,_nbytes) \ 3519 SL((_v128),(_v128),16-(_nbytes)) 3520 #define SHR(_v128,_nbytes) \ 3521 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes))) 3522 #define SHL(_v128,_nbytes) \ 3523 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes))) 3524 #define ILO64x2(_argL,_argR) \ 3525 binop(Iop_InterleaveLO64x2,(_argL),(_argR)) 3526 #define IHI64x2(_argL,_argR) \ 3527 binop(Iop_InterleaveHI64x2,(_argL),(_argR)) 3528 #define ILO32x4(_argL,_argR) \ 3529 binop(Iop_InterleaveLO32x4,(_argL),(_argR)) 3530 #define IHI32x4(_argL,_argR) \ 3531 binop(Iop_InterleaveHI32x4,(_argL),(_argR)) 3532 #define ILO16x8(_argL,_argR) \ 3533 binop(Iop_InterleaveLO16x8,(_argL),(_argR)) 3534 #define IHI16x8(_argL,_argR) \ 3535 binop(Iop_InterleaveHI16x8,(_argL),(_argR)) 3536 #define ILO8x16(_argL,_argR) \ 3537 binop(Iop_InterleaveLO8x16,(_argL),(_argR)) 3538 #define IHI8x16(_argL,_argR) \ 3539 binop(Iop_InterleaveHI8x16,(_argL),(_argR)) 3540 #define CEV32x4(_argL,_argR) \ 3541 binop(Iop_CatEvenLanes32x4,(_argL),(_argR)) 3542 #define COD32x4(_argL,_argR) \ 3543 binop(Iop_CatOddLanes32x4,(_argL),(_argR)) 3544 #define COD16x8(_argL,_argR) \ 3545 binop(Iop_CatOddLanes16x8,(_argL),(_argR)) 3546 #define COD8x16(_argL,_argR) \ 3547 binop(Iop_CatOddLanes8x16,(_argL),(_argR)) 3548 #define CEV8x16(_argL,_argR) \ 3549 binop(Iop_CatEvenLanes8x16,(_argL),(_argR)) 3550 #define AND(_arg1,_arg2) \ 3551 binop(Iop_AndV128,(_arg1),(_arg2)) 3552 #define OR2(_arg1,_arg2) \ 3553 binop(Iop_OrV128,(_arg1),(_arg2)) 3554 #define OR3(_arg1,_arg2,_arg3) \ 3555 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3))) 3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \ 3557 binop(Iop_OrV128, \ 3558 binop(Iop_OrV128,(_arg1),(_arg2)), \ 3559 binop(Iop_OrV128,(_arg3),(_arg4))) 3560 3561 3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */ 3563 static 3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0, 3565 UInt laneSzBlg2, IRTemp u0 ) 3566 { 3567 assign(*i0, mkexpr(u0)); 3568 } 3569 3570 3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */ 3572 static 3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 3574 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 3575 { 3576 /* This is pretty easy, since we have primitives directly to 3577 hand. */ 3578 if (laneSzBlg2 == 3) { 3579 // 64x2 3580 // u1 == B1 B0, u0 == A1 A0 3581 // i1 == B1 A1, i0 == B0 A0 3582 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0))); 3583 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0))); 3584 return; 3585 } 3586 if (laneSzBlg2 == 2) { 3587 // 32x4 3588 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3589 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3590 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0))); 3591 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0))); 3592 return; 3593 } 3594 if (laneSzBlg2 == 1) { 3595 // 16x8 3596 // u1 == B{7..0}, u0 == A{7..0} 3597 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3598 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3599 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0))); 3600 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0))); 3601 return; 3602 } 3603 if (laneSzBlg2 == 0) { 3604 // 8x16 3605 // u1 == B{f..0}, u0 == A{f..0} 3606 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3607 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3608 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0))); 3609 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0))); 3610 return; 3611 } 3612 /*NOTREACHED*/ 3613 vassert(0); 3614 } 3615 3616 3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */ 3618 static 3619 void math_INTERLEAVE3_128( 3620 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 3621 UInt laneSzBlg2, 3622 IRTemp u0, IRTemp u1, IRTemp u2 ) 3623 { 3624 if (laneSzBlg2 == 3) { 3625 // 64x2 3626 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3627 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3628 assign(*i2, IHI64x2( EX(u2), EX(u1) )); 3629 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) )); 3630 assign(*i0, ILO64x2( EX(u1), EX(u0) )); 3631 return; 3632 } 3633 3634 if (laneSzBlg2 == 2) { 3635 // 32x4 3636 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3637 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3638 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3639 IRTemp p0 = newTempV128(); 3640 IRTemp p1 = newTempV128(); 3641 IRTemp p2 = newTempV128(); 3642 IRTemp c1100 = newTempV128(); 3643 IRTemp c0011 = newTempV128(); 3644 IRTemp c0110 = newTempV128(); 3645 assign(c1100, mkV128(0xFF00)); 3646 assign(c0011, mkV128(0x00FF)); 3647 assign(c0110, mkV128(0x0FF0)); 3648 // First interleave them at 64x2 granularity, 3649 // generating partial ("p") values. 3650 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2); 3651 // And more shuffling around for the final answer 3652 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ), 3653 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) )); 3654 assign(*i1, OR3( SHL(EX(p2),12), 3655 AND(EX(p1),EX(c0110)), 3656 SHR(EX(p0),12) )); 3657 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ), 3658 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) )); 3659 return; 3660 } 3661 3662 if (laneSzBlg2 == 1) { 3663 // 16x8 3664 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3665 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3666 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3667 // 3668 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3669 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3670 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3671 // 3672 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3673 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3674 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3675 IRTemp p0 = newTempV128(); 3676 IRTemp p1 = newTempV128(); 3677 IRTemp p2 = newTempV128(); 3678 IRTemp c1000 = newTempV128(); 3679 IRTemp c0100 = newTempV128(); 3680 IRTemp c0010 = newTempV128(); 3681 IRTemp c0001 = newTempV128(); 3682 assign(c1000, mkV128(0xF000)); 3683 assign(c0100, mkV128(0x0F00)); 3684 assign(c0010, mkV128(0x00F0)); 3685 assign(c0001, mkV128(0x000F)); 3686 // First interleave them at 32x4 granularity, 3687 // generating partial ("p") values. 3688 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2); 3689 // And more shuffling around for the final answer 3690 assign(*i2, 3691 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ), 3692 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ), 3693 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ), 3694 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) ) 3695 )); 3696 assign(*i1, 3697 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ), 3698 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ), 3699 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ), 3700 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) ) 3701 )); 3702 assign(*i0, 3703 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ), 3704 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ), 3705 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ), 3706 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) ) 3707 )); 3708 return; 3709 } 3710 3711 if (laneSzBlg2 == 0) { 3712 // 8x16. It doesn't seem worth the hassle of first doing a 3713 // 16x8 interleave, so just generate all 24 partial results 3714 // directly :-( 3715 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0 3716 // i2 == Cf Bf Af Ce .. Bb Ab Ca 3717 // i1 == Ba Aa C9 B9 .. A6 C5 B5 3718 // i0 == A5 C4 B4 A4 .. C0 B0 A0 3719 3720 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128(); 3721 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128(); 3722 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128(); 3723 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128(); 3724 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128(); 3725 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128(); 3726 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128(); 3727 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128(); 3728 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128(); 3729 3730 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector 3731 // of the form 14 bytes junk : CC[0xF] : BB[0xA] 3732 // 3733 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \ 3734 IRTemp t_##_tempName = newTempV128(); \ 3735 assign(t_##_tempName, \ 3736 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \ 3737 ROR(EX(_srcVec2),(_srcShift2)) ) ) 3738 3739 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively 3740 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0; 3741 3742 // The slicing and reassembly are done as interleavedly as possible, 3743 // so as to minimise the demand for registers in the back end, which 3744 // was observed to be a problem in testing. 3745 3746 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14] 3747 XXXX(AfCe, AA, 0xf, CC, 0xe); 3748 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe))); 3749 3750 XXXX(BeAe, BB, 0xe, AA, 0xe); 3751 XXXX(CdBd, CC, 0xd, BB, 0xd); 3752 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd))); 3753 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98))); 3754 3755 XXXX(AdCc, AA, 0xd, CC, 0xc); 3756 XXXX(BcAc, BB, 0xc, AA, 0xc); 3757 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc))); 3758 3759 XXXX(CbBb, CC, 0xb, BB, 0xb); 3760 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0] 3761 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa))); 3762 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210))); 3763 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64))); 3764 3765 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14] 3766 XXXX(C9B9, CC, 0x9, BB, 0x9); 3767 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9))); 3768 3769 XXXX(A9C8, AA, 0x9, CC, 0x8); 3770 XXXX(B8A8, BB, 0x8, AA, 0x8); 3771 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8))); 3772 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98))); 3773 3774 XXXX(C7B7, CC, 0x7, BB, 0x7); 3775 XXXX(A7C6, AA, 0x7, CC, 0x6); 3776 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6))); 3777 3778 XXXX(B6A6, BB, 0x6, AA, 0x6); 3779 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0] 3780 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5))); 3781 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210))); 3782 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64))); 3783 3784 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14] 3785 XXXX(B4A4, BB, 0x4, AA, 0x4); 3786 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4))); 3787 3788 XXXX(C3B3, CC, 0x3, BB, 0x3); 3789 XXXX(A3C2, AA, 0x3, CC, 0x2); 3790 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2))); 3791 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98))); 3792 3793 XXXX(B2A2, BB, 0x2, AA, 0x2); 3794 XXXX(C1B1, CC, 0x1, BB, 0x1); 3795 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1))); 3796 3797 XXXX(A1C0, AA, 0x1, CC, 0x0); 3798 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0] 3799 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0))); 3800 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210))); 3801 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64))); 3802 3803 # undef XXXX 3804 return; 3805 } 3806 3807 /*NOTREACHED*/ 3808 vassert(0); 3809 } 3810 3811 3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */ 3813 static 3814 void math_INTERLEAVE4_128( 3815 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 3816 UInt laneSzBlg2, 3817 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 3818 { 3819 if (laneSzBlg2 == 3) { 3820 // 64x2 3821 assign(*i0, ILO64x2(EX(u1), EX(u0))); 3822 assign(*i1, ILO64x2(EX(u3), EX(u2))); 3823 assign(*i2, IHI64x2(EX(u1), EX(u0))); 3824 assign(*i3, IHI64x2(EX(u3), EX(u2))); 3825 return; 3826 } 3827 if (laneSzBlg2 == 2) { 3828 // 32x4 3829 // First, interleave at the 64-bit lane size. 3830 IRTemp p0 = newTempV128(); 3831 IRTemp p1 = newTempV128(); 3832 IRTemp p2 = newTempV128(); 3833 IRTemp p3 = newTempV128(); 3834 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3); 3835 // And interleave (cat) at the 32 bit size. 3836 assign(*i0, CEV32x4(EX(p1), EX(p0))); 3837 assign(*i1, COD32x4(EX(p1), EX(p0))); 3838 assign(*i2, CEV32x4(EX(p3), EX(p2))); 3839 assign(*i3, COD32x4(EX(p3), EX(p2))); 3840 return; 3841 } 3842 if (laneSzBlg2 == 1) { 3843 // 16x8 3844 // First, interleave at the 32-bit lane size. 3845 IRTemp p0 = newTempV128(); 3846 IRTemp p1 = newTempV128(); 3847 IRTemp p2 = newTempV128(); 3848 IRTemp p3 = newTempV128(); 3849 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3); 3850 // And rearrange within each vector, to get the right 16 bit lanes. 3851 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2))); 3852 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2))); 3853 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2))); 3854 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2))); 3855 return; 3856 } 3857 if (laneSzBlg2 == 0) { 3858 // 8x16 3859 // First, interleave at the 16-bit lane size. 3860 IRTemp p0 = newTempV128(); 3861 IRTemp p1 = newTempV128(); 3862 IRTemp p2 = newTempV128(); 3863 IRTemp p3 = newTempV128(); 3864 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3); 3865 // And rearrange within each vector, to get the right 8 bit lanes. 3866 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0)))); 3867 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1)))); 3868 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2)))); 3869 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3)))); 3870 return; 3871 } 3872 /*NOTREACHED*/ 3873 vassert(0); 3874 } 3875 3876 3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */ 3878 static 3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0, 3880 UInt laneSzBlg2, IRTemp i0 ) 3881 { 3882 assign(*u0, mkexpr(i0)); 3883 } 3884 3885 3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */ 3887 static 3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 3889 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 3890 { 3891 /* This is pretty easy, since we have primitives directly to 3892 hand. */ 3893 if (laneSzBlg2 == 3) { 3894 // 64x2 3895 // i1 == B1 A1, i0 == B0 A0 3896 // u1 == B1 B0, u0 == A1 A0 3897 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0))); 3898 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0))); 3899 return; 3900 } 3901 if (laneSzBlg2 == 2) { 3902 // 32x4 3903 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0 3904 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0, 3905 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0))); 3906 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0))); 3907 return; 3908 } 3909 if (laneSzBlg2 == 1) { 3910 // 16x8 3911 // i0 == B3 A3 B2 A2 B1 A1 B0 A0 3912 // i1 == B7 A7 B6 A6 B5 A5 B4 A4 3913 // u1 == B{7..0}, u0 == A{7..0} 3914 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0))); 3915 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0))); 3916 return; 3917 } 3918 if (laneSzBlg2 == 0) { 3919 // 8x16 3920 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0 3921 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8 3922 // u1 == B{f..0}, u0 == A{f..0} 3923 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0))); 3924 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0))); 3925 return; 3926 } 3927 /*NOTREACHED*/ 3928 vassert(0); 3929 } 3930 3931 3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */ 3933 static 3934 void math_DEINTERLEAVE3_128( 3935 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 3936 UInt laneSzBlg2, 3937 IRTemp i0, IRTemp i1, IRTemp i2 ) 3938 { 3939 if (laneSzBlg2 == 3) { 3940 // 64x2 3941 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0, 3942 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0 3943 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) )); 3944 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) )); 3945 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) )); 3946 return; 3947 } 3948 3949 if (laneSzBlg2 == 2) { 3950 // 32x4 3951 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0 3952 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0 3953 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0 3954 IRTemp t_a1c0b0a0 = newTempV128(); 3955 IRTemp t_a2c1b1a1 = newTempV128(); 3956 IRTemp t_a3c2b2a2 = newTempV128(); 3957 IRTemp t_a0c3b3a3 = newTempV128(); 3958 IRTemp p0 = newTempV128(); 3959 IRTemp p1 = newTempV128(); 3960 IRTemp p2 = newTempV128(); 3961 // Compute some intermediate values. 3962 assign(t_a1c0b0a0, EX(i0)); 3963 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4)); 3964 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4)); 3965 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4)); 3966 // First deinterleave into lane-pairs 3967 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0))); 3968 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)), 3969 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0)))); 3970 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4))); 3971 // Then deinterleave at 64x2 granularity. 3972 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2); 3973 return; 3974 } 3975 3976 if (laneSzBlg2 == 1) { 3977 // 16x8 3978 // u2 == C7 C6 C5 C4 C3 C2 C1 C0 3979 // u1 == B7 B6 B5 B4 B3 B2 B1 B0 3980 // u0 == A7 A6 A5 A4 A3 A2 A1 A0 3981 // 3982 // i2 == C7 B7 A7 C6 B6 A6 C5 B5 3983 // i1 == A5 C4 B4 A4 C4 B3 A3 C2 3984 // i0 == B2 A2 C1 B1 A1 C0 B0 A0 3985 // 3986 // p2 == C7 C6 B7 B6 A7 A6 C5 C4 3987 // p1 == B5 B4 A5 A4 C3 C2 B3 B2 3988 // p0 == A3 A2 C1 C0 B1 B0 A1 A0 3989 3990 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111; 3991 s0 = s1 = s2 = s3 3992 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID; 3993 newTempsV128_4(&s0, &s1, &s2, &s3); 3994 newTempsV128_4(&t0, &t1, &t2, &t3); 3995 newTempsV128_4(&p0, &p1, &p2, &c00111111); 3996 3997 // s0 == b2a2 c1b1a1 c0b0a0 3998 // s1 == b4a4 c3b3c3 c2b2a2 3999 // s2 == b6a6 c5b5a5 c4b4a4 4000 // s3 == b0a0 c7b7a7 c6b6a6 4001 assign(s0, EX(i0)); 4002 assign(s1, SL(EX(i1),EX(i0),6*2)); 4003 assign(s2, SL(EX(i2),EX(i1),4*2)); 4004 assign(s3, SL(EX(i0),EX(i2),2*2)); 4005 4006 // t0 == 0 0 c1c0 b1b0 a1a0 4007 // t1 == 0 0 c3c2 b3b2 a3a2 4008 // t2 == 0 0 c5c4 b5b4 a5a4 4009 // t3 == 0 0 c7c6 b7b6 a7a6 4010 assign(c00111111, mkV128(0x0FFF)); 4011 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111))); 4012 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111))); 4013 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111))); 4014 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111))); 4015 4016 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2))); 4017 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2))); 4018 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2))); 4019 4020 // Then deinterleave at 32x4 granularity. 4021 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2); 4022 return; 4023 } 4024 4025 if (laneSzBlg2 == 0) { 4026 // 8x16. This is the same scheme as for 16x8, with twice the 4027 // number of intermediate values. 4028 // 4029 // u2 == C{f..0} 4030 // u1 == B{f..0} 4031 // u0 == A{f..0} 4032 // 4033 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a} 4034 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5} 4035 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4036 // 4037 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba} 4038 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54} 4039 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10} 4040 // 4041 IRTemp s0, s1, s2, s3, s4, s5, s6, s7, 4042 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK; 4043 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7 4044 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK 4045 = IRTemp_INVALID; 4046 newTempsV128_4(&s0, &s1, &s2, &s3); 4047 newTempsV128_4(&s4, &s5, &s6, &s7); 4048 newTempsV128_4(&t0, &t1, &t2, &t3); 4049 newTempsV128_4(&t4, &t5, &t6, &t7); 4050 newTempsV128_4(&p0, &p1, &p2, &cMASK); 4051 4052 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0} 4053 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2} 4054 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4} 4055 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6} 4056 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8} 4057 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a} 4058 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c} 4059 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e} 4060 assign(s0, SL(EX(i1),EX(i0), 0)); 4061 assign(s1, SL(EX(i1),EX(i0), 6)); 4062 assign(s2, SL(EX(i1),EX(i0),12)); 4063 assign(s3, SL(EX(i2),EX(i1), 2)); 4064 assign(s4, SL(EX(i2),EX(i1), 8)); 4065 assign(s5, SL(EX(i2),EX(i1),14)); 4066 assign(s6, SL(EX(i0),EX(i2), 4)); 4067 assign(s7, SL(EX(i0),EX(i2),10)); 4068 4069 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0 4070 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2 4071 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4 4072 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6 4073 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8 4074 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa 4075 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac 4076 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae 4077 assign(cMASK, mkV128(0x003F)); 4078 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK))); 4079 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK))); 4080 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK))); 4081 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK))); 4082 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK))); 4083 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK))); 4084 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK))); 4085 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK))); 4086 4087 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) )); 4088 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8), 4089 SHL(EX(t3),2), SHR(EX(t2),4) )); 4090 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) )); 4091 4092 // Then deinterleave at 16x8 granularity. 4093 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2); 4094 return; 4095 } 4096 4097 /*NOTREACHED*/ 4098 vassert(0); 4099 } 4100 4101 4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */ 4103 static 4104 void math_DEINTERLEAVE4_128( 4105 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4106 UInt laneSzBlg2, 4107 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4108 { 4109 if (laneSzBlg2 == 3) { 4110 // 64x2 4111 assign(*u0, ILO64x2(EX(i2), EX(i0))); 4112 assign(*u1, IHI64x2(EX(i2), EX(i0))); 4113 assign(*u2, ILO64x2(EX(i3), EX(i1))); 4114 assign(*u3, IHI64x2(EX(i3), EX(i1))); 4115 return; 4116 } 4117 if (laneSzBlg2 == 2) { 4118 // 32x4 4119 IRTemp p0 = newTempV128(); 4120 IRTemp p2 = newTempV128(); 4121 IRTemp p1 = newTempV128(); 4122 IRTemp p3 = newTempV128(); 4123 assign(p0, ILO32x4(EX(i1), EX(i0))); 4124 assign(p1, IHI32x4(EX(i1), EX(i0))); 4125 assign(p2, ILO32x4(EX(i3), EX(i2))); 4126 assign(p3, IHI32x4(EX(i3), EX(i2))); 4127 // And now do what we did for the 64-bit case. 4128 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3); 4129 return; 4130 } 4131 if (laneSzBlg2 == 1) { 4132 // 16x8 4133 // Deinterleave into 32-bit chunks, then do as the 32-bit case. 4134 IRTemp p0 = newTempV128(); 4135 IRTemp p1 = newTempV128(); 4136 IRTemp p2 = newTempV128(); 4137 IRTemp p3 = newTempV128(); 4138 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8))); 4139 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8))); 4140 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8))); 4141 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8))); 4142 // From here on is like the 32 bit case. 4143 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3); 4144 return; 4145 } 4146 if (laneSzBlg2 == 0) { 4147 // 8x16 4148 // Deinterleave into 16-bit chunks, then do as the 16-bit case. 4149 IRTemp p0 = newTempV128(); 4150 IRTemp p1 = newTempV128(); 4151 IRTemp p2 = newTempV128(); 4152 IRTemp p3 = newTempV128(); 4153 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)), 4154 ILO8x16(EX(i0),ROL(EX(i0),4)) )); 4155 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)), 4156 ILO8x16(EX(i1),ROL(EX(i1),4)) )); 4157 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)), 4158 ILO8x16(EX(i2),ROL(EX(i2),4)) )); 4159 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)), 4160 ILO8x16(EX(i3),ROL(EX(i3),4)) )); 4161 // From here on is like the 16 bit case. 4162 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3); 4163 return; 4164 } 4165 /*NOTREACHED*/ 4166 vassert(0); 4167 } 4168 4169 4170 /* Wrappers that use the full-width (de)interleavers to do half-width 4171 (de)interleaving. The scheme is to clone each input lane in the 4172 lower half of each incoming value, do a full width (de)interleave 4173 at the next lane size up, and remove every other lane of the the 4174 result. The returned values may have any old junk in the upper 4175 64 bits -- the caller must ignore that. */ 4176 4177 /* Helper function -- get doubling and narrowing operations. */ 4178 static 4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler, 4180 /*OUT*/IROp* halver, 4181 UInt laneSzBlg2 ) 4182 { 4183 switch (laneSzBlg2) { 4184 case 2: 4185 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4; 4186 break; 4187 case 1: 4188 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8; 4189 break; 4190 case 0: 4191 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16; 4192 break; 4193 default: 4194 vassert(0); 4195 } 4196 } 4197 4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */ 4199 static 4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0, 4201 UInt laneSzBlg2, IRTemp u0 ) 4202 { 4203 assign(*i0, mkexpr(u0)); 4204 } 4205 4206 4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */ 4208 static 4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1, 4210 UInt laneSzBlg2, IRTemp u0, IRTemp u1 ) 4211 { 4212 if (laneSzBlg2 == 3) { 4213 // 1x64, degenerate case 4214 assign(*i0, EX(u0)); 4215 assign(*i1, EX(u1)); 4216 return; 4217 } 4218 4219 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4220 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4221 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4222 4223 IRTemp du0 = newTempV128(); 4224 IRTemp du1 = newTempV128(); 4225 assign(du0, binop(doubler, EX(u0), EX(u0))); 4226 assign(du1, binop(doubler, EX(u1), EX(u1))); 4227 IRTemp di0 = newTempV128(); 4228 IRTemp di1 = newTempV128(); 4229 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1); 4230 assign(*i0, binop(halver, EX(di0), EX(di0))); 4231 assign(*i1, binop(halver, EX(di1), EX(di1))); 4232 } 4233 4234 4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */ 4236 static 4237 void math_INTERLEAVE3_64( 4238 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, 4239 UInt laneSzBlg2, 4240 IRTemp u0, IRTemp u1, IRTemp u2 ) 4241 { 4242 if (laneSzBlg2 == 3) { 4243 // 1x64, degenerate case 4244 assign(*i0, EX(u0)); 4245 assign(*i1, EX(u1)); 4246 assign(*i2, EX(u2)); 4247 return; 4248 } 4249 4250 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4251 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4252 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4253 4254 IRTemp du0 = newTempV128(); 4255 IRTemp du1 = newTempV128(); 4256 IRTemp du2 = newTempV128(); 4257 assign(du0, binop(doubler, EX(u0), EX(u0))); 4258 assign(du1, binop(doubler, EX(u1), EX(u1))); 4259 assign(du2, binop(doubler, EX(u2), EX(u2))); 4260 IRTemp di0 = newTempV128(); 4261 IRTemp di1 = newTempV128(); 4262 IRTemp di2 = newTempV128(); 4263 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2); 4264 assign(*i0, binop(halver, EX(di0), EX(di0))); 4265 assign(*i1, binop(halver, EX(di1), EX(di1))); 4266 assign(*i2, binop(halver, EX(di2), EX(di2))); 4267 } 4268 4269 4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */ 4271 static 4272 void math_INTERLEAVE4_64( 4273 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3, 4274 UInt laneSzBlg2, 4275 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 ) 4276 { 4277 if (laneSzBlg2 == 3) { 4278 // 1x64, degenerate case 4279 assign(*i0, EX(u0)); 4280 assign(*i1, EX(u1)); 4281 assign(*i2, EX(u2)); 4282 assign(*i3, EX(u3)); 4283 return; 4284 } 4285 4286 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4287 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4288 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4289 4290 IRTemp du0 = newTempV128(); 4291 IRTemp du1 = newTempV128(); 4292 IRTemp du2 = newTempV128(); 4293 IRTemp du3 = newTempV128(); 4294 assign(du0, binop(doubler, EX(u0), EX(u0))); 4295 assign(du1, binop(doubler, EX(u1), EX(u1))); 4296 assign(du2, binop(doubler, EX(u2), EX(u2))); 4297 assign(du3, binop(doubler, EX(u3), EX(u3))); 4298 IRTemp di0 = newTempV128(); 4299 IRTemp di1 = newTempV128(); 4300 IRTemp di2 = newTempV128(); 4301 IRTemp di3 = newTempV128(); 4302 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3, 4303 laneSzBlg2 + 1, du0, du1, du2, du3); 4304 assign(*i0, binop(halver, EX(di0), EX(di0))); 4305 assign(*i1, binop(halver, EX(di1), EX(di1))); 4306 assign(*i2, binop(halver, EX(di2), EX(di2))); 4307 assign(*i3, binop(halver, EX(di3), EX(di3))); 4308 } 4309 4310 4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */ 4312 static 4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0, 4314 UInt laneSzBlg2, IRTemp i0 ) 4315 { 4316 assign(*u0, mkexpr(i0)); 4317 } 4318 4319 4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */ 4321 static 4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1, 4323 UInt laneSzBlg2, IRTemp i0, IRTemp i1 ) 4324 { 4325 if (laneSzBlg2 == 3) { 4326 // 1x64, degenerate case 4327 assign(*u0, EX(i0)); 4328 assign(*u1, EX(i1)); 4329 return; 4330 } 4331 4332 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4333 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4334 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4335 4336 IRTemp di0 = newTempV128(); 4337 IRTemp di1 = newTempV128(); 4338 assign(di0, binop(doubler, EX(i0), EX(i0))); 4339 assign(di1, binop(doubler, EX(i1), EX(i1))); 4340 4341 IRTemp du0 = newTempV128(); 4342 IRTemp du1 = newTempV128(); 4343 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1); 4344 assign(*u0, binop(halver, EX(du0), EX(du0))); 4345 assign(*u1, binop(halver, EX(du1), EX(du1))); 4346 } 4347 4348 4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */ 4350 static 4351 void math_DEINTERLEAVE3_64( 4352 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, 4353 UInt laneSzBlg2, 4354 IRTemp i0, IRTemp i1, IRTemp i2 ) 4355 { 4356 if (laneSzBlg2 == 3) { 4357 // 1x64, degenerate case 4358 assign(*u0, EX(i0)); 4359 assign(*u1, EX(i1)); 4360 assign(*u2, EX(i2)); 4361 return; 4362 } 4363 4364 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4365 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4366 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4367 4368 IRTemp di0 = newTempV128(); 4369 IRTemp di1 = newTempV128(); 4370 IRTemp di2 = newTempV128(); 4371 assign(di0, binop(doubler, EX(i0), EX(i0))); 4372 assign(di1, binop(doubler, EX(i1), EX(i1))); 4373 assign(di2, binop(doubler, EX(i2), EX(i2))); 4374 IRTemp du0 = newTempV128(); 4375 IRTemp du1 = newTempV128(); 4376 IRTemp du2 = newTempV128(); 4377 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2); 4378 assign(*u0, binop(halver, EX(du0), EX(du0))); 4379 assign(*u1, binop(halver, EX(du1), EX(du1))); 4380 assign(*u2, binop(halver, EX(du2), EX(du2))); 4381 } 4382 4383 4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */ 4385 static 4386 void math_DEINTERLEAVE4_64( 4387 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3, 4388 UInt laneSzBlg2, 4389 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 ) 4390 { 4391 if (laneSzBlg2 == 3) { 4392 // 1x64, degenerate case 4393 assign(*u0, EX(i0)); 4394 assign(*u1, EX(i1)); 4395 assign(*u2, EX(i2)); 4396 assign(*u3, EX(i3)); 4397 return; 4398 } 4399 4400 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2); 4401 IROp doubler = Iop_INVALID, halver = Iop_INVALID; 4402 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2); 4403 4404 IRTemp di0 = newTempV128(); 4405 IRTemp di1 = newTempV128(); 4406 IRTemp di2 = newTempV128(); 4407 IRTemp di3 = newTempV128(); 4408 assign(di0, binop(doubler, EX(i0), EX(i0))); 4409 assign(di1, binop(doubler, EX(i1), EX(i1))); 4410 assign(di2, binop(doubler, EX(i2), EX(i2))); 4411 assign(di3, binop(doubler, EX(i3), EX(i3))); 4412 IRTemp du0 = newTempV128(); 4413 IRTemp du1 = newTempV128(); 4414 IRTemp du2 = newTempV128(); 4415 IRTemp du3 = newTempV128(); 4416 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3, 4417 laneSzBlg2 + 1, di0, di1, di2, di3); 4418 assign(*u0, binop(halver, EX(du0), EX(du0))); 4419 assign(*u1, binop(halver, EX(du1), EX(du1))); 4420 assign(*u2, binop(halver, EX(du2), EX(du2))); 4421 assign(*u3, binop(halver, EX(du3), EX(du3))); 4422 } 4423 4424 4425 #undef EX 4426 #undef SL 4427 #undef ROR 4428 #undef ROL 4429 #undef SHR 4430 #undef SHL 4431 #undef ILO64x2 4432 #undef IHI64x2 4433 #undef ILO32x4 4434 #undef IHI32x4 4435 #undef ILO16x8 4436 #undef IHI16x8 4437 #undef ILO16x8 4438 #undef IHI16x8 4439 #undef CEV32x4 4440 #undef COD32x4 4441 #undef COD16x8 4442 #undef COD8x16 4443 #undef CEV8x16 4444 #undef AND 4445 #undef OR2 4446 #undef OR3 4447 #undef OR4 4448 4449 4450 /*------------------------------------------------------------*/ 4451 /*--- Load and Store instructions ---*/ 4452 /*------------------------------------------------------------*/ 4453 4454 /* Generate the EA for a "reg + reg" style amode. This is done from 4455 parts of the insn, but for sanity checking sake it takes the whole 4456 insn. This appears to depend on insn[15:12], with opt=insn[15:13] 4457 and S=insn[12]: 4458 4459 The possible forms, along with their opt:S values, are: 4460 011:0 Xn|SP + Xm 4461 111:0 Xn|SP + Xm 4462 011:1 Xn|SP + Xm * transfer_szB 4463 111:1 Xn|SP + Xm * transfer_szB 4464 010:0 Xn|SP + 32Uto64(Wm) 4465 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB 4466 110:0 Xn|SP + 32Sto64(Wm) 4467 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB 4468 4469 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of 4470 the transfer size is insn[23,31,30]. For integer loads/stores, 4471 insn[23] is zero, hence szLg2 can be at most 3 in such cases. 4472 4473 If the decoding fails, it returns IRTemp_INVALID. 4474 4475 isInt is True iff this is decoding is for transfers to/from integer 4476 registers. If False it is for transfers to/from vector registers. 4477 */ 4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt ) 4479 { 4480 UInt optS = SLICE_UInt(insn, 15, 12); 4481 UInt mm = SLICE_UInt(insn, 20, 16); 4482 UInt nn = SLICE_UInt(insn, 9, 5); 4483 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2)) 4484 | SLICE_UInt(insn, 31, 30); // Log2 of the size 4485 4486 buf[0] = 0; 4487 4488 /* Sanity checks, that this really is a load/store insn. */ 4489 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0)) 4490 goto fail; 4491 4492 if (isInt 4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/ 4494 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/ 4495 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/ 4496 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/ 4497 goto fail; 4498 4499 if (!isInt 4500 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/ 4501 goto fail; 4502 4503 /* Throw out non-verified but possibly valid cases. */ 4504 switch (szLg2) { 4505 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec 4506 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec 4507 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec 4508 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec 4509 case BITS3(1,0,0): // can only ever be valid for the vector case 4510 if (isInt) goto fail; else break; 4511 case BITS3(1,0,1): // these sizes are never valid 4512 case BITS3(1,1,0): 4513 case BITS3(1,1,1): goto fail; 4514 4515 default: vassert(0); 4516 } 4517 4518 IRExpr* rhs = NULL; 4519 switch (optS) { 4520 case BITS4(1,1,1,0): goto fail; //ATC 4521 case BITS4(0,1,1,0): 4522 rhs = getIReg64orZR(mm); 4523 vex_sprintf(buf, "[%s, %s]", 4524 nameIReg64orZR(nn), nameIReg64orZR(mm)); 4525 break; 4526 case BITS4(1,1,1,1): goto fail; //ATC 4527 case BITS4(0,1,1,1): 4528 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2)); 4529 vex_sprintf(buf, "[%s, %s lsl %u]", 4530 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2); 4531 break; 4532 case BITS4(0,1,0,0): 4533 rhs = unop(Iop_32Uto64, getIReg32orZR(mm)); 4534 vex_sprintf(buf, "[%s, %s uxtx]", 4535 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4536 break; 4537 case BITS4(0,1,0,1): 4538 rhs = binop(Iop_Shl64, 4539 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2)); 4540 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]", 4541 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4542 break; 4543 case BITS4(1,1,0,0): 4544 rhs = unop(Iop_32Sto64, getIReg32orZR(mm)); 4545 vex_sprintf(buf, "[%s, %s sxtx]", 4546 nameIReg64orZR(nn), nameIReg32orZR(mm)); 4547 break; 4548 case BITS4(1,1,0,1): 4549 rhs = binop(Iop_Shl64, 4550 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2)); 4551 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]", 4552 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2); 4553 break; 4554 default: 4555 /* The rest appear to be genuinely invalid */ 4556 goto fail; 4557 } 4558 4559 vassert(rhs); 4560 IRTemp res = newTemp(Ity_I64); 4561 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs)); 4562 return res; 4563 4564 fail: 4565 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS); 4566 return IRTemp_INVALID; 4567 } 4568 4569 4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest 4571 bits of DATAE :: Ity_I64. */ 4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE ) 4573 { 4574 IRExpr* addrE = mkexpr(addr); 4575 switch (szB) { 4576 case 8: 4577 storeLE(addrE, dataE); 4578 break; 4579 case 4: 4580 storeLE(addrE, unop(Iop_64to32, dataE)); 4581 break; 4582 case 2: 4583 storeLE(addrE, unop(Iop_64to16, dataE)); 4584 break; 4585 case 1: 4586 storeLE(addrE, unop(Iop_64to8, dataE)); 4587 break; 4588 default: 4589 vassert(0); 4590 } 4591 } 4592 4593 4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR, 4595 placing the result in an Ity_I64 temporary. */ 4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) 4597 { 4598 IRTemp res = newTemp(Ity_I64); 4599 IRExpr* addrE = mkexpr(addr); 4600 switch (szB) { 4601 case 8: 4602 assign(res, loadLE(Ity_I64,addrE)); 4603 break; 4604 case 4: 4605 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE))); 4606 break; 4607 case 2: 4608 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE))); 4609 break; 4610 case 1: 4611 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE))); 4612 break; 4613 default: 4614 vassert(0); 4615 } 4616 return res; 4617 } 4618 4619 4620 /* Generate a "standard 7" name, from bitQ and size. But also 4621 allow ".1d" since that's occasionally useful. */ 4622 static 4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size ) 4624 { 4625 vassert(bitQ <= 1 && size <= 3); 4626 const HChar* nms[8] 4627 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" }; 4628 UInt ix = (bitQ << 2) | size; 4629 vassert(ix < 8); 4630 return nms[ix]; 4631 } 4632 4633 4634 static 4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) 4636 { 4637 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 4638 4639 /* ------------ LDR,STR (immediate, uimm12) ----------- */ 4640 /* uimm12 is scaled by the transfer size 4641 4642 31 29 26 21 9 4 4643 | | | | | | 4644 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8] 4645 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8] 4646 4647 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4] 4648 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4] 4649 4650 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2] 4651 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2] 4652 4653 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1] 4654 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1] 4655 */ 4656 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) { 4657 UInt szLg2 = INSN(31,30); 4658 UInt szB = 1 << szLg2; 4659 Bool isLD = INSN(22,22) == 1; 4660 UInt offs = INSN(21,10) * szB; 4661 UInt nn = INSN(9,5); 4662 UInt tt = INSN(4,0); 4663 IRTemp ta = newTemp(Ity_I64); 4664 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs))); 4665 if (nn == 31) { /* FIXME generate stack alignment check */ } 4666 vassert(szLg2 < 4); 4667 if (isLD) { 4668 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta))); 4669 } else { 4670 gen_narrowing_store(szB, ta, getIReg64orZR(tt)); 4671 } 4672 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" }; 4673 const HChar* st_name[4] = { "strb", "strh", "str", "str" }; 4674 DIP("%s %s, [%s, #%u]\n", 4675 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt), 4676 nameIReg64orSP(nn), offs); 4677 return True; 4678 } 4679 4680 /* ------------ LDUR,STUR (immediate, simm9) ----------- */ 4681 /* 4682 31 29 26 20 11 9 4 4683 | | | | | | | 4684 (at-Rn-then-Rn=EA) | | | 4685 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9 4686 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9 4687 4688 (at-EA-then-Rn=EA) 4689 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]! 4690 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]! 4691 4692 (at-EA) 4693 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9] 4694 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9] 4695 4696 simm9 is unscaled. 4697 4698 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the 4699 load case this is because would create two competing values for 4700 Rt. In the store case the reason is unclear, but the spec 4701 disallows it anyway. 4702 4703 Stores are narrowing, loads are unsigned widening. sz encodes 4704 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8. 4705 */ 4706 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1)) 4707 == BITS9(1,1,1, 0,0,0,0,0, 0)) { 4708 UInt szLg2 = INSN(31,30); 4709 UInt szB = 1 << szLg2; 4710 Bool isLoad = INSN(22,22) == 1; 4711 UInt imm9 = INSN(20,12); 4712 UInt nn = INSN(9,5); 4713 UInt tt = INSN(4,0); 4714 Bool wBack = INSN(10,10) == 1; 4715 UInt how = INSN(11,10); 4716 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) { 4717 /* undecodable; fall through */ 4718 } else { 4719 if (nn == 31) { /* FIXME generate stack alignment check */ } 4720 4721 // Compute the transfer address TA and the writeback address WA. 4722 IRTemp tRN = newTemp(Ity_I64); 4723 assign(tRN, getIReg64orSP(nn)); 4724 IRTemp tEA = newTemp(Ity_I64); 4725 Long simm9 = (Long)sx_to_64(imm9, 9); 4726 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 4727 4728 IRTemp tTA = newTemp(Ity_I64); 4729 IRTemp tWA = newTemp(Ity_I64); 4730 switch (how) { 4731 case BITS2(0,1): 4732 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4733 case BITS2(1,1): 4734 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4735 case BITS2(0,0): 4736 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4737 default: 4738 vassert(0); /* NOTREACHED */ 4739 } 4740 4741 /* Normally rN would be updated after the transfer. However, in 4742 the special case typifed by 4743 str x30, [sp,#-16]! 4744 it is necessary to update SP before the transfer, (1) 4745 because Memcheck will otherwise complain about a write 4746 below the stack pointer, and (2) because the segfault 4747 stack extension mechanism will otherwise extend the stack 4748 only down to SP before the instruction, which might not be 4749 far enough, if the -16 bit takes the actual access 4750 address to the next page. 4751 */ 4752 Bool earlyWBack 4753 = wBack && simm9 < 0 && szB == 8 4754 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn; 4755 4756 if (wBack && earlyWBack) 4757 putIReg64orSP(nn, mkexpr(tEA)); 4758 4759 if (isLoad) { 4760 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA))); 4761 } else { 4762 gen_narrowing_store(szB, tTA, getIReg64orZR(tt)); 4763 } 4764 4765 if (wBack && !earlyWBack) 4766 putIReg64orSP(nn, mkexpr(tEA)); 4767 4768 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" }; 4769 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" }; 4770 const HChar* fmt_str = NULL; 4771 switch (how) { 4772 case BITS2(0,1): 4773 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4774 break; 4775 case BITS2(1,1): 4776 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4777 break; 4778 case BITS2(0,0): 4779 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n"; 4780 break; 4781 default: 4782 vassert(0); 4783 } 4784 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2], 4785 nameIRegOrZR(szB == 8, tt), 4786 nameIReg64orSP(nn), simm9); 4787 return True; 4788 } 4789 } 4790 4791 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */ 4792 /* L==1 => mm==LD 4793 L==0 => mm==ST 4794 x==0 => 32 bit transfers, and zero extended loads 4795 x==1 => 64 bit transfers 4796 simm7 is scaled by the (single-register) transfer size 4797 4798 (at-Rn-then-Rn=EA) 4799 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm 4800 4801 (at-EA-then-Rn=EA) 4802 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]! 4803 4804 (at-EA) 4805 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm] 4806 */ 4807 4808 UInt insn_30_23 = INSN(30,23); 4809 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1) 4810 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1) 4811 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) { 4812 UInt bL = INSN(22,22); 4813 UInt bX = INSN(31,31); 4814 UInt bWBack = INSN(23,23); 4815 UInt rT1 = INSN(4,0); 4816 UInt rN = INSN(9,5); 4817 UInt rT2 = INSN(14,10); 4818 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 4819 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31) 4820 || (bL && rT1 == rT2)) { 4821 /* undecodable; fall through */ 4822 } else { 4823 if (rN == 31) { /* FIXME generate stack alignment check */ } 4824 4825 // Compute the transfer address TA and the writeback address WA. 4826 IRTemp tRN = newTemp(Ity_I64); 4827 assign(tRN, getIReg64orSP(rN)); 4828 IRTemp tEA = newTemp(Ity_I64); 4829 simm7 = (bX ? 8 : 4) * simm7; 4830 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 4831 4832 IRTemp tTA = newTemp(Ity_I64); 4833 IRTemp tWA = newTemp(Ity_I64); 4834 switch (INSN(24,23)) { 4835 case BITS2(0,1): 4836 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 4837 case BITS2(1,1): 4838 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 4839 case BITS2(1,0): 4840 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 4841 default: 4842 vassert(0); /* NOTREACHED */ 4843 } 4844 4845 /* Normally rN would be updated after the transfer. However, in 4846 the special case typifed by 4847 stp x29, x30, [sp,#-112]! 4848 it is necessary to update SP before the transfer, (1) 4849 because Memcheck will otherwise complain about a write 4850 below the stack pointer, and (2) because the segfault 4851 stack extension mechanism will otherwise extend the stack 4852 only down to SP before the instruction, which might not be 4853 far enough, if the -112 bit takes the actual access 4854 address to the next page. 4855 */ 4856 Bool earlyWBack 4857 = bWBack && simm7 < 0 4858 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0; 4859 4860 if (bWBack && earlyWBack) 4861 putIReg64orSP(rN, mkexpr(tEA)); 4862 4863 /**/ if (bL == 1 && bX == 1) { 4864 // 64 bit load 4865 putIReg64orZR(rT1, loadLE(Ity_I64, 4866 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4867 putIReg64orZR(rT2, loadLE(Ity_I64, 4868 binop(Iop_Add64,mkexpr(tTA),mkU64(8)))); 4869 } else if (bL == 1 && bX == 0) { 4870 // 32 bit load 4871 putIReg32orZR(rT1, loadLE(Ity_I32, 4872 binop(Iop_Add64,mkexpr(tTA),mkU64(0)))); 4873 putIReg32orZR(rT2, loadLE(Ity_I32, 4874 binop(Iop_Add64,mkexpr(tTA),mkU64(4)))); 4875 } else if (bL == 0 && bX == 1) { 4876 // 64 bit store 4877 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4878 getIReg64orZR(rT1)); 4879 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)), 4880 getIReg64orZR(rT2)); 4881 } else { 4882 vassert(bL == 0 && bX == 0); 4883 // 32 bit store 4884 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)), 4885 getIReg32orZR(rT1)); 4886 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)), 4887 getIReg32orZR(rT2)); 4888 } 4889 4890 if (bWBack && !earlyWBack) 4891 putIReg64orSP(rN, mkexpr(tEA)); 4892 4893 const HChar* fmt_str = NULL; 4894 switch (INSN(24,23)) { 4895 case BITS2(0,1): 4896 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 4897 break; 4898 case BITS2(1,1): 4899 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 4900 break; 4901 case BITS2(1,0): 4902 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 4903 break; 4904 default: 4905 vassert(0); 4906 } 4907 DIP(fmt_str, bL == 0 ? "st" : "ld", 4908 nameIRegOrZR(bX == 1, rT1), 4909 nameIRegOrZR(bX == 1, rT2), 4910 nameIReg64orSP(rN), simm7); 4911 return True; 4912 } 4913 } 4914 4915 /* ---------------- LDR (literal, int reg) ---------------- */ 4916 /* 31 29 23 4 4917 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)] 4918 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)] 4919 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)] 4920 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)] 4921 Just handles the first two cases for now. 4922 */ 4923 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) { 4924 UInt imm19 = INSN(23,5); 4925 UInt rT = INSN(4,0); 4926 UInt bX = INSN(30,30); 4927 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 4928 if (bX) { 4929 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea))); 4930 } else { 4931 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea))); 4932 } 4933 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea); 4934 return True; 4935 } 4936 4937 /* -------------- {LD,ST}R (integer register) --------------- */ 4938 /* 31 29 20 15 12 11 9 4 4939 | | | | | | | | 4940 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}] 4941 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}] 4942 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}] 4943 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}] 4944 4945 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}] 4946 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}] 4947 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}] 4948 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}] 4949 */ 4950 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0) 4951 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 4952 HChar dis_buf[64]; 4953 UInt szLg2 = INSN(31,30); 4954 Bool isLD = INSN(22,22) == 1; 4955 UInt tt = INSN(4,0); 4956 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 4957 if (ea != IRTemp_INVALID) { 4958 switch (szLg2) { 4959 case 3: /* 64 bit */ 4960 if (isLD) { 4961 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea))); 4962 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf); 4963 } else { 4964 storeLE(mkexpr(ea), getIReg64orZR(tt)); 4965 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf); 4966 } 4967 break; 4968 case 2: /* 32 bit */ 4969 if (isLD) { 4970 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea))); 4971 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf); 4972 } else { 4973 storeLE(mkexpr(ea), getIReg32orZR(tt)); 4974 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf); 4975 } 4976 break; 4977 case 1: /* 16 bit */ 4978 if (isLD) { 4979 putIReg64orZR(tt, unop(Iop_16Uto64, 4980 loadLE(Ity_I16, mkexpr(ea)))); 4981 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4982 } else { 4983 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt))); 4984 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf); 4985 } 4986 break; 4987 case 0: /* 8 bit */ 4988 if (isLD) { 4989 putIReg64orZR(tt, unop(Iop_8Uto64, 4990 loadLE(Ity_I8, mkexpr(ea)))); 4991 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf); 4992 } else { 4993 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt))); 4994 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf); 4995 } 4996 break; 4997 default: 4998 vassert(0); 4999 } 5000 return True; 5001 } 5002 } 5003 5004 /* -------------- LDRS{B,H,W} (uimm12) -------------- */ 5005 /* 31 29 26 23 21 9 4 5006 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4] 5007 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2] 5008 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1] 5009 where 5010 Rt is Wt when x==1, Xt when x==0 5011 */ 5012 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) { 5013 /* Further checks on bits 31:30 and 22 */ 5014 Bool valid = False; 5015 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5016 case BITS3(1,0,0): 5017 case BITS3(0,1,0): case BITS3(0,1,1): 5018 case BITS3(0,0,0): case BITS3(0,0,1): 5019 valid = True; 5020 break; 5021 } 5022 if (valid) { 5023 UInt szLg2 = INSN(31,30); 5024 UInt bitX = INSN(22,22); 5025 UInt imm12 = INSN(21,10); 5026 UInt nn = INSN(9,5); 5027 UInt tt = INSN(4,0); 5028 UInt szB = 1 << szLg2; 5029 IRExpr* ea = binop(Iop_Add64, 5030 getIReg64orSP(nn), mkU64(imm12 * szB)); 5031 switch (szB) { 5032 case 4: 5033 vassert(bitX == 0); 5034 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea))); 5035 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt), 5036 nameIReg64orSP(nn), imm12 * szB); 5037 break; 5038 case 2: 5039 if (bitX == 1) { 5040 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea))); 5041 } else { 5042 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea))); 5043 } 5044 DIP("ldrsh %s, [%s, #%u]\n", 5045 nameIRegOrZR(bitX == 0, tt), 5046 nameIReg64orSP(nn), imm12 * szB); 5047 break; 5048 case 1: 5049 if (bitX == 1) { 5050 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea))); 5051 } else { 5052 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea))); 5053 } 5054 DIP("ldrsb %s, [%s, #%u]\n", 5055 nameIRegOrZR(bitX == 0, tt), 5056 nameIReg64orSP(nn), imm12 * szB); 5057 break; 5058 default: 5059 vassert(0); 5060 } 5061 return True; 5062 } 5063 /* else fall through */ 5064 } 5065 5066 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */ 5067 /* (at-Rn-then-Rn=EA) 5068 31 29 23 21 20 11 9 4 5069 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9 5070 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9 5071 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9 5072 5073 (at-EA-then-Rn=EA) 5074 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]! 5075 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]! 5076 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]! 5077 where 5078 Rt is Wt when x==1, Xt when x==0 5079 transfer-at-Rn when [11]==0, at EA when [11]==1 5080 */ 5081 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5082 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5083 /* Further checks on bits 31:30 and 22 */ 5084 Bool valid = False; 5085 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5086 case BITS3(1,0,0): // LDRSW Xt 5087 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt 5088 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt 5089 valid = True; 5090 break; 5091 } 5092 if (valid) { 5093 UInt szLg2 = INSN(31,30); 5094 UInt imm9 = INSN(20,12); 5095 Bool atRN = INSN(11,11) == 0; 5096 UInt nn = INSN(9,5); 5097 UInt tt = INSN(4,0); 5098 IRTemp tRN = newTemp(Ity_I64); 5099 IRTemp tEA = newTemp(Ity_I64); 5100 IRTemp tTA = IRTemp_INVALID; 5101 ULong simm9 = sx_to_64(imm9, 9); 5102 Bool is64 = INSN(22,22) == 0; 5103 assign(tRN, getIReg64orSP(nn)); 5104 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5105 tTA = atRN ? tRN : tEA; 5106 HChar ch = '?'; 5107 /* There are 5 cases: 5108 byte load, SX to 64 5109 byte load, SX to 32, ZX to 64 5110 halfword load, SX to 64 5111 halfword load, SX to 32, ZX to 64 5112 word load, SX to 64 5113 The ifs below handle them in the listed order. 5114 */ 5115 if (szLg2 == 0) { 5116 ch = 'b'; 5117 if (is64) { 5118 putIReg64orZR(tt, unop(Iop_8Sto64, 5119 loadLE(Ity_I8, mkexpr(tTA)))); 5120 } else { 5121 putIReg32orZR(tt, unop(Iop_8Sto32, 5122 loadLE(Ity_I8, mkexpr(tTA)))); 5123 } 5124 } 5125 else if (szLg2 == 1) { 5126 ch = 'h'; 5127 if (is64) { 5128 putIReg64orZR(tt, unop(Iop_16Sto64, 5129 loadLE(Ity_I16, mkexpr(tTA)))); 5130 } else { 5131 putIReg32orZR(tt, unop(Iop_16Sto32, 5132 loadLE(Ity_I16, mkexpr(tTA)))); 5133 } 5134 } 5135 else if (szLg2 == 2 && is64) { 5136 ch = 'w'; 5137 putIReg64orZR(tt, unop(Iop_32Sto64, 5138 loadLE(Ity_I32, mkexpr(tTA)))); 5139 } 5140 else { 5141 vassert(0); 5142 } 5143 putIReg64orSP(nn, mkexpr(tEA)); 5144 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!", 5145 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 5146 return True; 5147 } 5148 /* else fall through */ 5149 } 5150 5151 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */ 5152 /* 31 29 23 21 20 11 9 4 5153 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9] 5154 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9] 5155 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9] 5156 where 5157 Rt is Wt when x==1, Xt when x==0 5158 */ 5159 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5160 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5161 /* Further checks on bits 31:30 and 22 */ 5162 Bool valid = False; 5163 switch ((INSN(31,30) << 1) | INSN(22,22)) { 5164 case BITS3(1,0,0): // LDURSW Xt 5165 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt 5166 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt 5167 valid = True; 5168 break; 5169 } 5170 if (valid) { 5171 UInt szLg2 = INSN(31,30); 5172 UInt imm9 = INSN(20,12); 5173 UInt nn = INSN(9,5); 5174 UInt tt = INSN(4,0); 5175 IRTemp tRN = newTemp(Ity_I64); 5176 IRTemp tEA = newTemp(Ity_I64); 5177 ULong simm9 = sx_to_64(imm9, 9); 5178 Bool is64 = INSN(22,22) == 0; 5179 assign(tRN, getIReg64orSP(nn)); 5180 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5181 HChar ch = '?'; 5182 /* There are 5 cases: 5183 byte load, SX to 64 5184 byte load, SX to 32, ZX to 64 5185 halfword load, SX to 64 5186 halfword load, SX to 32, ZX to 64 5187 word load, SX to 64 5188 The ifs below handle them in the listed order. 5189 */ 5190 if (szLg2 == 0) { 5191 ch = 'b'; 5192 if (is64) { 5193 putIReg64orZR(tt, unop(Iop_8Sto64, 5194 loadLE(Ity_I8, mkexpr(tEA)))); 5195 } else { 5196 putIReg32orZR(tt, unop(Iop_8Sto32, 5197 loadLE(Ity_I8, mkexpr(tEA)))); 5198 } 5199 } 5200 else if (szLg2 == 1) { 5201 ch = 'h'; 5202 if (is64) { 5203 putIReg64orZR(tt, unop(Iop_16Sto64, 5204 loadLE(Ity_I16, mkexpr(tEA)))); 5205 } else { 5206 putIReg32orZR(tt, unop(Iop_16Sto32, 5207 loadLE(Ity_I16, mkexpr(tEA)))); 5208 } 5209 } 5210 else if (szLg2 == 2 && is64) { 5211 ch = 'w'; 5212 putIReg64orZR(tt, unop(Iop_32Sto64, 5213 loadLE(Ity_I32, mkexpr(tEA)))); 5214 } 5215 else { 5216 vassert(0); 5217 } 5218 DIP("ldurs%c %s, [%s, #%lld]", 5219 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9); 5220 return True; 5221 } 5222 /* else fall through */ 5223 } 5224 5225 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */ 5226 /* L==1 => mm==LD 5227 L==0 => mm==ST 5228 sz==00 => 32 bit (S) transfers 5229 sz==01 => 64 bit (D) transfers 5230 sz==10 => 128 bit (Q) transfers 5231 sz==11 isn't allowed 5232 simm7 is scaled by the (single-register) transfer size 5233 5234 31 29 26 22 21 14 9 4 5235 5236 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm] 5237 (at-EA, with nontemporal hint) 5238 5239 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm 5240 (at-Rn-then-Rn=EA) 5241 5242 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm] 5243 (at-EA) 5244 5245 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]! 5246 (at-EA-then-Rn=EA) 5247 */ 5248 if (INSN(29,25) == BITS5(1,0,1,1,0)) { 5249 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units 5250 Bool isLD = INSN(22,22) == 1; 5251 Bool wBack = INSN(23,23) == 1; 5252 Long simm7 = (Long)sx_to_64(INSN(21,15), 7); 5253 UInt tt2 = INSN(14,10); 5254 UInt nn = INSN(9,5); 5255 UInt tt1 = INSN(4,0); 5256 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) { 5257 /* undecodable; fall through */ 5258 } else { 5259 if (nn == 31) { /* FIXME generate stack alignment check */ } 5260 5261 // Compute the transfer address TA and the writeback address WA. 5262 UInt szB = 4 << szSlg2; /* szB is the per-register size */ 5263 IRTemp tRN = newTemp(Ity_I64); 5264 assign(tRN, getIReg64orSP(nn)); 5265 IRTemp tEA = newTemp(Ity_I64); 5266 simm7 = szB * simm7; 5267 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7))); 5268 5269 IRTemp tTA = newTemp(Ity_I64); 5270 IRTemp tWA = newTemp(Ity_I64); 5271 switch (INSN(24,23)) { 5272 case BITS2(0,1): 5273 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break; 5274 case BITS2(1,1): 5275 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break; 5276 case BITS2(1,0): 5277 case BITS2(0,0): 5278 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break; 5279 default: 5280 vassert(0); /* NOTREACHED */ 5281 } 5282 5283 IRType ty = Ity_INVALID; 5284 switch (szB) { 5285 case 4: ty = Ity_F32; break; 5286 case 8: ty = Ity_F64; break; 5287 case 16: ty = Ity_V128; break; 5288 default: vassert(0); 5289 } 5290 5291 /* Normally rN would be updated after the transfer. However, in 5292 the special cases typifed by 5293 stp q0, q1, [sp,#-512]! 5294 stp d0, d1, [sp,#-512]! 5295 stp s0, s1, [sp,#-512]! 5296 it is necessary to update SP before the transfer, (1) 5297 because Memcheck will otherwise complain about a write 5298 below the stack pointer, and (2) because the segfault 5299 stack extension mechanism will otherwise extend the stack 5300 only down to SP before the instruction, which might not be 5301 far enough, if the -512 bit takes the actual access 5302 address to the next page. 5303 */ 5304 Bool earlyWBack 5305 = wBack && simm7 < 0 5306 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD; 5307 5308 if (wBack && earlyWBack) 5309 putIReg64orSP(nn, mkexpr(tEA)); 5310 5311 if (isLD) { 5312 if (szB < 16) { 5313 putQReg128(tt1, mkV128(0x0000)); 5314 } 5315 putQRegLO(tt1, 5316 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0)))); 5317 if (szB < 16) { 5318 putQReg128(tt2, mkV128(0x0000)); 5319 } 5320 putQRegLO(tt2, 5321 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB)))); 5322 } else { 5323 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)), 5324 getQRegLO(tt1, ty)); 5325 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)), 5326 getQRegLO(tt2, ty)); 5327 } 5328 5329 if (wBack && !earlyWBack) 5330 putIReg64orSP(nn, mkexpr(tEA)); 5331 5332 const HChar* fmt_str = NULL; 5333 switch (INSN(24,23)) { 5334 case BITS2(0,1): 5335 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n"; 5336 break; 5337 case BITS2(1,1): 5338 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n"; 5339 break; 5340 case BITS2(1,0): 5341 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n"; 5342 break; 5343 case BITS2(0,0): 5344 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n"; 5345 break; 5346 default: 5347 vassert(0); 5348 } 5349 DIP(fmt_str, isLD ? "ld" : "st", 5350 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty), 5351 nameIReg64orSP(nn), simm7); 5352 return True; 5353 } 5354 } 5355 5356 /* -------------- {LD,ST}R (vector register) --------------- */ 5357 /* 31 29 23 20 15 12 11 9 4 5358 | | | | | | | | | 5359 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}] 5360 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}] 5361 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}] 5362 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}] 5363 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}] 5364 5365 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}] 5366 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}] 5367 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}] 5368 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}] 5369 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}] 5370 */ 5371 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5372 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5373 HChar dis_buf[64]; 5374 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5375 Bool isLD = INSN(22,22) == 1; 5376 UInt tt = INSN(4,0); 5377 if (szLg2 > 4) goto after_LDR_STR_vector_register; 5378 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/); 5379 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register; 5380 switch (szLg2) { 5381 case 0: /* 8 bit */ 5382 if (isLD) { 5383 putQReg128(tt, mkV128(0x0000)); 5384 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea))); 5385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5386 } else { 5387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8)); 5388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf); 5389 } 5390 break; 5391 case 1: 5392 if (isLD) { 5393 putQReg128(tt, mkV128(0x0000)); 5394 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea))); 5395 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5396 } else { 5397 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16)); 5398 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf); 5399 } 5400 break; 5401 case 2: /* 32 bit */ 5402 if (isLD) { 5403 putQReg128(tt, mkV128(0x0000)); 5404 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea))); 5405 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5406 } else { 5407 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32)); 5408 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf); 5409 } 5410 break; 5411 case 3: /* 64 bit */ 5412 if (isLD) { 5413 putQReg128(tt, mkV128(0x0000)); 5414 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea))); 5415 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5416 } else { 5417 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64)); 5418 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf); 5419 } 5420 break; 5421 case 4: 5422 if (isLD) { 5423 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea))); 5424 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf); 5425 } else { 5426 storeLE(mkexpr(ea), getQReg128(tt)); 5427 DIP("str %s, %s\n", nameQReg128(tt), dis_buf); 5428 } 5429 break; 5430 default: 5431 vassert(0); 5432 } 5433 return True; 5434 } 5435 after_LDR_STR_vector_register: 5436 5437 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */ 5438 /* 31 29 22 20 15 12 11 9 4 5439 | | | | | | | | | 5440 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}] 5441 5442 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}] 5443 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}] 5444 5445 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}] 5446 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}] 5447 */ 5448 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1) 5449 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) { 5450 HChar dis_buf[64]; 5451 UInt szLg2 = INSN(31,30); 5452 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64 5453 UInt tt = INSN(4,0); 5454 if (szLg2 == 3) goto after_LDRS_integer_register; 5455 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/); 5456 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register; 5457 /* Enumerate the 5 variants explicitly. */ 5458 if (szLg2 == 2/*32 bit*/ && sxTo64) { 5459 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea)))); 5460 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf); 5461 return True; 5462 } 5463 else 5464 if (szLg2 == 1/*16 bit*/) { 5465 if (sxTo64) { 5466 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea)))); 5467 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf); 5468 } else { 5469 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea)))); 5470 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf); 5471 } 5472 return True; 5473 } 5474 else 5475 if (szLg2 == 0/*8 bit*/) { 5476 if (sxTo64) { 5477 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea)))); 5478 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf); 5479 } else { 5480 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea)))); 5481 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf); 5482 } 5483 return True; 5484 } 5485 /* else it's an invalid combination */ 5486 } 5487 after_LDRS_integer_register: 5488 5489 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */ 5490 /* This is the Unsigned offset variant only. The Post-Index and 5491 Pre-Index variants are below. 5492 5493 31 29 23 21 9 4 5494 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1] 5495 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2] 5496 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4] 5497 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8] 5498 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16] 5499 5500 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1] 5501 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2] 5502 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4] 5503 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8] 5504 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16] 5505 */ 5506 if (INSN(29,24) == BITS6(1,1,1,1,0,1) 5507 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) { 5508 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5509 Bool isLD = INSN(22,22) == 1; 5510 UInt pimm12 = INSN(21,10) << szLg2; 5511 UInt nn = INSN(9,5); 5512 UInt tt = INSN(4,0); 5513 IRTemp tEA = newTemp(Ity_I64); 5514 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5515 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12))); 5516 if (isLD) { 5517 if (szLg2 < 4) { 5518 putQReg128(tt, mkV128(0x0000)); 5519 } 5520 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5521 } else { 5522 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5523 } 5524 DIP("%s %s, [%s, #%u]\n", 5525 isLD ? "ldr" : "str", 5526 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12); 5527 return True; 5528 } 5529 5530 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */ 5531 /* These are the Post-Index and Pre-Index variants. 5532 5533 31 29 23 20 11 9 4 5534 (at-Rn-then-Rn=EA) 5535 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm 5536 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm 5537 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm 5538 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm 5539 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm 5540 5541 (at-EA-then-Rn=EA) 5542 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]! 5543 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]! 5544 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]! 5545 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]! 5546 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]! 5547 5548 Stores are the same except with bit 22 set to 0. 5549 */ 5550 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5551 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5552 && INSN(21,21) == 0 && INSN(10,10) == 1) { 5553 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5554 Bool isLD = INSN(22,22) == 1; 5555 UInt imm9 = INSN(20,12); 5556 Bool atRN = INSN(11,11) == 0; 5557 UInt nn = INSN(9,5); 5558 UInt tt = INSN(4,0); 5559 IRTemp tRN = newTemp(Ity_I64); 5560 IRTemp tEA = newTemp(Ity_I64); 5561 IRTemp tTA = IRTemp_INVALID; 5562 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5563 ULong simm9 = sx_to_64(imm9, 9); 5564 assign(tRN, getIReg64orSP(nn)); 5565 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9))); 5566 tTA = atRN ? tRN : tEA; 5567 if (isLD) { 5568 if (szLg2 < 4) { 5569 putQReg128(tt, mkV128(0x0000)); 5570 } 5571 putQRegLO(tt, loadLE(ty, mkexpr(tTA))); 5572 } else { 5573 storeLE(mkexpr(tTA), getQRegLO(tt, ty)); 5574 } 5575 putIReg64orSP(nn, mkexpr(tEA)); 5576 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n", 5577 isLD ? "ldr" : "str", 5578 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9); 5579 return True; 5580 } 5581 5582 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */ 5583 /* 31 29 23 20 11 9 4 5584 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm] 5585 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm] 5586 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm] 5587 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm] 5588 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm] 5589 5590 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm] 5591 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm] 5592 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm] 5593 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm] 5594 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm] 5595 */ 5596 if (INSN(29,24) == BITS6(1,1,1,1,0,0) 5597 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4 5598 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) { 5599 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30); 5600 Bool isLD = INSN(22,22) == 1; 5601 UInt imm9 = INSN(20,12); 5602 UInt nn = INSN(9,5); 5603 UInt tt = INSN(4,0); 5604 ULong simm9 = sx_to_64(imm9, 9); 5605 IRTemp tEA = newTemp(Ity_I64); 5606 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2); 5607 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9))); 5608 if (isLD) { 5609 if (szLg2 < 4) { 5610 putQReg128(tt, mkV128(0x0000)); 5611 } 5612 putQRegLO(tt, loadLE(ty, mkexpr(tEA))); 5613 } else { 5614 storeLE(mkexpr(tEA), getQRegLO(tt, ty)); 5615 } 5616 DIP("%s %s, [%s, #%lld]\n", 5617 isLD ? "ldur" : "stur", 5618 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9); 5619 return True; 5620 } 5621 5622 /* ---------------- LDR (literal, SIMD&FP) ---------------- */ 5623 /* 31 29 23 4 5624 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)] 5625 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)] 5626 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)] 5627 */ 5628 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) { 5629 UInt szB = 4 << INSN(31,30); 5630 UInt imm19 = INSN(23,5); 5631 UInt tt = INSN(4,0); 5632 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21); 5633 IRType ty = preferredVectorSubTypeFromSize(szB); 5634 putQReg128(tt, mkV128(0x0000)); 5635 putQRegLO(tt, loadLE(ty, mkU64(ea))); 5636 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea); 5637 return True; 5638 } 5639 5640 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */ 5641 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */ 5642 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */ 5643 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */ 5644 /* 31 29 26 22 21 20 15 11 9 4 5645 5646 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP] 5647 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step 5648 5649 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP] 5650 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step 5651 5652 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP] 5653 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step 5654 5655 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP] 5656 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step 5657 5658 T = defined by Q and sz in the normal way 5659 step = if m == 11111 then transfer-size else Xm 5660 xx = case L of 1 -> LD ; 0 -> ST 5661 */ 5662 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5663 && INSN(21,21) == 0) { 5664 Bool bitQ = INSN(30,30); 5665 Bool isPX = INSN(23,23) == 1; 5666 Bool isLD = INSN(22,22) == 1; 5667 UInt mm = INSN(20,16); 5668 UInt opc = INSN(15,12); 5669 UInt sz = INSN(11,10); 5670 UInt nn = INSN(9,5); 5671 UInt tt = INSN(4,0); 5672 Bool isQ = bitQ == 1; 5673 Bool is1d = sz == BITS2(1,1) && !isQ; 5674 UInt nRegs = 0; 5675 switch (opc) { 5676 case BITS4(0,0,0,0): nRegs = 4; break; 5677 case BITS4(0,1,0,0): nRegs = 3; break; 5678 case BITS4(1,0,0,0): nRegs = 2; break; 5679 case BITS4(0,1,1,1): nRegs = 1; break; 5680 default: break; 5681 } 5682 5683 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5684 If we see it, set nRegs to 0 so as to cause the next conditional 5685 to fail. */ 5686 if (!isPX && mm != 0) 5687 nRegs = 0; 5688 5689 if (nRegs == 1 /* .1d is allowed */ 5690 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) { 5691 5692 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5693 5694 /* Generate the transfer address (TA) and if necessary the 5695 writeback address (WB) */ 5696 IRTemp tTA = newTemp(Ity_I64); 5697 assign(tTA, getIReg64orSP(nn)); 5698 if (nn == 31) { /* FIXME generate stack alignment check */ } 5699 IRTemp tWB = IRTemp_INVALID; 5700 if (isPX) { 5701 tWB = newTemp(Ity_I64); 5702 assign(tWB, binop(Iop_Add64, 5703 mkexpr(tTA), 5704 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5705 : getIReg64orZR(mm))); 5706 } 5707 5708 /* -- BEGIN generate the transfers -- */ 5709 5710 IRTemp u0, u1, u2, u3, i0, i1, i2, i3; 5711 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID; 5712 switch (nRegs) { 5713 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */ 5714 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */ 5715 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */ 5716 case 1: u0 = newTempV128(); i0 = newTempV128(); break; 5717 default: vassert(0); 5718 } 5719 5720 /* -- Multiple 128 or 64 bit stores -- */ 5721 if (!isLD) { 5722 switch (nRegs) { 5723 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5724 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5725 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */ 5726 case 1: assign(u0, getQReg128((tt+0) % 32)); break; 5727 default: vassert(0); 5728 } 5729 switch (nRegs) { 5730 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64) 5731 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3); 5732 break; 5733 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64) 5734 (&i0, &i1, &i2, sz, u0, u1, u2); 5735 break; 5736 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64) 5737 (&i0, &i1, sz, u0, u1); 5738 break; 5739 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64) 5740 (&i0, sz, u0); 5741 break; 5742 default: vassert(0); 5743 } 5744 # define MAYBE_NARROW_TO_64(_expr) \ 5745 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5746 UInt step = isQ ? 16 : 8; 5747 switch (nRegs) { 5748 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5749 MAYBE_NARROW_TO_64(mkexpr(i3)) ); 5750 /* fallthru */ 5751 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5752 MAYBE_NARROW_TO_64(mkexpr(i2)) ); 5753 /* fallthru */ 5754 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5755 MAYBE_NARROW_TO_64(mkexpr(i1)) ); 5756 /* fallthru */ 5757 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5758 MAYBE_NARROW_TO_64(mkexpr(i0)) ); 5759 break; 5760 default: vassert(0); 5761 } 5762 # undef MAYBE_NARROW_TO_64 5763 } 5764 5765 /* -- Multiple 128 or 64 bit loads -- */ 5766 else /* isLD */ { 5767 UInt step = isQ ? 16 : 8; 5768 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5769 # define MAYBE_WIDEN_FROM_64(_expr) \ 5770 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5771 switch (nRegs) { 5772 case 4: 5773 assign(i3, MAYBE_WIDEN_FROM_64( 5774 loadLE(loadTy, 5775 binop(Iop_Add64, mkexpr(tTA), 5776 mkU64(3 * step))))); 5777 /* fallthru */ 5778 case 3: 5779 assign(i2, MAYBE_WIDEN_FROM_64( 5780 loadLE(loadTy, 5781 binop(Iop_Add64, mkexpr(tTA), 5782 mkU64(2 * step))))); 5783 /* fallthru */ 5784 case 2: 5785 assign(i1, MAYBE_WIDEN_FROM_64( 5786 loadLE(loadTy, 5787 binop(Iop_Add64, mkexpr(tTA), 5788 mkU64(1 * step))))); 5789 /* fallthru */ 5790 case 1: 5791 assign(i0, MAYBE_WIDEN_FROM_64( 5792 loadLE(loadTy, 5793 binop(Iop_Add64, mkexpr(tTA), 5794 mkU64(0 * step))))); 5795 break; 5796 default: 5797 vassert(0); 5798 } 5799 # undef MAYBE_WIDEN_FROM_64 5800 switch (nRegs) { 5801 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64) 5802 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3); 5803 break; 5804 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64) 5805 (&u0, &u1, &u2, sz, i0, i1, i2); 5806 break; 5807 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64) 5808 (&u0, &u1, sz, i0, i1); 5809 break; 5810 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64) 5811 (&u0, sz, i0); 5812 break; 5813 default: vassert(0); 5814 } 5815 switch (nRegs) { 5816 case 4: putQReg128( (tt+3) % 32, 5817 math_MAYBE_ZERO_HI64(bitQ, u3)); 5818 /* fallthru */ 5819 case 3: putQReg128( (tt+2) % 32, 5820 math_MAYBE_ZERO_HI64(bitQ, u2)); 5821 /* fallthru */ 5822 case 2: putQReg128( (tt+1) % 32, 5823 math_MAYBE_ZERO_HI64(bitQ, u1)); 5824 /* fallthru */ 5825 case 1: putQReg128( (tt+0) % 32, 5826 math_MAYBE_ZERO_HI64(bitQ, u0)); 5827 break; 5828 default: vassert(0); 5829 } 5830 } 5831 5832 /* -- END generate the transfers -- */ 5833 5834 /* Do the writeback, if necessary */ 5835 if (isPX) { 5836 putIReg64orSP(nn, mkexpr(tWB)); 5837 } 5838 5839 HChar pxStr[20]; 5840 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 5841 if (isPX) { 5842 if (mm == BITS5(1,1,1,1,1)) 5843 vex_sprintf(pxStr, ", #%u", xferSzB); 5844 else 5845 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 5846 } 5847 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 5848 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n", 5849 isLD ? "ld" : "st", nRegs, 5850 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 5851 pxStr); 5852 5853 return True; 5854 } 5855 /* else fall through */ 5856 } 5857 5858 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */ 5859 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */ 5860 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */ 5861 /* 31 29 26 22 21 20 15 11 9 4 5862 5863 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP] 5864 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step 5865 5866 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP] 5867 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step 5868 5869 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP] 5870 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step 5871 5872 T = defined by Q and sz in the normal way 5873 step = if m == 11111 then transfer-size else Xm 5874 xx = case L of 1 -> LD ; 0 -> ST 5875 */ 5876 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0) 5877 && INSN(21,21) == 0) { 5878 Bool bitQ = INSN(30,30); 5879 Bool isPX = INSN(23,23) == 1; 5880 Bool isLD = INSN(22,22) == 1; 5881 UInt mm = INSN(20,16); 5882 UInt opc = INSN(15,12); 5883 UInt sz = INSN(11,10); 5884 UInt nn = INSN(9,5); 5885 UInt tt = INSN(4,0); 5886 Bool isQ = bitQ == 1; 5887 UInt nRegs = 0; 5888 switch (opc) { 5889 case BITS4(0,0,1,0): nRegs = 4; break; 5890 case BITS4(0,1,1,0): nRegs = 3; break; 5891 case BITS4(1,0,1,0): nRegs = 2; break; 5892 default: break; 5893 } 5894 5895 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. 5896 If we see it, set nRegs to 0 so as to cause the next conditional 5897 to fail. */ 5898 if (!isPX && mm != 0) 5899 nRegs = 0; 5900 5901 if (nRegs >= 2 && nRegs <= 4) { 5902 5903 UInt xferSzB = (isQ ? 16 : 8) * nRegs; 5904 5905 /* Generate the transfer address (TA) and if necessary the 5906 writeback address (WB) */ 5907 IRTemp tTA = newTemp(Ity_I64); 5908 assign(tTA, getIReg64orSP(nn)); 5909 if (nn == 31) { /* FIXME generate stack alignment check */ } 5910 IRTemp tWB = IRTemp_INVALID; 5911 if (isPX) { 5912 tWB = newTemp(Ity_I64); 5913 assign(tWB, binop(Iop_Add64, 5914 mkexpr(tTA), 5915 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 5916 : getIReg64orZR(mm))); 5917 } 5918 5919 /* -- BEGIN generate the transfers -- */ 5920 5921 IRTemp u0, u1, u2, u3; 5922 u0 = u1 = u2 = u3 = IRTemp_INVALID; 5923 switch (nRegs) { 5924 case 4: u3 = newTempV128(); /* fallthru */ 5925 case 3: u2 = newTempV128(); /* fallthru */ 5926 case 2: u1 = newTempV128(); 5927 u0 = newTempV128(); break; 5928 default: vassert(0); 5929 } 5930 5931 /* -- Multiple 128 or 64 bit stores -- */ 5932 if (!isLD) { 5933 switch (nRegs) { 5934 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */ 5935 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */ 5936 case 2: assign(u1, getQReg128((tt+1) % 32)); 5937 assign(u0, getQReg128((tt+0) % 32)); break; 5938 default: vassert(0); 5939 } 5940 # define MAYBE_NARROW_TO_64(_expr) \ 5941 (isQ ? (_expr) : unop(Iop_V128to64,(_expr))) 5942 UInt step = isQ ? 16 : 8; 5943 switch (nRegs) { 5944 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)), 5945 MAYBE_NARROW_TO_64(mkexpr(u3)) ); 5946 /* fallthru */ 5947 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)), 5948 MAYBE_NARROW_TO_64(mkexpr(u2)) ); 5949 /* fallthru */ 5950 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)), 5951 MAYBE_NARROW_TO_64(mkexpr(u1)) ); 5952 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)), 5953 MAYBE_NARROW_TO_64(mkexpr(u0)) ); 5954 break; 5955 default: vassert(0); 5956 } 5957 # undef MAYBE_NARROW_TO_64 5958 } 5959 5960 /* -- Multiple 128 or 64 bit loads -- */ 5961 else /* isLD */ { 5962 UInt step = isQ ? 16 : 8; 5963 IRType loadTy = isQ ? Ity_V128 : Ity_I64; 5964 # define MAYBE_WIDEN_FROM_64(_expr) \ 5965 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr))) 5966 switch (nRegs) { 5967 case 4: 5968 assign(u3, MAYBE_WIDEN_FROM_64( 5969 loadLE(loadTy, 5970 binop(Iop_Add64, mkexpr(tTA), 5971 mkU64(3 * step))))); 5972 /* fallthru */ 5973 case 3: 5974 assign(u2, MAYBE_WIDEN_FROM_64( 5975 loadLE(loadTy, 5976 binop(Iop_Add64, mkexpr(tTA), 5977 mkU64(2 * step))))); 5978 /* fallthru */ 5979 case 2: 5980 assign(u1, MAYBE_WIDEN_FROM_64( 5981 loadLE(loadTy, 5982 binop(Iop_Add64, mkexpr(tTA), 5983 mkU64(1 * step))))); 5984 assign(u0, MAYBE_WIDEN_FROM_64( 5985 loadLE(loadTy, 5986 binop(Iop_Add64, mkexpr(tTA), 5987 mkU64(0 * step))))); 5988 break; 5989 default: 5990 vassert(0); 5991 } 5992 # undef MAYBE_WIDEN_FROM_64 5993 switch (nRegs) { 5994 case 4: putQReg128( (tt+3) % 32, 5995 math_MAYBE_ZERO_HI64(bitQ, u3)); 5996 /* fallthru */ 5997 case 3: putQReg128( (tt+2) % 32, 5998 math_MAYBE_ZERO_HI64(bitQ, u2)); 5999 /* fallthru */ 6000 case 2: putQReg128( (tt+1) % 32, 6001 math_MAYBE_ZERO_HI64(bitQ, u1)); 6002 putQReg128( (tt+0) % 32, 6003 math_MAYBE_ZERO_HI64(bitQ, u0)); 6004 break; 6005 default: vassert(0); 6006 } 6007 } 6008 6009 /* -- END generate the transfers -- */ 6010 6011 /* Do the writeback, if necessary */ 6012 if (isPX) { 6013 putIReg64orSP(nn, mkexpr(tWB)); 6014 } 6015 6016 HChar pxStr[20]; 6017 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6018 if (isPX) { 6019 if (mm == BITS5(1,1,1,1,1)) 6020 vex_sprintf(pxStr, ", #%u", xferSzB); 6021 else 6022 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6023 } 6024 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6025 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n", 6026 isLD ? "ld" : "st", 6027 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6028 pxStr); 6029 6030 return True; 6031 } 6032 /* else fall through */ 6033 } 6034 6035 /* ---------- LD1R (single structure, replicate) ---------- */ 6036 /* ---------- LD2R (single structure, replicate) ---------- */ 6037 /* ---------- LD3R (single structure, replicate) ---------- */ 6038 /* ---------- LD4R (single structure, replicate) ---------- */ 6039 /* 31 29 22 20 15 11 9 4 6040 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP] 6041 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step 6042 6043 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP] 6044 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step 6045 6046 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP] 6047 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step 6048 6049 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP] 6050 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step 6051 6052 step = if m == 11111 then transfer-size else Xm 6053 */ 6054 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1) 6055 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1) 6056 && INSN(12,12) == 0) { 6057 UInt bitQ = INSN(30,30); 6058 Bool isPX = INSN(23,23) == 1; 6059 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6060 UInt mm = INSN(20,16); 6061 UInt sz = INSN(11,10); 6062 UInt nn = INSN(9,5); 6063 UInt tt = INSN(4,0); 6064 6065 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6066 if (isPX || mm == 0) { 6067 6068 IRType ty = integerIRTypeOfSize(1 << sz); 6069 6070 UInt laneSzB = 1 << sz; 6071 UInt xferSzB = laneSzB * nRegs; 6072 6073 /* Generate the transfer address (TA) and if necessary the 6074 writeback address (WB) */ 6075 IRTemp tTA = newTemp(Ity_I64); 6076 assign(tTA, getIReg64orSP(nn)); 6077 if (nn == 31) { /* FIXME generate stack alignment check */ } 6078 IRTemp tWB = IRTemp_INVALID; 6079 if (isPX) { 6080 tWB = newTemp(Ity_I64); 6081 assign(tWB, binop(Iop_Add64, 6082 mkexpr(tTA), 6083 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6084 : getIReg64orZR(mm))); 6085 } 6086 6087 /* Do the writeback, if necessary */ 6088 if (isPX) { 6089 putIReg64orSP(nn, mkexpr(tWB)); 6090 } 6091 6092 IRTemp e0, e1, e2, e3, v0, v1, v2, v3; 6093 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID; 6094 switch (nRegs) { 6095 case 4: 6096 e3 = newTemp(ty); 6097 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6098 mkU64(3 * laneSzB)))); 6099 v3 = math_DUP_TO_V128(e3, ty); 6100 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3)); 6101 /* fallthrough */ 6102 case 3: 6103 e2 = newTemp(ty); 6104 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6105 mkU64(2 * laneSzB)))); 6106 v2 = math_DUP_TO_V128(e2, ty); 6107 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2)); 6108 /* fallthrough */ 6109 case 2: 6110 e1 = newTemp(ty); 6111 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6112 mkU64(1 * laneSzB)))); 6113 v1 = math_DUP_TO_V128(e1, ty); 6114 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1)); 6115 /* fallthrough */ 6116 case 1: 6117 e0 = newTemp(ty); 6118 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA), 6119 mkU64(0 * laneSzB)))); 6120 v0 = math_DUP_TO_V128(e0, ty); 6121 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0)); 6122 break; 6123 default: 6124 vassert(0); 6125 } 6126 6127 HChar pxStr[20]; 6128 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6129 if (isPX) { 6130 if (mm == BITS5(1,1,1,1,1)) 6131 vex_sprintf(pxStr, ", #%u", xferSzB); 6132 else 6133 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6134 } 6135 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6136 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n", 6137 nRegs, 6138 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn), 6139 pxStr); 6140 6141 return True; 6142 } 6143 /* else fall through */ 6144 } 6145 6146 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */ 6147 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */ 6148 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */ 6149 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */ 6150 /* 31 29 22 21 20 15 11 9 4 6151 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP] 6152 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step 6153 6154 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP] 6155 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step 6156 6157 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP] 6158 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step 6159 6160 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP] 6161 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step 6162 6163 step = if m == 11111 then transfer-size else Xm 6164 op = case L of 1 -> LD ; 0 -> ST 6165 6166 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb 6167 01:b:b:b0 -> 2, bbb 6168 10:b:b:00 -> 4, bb 6169 10:b:0:01 -> 8, b 6170 */ 6171 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) { 6172 UInt bitQ = INSN(30,30); 6173 Bool isPX = INSN(23,23) == 1; 6174 Bool isLD = INSN(22,22) == 1; 6175 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1; 6176 UInt mm = INSN(20,16); 6177 UInt xx = INSN(15,14); 6178 UInt bitS = INSN(12,12); 6179 UInt sz = INSN(11,10); 6180 UInt nn = INSN(9,5); 6181 UInt tt = INSN(4,0); 6182 6183 Bool valid = True; 6184 6185 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */ 6186 if (!isPX && mm != 0) 6187 valid = False; 6188 6189 UInt laneSzB = 0; /* invalid */ 6190 UInt ix = 16; /* invalid */ 6191 6192 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz; 6193 switch (xx_q_S_sz) { 6194 case 0x00: case 0x01: case 0x02: case 0x03: 6195 case 0x04: case 0x05: case 0x06: case 0x07: 6196 case 0x08: case 0x09: case 0x0A: case 0x0B: 6197 case 0x0C: case 0x0D: case 0x0E: case 0x0F: 6198 laneSzB = 1; ix = xx_q_S_sz & 0xF; 6199 break; 6200 case 0x10: case 0x12: case 0x14: case 0x16: 6201 case 0x18: case 0x1A: case 0x1C: case 0x1E: 6202 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7; 6203 break; 6204 case 0x20: case 0x24: case 0x28: case 0x2C: 6205 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3; 6206 break; 6207 case 0x21: case 0x29: 6208 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1; 6209 break; 6210 default: 6211 break; 6212 } 6213 6214 if (valid && laneSzB != 0) { 6215 6216 IRType ty = integerIRTypeOfSize(laneSzB); 6217 UInt xferSzB = laneSzB * nRegs; 6218 6219 /* Generate the transfer address (TA) and if necessary the 6220 writeback address (WB) */ 6221 IRTemp tTA = newTemp(Ity_I64); 6222 assign(tTA, getIReg64orSP(nn)); 6223 if (nn == 31) { /* FIXME generate stack alignment check */ } 6224 IRTemp tWB = IRTemp_INVALID; 6225 if (isPX) { 6226 tWB = newTemp(Ity_I64); 6227 assign(tWB, binop(Iop_Add64, 6228 mkexpr(tTA), 6229 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB) 6230 : getIReg64orZR(mm))); 6231 } 6232 6233 /* Do the writeback, if necessary */ 6234 if (isPX) { 6235 putIReg64orSP(nn, mkexpr(tWB)); 6236 } 6237 6238 switch (nRegs) { 6239 case 4: { 6240 IRExpr* addr 6241 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB)); 6242 if (isLD) { 6243 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr)); 6244 } else { 6245 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty)); 6246 } 6247 /* fallthrough */ 6248 } 6249 case 3: { 6250 IRExpr* addr 6251 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB)); 6252 if (isLD) { 6253 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr)); 6254 } else { 6255 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty)); 6256 } 6257 /* fallthrough */ 6258 } 6259 case 2: { 6260 IRExpr* addr 6261 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB)); 6262 if (isLD) { 6263 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr)); 6264 } else { 6265 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty)); 6266 } 6267 /* fallthrough */ 6268 } 6269 case 1: { 6270 IRExpr* addr 6271 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB)); 6272 if (isLD) { 6273 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr)); 6274 } else { 6275 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty)); 6276 } 6277 break; 6278 } 6279 default: 6280 vassert(0); 6281 } 6282 6283 HChar pxStr[20]; 6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0; 6285 if (isPX) { 6286 if (mm == BITS5(1,1,1,1,1)) 6287 vex_sprintf(pxStr, ", #%u", xferSzB); 6288 else 6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm)); 6290 } 6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz); 6292 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n", 6293 isLD ? "ld" : "st", nRegs, 6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, 6295 ix, nameIReg64orSP(nn), pxStr); 6296 6297 return True; 6298 } 6299 /* else fall through */ 6300 } 6301 6302 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ 6303 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ 6304 /* 31 29 23 20 14 9 4 6305 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] 6306 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] 6307 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] 6308 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] 6309 */ 6310 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) 6311 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) 6312 && INSN(14,10) == BITS5(1,1,1,1,1)) { 6313 UInt szBlg2 = INSN(31,30); 6314 Bool isLD = INSN(22,22) == 1; 6315 Bool isAcqOrRel = INSN(15,15) == 1; 6316 UInt ss = INSN(20,16); 6317 UInt nn = INSN(9,5); 6318 UInt tt = INSN(4,0); 6319 6320 vassert(szBlg2 < 4); 6321 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6322 IRType ty = integerIRTypeOfSize(szB); 6323 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6324 6325 IRTemp ea = newTemp(Ity_I64); 6326 assign(ea, getIReg64orSP(nn)); 6327 /* FIXME generate check that ea is szB-aligned */ 6328 6329 if (isLD && ss == BITS5(1,1,1,1,1)) { 6330 IRTemp res = newTemp(ty); 6331 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); 6332 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6333 if (isAcqOrRel) { 6334 stmt(IRStmt_MBE(Imbe_Fence)); 6335 } 6336 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6337 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6338 return True; 6339 } 6340 if (!isLD) { 6341 if (isAcqOrRel) { 6342 stmt(IRStmt_MBE(Imbe_Fence)); 6343 } 6344 IRTemp res = newTemp(Ity_I1); 6345 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6346 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); 6347 /* IR semantics: res is 1 if store succeeds, 0 if it fails. 6348 Need to set rS to 1 on failure, 0 on success. */ 6349 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), 6350 mkU64(1))); 6351 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], 6352 nameIRegOrZR(False, ss), 6353 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6354 return True; 6355 } 6356 /* else fall through */ 6357 } 6358 6359 /* ------------------ LDA{R,RH,RB} ------------------ */ 6360 /* ------------------ STL{R,RH,RB} ------------------ */ 6361 /* 31 29 23 20 14 9 4 6362 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP] 6363 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP] 6364 */ 6365 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) 6366 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) { 6367 UInt szBlg2 = INSN(31,30); 6368 Bool isLD = INSN(22,22) == 1; 6369 UInt nn = INSN(9,5); 6370 UInt tt = INSN(4,0); 6371 6372 vassert(szBlg2 < 4); 6373 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ 6374 IRType ty = integerIRTypeOfSize(szB); 6375 const HChar* suffix[4] = { "rb", "rh", "r", "r" }; 6376 6377 IRTemp ea = newTemp(Ity_I64); 6378 assign(ea, getIReg64orSP(nn)); 6379 /* FIXME generate check that ea is szB-aligned */ 6380 6381 if (isLD) { 6382 IRTemp res = newTemp(ty); 6383 assign(res, loadLE(ty, mkexpr(ea))); 6384 putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); 6385 stmt(IRStmt_MBE(Imbe_Fence)); 6386 DIP("lda%s %s, [%s]\n", suffix[szBlg2], 6387 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6388 } else { 6389 stmt(IRStmt_MBE(Imbe_Fence)); 6390 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); 6391 storeLE(mkexpr(ea), data); 6392 DIP("stl%s %s, [%s]\n", suffix[szBlg2], 6393 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); 6394 } 6395 return True; 6396 } 6397 6398 /* ------------------ PRFM (immediate) ------------------ */ 6399 /* 31 21 9 4 6400 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm] 6401 */ 6402 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) { 6403 UInt imm12 = INSN(21,10); 6404 UInt nn = INSN(9,5); 6405 UInt tt = INSN(4,0); 6406 /* Generating any IR here is pointless, except for documentation 6407 purposes, as it will get optimised away later. */ 6408 IRTemp ea = newTemp(Ity_I64); 6409 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8))); 6410 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8); 6411 return True; 6412 } 6413 6414 vex_printf("ARM64 front end: load_store\n"); 6415 return False; 6416 # undef INSN 6417 } 6418 6419 6420 /*------------------------------------------------------------*/ 6421 /*--- Control flow and misc instructions ---*/ 6422 /*------------------------------------------------------------*/ 6423 6424 static 6425 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn, 6426 const VexArchInfo* archinfo) 6427 { 6428 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 6429 6430 /* ---------------------- B cond ----------------------- */ 6431 /* 31 24 4 3 6432 0101010 0 imm19 0 cond */ 6433 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) { 6434 UInt cond = INSN(3,0); 6435 ULong uimm64 = INSN(23,5) << 2; 6436 Long simm64 = (Long)sx_to_64(uimm64, 21); 6437 vassert(dres->whatNext == Dis_Continue); 6438 vassert(dres->len == 4); 6439 vassert(dres->continueAt == 0); 6440 vassert(dres->jk_StopHere == Ijk_INVALID); 6441 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)), 6442 Ijk_Boring, 6443 IRConst_U64(guest_PC_curr_instr + simm64), 6444 OFFB_PC) ); 6445 putPC(mkU64(guest_PC_curr_instr + 4)); 6446 dres->whatNext = Dis_StopHere; 6447 dres->jk_StopHere = Ijk_Boring; 6448 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64); 6449 return True; 6450 } 6451 6452 /* -------------------- B{L} uncond -------------------- */ 6453 if (INSN(30,26) == BITS5(0,0,1,0,1)) { 6454 /* 000101 imm26 B (PC + sxTo64(imm26 << 2)) 6455 100101 imm26 B (PC + sxTo64(imm26 << 2)) 6456 */ 6457 UInt bLink = INSN(31,31); 6458 ULong uimm64 = INSN(25,0) << 2; 6459 Long simm64 = (Long)sx_to_64(uimm64, 28); 6460 if (bLink) { 6461 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6462 } 6463 putPC(mkU64(guest_PC_curr_instr + simm64)); 6464 dres->whatNext = Dis_StopHere; 6465 dres->jk_StopHere = Ijk_Call; 6466 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "", 6467 guest_PC_curr_instr + simm64); 6468 return True; 6469 } 6470 6471 /* --------------------- B{L} reg --------------------- */ 6472 /* 31 24 22 20 15 9 4 6473 1101011 00 10 11111 000000 nn 00000 RET Rn 6474 1101011 00 01 11111 000000 nn 00000 CALL Rn 6475 1101011 00 00 11111 000000 nn 00000 JMP Rn 6476 */ 6477 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0) 6478 && INSN(20,16) == BITS5(1,1,1,1,1) 6479 && INSN(15,10) == BITS6(0,0,0,0,0,0) 6480 && INSN(4,0) == BITS5(0,0,0,0,0)) { 6481 UInt branch_type = INSN(22,21); 6482 UInt nn = INSN(9,5); 6483 if (branch_type == BITS2(1,0) /* RET */) { 6484 putPC(getIReg64orZR(nn)); 6485 dres->whatNext = Dis_StopHere; 6486 dres->jk_StopHere = Ijk_Ret; 6487 DIP("ret %s\n", nameIReg64orZR(nn)); 6488 return True; 6489 } 6490 if (branch_type == BITS2(0,1) /* CALL */) { 6491 IRTemp dst = newTemp(Ity_I64); 6492 assign(dst, getIReg64orZR(nn)); 6493 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4)); 6494 putPC(mkexpr(dst)); 6495 dres->whatNext = Dis_StopHere; 6496 dres->jk_StopHere = Ijk_Call; 6497 DIP("blr %s\n", nameIReg64orZR(nn)); 6498 return True; 6499 } 6500 if (branch_type == BITS2(0,0) /* JMP */) { 6501 putPC(getIReg64orZR(nn)); 6502 dres->whatNext = Dis_StopHere; 6503 dres->jk_StopHere = Ijk_Boring; 6504 DIP("jmp %s\n", nameIReg64orZR(nn)); 6505 return True; 6506 } 6507 } 6508 6509 /* -------------------- CB{N}Z -------------------- */ 6510 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6511 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2)) 6512 */ 6513 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) { 6514 Bool is64 = INSN(31,31) == 1; 6515 Bool bIfZ = INSN(24,24) == 0; 6516 ULong uimm64 = INSN(23,5) << 2; 6517 UInt rT = INSN(4,0); 6518 Long simm64 = (Long)sx_to_64(uimm64, 21); 6519 IRExpr* cond = NULL; 6520 if (is64) { 6521 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6522 getIReg64orZR(rT), mkU64(0)); 6523 } else { 6524 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32, 6525 getIReg32orZR(rT), mkU32(0)); 6526 } 6527 stmt( IRStmt_Exit(cond, 6528 Ijk_Boring, 6529 IRConst_U64(guest_PC_curr_instr + simm64), 6530 OFFB_PC) ); 6531 putPC(mkU64(guest_PC_curr_instr + 4)); 6532 dres->whatNext = Dis_StopHere; 6533 dres->jk_StopHere = Ijk_Boring; 6534 DIP("cb%sz %s, 0x%llx\n", 6535 bIfZ ? "" : "n", nameIRegOrZR(is64, rT), 6536 guest_PC_curr_instr + simm64); 6537 return True; 6538 } 6539 6540 /* -------------------- TB{N}Z -------------------- */ 6541 /* 31 30 24 23 18 5 4 6542 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6543 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2)) 6544 */ 6545 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) { 6546 UInt b5 = INSN(31,31); 6547 Bool bIfZ = INSN(24,24) == 0; 6548 UInt b40 = INSN(23,19); 6549 UInt imm14 = INSN(18,5); 6550 UInt tt = INSN(4,0); 6551 UInt bitNo = (b5 << 5) | b40; 6552 ULong uimm64 = imm14 << 2; 6553 Long simm64 = sx_to_64(uimm64, 16); 6554 IRExpr* cond 6555 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64, 6556 binop(Iop_And64, 6557 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)), 6558 mkU64(1)), 6559 mkU64(0)); 6560 stmt( IRStmt_Exit(cond, 6561 Ijk_Boring, 6562 IRConst_U64(guest_PC_curr_instr + simm64), 6563 OFFB_PC) ); 6564 putPC(mkU64(guest_PC_curr_instr + 4)); 6565 dres->whatNext = Dis_StopHere; 6566 dres->jk_StopHere = Ijk_Boring; 6567 DIP("tb%sz %s, #%u, 0x%llx\n", 6568 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo, 6569 guest_PC_curr_instr + simm64); 6570 return True; 6571 } 6572 6573 /* -------------------- SVC -------------------- */ 6574 /* 11010100 000 imm16 000 01 6575 Don't bother with anything except the imm16==0 case. 6576 */ 6577 if (INSN(31,0) == 0xD4000001) { 6578 putPC(mkU64(guest_PC_curr_instr + 4)); 6579 dres->whatNext = Dis_StopHere; 6580 dres->jk_StopHere = Ijk_Sys_syscall; 6581 DIP("svc #0\n"); 6582 return True; 6583 } 6584 6585 /* ------------------ M{SR,RS} ------------------ */ 6586 /* ---- Cases for TPIDR_EL0 ---- 6587 0xD51BD0 010 Rt MSR tpidr_el0, rT 6588 0xD53BD0 010 Rt MRS rT, tpidr_el0 6589 */ 6590 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/ 6591 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) { 6592 Bool toSys = INSN(21,21) == 0; 6593 UInt tt = INSN(4,0); 6594 if (toSys) { 6595 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) ); 6596 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt)); 6597 } else { 6598 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 )); 6599 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt)); 6600 } 6601 return True; 6602 } 6603 /* ---- Cases for FPCR ---- 6604 0xD51B44 000 Rt MSR fpcr, rT 6605 0xD53B44 000 Rt MSR rT, fpcr 6606 */ 6607 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/ 6608 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) { 6609 Bool toSys = INSN(21,21) == 0; 6610 UInt tt = INSN(4,0); 6611 if (toSys) { 6612 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) ); 6613 DIP("msr fpcr, %s\n", nameIReg64orZR(tt)); 6614 } else { 6615 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32)); 6616 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt)); 6617 } 6618 return True; 6619 } 6620 /* ---- Cases for FPSR ---- 6621 0xD51B44 001 Rt MSR fpsr, rT 6622 0xD53B44 001 Rt MSR rT, fpsr 6623 The only part of this we model is FPSR.QC. All other bits 6624 are ignored when writing to it and RAZ when reading from it. 6625 */ 6626 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ 6627 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { 6628 Bool toSys = INSN(21,21) == 0; 6629 UInt tt = INSN(4,0); 6630 if (toSys) { 6631 /* Just deal with FPSR.QC. Make up a V128 value which is 6632 zero if Xt[27] is zero and any other value if Xt[27] is 6633 nonzero. */ 6634 IRTemp qc64 = newTemp(Ity_I64); 6635 assign(qc64, binop(Iop_And64, 6636 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)), 6637 mkU64(1))); 6638 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64)); 6639 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) ); 6640 DIP("msr fpsr, %s\n", nameIReg64orZR(tt)); 6641 } else { 6642 /* Generate a value which is all zeroes except for bit 27, 6643 which must be zero if QCFLAG is all zeroes and one otherwise. */ 6644 IRTemp qcV128 = newTempV128(); 6645 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 )); 6646 IRTemp qc64 = newTemp(Ity_I64); 6647 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)), 6648 unop(Iop_V128to64, mkexpr(qcV128)))); 6649 IRExpr* res = binop(Iop_Shl64, 6650 unop(Iop_1Uto64, 6651 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))), 6652 mkU8(27)); 6653 putIReg64orZR(tt, res); 6654 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt)); 6655 } 6656 return True; 6657 } 6658 /* ---- Cases for NZCV ---- 6659 D51B42 000 Rt MSR nzcv, rT 6660 D53B42 000 Rt MRS rT, nzcv 6661 The only parts of NZCV that actually exist are bits 31:28, which 6662 are the N Z C and V bits themselves. Hence the flags thunk provides 6663 all the state we need. 6664 */ 6665 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/ 6666 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) { 6667 Bool toSys = INSN(21,21) == 0; 6668 UInt tt = INSN(4,0); 6669 if (toSys) { 6670 IRTemp t = newTemp(Ity_I64); 6671 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL))); 6672 setFlags_COPY(t); 6673 DIP("msr %s, nzcv\n", nameIReg32orZR(tt)); 6674 } else { 6675 IRTemp res = newTemp(Ity_I64); 6676 assign(res, mk_arm64g_calculate_flags_nzcv()); 6677 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res))); 6678 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt)); 6679 } 6680 return True; 6681 } 6682 /* ---- Cases for DCZID_EL0 ---- 6683 Don't support arbitrary reads and writes to this register. Just 6684 return the value 16, which indicates that the DC ZVA instruction 6685 is not permitted, so we don't have to emulate it. 6686 D5 3B 00 111 Rt MRS rT, dczid_el0 6687 */ 6688 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) { 6689 UInt tt = INSN(4,0); 6690 putIReg64orZR(tt, mkU64(1<<4)); 6691 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt)); 6692 return True; 6693 } 6694 /* ---- Cases for CTR_EL0 ---- 6695 We just handle reads, and make up a value from the D and I line 6696 sizes in the VexArchInfo we are given, and patch in the following 6697 fields that the Foundation model gives ("natively"): 6698 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11 6699 D5 3B 00 001 Rt MRS rT, dczid_el0 6700 */ 6701 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) { 6702 UInt tt = INSN(4,0); 6703 /* Need to generate a value from dMinLine_lg2_szB and 6704 dMinLine_lg2_szB. The value in the register is in 32-bit 6705 units, so need to subtract 2 from the values in the 6706 VexArchInfo. We can assume that the values here are valid -- 6707 disInstr_ARM64 checks them -- so there's no need to deal with 6708 out-of-range cases. */ 6709 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6710 && archinfo->arm64_dMinLine_lg2_szB <= 17 6711 && archinfo->arm64_iMinLine_lg2_szB >= 2 6712 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6713 UInt val 6714 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16) 6715 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0); 6716 putIReg64orZR(tt, mkU64(val)); 6717 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt)); 6718 return True; 6719 } 6720 /* ---- Cases for CNTVCT_EL0 ---- 6721 This is a timestamp counter of some sort. Support reads of it only 6722 by passing through to the host. 6723 D5 3B E0 010 Rt MRS Xt, cntvct_el0 6724 */ 6725 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) { 6726 UInt tt = INSN(4,0); 6727 IRTemp val = newTemp(Ity_I64); 6728 IRExpr** args = mkIRExprVec_0(); 6729 IRDirty* d = unsafeIRDirty_1_N ( 6730 val, 6731 0/*regparms*/, 6732 "arm64g_dirtyhelper_MRS_CNTVCT_EL0", 6733 &arm64g_dirtyhelper_MRS_CNTVCT_EL0, 6734 args 6735 ); 6736 /* execute the dirty call, dumping the result in val. */ 6737 stmt( IRStmt_Dirty(d) ); 6738 putIReg64orZR(tt, mkexpr(val)); 6739 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt)); 6740 return True; 6741 } 6742 6743 /* ------------------ IC_IVAU ------------------ */ 6744 /* D5 0B 75 001 Rt ic ivau, rT 6745 */ 6746 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) { 6747 /* We will always be provided with a valid iMinLine value. */ 6748 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2 6749 && archinfo->arm64_iMinLine_lg2_szB <= 17); 6750 /* Round the requested address, in rT, down to the start of the 6751 containing block. */ 6752 UInt tt = INSN(4,0); 6753 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB; 6754 IRTemp addr = newTemp(Ity_I64); 6755 assign( addr, binop( Iop_And64, 6756 getIReg64orZR(tt), 6757 mkU64(~(lineszB - 1))) ); 6758 /* Set the invalidation range, request exit-and-invalidate, with 6759 continuation at the next instruction. */ 6760 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6761 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6762 /* be paranoid ... */ 6763 stmt( IRStmt_MBE(Imbe_Fence) ); 6764 putPC(mkU64( guest_PC_curr_instr + 4 )); 6765 dres->whatNext = Dis_StopHere; 6766 dres->jk_StopHere = Ijk_InvalICache; 6767 DIP("ic ivau, %s\n", nameIReg64orZR(tt)); 6768 return True; 6769 } 6770 6771 /* ------------------ DC_CVAU ------------------ */ 6772 /* D5 0B 7B 001 Rt dc cvau, rT 6773 */ 6774 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) { 6775 /* Exactly the same scheme as for IC IVAU, except we observe the 6776 dMinLine size, and request an Ijk_FlushDCache instead of 6777 Ijk_InvalICache. */ 6778 /* We will always be provided with a valid dMinLine value. */ 6779 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2 6780 && archinfo->arm64_dMinLine_lg2_szB <= 17); 6781 /* Round the requested address, in rT, down to the start of the 6782 containing block. */ 6783 UInt tt = INSN(4,0); 6784 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB; 6785 IRTemp addr = newTemp(Ity_I64); 6786 assign( addr, binop( Iop_And64, 6787 getIReg64orZR(tt), 6788 mkU64(~(lineszB - 1))) ); 6789 /* Set the flush range, request exit-and-flush, with 6790 continuation at the next instruction. */ 6791 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr))); 6792 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB))); 6793 /* be paranoid ... */ 6794 stmt( IRStmt_MBE(Imbe_Fence) ); 6795 putPC(mkU64( guest_PC_curr_instr + 4 )); 6796 dres->whatNext = Dis_StopHere; 6797 dres->jk_StopHere = Ijk_FlushDCache; 6798 DIP("dc cvau, %s\n", nameIReg64orZR(tt)); 6799 return True; 6800 } 6801 6802 /* ------------------ ISB, DMB, DSB ------------------ */ 6803 /* 31 21 11 7 6 4 6804 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt 6805 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt 6806 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt 6807 */ 6808 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0) 6809 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1) 6810 && INSN(7,7) == 1 6811 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) { 6812 UInt opc = INSN(6,5); 6813 UInt CRm = INSN(11,8); 6814 vassert(opc <= 2 && CRm <= 15); 6815 stmt(IRStmt_MBE(Imbe_Fence)); 6816 const HChar* opNames[3] 6817 = { "dsb", "dmb", "isb" }; 6818 const HChar* howNames[16] 6819 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh", 6820 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" }; 6821 DIP("%s %s\n", opNames[opc], howNames[CRm]); 6822 return True; 6823 } 6824 6825 /* -------------------- NOP -------------------- */ 6826 if (INSN(31,0) == 0xD503201F) { 6827 DIP("nop\n"); 6828 return True; 6829 } 6830 6831 /* -------------------- BRK -------------------- */ 6832 /* 31 23 20 4 6833 1101 0100 001 imm16 00000 BRK #imm16 6834 */ 6835 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0) 6836 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) { 6837 UInt imm16 = INSN(20,5); 6838 /* Request SIGTRAP and then restart of this insn. */ 6839 putPC(mkU64(guest_PC_curr_instr + 0)); 6840 dres->whatNext = Dis_StopHere; 6841 dres->jk_StopHere = Ijk_SigTRAP; 6842 DIP("brk #%u\n", imm16); 6843 return True; 6844 } 6845 6846 //fail: 6847 vex_printf("ARM64 front end: branch_etc\n"); 6848 return False; 6849 # undef INSN 6850 } 6851 6852 6853 /*------------------------------------------------------------*/ 6854 /*--- SIMD and FP instructions: helper functions ---*/ 6855 /*------------------------------------------------------------*/ 6856 6857 /* Some constructors for interleave/deinterleave expressions. */ 6858 6859 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6860 // returns a0 b0 6861 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10)); 6862 } 6863 6864 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) { 6865 // returns a1 b1 6866 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10)); 6867 } 6868 6869 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6870 // returns a2 a0 b2 b0 6871 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6872 } 6873 6874 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) { 6875 // returns a3 a1 b3 b1 6876 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210)); 6877 } 6878 6879 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) { 6880 // returns a1 b1 a0 b0 6881 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210)); 6882 } 6883 6884 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) { 6885 // returns a3 b3 a2 b2 6886 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210)); 6887 } 6888 6889 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6890 // returns a6 a4 a2 a0 b6 b4 b2 b0 6891 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6892 } 6893 6894 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6895 // returns a7 a5 a3 a1 b7 b5 b3 b1 6896 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210)); 6897 } 6898 6899 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6900 // returns a3 b3 a2 b2 a1 b1 a0 b0 6901 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210)); 6902 } 6903 6904 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) { 6905 // returns a7 b7 a6 b6 a5 b5 a4 b4 6906 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210)); 6907 } 6908 6909 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210, 6910 IRTemp bFEDCBA9876543210 ) { 6911 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0 6912 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210), 6913 mkexpr(bFEDCBA9876543210)); 6914 } 6915 6916 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210, 6917 IRTemp bFEDCBA9876543210 ) { 6918 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1 6919 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210), 6920 mkexpr(bFEDCBA9876543210)); 6921 } 6922 6923 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210, 6924 IRTemp bFEDCBA9876543210 ) { 6925 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0 6926 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210), 6927 mkexpr(bFEDCBA9876543210)); 6928 } 6929 6930 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210, 6931 IRTemp bFEDCBA9876543210 ) { 6932 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8 6933 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210), 6934 mkexpr(bFEDCBA9876543210)); 6935 } 6936 6937 /* Generate N copies of |bit| in the bottom of a ULong. */ 6938 static ULong Replicate ( ULong bit, Int N ) 6939 { 6940 vassert(bit <= 1 && N >= 1 && N < 64); 6941 if (bit == 0) { 6942 return 0; 6943 } else { 6944 /* Careful. This won't work for N == 64. */ 6945 return (1ULL << N) - 1; 6946 } 6947 } 6948 6949 static ULong Replicate32x2 ( ULong bits32 ) 6950 { 6951 vassert(0 == (bits32 & ~0xFFFFFFFFULL)); 6952 return (bits32 << 32) | bits32; 6953 } 6954 6955 static ULong Replicate16x4 ( ULong bits16 ) 6956 { 6957 vassert(0 == (bits16 & ~0xFFFFULL)); 6958 return Replicate32x2((bits16 << 16) | bits16); 6959 } 6960 6961 static ULong Replicate8x8 ( ULong bits8 ) 6962 { 6963 vassert(0 == (bits8 & ~0xFFULL)); 6964 return Replicate16x4((bits8 << 8) | bits8); 6965 } 6966 6967 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of 6968 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N 6969 is 64. In the former case, the upper 32 bits of the returned value 6970 are guaranteed to be zero. */ 6971 static ULong VFPExpandImm ( ULong imm8, Int N ) 6972 { 6973 vassert(imm8 <= 0xFF); 6974 vassert(N == 32 || N == 64); 6975 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2. 6976 Int F = N - E - 1; 6977 ULong imm8_6 = (imm8 >> 6) & 1; 6978 /* sign: 1 bit */ 6979 /* exp: E bits */ 6980 /* frac: F bits */ 6981 ULong sign = (imm8 >> 7) & 1; 6982 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1); 6983 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6); 6984 vassert(sign < (1ULL << 1)); 6985 vassert(exp < (1ULL << E)); 6986 vassert(frac < (1ULL << F)); 6987 vassert(1 + E + F == N); 6988 ULong res = (sign << (E+F)) | (exp << F) | frac; 6989 return res; 6990 } 6991 6992 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value. 6993 This might fail, as indicated by the returned Bool. Page 2530 of 6994 the manual. */ 6995 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res, 6996 UInt op, UInt cmode, UInt imm8 ) 6997 { 6998 vassert(op <= 1); 6999 vassert(cmode <= 15); 7000 vassert(imm8 <= 255); 7001 7002 *res = 0; /* will overwrite iff returning True */ 7003 7004 ULong imm64 = 0; 7005 Bool testimm8 = False; 7006 7007 switch (cmode >> 1) { 7008 case 0: 7009 testimm8 = False; imm64 = Replicate32x2(imm8); break; 7010 case 1: 7011 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break; 7012 case 2: 7013 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break; 7014 case 3: 7015 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break; 7016 case 4: 7017 testimm8 = False; imm64 = Replicate16x4(imm8); break; 7018 case 5: 7019 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break; 7020 case 6: 7021 testimm8 = True; 7022 if ((cmode & 1) == 0) 7023 imm64 = Replicate32x2((imm8 << 8) | 0xFF); 7024 else 7025 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF); 7026 break; 7027 case 7: 7028 testimm8 = False; 7029 if ((cmode & 1) == 0 && op == 0) 7030 imm64 = Replicate8x8(imm8); 7031 if ((cmode & 1) == 0 && op == 1) { 7032 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00; 7033 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00; 7034 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00; 7035 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00; 7036 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00; 7037 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00; 7038 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00; 7039 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00; 7040 } 7041 if ((cmode & 1) == 1 && op == 0) { 7042 ULong imm8_7 = (imm8 >> 7) & 1; 7043 ULong imm8_6 = (imm8 >> 6) & 1; 7044 ULong imm8_50 = imm8 & 63; 7045 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19)) 7046 | ((imm8_6 ^ 1) << (5 + 6 + 19)) 7047 | (Replicate(imm8_6, 5) << (6 + 19)) 7048 | (imm8_50 << 19); 7049 imm64 = Replicate32x2(imm32); 7050 } 7051 if ((cmode & 1) == 1 && op == 1) { 7052 // imm64 = imm8<7>:NOT(imm8<6>) 7053 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48); 7054 ULong imm8_7 = (imm8 >> 7) & 1; 7055 ULong imm8_6 = (imm8 >> 6) & 1; 7056 ULong imm8_50 = imm8 & 63; 7057 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62) 7058 | (Replicate(imm8_6, 8) << 54) 7059 | (imm8_50 << 48); 7060 } 7061 break; 7062 default: 7063 vassert(0); 7064 } 7065 7066 if (testimm8 && imm8 == 0) 7067 return False; 7068 7069 *res = imm64; 7070 return True; 7071 } 7072 7073 /* Help a bit for decoding laneage for vector operations that can be 7074 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q 7075 and SZ bits, typically for vector floating point. */ 7076 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF, 7077 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper, 7078 /*OUT*/const HChar** arrSpec, 7079 Bool bitQ, Bool bitSZ ) 7080 { 7081 vassert(bitQ == True || bitQ == False); 7082 vassert(bitSZ == True || bitSZ == False); 7083 if (bitQ && bitSZ) { // 2x64 7084 if (tyI) *tyI = Ity_I64; 7085 if (tyF) *tyF = Ity_F64; 7086 if (nLanes) *nLanes = 2; 7087 if (zeroUpper) *zeroUpper = False; 7088 if (arrSpec) *arrSpec = "2d"; 7089 return True; 7090 } 7091 if (bitQ && !bitSZ) { // 4x32 7092 if (tyI) *tyI = Ity_I32; 7093 if (tyF) *tyF = Ity_F32; 7094 if (nLanes) *nLanes = 4; 7095 if (zeroUpper) *zeroUpper = False; 7096 if (arrSpec) *arrSpec = "4s"; 7097 return True; 7098 } 7099 if (!bitQ && !bitSZ) { // 2x32 7100 if (tyI) *tyI = Ity_I32; 7101 if (tyF) *tyF = Ity_F32; 7102 if (nLanes) *nLanes = 2; 7103 if (zeroUpper) *zeroUpper = True; 7104 if (arrSpec) *arrSpec = "2s"; 7105 return True; 7106 } 7107 // Else impliedly 1x64, which isn't allowed. 7108 return False; 7109 } 7110 7111 /* Helper for decoding laneage for shift-style vector operations 7112 that involve an immediate shift amount. */ 7113 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2, 7114 UInt immh, UInt immb ) 7115 { 7116 vassert(immh < (1<<4)); 7117 vassert(immb < (1<<3)); 7118 UInt immhb = (immh << 3) | immb; 7119 if (immh & 8) { 7120 if (shift) *shift = 128 - immhb; 7121 if (szBlg2) *szBlg2 = 3; 7122 return True; 7123 } 7124 if (immh & 4) { 7125 if (shift) *shift = 64 - immhb; 7126 if (szBlg2) *szBlg2 = 2; 7127 return True; 7128 } 7129 if (immh & 2) { 7130 if (shift) *shift = 32 - immhb; 7131 if (szBlg2) *szBlg2 = 1; 7132 return True; 7133 } 7134 if (immh & 1) { 7135 if (shift) *shift = 16 - immhb; 7136 if (szBlg2) *szBlg2 = 0; 7137 return True; 7138 } 7139 return False; 7140 } 7141 7142 /* Generate IR to fold all lanes of the V128 value in 'src' as 7143 characterised by the operator 'op', and return the result in the 7144 bottom bits of a V128, with all other bits set to zero. */ 7145 static IRTemp math_FOLDV ( IRTemp src, IROp op ) 7146 { 7147 /* The basic idea is to use repeated applications of Iop_CatEven* 7148 and Iop_CatOdd* operators to 'src' so as to clone each lane into 7149 a complete vector. Then fold all those vectors with 'op' and 7150 zero out all but the least significant lane. */ 7151 switch (op) { 7152 case Iop_Min8Sx16: case Iop_Min8Ux16: 7153 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: { 7154 /* NB: temp naming here is misleading -- the naming is for 8 7155 lanes of 16 bit, whereas what is being operated on is 16 7156 lanes of 8 bits. */ 7157 IRTemp x76543210 = src; 7158 IRTemp x76547654 = newTempV128(); 7159 IRTemp x32103210 = newTempV128(); 7160 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7161 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7162 IRTemp x76767676 = newTempV128(); 7163 IRTemp x54545454 = newTempV128(); 7164 IRTemp x32323232 = newTempV128(); 7165 IRTemp x10101010 = newTempV128(); 7166 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7167 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7168 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7169 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7170 IRTemp x77777777 = newTempV128(); 7171 IRTemp x66666666 = newTempV128(); 7172 IRTemp x55555555 = newTempV128(); 7173 IRTemp x44444444 = newTempV128(); 7174 IRTemp x33333333 = newTempV128(); 7175 IRTemp x22222222 = newTempV128(); 7176 IRTemp x11111111 = newTempV128(); 7177 IRTemp x00000000 = newTempV128(); 7178 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7179 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7180 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7181 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7182 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7183 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7184 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7185 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7186 /* Naming not misleading after here. */ 7187 IRTemp xAllF = newTempV128(); 7188 IRTemp xAllE = newTempV128(); 7189 IRTemp xAllD = newTempV128(); 7190 IRTemp xAllC = newTempV128(); 7191 IRTemp xAllB = newTempV128(); 7192 IRTemp xAllA = newTempV128(); 7193 IRTemp xAll9 = newTempV128(); 7194 IRTemp xAll8 = newTempV128(); 7195 IRTemp xAll7 = newTempV128(); 7196 IRTemp xAll6 = newTempV128(); 7197 IRTemp xAll5 = newTempV128(); 7198 IRTemp xAll4 = newTempV128(); 7199 IRTemp xAll3 = newTempV128(); 7200 IRTemp xAll2 = newTempV128(); 7201 IRTemp xAll1 = newTempV128(); 7202 IRTemp xAll0 = newTempV128(); 7203 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777)); 7204 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777)); 7205 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666)); 7206 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666)); 7207 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555)); 7208 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555)); 7209 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444)); 7210 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444)); 7211 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333)); 7212 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333)); 7213 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222)); 7214 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222)); 7215 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111)); 7216 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111)); 7217 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000)); 7218 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000)); 7219 IRTemp maxFE = newTempV128(); 7220 IRTemp maxDC = newTempV128(); 7221 IRTemp maxBA = newTempV128(); 7222 IRTemp max98 = newTempV128(); 7223 IRTemp max76 = newTempV128(); 7224 IRTemp max54 = newTempV128(); 7225 IRTemp max32 = newTempV128(); 7226 IRTemp max10 = newTempV128(); 7227 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE))); 7228 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC))); 7229 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA))); 7230 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8))); 7231 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6))); 7232 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4))); 7233 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2))); 7234 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0))); 7235 IRTemp maxFEDC = newTempV128(); 7236 IRTemp maxBA98 = newTempV128(); 7237 IRTemp max7654 = newTempV128(); 7238 IRTemp max3210 = newTempV128(); 7239 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC))); 7240 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98))); 7241 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7242 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7243 IRTemp maxFEDCBA98 = newTempV128(); 7244 IRTemp max76543210 = newTempV128(); 7245 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98))); 7246 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7247 IRTemp maxAllLanes = newTempV128(); 7248 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98), 7249 mkexpr(max76543210))); 7250 IRTemp res = newTempV128(); 7251 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes))); 7252 return res; 7253 } 7254 case Iop_Min16Sx8: case Iop_Min16Ux8: 7255 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: { 7256 IRTemp x76543210 = src; 7257 IRTemp x76547654 = newTempV128(); 7258 IRTemp x32103210 = newTempV128(); 7259 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210)); 7260 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210)); 7261 IRTemp x76767676 = newTempV128(); 7262 IRTemp x54545454 = newTempV128(); 7263 IRTemp x32323232 = newTempV128(); 7264 IRTemp x10101010 = newTempV128(); 7265 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654)); 7266 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654)); 7267 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210)); 7268 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210)); 7269 IRTemp x77777777 = newTempV128(); 7270 IRTemp x66666666 = newTempV128(); 7271 IRTemp x55555555 = newTempV128(); 7272 IRTemp x44444444 = newTempV128(); 7273 IRTemp x33333333 = newTempV128(); 7274 IRTemp x22222222 = newTempV128(); 7275 IRTemp x11111111 = newTempV128(); 7276 IRTemp x00000000 = newTempV128(); 7277 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676)); 7278 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676)); 7279 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454)); 7280 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454)); 7281 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232)); 7282 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232)); 7283 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010)); 7284 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010)); 7285 IRTemp max76 = newTempV128(); 7286 IRTemp max54 = newTempV128(); 7287 IRTemp max32 = newTempV128(); 7288 IRTemp max10 = newTempV128(); 7289 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666))); 7290 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444))); 7291 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222))); 7292 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000))); 7293 IRTemp max7654 = newTempV128(); 7294 IRTemp max3210 = newTempV128(); 7295 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54))); 7296 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7297 IRTemp max76543210 = newTempV128(); 7298 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210))); 7299 IRTemp res = newTempV128(); 7300 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210))); 7301 return res; 7302 } 7303 case Iop_Max32Fx4: case Iop_Min32Fx4: 7304 case Iop_Min32Sx4: case Iop_Min32Ux4: 7305 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: { 7306 IRTemp x3210 = src; 7307 IRTemp x3232 = newTempV128(); 7308 IRTemp x1010 = newTempV128(); 7309 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210)); 7310 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210)); 7311 IRTemp x3333 = newTempV128(); 7312 IRTemp x2222 = newTempV128(); 7313 IRTemp x1111 = newTempV128(); 7314 IRTemp x0000 = newTempV128(); 7315 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232)); 7316 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232)); 7317 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010)); 7318 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010)); 7319 IRTemp max32 = newTempV128(); 7320 IRTemp max10 = newTempV128(); 7321 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222))); 7322 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000))); 7323 IRTemp max3210 = newTempV128(); 7324 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10))); 7325 IRTemp res = newTempV128(); 7326 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210))); 7327 return res; 7328 } 7329 case Iop_Add64x2: { 7330 IRTemp x10 = src; 7331 IRTemp x00 = newTempV128(); 7332 IRTemp x11 = newTempV128(); 7333 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10))); 7334 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10))); 7335 IRTemp max10 = newTempV128(); 7336 assign(max10, binop(op, mkexpr(x11), mkexpr(x00))); 7337 IRTemp res = newTempV128(); 7338 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10))); 7339 return res; 7340 } 7341 default: 7342 vassert(0); 7343 } 7344 } 7345 7346 7347 /* Generate IR for TBL and TBX. This deals with the 128 bit case 7348 only. */ 7349 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, 7350 IRTemp oor_values ) 7351 { 7352 vassert(len >= 0 && len <= 3); 7353 7354 /* Generate some useful constants as concisely as possible. */ 7355 IRTemp half15 = newTemp(Ity_I64); 7356 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL)); 7357 IRTemp half16 = newTemp(Ity_I64); 7358 assign(half16, mkU64(0x1010101010101010ULL)); 7359 7360 /* A zero vector */ 7361 IRTemp allZero = newTempV128(); 7362 assign(allZero, mkV128(0x0000)); 7363 /* A vector containing 15 in each 8-bit lane */ 7364 IRTemp all15 = newTempV128(); 7365 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15))); 7366 /* A vector containing 16 in each 8-bit lane */ 7367 IRTemp all16 = newTempV128(); 7368 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16))); 7369 /* A vector containing 32 in each 8-bit lane */ 7370 IRTemp all32 = newTempV128(); 7371 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16))); 7372 /* A vector containing 48 in each 8-bit lane */ 7373 IRTemp all48 = newTempV128(); 7374 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32))); 7375 /* A vector containing 64 in each 8-bit lane */ 7376 IRTemp all64 = newTempV128(); 7377 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32))); 7378 7379 /* Group the 16/32/48/64 vectors so as to be indexable. */ 7380 IRTemp allXX[4] = { all16, all32, all48, all64 }; 7381 7382 /* Compute the result for each table vector, with zeroes in places 7383 where the index values are out of range, and OR them into the 7384 running vector. */ 7385 IRTemp running_result = newTempV128(); 7386 assign(running_result, mkV128(0)); 7387 7388 UInt tabent; 7389 for (tabent = 0; tabent <= len; tabent++) { 7390 vassert(tabent >= 0 && tabent < 4); 7391 IRTemp bias = newTempV128(); 7392 assign(bias, 7393 mkexpr(tabent == 0 ? allZero : allXX[tabent-1])); 7394 IRTemp biased_indices = newTempV128(); 7395 assign(biased_indices, 7396 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias))); 7397 IRTemp valid_mask = newTempV128(); 7398 assign(valid_mask, 7399 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices))); 7400 IRTemp safe_biased_indices = newTempV128(); 7401 assign(safe_biased_indices, 7402 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15))); 7403 IRTemp results_or_junk = newTempV128(); 7404 assign(results_or_junk, 7405 binop(Iop_Perm8x16, mkexpr(tab[tabent]), 7406 mkexpr(safe_biased_indices))); 7407 IRTemp results_or_zero = newTempV128(); 7408 assign(results_or_zero, 7409 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask))); 7410 /* And OR that into the running result. */ 7411 IRTemp tmp = newTempV128(); 7412 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero), 7413 mkexpr(running_result))); 7414 running_result = tmp; 7415 } 7416 7417 /* So now running_result holds the overall result where the indices 7418 are in range, and zero in out-of-range lanes. Now we need to 7419 compute an overall validity mask and use this to copy in the 7420 lanes in the oor_values for out of range indices. This is 7421 unnecessary for TBL but will get folded out by iropt, so we lean 7422 on that and generate the same code for TBL and TBX here. */ 7423 IRTemp overall_valid_mask = newTempV128(); 7424 assign(overall_valid_mask, 7425 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src))); 7426 IRTemp result = newTempV128(); 7427 assign(result, 7428 binop(Iop_OrV128, 7429 mkexpr(running_result), 7430 binop(Iop_AndV128, 7431 mkexpr(oor_values), 7432 unop(Iop_NotV128, mkexpr(overall_valid_mask))))); 7433 return result; 7434 } 7435 7436 7437 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be 7438 an op which takes two I64s and produces a V128. That is, a widening 7439 operator. Generate IR which applies |opI64x2toV128| to either the 7440 lower (if |is2| is False) or upper (if |is2| is True) halves of 7441 |argL| and |argR|, and return the value in a new IRTemp. 7442 */ 7443 static 7444 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128, 7445 IRExpr* argL, IRExpr* argR ) 7446 { 7447 IRTemp res = newTempV128(); 7448 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64; 7449 assign(res, binop(opI64x2toV128, unop(slice, argL), 7450 unop(slice, argR))); 7451 return res; 7452 } 7453 7454 7455 /* Generate signed/unsigned absolute difference vector IR. */ 7456 static 7457 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE ) 7458 { 7459 vassert(size <= 3); 7460 IRTemp argL = newTempV128(); 7461 IRTemp argR = newTempV128(); 7462 IRTemp msk = newTempV128(); 7463 IRTemp res = newTempV128(); 7464 assign(argL, argLE); 7465 assign(argR, argRE); 7466 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size), 7467 mkexpr(argL), mkexpr(argR))); 7468 assign(res, 7469 binop(Iop_OrV128, 7470 binop(Iop_AndV128, 7471 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)), 7472 mkexpr(msk)), 7473 binop(Iop_AndV128, 7474 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)), 7475 unop(Iop_NotV128, mkexpr(msk))))); 7476 return res; 7477 } 7478 7479 7480 /* Generate IR that takes a V128 and sign- or zero-widens 7481 either the lower or upper set of lanes to twice-as-wide, 7482 resulting in a new V128 value. */ 7483 static 7484 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf, 7485 UInt sizeNarrow, IRExpr* srcE ) 7486 { 7487 IRTemp src = newTempV128(); 7488 IRTemp res = newTempV128(); 7489 assign(src, srcE); 7490 switch (sizeNarrow) { 7491 case X10: 7492 assign(res, 7493 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2, 7494 binop(fromUpperHalf ? Iop_InterleaveHI32x4 7495 : Iop_InterleaveLO32x4, 7496 mkexpr(src), 7497 mkexpr(src)), 7498 mkU8(32))); 7499 break; 7500 case X01: 7501 assign(res, 7502 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4, 7503 binop(fromUpperHalf ? Iop_InterleaveHI16x8 7504 : Iop_InterleaveLO16x8, 7505 mkexpr(src), 7506 mkexpr(src)), 7507 mkU8(16))); 7508 break; 7509 case X00: 7510 assign(res, 7511 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8, 7512 binop(fromUpperHalf ? Iop_InterleaveHI8x16 7513 : Iop_InterleaveLO8x16, 7514 mkexpr(src), 7515 mkexpr(src)), 7516 mkU8(8))); 7517 break; 7518 default: 7519 vassert(0); 7520 } 7521 return res; 7522 } 7523 7524 7525 /* Generate IR that takes a V128 and sign- or zero-widens 7526 either the even or odd lanes to twice-as-wide, 7527 resulting in a new V128 value. */ 7528 static 7529 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd, 7530 UInt sizeNarrow, IRExpr* srcE ) 7531 { 7532 IRTemp src = newTempV128(); 7533 IRTemp res = newTempV128(); 7534 IROp opSAR = mkVecSARN(sizeNarrow+1); 7535 IROp opSHR = mkVecSHRN(sizeNarrow+1); 7536 IROp opSHL = mkVecSHLN(sizeNarrow+1); 7537 IROp opSxR = zWiden ? opSHR : opSAR; 7538 UInt amt = 0; 7539 switch (sizeNarrow) { 7540 case X10: amt = 32; break; 7541 case X01: amt = 16; break; 7542 case X00: amt = 8; break; 7543 default: vassert(0); 7544 } 7545 assign(src, srcE); 7546 if (fromOdd) { 7547 assign(res, binop(opSxR, mkexpr(src), mkU8(amt))); 7548 } else { 7549 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)), 7550 mkU8(amt))); 7551 } 7552 return res; 7553 } 7554 7555 7556 /* Generate IR that takes two V128s and narrows (takes lower half) 7557 of each lane, producing a single V128 value. */ 7558 static 7559 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow ) 7560 { 7561 IRTemp res = newTempV128(); 7562 assign(res, binop(mkVecCATEVENLANES(sizeNarrow), 7563 mkexpr(argHi), mkexpr(argLo))); 7564 return res; 7565 } 7566 7567 7568 /* Return a temp which holds the vector dup of the lane of width 7569 (1 << size) obtained from src[laneNo]. */ 7570 static 7571 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo ) 7572 { 7573 vassert(size <= 3); 7574 /* Normalise |laneNo| so it is of the form 7575 x000 for D, xx00 for S, xxx0 for H, and xxxx for B. 7576 This puts the bits we want to inspect at constant offsets 7577 regardless of the value of |size|. 7578 */ 7579 UInt ix = laneNo << size; 7580 vassert(ix <= 15); 7581 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID }; 7582 switch (size) { 7583 case 0: /* B */ 7584 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16; 7585 /* fallthrough */ 7586 case 1: /* H */ 7587 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8; 7588 /* fallthrough */ 7589 case 2: /* S */ 7590 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4; 7591 /* fallthrough */ 7592 case 3: /* D */ 7593 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2; 7594 break; 7595 default: 7596 vassert(0); 7597 } 7598 IRTemp res = newTempV128(); 7599 assign(res, src); 7600 Int i; 7601 for (i = 3; i >= 0; i--) { 7602 if (ops[i] == Iop_INVALID) 7603 break; 7604 IRTemp tmp = newTempV128(); 7605 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res))); 7606 res = tmp; 7607 } 7608 return res; 7609 } 7610 7611 7612 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size 7613 selector encoded as shown below. Return a new V128 holding the 7614 selected lane from |srcV| dup'd out to V128, and also return the 7615 lane number, log2 of the lane size in bytes, and width-character via 7616 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5 7617 is an invalid selector, in which case return 7618 IRTemp_INVALID, 0, 0 and '?' respectively. 7619 7620 imm5 = xxxx1 signifies .b[xxxx] 7621 = xxx10 .h[xxx] 7622 = xx100 .s[xx] 7623 = x1000 .d[x] 7624 otherwise invalid 7625 */ 7626 static 7627 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo, 7628 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh, 7629 IRExpr* srcV, UInt imm5 ) 7630 { 7631 *laneNo = 0; 7632 *laneSzLg2 = 0; 7633 *laneCh = '?'; 7634 7635 if (imm5 & 1) { 7636 *laneNo = (imm5 >> 1) & 15; 7637 *laneSzLg2 = 0; 7638 *laneCh = 'b'; 7639 } 7640 else if (imm5 & 2) { 7641 *laneNo = (imm5 >> 2) & 7; 7642 *laneSzLg2 = 1; 7643 *laneCh = 'h'; 7644 } 7645 else if (imm5 & 4) { 7646 *laneNo = (imm5 >> 3) & 3; 7647 *laneSzLg2 = 2; 7648 *laneCh = 's'; 7649 } 7650 else if (imm5 & 8) { 7651 *laneNo = (imm5 >> 4) & 1; 7652 *laneSzLg2 = 3; 7653 *laneCh = 'd'; 7654 } 7655 else { 7656 /* invalid */ 7657 return IRTemp_INVALID; 7658 } 7659 7660 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo); 7661 } 7662 7663 7664 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */ 7665 static 7666 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm ) 7667 { 7668 IRType ty = Ity_INVALID; 7669 IRTemp rcS = IRTemp_INVALID; 7670 switch (size) { 7671 case X01: 7672 vassert(imm <= 0xFFFFULL); 7673 ty = Ity_I16; 7674 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm )); 7675 break; 7676 case X10: 7677 vassert(imm <= 0xFFFFFFFFULL); 7678 ty = Ity_I32; 7679 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm )); 7680 break; 7681 case X11: 7682 ty = Ity_I64; 7683 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break; 7684 default: 7685 vassert(0); 7686 } 7687 IRTemp rcV = math_DUP_TO_V128(rcS, ty); 7688 return rcV; 7689 } 7690 7691 7692 /* Let |new64| be a V128 in which only the lower 64 bits are interesting, 7693 and the upper can contain any value -- it is ignored. If |is2| is False, 7694 generate IR to put |new64| in the lower half of vector reg |dd| and zero 7695 the upper half. If |is2| is True, generate IR to put |new64| in the upper 7696 half of vector reg |dd| and leave the lower half unchanged. This 7697 simulates the behaviour of the "foo/foo2" instructions in which the 7698 destination is half the width of sources, for example addhn/addhn2. 7699 */ 7700 static 7701 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 ) 7702 { 7703 if (is2) { 7704 /* Get the old contents of Vdd, zero the upper half, and replace 7705 it with 'x'. */ 7706 IRTemp t_zero_oldLO = newTempV128(); 7707 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd))); 7708 IRTemp t_newHI_zero = newTempV128(); 7709 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64), 7710 mkV128(0x0000))); 7711 IRTemp res = newTempV128(); 7712 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO), 7713 mkexpr(t_newHI_zero))); 7714 putQReg128(dd, mkexpr(res)); 7715 } else { 7716 /* This is simple. */ 7717 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64))); 7718 } 7719 } 7720 7721 7722 /* Compute vector SQABS at lane size |size| for |srcE|, returning 7723 the q result in |*qabs| and the normal result in |*nabs|. */ 7724 static 7725 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs, 7726 IRExpr* srcE, UInt size ) 7727 { 7728 IRTemp src, mask, maskn, nsub, qsub; 7729 src = mask = maskn = nsub = qsub = IRTemp_INVALID; 7730 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs); 7731 assign(src, srcE); 7732 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src))); 7733 assign(maskn, unop(Iop_NotV128, mkexpr(mask))); 7734 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7735 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7736 assign(*nabs, binop(Iop_OrV128, 7737 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)), 7738 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7739 assign(*qabs, binop(Iop_OrV128, 7740 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)), 7741 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn)))); 7742 } 7743 7744 7745 /* Compute vector SQNEG at lane size |size| for |srcE|, returning 7746 the q result in |*qneg| and the normal result in |*nneg|. */ 7747 static 7748 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg, 7749 IRExpr* srcE, UInt size ) 7750 { 7751 IRTemp src = IRTemp_INVALID; 7752 newTempsV128_3(&src, nneg, qneg); 7753 assign(src, srcE); 7754 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src))); 7755 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src))); 7756 } 7757 7758 7759 /* Zero all except the least significant lane of |srcE|, where |size| 7760 indicates the lane size in the usual way. */ 7761 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE ) 7762 { 7763 vassert(size < 4); 7764 IRTemp t = newTempV128(); 7765 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE)); 7766 return t; 7767 } 7768 7769 7770 /* Generate IR to compute vector widening MULL from either the lower 7771 (is2==False) or upper (is2==True) halves of vecN and vecM. The 7772 widening multiplies are unsigned when isU==True and signed when 7773 isU==False. |size| is the narrow lane size indication. Optionally, 7774 the product may be added to or subtracted from vecD, at the wide lane 7775 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas| 7776 is 'm' (only multiply) then the accumulate part does not happen, and 7777 |vecD| is expected to == IRTemp_INVALID. 7778 7779 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants 7780 are allowed. The result is returned in a new IRTemp, which is 7781 returned in *res. */ 7782 static 7783 void math_MULL_ACC ( /*OUT*/IRTemp* res, 7784 Bool is2, Bool isU, UInt size, HChar mas, 7785 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7786 { 7787 vassert(res && *res == IRTemp_INVALID); 7788 vassert(size <= 2); 7789 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7790 if (mas == 'm') vassert(vecD == IRTemp_INVALID); 7791 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size); 7792 IROp accOp = (mas == 'a') ? mkVecADD(size+1) 7793 : (mas == 's' ? mkVecSUB(size+1) 7794 : Iop_INVALID); 7795 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp, 7796 mkexpr(vecN), mkexpr(vecM)); 7797 *res = newTempV128(); 7798 assign(*res, mas == 'm' ? mkexpr(mul) 7799 : binop(accOp, mkexpr(vecD), mkexpr(mul))); 7800 } 7801 7802 7803 /* Same as math_MULL_ACC, except the multiply is signed widening, 7804 the multiplied value is then doubled, before being added to or 7805 subtracted from the accumulated value. And everything is 7806 saturated. In all cases, saturation residuals are returned 7807 via (sat1q, sat1n), and in the accumulate cases, 7808 via (sat2q, sat2n) too. All results are returned in new temporaries. 7809 In the no-accumulate case, *sat2q and *sat2n are never instantiated, 7810 so the caller can tell this has happened. */ 7811 static 7812 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res, 7813 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7814 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n, 7815 Bool is2, UInt size, HChar mas, 7816 IRTemp vecN, IRTemp vecM, IRTemp vecD ) 7817 { 7818 vassert(size <= 2); 7819 vassert(mas == 'm' || mas == 'a' || mas == 's'); 7820 /* Compute 7821 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2 7822 sat1n = vecN.D[is2] *s vecM.d[is2] * 2 7823 IOW take either the low or high halves of vecN and vecM, signed widen, 7824 multiply, double that, and signedly saturate. Also compute the same 7825 but without saturation. 7826 */ 7827 vassert(sat2q && *sat2q == IRTemp_INVALID); 7828 vassert(sat2n && *sat2n == IRTemp_INVALID); 7829 newTempsV128_3(sat1q, sat1n, res); 7830 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size), 7831 mkexpr(vecN), mkexpr(vecM)); 7832 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size), 7833 mkexpr(vecN), mkexpr(vecM)); 7834 assign(*sat1q, mkexpr(tq)); 7835 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn))); 7836 7837 /* If there is no accumulation, the final result is sat1q, 7838 and there's no assignment to sat2q or sat2n. */ 7839 if (mas == 'm') { 7840 assign(*res, mkexpr(*sat1q)); 7841 return; 7842 } 7843 7844 /* Compute 7845 sat2q = vecD +sq/-sq sat1q 7846 sat2n = vecD +/- sat1n 7847 result = sat2q 7848 */ 7849 newTempsV128_2(sat2q, sat2n); 7850 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1), 7851 mkexpr(vecD), mkexpr(*sat1q))); 7852 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1), 7853 mkexpr(vecD), mkexpr(*sat1n))); 7854 assign(*res, mkexpr(*sat2q)); 7855 } 7856 7857 7858 /* Generate IR for widening signed vector multiplies. The operands 7859 have their lane width signedly widened, and they are then multiplied 7860 at the wider width, returning results in two new IRTemps. */ 7861 static 7862 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO, 7863 UInt sizeNarrow, IRTemp argL, IRTemp argR ) 7864 { 7865 vassert(sizeNarrow <= 2); 7866 newTempsV128_2(resHI, resLO); 7867 IRTemp argLhi = newTemp(Ity_I64); 7868 IRTemp argLlo = newTemp(Ity_I64); 7869 IRTemp argRhi = newTemp(Ity_I64); 7870 IRTemp argRlo = newTemp(Ity_I64); 7871 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL))); 7872 assign(argLlo, unop(Iop_V128to64, mkexpr(argL))); 7873 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR))); 7874 assign(argRlo, unop(Iop_V128to64, mkexpr(argR))); 7875 IROp opMulls = mkVecMULLS(sizeNarrow); 7876 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi))); 7877 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo))); 7878 } 7879 7880 7881 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply, 7882 double that, possibly add a rounding constant (R variants), and take 7883 the high half. */ 7884 static 7885 void math_SQDMULH ( /*OUT*/IRTemp* res, 7886 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n, 7887 Bool isR, UInt size, IRTemp vN, IRTemp vM ) 7888 { 7889 vassert(size == X01 || size == X10); /* s or h only */ 7890 7891 newTempsV128_3(res, sat1q, sat1n); 7892 7893 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID; 7894 math_MULLS(&mullsHI, &mullsLO, size, vN, vM); 7895 7896 IRTemp addWide = mkVecADD(size+1); 7897 7898 if (isR) { 7899 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7900 7901 Int rcShift = size == X01 ? 15 : 31; 7902 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift); 7903 assign(*sat1n, 7904 binop(mkVecCATODDLANES(size), 7905 binop(addWide, 7906 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7907 mkexpr(roundConst)), 7908 binop(addWide, 7909 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)), 7910 mkexpr(roundConst)))); 7911 } else { 7912 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM))); 7913 7914 assign(*sat1n, 7915 binop(mkVecCATODDLANES(size), 7916 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)), 7917 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)))); 7918 } 7919 7920 assign(*res, mkexpr(*sat1q)); 7921 } 7922 7923 7924 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in 7925 a new temp in *res, and the Q difference pair in new temps in 7926 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the 7927 three operations it is. */ 7928 static 7929 void math_QSHL_IMM ( /*OUT*/IRTemp* res, 7930 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2, 7931 IRTemp src, UInt size, UInt shift, const HChar* nm ) 7932 { 7933 vassert(size <= 3); 7934 UInt laneBits = 8 << size; 7935 vassert(shift < laneBits); 7936 newTempsV128_3(res, qDiff1, qDiff2); 7937 IRTemp z128 = newTempV128(); 7938 assign(z128, mkV128(0x0000)); 7939 7940 /* UQSHL */ 7941 if (vex_streq(nm, "uqshl")) { 7942 IROp qop = mkVecQSHLNSATUU(size); 7943 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7944 if (shift == 0) { 7945 /* No shift means no saturation. */ 7946 assign(*qDiff1, mkexpr(z128)); 7947 assign(*qDiff2, mkexpr(z128)); 7948 } else { 7949 /* Saturation has occurred if any of the shifted-out bits are 7950 nonzero. We get the shifted-out bits by right-shifting the 7951 original value. */ 7952 UInt rshift = laneBits - shift; 7953 vassert(rshift >= 1 && rshift < laneBits); 7954 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 7955 assign(*qDiff2, mkexpr(z128)); 7956 } 7957 return; 7958 } 7959 7960 /* SQSHL */ 7961 if (vex_streq(nm, "sqshl")) { 7962 IROp qop = mkVecQSHLNSATSS(size); 7963 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7964 if (shift == 0) { 7965 /* No shift means no saturation. */ 7966 assign(*qDiff1, mkexpr(z128)); 7967 assign(*qDiff2, mkexpr(z128)); 7968 } else { 7969 /* Saturation has occurred if any of the shifted-out bits are 7970 different from the top bit of the original value. */ 7971 UInt rshift = laneBits - 1 - shift; 7972 vassert(rshift >= 0 && rshift < laneBits-1); 7973 /* qDiff1 is the shifted out bits, and the top bit of the original 7974 value, preceded by zeroes. */ 7975 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 7976 /* qDiff2 is the top bit of the original value, cloned the 7977 correct number of times. */ 7978 assign(*qDiff2, binop(mkVecSHRN(size), 7979 binop(mkVecSARN(size), mkexpr(src), 7980 mkU8(laneBits-1)), 7981 mkU8(rshift))); 7982 /* This also succeeds in comparing the top bit of the original 7983 value to itself, which is a bit stupid, but not wrong. */ 7984 } 7985 return; 7986 } 7987 7988 /* SQSHLU */ 7989 if (vex_streq(nm, "sqshlu")) { 7990 IROp qop = mkVecQSHLNSATSU(size); 7991 assign(*res, binop(qop, mkexpr(src), mkU8(shift))); 7992 if (shift == 0) { 7993 /* If there's no shift, saturation depends on the top bit 7994 of the source. */ 7995 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1))); 7996 assign(*qDiff2, mkexpr(z128)); 7997 } else { 7998 /* Saturation has occurred if any of the shifted-out bits are 7999 nonzero. We get the shifted-out bits by right-shifting the 8000 original value. */ 8001 UInt rshift = laneBits - shift; 8002 vassert(rshift >= 1 && rshift < laneBits); 8003 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift))); 8004 assign(*qDiff2, mkexpr(z128)); 8005 } 8006 return; 8007 } 8008 8009 vassert(0); 8010 } 8011 8012 8013 /* Generate IR to do SRHADD and URHADD. */ 8014 static 8015 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb ) 8016 { 8017 /* Generate this: 8018 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) 8019 */ 8020 vassert(size <= 3); 8021 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size); 8022 IROp opADD = mkVecADD(size); 8023 /* The only tricky bit is to generate the correct vector 1 constant. */ 8024 const ULong ones64[4] 8025 = { 0x0101010101010101ULL, 0x0001000100010001ULL, 8026 0x0000000100000001ULL, 0x0000000000000001ULL }; 8027 IRTemp imm64 = newTemp(Ity_I64); 8028 assign(imm64, mkU64(ones64[size])); 8029 IRTemp vecOne = newTempV128(); 8030 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64))); 8031 IRTemp scaOne = newTemp(Ity_I8); 8032 assign(scaOne, mkU8(1)); 8033 IRTemp res = newTempV128(); 8034 assign(res, 8035 binop(opADD, 8036 binop(opSHR, mkexpr(aa), mkexpr(scaOne)), 8037 binop(opADD, 8038 binop(opSHR, mkexpr(bb), mkexpr(scaOne)), 8039 binop(opSHR, 8040 binop(opADD, 8041 binop(opADD, 8042 binop(Iop_AndV128, mkexpr(aa), 8043 mkexpr(vecOne)), 8044 binop(Iop_AndV128, mkexpr(bb), 8045 mkexpr(vecOne)) 8046 ), 8047 mkexpr(vecOne) 8048 ), 8049 mkexpr(scaOne) 8050 ) 8051 ) 8052 ) 8053 ); 8054 return res; 8055 } 8056 8057 8058 /* QCFLAG tracks the SIMD sticky saturation status. Update the status 8059 thusly: if, after application of |opZHI| to both |qres| and |nres|, 8060 they have the same value, leave QCFLAG unchanged. Otherwise, set it 8061 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128 8062 operators, or Iop_INVALID, in which case |qres| and |nres| are used 8063 unmodified. The presence |opZHI| means this function can be used to 8064 generate QCFLAG update code for both scalar and vector SIMD operations. 8065 */ 8066 static 8067 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI ) 8068 { 8069 IRTemp diff = newTempV128(); 8070 IRTemp oldQCFLAG = newTempV128(); 8071 IRTemp newQCFLAG = newTempV128(); 8072 if (opZHI == Iop_INVALID) { 8073 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))); 8074 } else { 8075 vassert(opZHI == Iop_ZeroHI64ofV128 8076 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128); 8077 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)))); 8078 } 8079 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128)); 8080 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff))); 8081 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG))); 8082 } 8083 8084 8085 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres| 8086 are used unmodified, hence suitable for QCFLAG updates for whole-vector 8087 operations. */ 8088 static 8089 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres ) 8090 { 8091 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID); 8092 } 8093 8094 8095 /* Generate IR to rearrange two vector values in a way which is useful 8096 for doing S/D add-pair etc operations. There are 3 cases: 8097 8098 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0] 8099 8100 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0] 8101 8102 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0] 8103 8104 The cases are distinguished as follows: 8105 isD == True, bitQ == 1 => 2d 8106 isD == False, bitQ == 1 => 4s 8107 isD == False, bitQ == 0 => 2s 8108 */ 8109 static 8110 void math_REARRANGE_FOR_FLOATING_PAIRWISE ( 8111 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR, 8112 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ 8113 ) 8114 { 8115 vassert(rearrL && *rearrL == IRTemp_INVALID); 8116 vassert(rearrR && *rearrR == IRTemp_INVALID); 8117 *rearrL = newTempV128(); 8118 *rearrR = newTempV128(); 8119 if (isD) { 8120 // 2d case 8121 vassert(bitQ == 1); 8122 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN))); 8123 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN))); 8124 } 8125 else if (!isD && bitQ == 1) { 8126 // 4s case 8127 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8128 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN))); 8129 } else { 8130 // 2s case 8131 vassert(!isD && bitQ == 0); 8132 IRTemp m1n1m0n0 = newTempV128(); 8133 IRTemp m0n0m1n1 = newTempV128(); 8134 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4, 8135 mkexpr(vecM), mkexpr(vecN))); 8136 assign(m0n0m1n1, triop(Iop_SliceV128, 8137 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8))); 8138 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0))); 8139 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1))); 8140 } 8141 } 8142 8143 8144 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */ 8145 static Double two_to_the_minus ( Int n ) 8146 { 8147 if (n == 1) return 0.5; 8148 vassert(n >= 2 && n <= 64); 8149 Int half = n / 2; 8150 return two_to_the_minus(half) * two_to_the_minus(n - half); 8151 } 8152 8153 8154 /* Returns 2.0 ^ n for n in 1 .. 64 */ 8155 static Double two_to_the_plus ( Int n ) 8156 { 8157 if (n == 1) return 2.0; 8158 vassert(n >= 2 && n <= 64); 8159 Int half = n / 2; 8160 return two_to_the_plus(half) * two_to_the_plus(n - half); 8161 } 8162 8163 8164 /*------------------------------------------------------------*/ 8165 /*--- SIMD and FP instructions ---*/ 8166 /*------------------------------------------------------------*/ 8167 8168 static 8169 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) 8170 { 8171 /* 31 29 23 21 20 15 14 10 9 4 8172 0 q 101110 op2 0 m 0 imm4 0 n d 8173 Decode fields: op2 8174 */ 8175 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8176 if (INSN(31,31) != 0 8177 || INSN(29,24) != BITS6(1,0,1,1,1,0) 8178 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) { 8179 return False; 8180 } 8181 UInt bitQ = INSN(30,30); 8182 UInt op2 = INSN(23,22); 8183 UInt mm = INSN(20,16); 8184 UInt imm4 = INSN(14,11); 8185 UInt nn = INSN(9,5); 8186 UInt dd = INSN(4,0); 8187 8188 if (op2 == BITS2(0,0)) { 8189 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */ 8190 IRTemp sHi = newTempV128(); 8191 IRTemp sLo = newTempV128(); 8192 IRTemp res = newTempV128(); 8193 assign(sHi, getQReg128(mm)); 8194 assign(sLo, getQReg128(nn)); 8195 if (bitQ == 1) { 8196 if (imm4 == 0) { 8197 assign(res, mkexpr(sLo)); 8198 } else { 8199 vassert(imm4 >= 1 && imm4 <= 15); 8200 assign(res, triop(Iop_SliceV128, 8201 mkexpr(sHi), mkexpr(sLo), mkU8(imm4))); 8202 } 8203 putQReg128(dd, mkexpr(res)); 8204 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4); 8205 } else { 8206 if (imm4 >= 8) return False; 8207 if (imm4 == 0) { 8208 assign(res, mkexpr(sLo)); 8209 } else { 8210 vassert(imm4 >= 1 && imm4 <= 7); 8211 IRTemp hi64lo64 = newTempV128(); 8212 assign(hi64lo64, binop(Iop_InterleaveLO64x2, 8213 mkexpr(sHi), mkexpr(sLo))); 8214 assign(res, triop(Iop_SliceV128, 8215 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4))); 8216 } 8217 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); 8218 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4); 8219 } 8220 return True; 8221 } 8222 8223 return False; 8224 # undef INSN 8225 } 8226 8227 8228 static 8229 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn) 8230 { 8231 /* 31 29 23 21 20 15 14 12 11 9 4 8232 0 q 001110 op2 0 m 0 len op 00 n d 8233 Decode fields: op2,len,op 8234 */ 8235 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8236 if (INSN(31,31) != 0 8237 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8238 || INSN(21,21) != 0 8239 || INSN(15,15) != 0 8240 || INSN(11,10) != BITS2(0,0)) { 8241 return False; 8242 } 8243 UInt bitQ = INSN(30,30); 8244 UInt op2 = INSN(23,22); 8245 UInt mm = INSN(20,16); 8246 UInt len = INSN(14,13); 8247 UInt bitOP = INSN(12,12); 8248 UInt nn = INSN(9,5); 8249 UInt dd = INSN(4,0); 8250 8251 if (op2 == X00) { 8252 /* -------- 00,xx,0 TBL, xx register table -------- */ 8253 /* -------- 00,xx,1 TBX, xx register table -------- */ 8254 /* 31 28 20 15 14 12 9 4 8255 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8256 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta 8257 where Ta = 16b(q=1) or 8b(q=0) 8258 */ 8259 Bool isTBX = bitOP == 1; 8260 /* The out-of-range values to use. */ 8261 IRTemp oor_values = newTempV128(); 8262 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0)); 8263 /* src value */ 8264 IRTemp src = newTempV128(); 8265 assign(src, getQReg128(mm)); 8266 /* The table values */ 8267 IRTemp tab[4]; 8268 UInt i; 8269 for (i = 0; i <= len; i++) { 8270 vassert(i < 4); 8271 tab[i] = newTempV128(); 8272 assign(tab[i], getQReg128((nn + i) % 32)); 8273 } 8274 IRTemp res = math_TBL_TBX(tab, len, src, oor_values); 8275 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8276 const HChar* Ta = bitQ ==1 ? "16b" : "8b"; 8277 const HChar* nm = isTBX ? "tbx" : "tbl"; 8278 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n", 8279 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta); 8280 return True; 8281 } 8282 8283 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8284 return False; 8285 # undef INSN 8286 } 8287 8288 8289 static 8290 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn) 8291 { 8292 /* 31 29 23 21 20 15 14 11 9 4 8293 0 q 001110 size 0 m 0 opcode 10 n d 8294 Decode fields: opcode 8295 */ 8296 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8297 if (INSN(31,31) != 0 8298 || INSN(29,24) != BITS6(0,0,1,1,1,0) 8299 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) { 8300 return False; 8301 } 8302 UInt bitQ = INSN(30,30); 8303 UInt size = INSN(23,22); 8304 UInt mm = INSN(20,16); 8305 UInt opcode = INSN(14,12); 8306 UInt nn = INSN(9,5); 8307 UInt dd = INSN(4,0); 8308 8309 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) { 8310 /* -------- 001 UZP1 std7_std7_std7 -------- */ 8311 /* -------- 101 UZP2 std7_std7_std7 -------- */ 8312 if (bitQ == 0 && size == X11) return False; // implied 1d case 8313 Bool isUZP1 = opcode == BITS3(0,0,1); 8314 IROp op = isUZP1 ? mkVecCATEVENLANES(size) 8315 : mkVecCATODDLANES(size); 8316 IRTemp preL = newTempV128(); 8317 IRTemp preR = newTempV128(); 8318 IRTemp res = newTempV128(); 8319 if (bitQ == 0) { 8320 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm), 8321 getQReg128(nn))); 8322 assign(preR, mkexpr(preL)); 8323 } else { 8324 assign(preL, getQReg128(mm)); 8325 assign(preR, getQReg128(nn)); 8326 } 8327 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8328 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8329 const HChar* nm = isUZP1 ? "uzp1" : "uzp2"; 8330 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8331 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8332 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8333 return True; 8334 } 8335 8336 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) { 8337 /* -------- 010 TRN1 std7_std7_std7 -------- */ 8338 /* -------- 110 TRN2 std7_std7_std7 -------- */ 8339 if (bitQ == 0 && size == X11) return False; // implied 1d case 8340 Bool isTRN1 = opcode == BITS3(0,1,0); 8341 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size) 8342 : mkVecCATODDLANES(size); 8343 IROp op2 = mkVecINTERLEAVEHI(size); 8344 IRTemp srcM = newTempV128(); 8345 IRTemp srcN = newTempV128(); 8346 IRTemp res = newTempV128(); 8347 assign(srcM, getQReg128(mm)); 8348 assign(srcN, getQReg128(nn)); 8349 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)), 8350 binop(op1, mkexpr(srcN), mkexpr(srcN)))); 8351 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8352 const HChar* nm = isTRN1 ? "trn1" : "trn2"; 8353 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8354 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8355 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8356 return True; 8357 } 8358 8359 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) { 8360 /* -------- 011 ZIP1 std7_std7_std7 -------- */ 8361 /* -------- 111 ZIP2 std7_std7_std7 -------- */ 8362 if (bitQ == 0 && size == X11) return False; // implied 1d case 8363 Bool isZIP1 = opcode == BITS3(0,1,1); 8364 IROp op = isZIP1 ? mkVecINTERLEAVELO(size) 8365 : mkVecINTERLEAVEHI(size); 8366 IRTemp preL = newTempV128(); 8367 IRTemp preR = newTempV128(); 8368 IRTemp res = newTempV128(); 8369 if (bitQ == 0 && !isZIP1) { 8370 IRTemp z128 = newTempV128(); 8371 assign(z128, mkV128(0x0000)); 8372 // preL = Vm shifted left 32 bits 8373 // preR = Vn shifted left 32 bits 8374 assign(preL, triop(Iop_SliceV128, 8375 getQReg128(mm), mkexpr(z128), mkU8(12))); 8376 assign(preR, triop(Iop_SliceV128, 8377 getQReg128(nn), mkexpr(z128), mkU8(12))); 8378 8379 } else { 8380 assign(preL, getQReg128(mm)); 8381 assign(preR, getQReg128(nn)); 8382 } 8383 assign(res, binop(op, mkexpr(preL), mkexpr(preR))); 8384 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); 8385 const HChar* nm = isZIP1 ? "zip1" : "zip2"; 8386 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8387 DIP("%s %s.%s, %s.%s, %s.%s\n", nm, 8388 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); 8389 return True; 8390 } 8391 8392 return False; 8393 # undef INSN 8394 } 8395 8396 8397 static 8398 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn) 8399 { 8400 /* 31 28 23 21 16 11 9 4 8401 0 q u 01110 size 11000 opcode 10 n d 8402 Decode fields: u,size,opcode 8403 */ 8404 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) 8405 if (INSN(31,31) != 0 8406 || INSN(28,24) != BITS5(0,1,1,1,0) 8407 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) { 8408 return False; 8409 } 8410 UInt bitQ = INSN(30,30); 8411 UInt bitU = INSN(29,29); 8412 UInt size = INSN(23,22); 8413 UInt opcode = INSN(16,12); 8414 UInt nn = INSN(9,5); 8415 UInt dd = INSN(4,0); 8416 8417 if (opcode == BITS5(0,0,0,1,1)) { 8418 /* -------- 0,xx,00011 SADDLV -------- */ 8419 /* -------- 1,xx,00011 UADDLV -------- */ 8420 /* size is the narrow size */ 8421 if (size == X11 || (size == X10 && bitQ == 0)) return False; 8422 Bool isU = bitU == 1; 8423 IRTemp src = newTempV128(); 8424 assign(src, getQReg128(nn)); 8425 /* The basic plan is to widen the lower half, and if Q = 1, 8426 the upper half too. Add them together (if Q = 1), and in 8427 either case fold with add at twice the lane width. 8428 */ 8429 IRExpr* widened 8430 = mkexpr(math_WIDEN_LO_OR_HI_LANES( 8431 isU, False/*!fromUpperHalf*/, size, mkexpr(src))); 8432 if (bitQ == 1) { 8433 widened 8434 = binop(mkVecADD(size+1), 8435 widened, 8436 mkexpr(math_WIDEN_LO_OR_HI_LANES( 8437 isU, True/*fromUpperHalf*/, size, mkexpr(src))) 8438 ); 8439 } 8440 /* Now fold. */ 8441 IRTemp tWi = newTempV128(); 8442 assign(tWi, widened); 8443 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1)); 8444 putQReg128(dd, mkexpr(res)); 8445 const HChar* arr = nameArr_Q_SZ(bitQ, size); 8446 const HChar ch = "bhsd"[size]; 8447 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv", 8448 nameQReg128(dd), ch, nameQReg128(nn), arr); 8449 return True; 8450 } 8451 8452 UInt ix = 0; 8453 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; } 8454 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; } 8455 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; } 8456 /**/ 8457 if (ix != 0) { 8458 /* -------- 0,xx,01010: SMAXV -------- (1) */ 8459 /* -------- 1,xx,01010: UMAXV -------- (2) */ 8460 /* -------- 0,xx,11010: SMINV -------- (3) */ 8461 /* -------- 1,xx,11010: UMINV -------- (4) */ 8462 /* -------- 0,xx,11011: ADDV -------- (5) */ 8463 vassert(ix >= 1 && ix <= 5); 8464 if (size == X11) return False; // 1d,2d cases not allowed 8465 if (size == X10 && bitQ == 0) return False; // 2s case not allowed 8466 const IROp opMAXS[3] 8467 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 }; 8468 const IROp opMAXU[3] 8469 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 }; 8470 const IROp opMINS[3] 8471 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 }; 8472 const IROp opMINU[3] 8473 = { Iop_Min8Ux16, Iop_Min16Ux8,