1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_arm64_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2013-2017 OpenWorks 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "libvex_basictypes.h" 32 #include "libvex_ir.h" 33 #include "libvex.h" 34 #include "ir_match.h" 35 36 #include "main_util.h" 37 #include "main_globals.h" 38 #include "host_generic_regs.h" 39 #include "host_generic_simd64.h" // for 32-bit SIMD helpers 40 #include "host_arm64_defs.h" 41 42 43 /*---------------------------------------------------------*/ 44 /*--- ISelEnv ---*/ 45 /*---------------------------------------------------------*/ 46 47 /* This carries around: 48 49 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 50 might encounter. This is computed before insn selection starts, 51 and does not change. 52 53 - A mapping from IRTemp to HReg. This tells the insn selector 54 which virtual register is associated with each IRTemp temporary. 55 This is computed before insn selection starts, and does not 56 change. We expect this mapping to map precisely the same set of 57 IRTemps as the type mapping does. 58 59 |vregmap| holds the primary register for the IRTemp. 60 |vregmapHI| is only used for 128-bit integer-typed 61 IRTemps. It holds the identity of a second 62 64-bit virtual HReg, which holds the high half 63 of the value. 64 65 - The code array, that is, the insns selected so far. 66 67 - A counter, for generating new virtual registers. 68 69 - The host hardware capabilities word. This is set at the start 70 and does not change. 71 72 - A Bool for indicating whether we may generate chain-me 73 instructions for control flow transfers, or whether we must use 74 XAssisted. 75 76 - The maximum guest address of any guest insn in this block. 77 Actually, the address of the highest-addressed byte from any insn 78 in this block. Is set at the start and does not change. This is 79 used for detecting jumps which are definitely forward-edges from 80 this block, and therefore can be made (chained) to the fast entry 81 point of the destination, thereby avoiding the destination's 82 event check. 83 84 - An IRExpr*, which may be NULL, holding the IR expression (an 85 IRRoundingMode-encoded value) to which the FPU's rounding mode 86 was most recently set. Setting to NULL is always safe. Used to 87 avoid redundant settings of the FPU's rounding mode, as 88 described in set_FPCR_rounding_mode below. 89 90 Note, this is all (well, mostly) host-independent. 91 */ 92 93 typedef 94 struct { 95 /* Constant -- are set at the start and do not change. */ 96 IRTypeEnv* type_env; 97 98 HReg* vregmap; 99 HReg* vregmapHI; 100 Int n_vregmap; 101 102 UInt hwcaps; 103 104 Bool chainingAllowed; 105 Addr64 max_ga; 106 107 /* These are modified as we go along. */ 108 HInstrArray* code; 109 Int vreg_ctr; 110 111 IRExpr* previous_rm; 112 } 113 ISelEnv; 114 115 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 116 { 117 vassert(tmp >= 0); 118 vassert(tmp < env->n_vregmap); 119 return env->vregmap[tmp]; 120 } 121 122 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, 123 ISelEnv* env, IRTemp tmp ) 124 { 125 vassert(tmp >= 0); 126 vassert(tmp < env->n_vregmap); 127 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 128 *vrLO = env->vregmap[tmp]; 129 *vrHI = env->vregmapHI[tmp]; 130 } 131 132 static void addInstr ( ISelEnv* env, ARM64Instr* instr ) 133 { 134 addHInstr(env->code, instr); 135 if (vex_traceflags & VEX_TRACE_VCODE) { 136 ppARM64Instr(instr); 137 vex_printf("\n"); 138 } 139 } 140 141 static HReg newVRegI ( ISelEnv* env ) 142 { 143 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr); 144 env->vreg_ctr++; 145 return reg; 146 } 147 148 static HReg newVRegD ( ISelEnv* env ) 149 { 150 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr); 151 env->vreg_ctr++; 152 return reg; 153 } 154 155 static HReg newVRegV ( ISelEnv* env ) 156 { 157 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr); 158 env->vreg_ctr++; 159 return reg; 160 } 161 162 163 /*---------------------------------------------------------*/ 164 /*--- ISEL: Forward declarations ---*/ 165 /*---------------------------------------------------------*/ 166 167 /* These are organised as iselXXX and iselXXX_wrk pairs. The 168 iselXXX_wrk do the real work, but are not to be called directly. 169 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 170 checks that all returned registers are virtual. You should not 171 call the _wrk version directly. 172 173 Because some forms of ARM64 memory amodes are implicitly scaled by 174 the access size, iselIntExpr_AMode takes an IRType which tells it 175 the type of the access for which the amode is to be used. This 176 type needs to be correct, else you'll get incorrect code. 177 */ 178 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, 179 IRExpr* e, IRType dty ); 180 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, 181 IRExpr* e, IRType dty ); 182 183 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ); 184 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ); 185 186 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ); 187 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ); 188 189 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ); 190 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ); 191 192 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 193 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 194 195 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 196 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 197 198 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 199 ISelEnv* env, IRExpr* e ); 200 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 201 ISelEnv* env, IRExpr* e ); 202 203 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 204 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 205 206 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 207 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 208 209 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ); 210 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ); 211 212 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); 213 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); 214 215 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 216 ISelEnv* env, IRExpr* e ); 217 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo, 218 ISelEnv* env, IRExpr* e ); 219 220 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ); 221 222 223 /*---------------------------------------------------------*/ 224 /*--- ISEL: Misc helpers ---*/ 225 /*---------------------------------------------------------*/ 226 227 /* Generate an amode suitable for a 64-bit sized access relative to 228 the baseblock register (X21). This generates an RI12 amode, which 229 means its scaled by the access size, which is why the access size 230 -- 64 bit -- is stated explicitly here. Consequently |off| needs 231 to be divisible by 8. */ 232 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off ) 233 { 234 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */ 235 vassert((off & 7) == 0); /* ditto */ 236 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/); 237 } 238 239 /* Ditto, for 32 bit accesses. */ 240 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off ) 241 { 242 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */ 243 vassert((off & 3) == 0); /* ditto */ 244 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/); 245 } 246 247 /* Ditto, for 16 bit accesses. */ 248 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off ) 249 { 250 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */ 251 vassert((off & 1) == 0); /* ditto */ 252 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/); 253 } 254 255 /* Ditto, for 8 bit accesses. */ 256 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off ) 257 { 258 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */ 259 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/); 260 } 261 262 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off ) 263 { 264 vassert(off < (1<<12)); 265 HReg r = newVRegI(env); 266 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(), 267 ARM64RIA_I12(off,0), True/*isAdd*/)); 268 return r; 269 } 270 271 static HReg get_baseblock_register ( void ) 272 { 273 return hregARM64_X21(); 274 } 275 276 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in 277 a new register, and return the new register. */ 278 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src ) 279 { 280 HReg dst = newVRegI(env); 281 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */ 282 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); 283 return dst; 284 } 285 286 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in 287 a new register, and return the new register. */ 288 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src ) 289 { 290 HReg dst = newVRegI(env); 291 ARM64RI6* n48 = ARM64RI6_I6(48); 292 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); 293 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR)); 294 return dst; 295 } 296 297 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in 298 a new register, and return the new register. */ 299 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src ) 300 { 301 HReg dst = newVRegI(env); 302 ARM64RIL* mask = ARM64RIL_I13(1, 0, 15); /* encodes 0xFFFF */ 303 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); 304 return dst; 305 } 306 307 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in 308 a new register, and return the new register. */ 309 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src ) 310 { 311 HReg dst = newVRegI(env); 312 ARM64RI6* n32 = ARM64RI6_I6(32); 313 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL)); 314 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR)); 315 return dst; 316 } 317 318 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in 319 a new register, and return the new register. */ 320 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src ) 321 { 322 HReg dst = newVRegI(env); 323 ARM64RI6* n56 = ARM64RI6_I6(56); 324 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); 325 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR)); 326 return dst; 327 } 328 329 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src ) 330 { 331 HReg dst = newVRegI(env); 332 ARM64RIL* mask = ARM64RIL_I13(1, 0, 7); /* encodes 0xFF */ 333 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); 334 return dst; 335 } 336 337 /* Is this IRExpr_Const(IRConst_U64(0)) ? */ 338 static Bool isZeroU64 ( IRExpr* e ) { 339 if (e->tag != Iex_Const) return False; 340 IRConst* con = e->Iex.Const.con; 341 vassert(con->tag == Ico_U64); 342 return con->Ico.U64 == 0; 343 } 344 345 346 /*---------------------------------------------------------*/ 347 /*--- ISEL: FP rounding mode helpers ---*/ 348 /*---------------------------------------------------------*/ 349 350 /* Set the FP rounding mode: 'mode' is an I32-typed expression 351 denoting a value in the range 0 .. 3, indicating a round mode 352 encoded as per type IRRoundingMode -- the first four values only 353 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64 354 FSCR to have the same rounding. 355 356 For speed & simplicity, we're setting the *entire* FPCR here. 357 358 Setting the rounding mode is expensive. So this function tries to 359 avoid repeatedly setting the rounding mode to the same thing by 360 first comparing 'mode' to the 'mode' tree supplied in the previous 361 call to this function, if any. (The previous value is stored in 362 env->previous_rm.) If 'mode' is a single IR temporary 't' and 363 env->previous_rm is also just 't', then the setting is skipped. 364 365 This is safe because of the SSA property of IR: an IR temporary can 366 only be defined once and so will have the same value regardless of 367 where it appears in the block. Cool stuff, SSA. 368 369 A safety condition: all attempts to set the RM must be aware of 370 this mechanism - by being routed through the functions here. 371 372 Of course this only helps if blocks where the RM is set more than 373 once and it is set to the same value each time, *and* that value is 374 held in the same IR temporary each time. In order to assure the 375 latter as much as possible, the IR optimiser takes care to do CSE 376 on any block with any sign of floating point activity. 377 */ 378 static 379 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode ) 380 { 381 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32); 382 383 /* Do we need to do anything? */ 384 if (env->previous_rm 385 && env->previous_rm->tag == Iex_RdTmp 386 && mode->tag == Iex_RdTmp 387 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) { 388 /* no - setting it to what it was before. */ 389 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32); 390 return; 391 } 392 393 /* No luck - we better set it, and remember what we set it to. */ 394 env->previous_rm = mode; 395 396 /* Only supporting the rounding-mode bits - the rest of FPCR is set 397 to zero - so we can set the whole register at once (faster). */ 398 399 /* This isn't simple, because 'mode' carries an IR rounding 400 encoding, and we need to translate that to an ARM64 FP one: 401 The IR encoding: 402 00 to nearest (the default) 403 10 to +infinity 404 01 to -infinity 405 11 to zero 406 The ARM64 FP encoding: 407 00 to nearest 408 01 to +infinity 409 10 to -infinity 410 11 to zero 411 Easy enough to do; just swap the two bits. 412 */ 413 HReg irrm = iselIntExpr_R(env, mode); 414 HReg tL = newVRegI(env); 415 HReg tR = newVRegI(env); 416 HReg t3 = newVRegI(env); 417 /* tL = irrm << 1; 418 tR = irrm >> 1; if we're lucky, these will issue together 419 tL &= 2; 420 tR &= 1; ditto 421 t3 = tL | tR; 422 t3 <<= 22; 423 fmxr fpscr, t3 424 */ 425 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 426 ARM64RIL* ril_two = mb_mkARM64RIL_I(2); 427 vassert(ril_one && ril_two); 428 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL)); 429 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR)); 430 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND)); 431 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND)); 432 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR)); 433 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL)); 434 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3)); 435 } 436 437 438 /*---------------------------------------------------------*/ 439 /*--- ISEL: Function call helpers ---*/ 440 /*---------------------------------------------------------*/ 441 442 /* Used only in doHelperCall. See big comment in doHelperCall re 443 handling of register-parameter args. This function figures out 444 whether evaluation of an expression might require use of a fixed 445 register. If in doubt return True (safe but suboptimal). 446 */ 447 static 448 Bool mightRequireFixedRegs ( IRExpr* e ) 449 { 450 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) { 451 // These are always "safe" -- either a copy of SP in some 452 // arbitrary vreg, or a copy of x21, respectively. 453 return False; 454 } 455 /* Else it's a "normal" expression. */ 456 switch (e->tag) { 457 case Iex_RdTmp: case Iex_Const: case Iex_Get: 458 return False; 459 default: 460 return True; 461 } 462 } 463 464 465 /* Do a complete function call. |guard| is a Ity_Bit expression 466 indicating whether or not the call happens. If guard==NULL, the 467 call is unconditional. |retloc| is set to indicate where the 468 return value is after the call. The caller (of this fn) must 469 generate code to add |stackAdjustAfterCall| to the stack pointer 470 after the call is done. Returns True iff it managed to handle this 471 combination of arg/return types, else returns False. */ 472 473 static 474 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 475 /*OUT*/RetLoc* retloc, 476 ISelEnv* env, 477 IRExpr* guard, 478 IRCallee* cee, IRType retTy, IRExpr** args ) 479 { 480 ARM64CondCode cc; 481 HReg argregs[ARM64_N_ARGREGS]; 482 HReg tmpregs[ARM64_N_ARGREGS]; 483 Bool go_fast; 484 Int n_args, i, nextArgReg; 485 Addr64 target; 486 487 vassert(ARM64_N_ARGREGS == 8); 488 489 /* Set default returns. We'll update them later if needed. */ 490 *stackAdjustAfterCall = 0; 491 *retloc = mk_RetLoc_INVALID(); 492 493 /* These are used for cross-checking that IR-level constraints on 494 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */ 495 UInt nVECRETs = 0; 496 UInt nGSPTRs = 0; 497 498 /* Marshal args for a call and do the call. 499 500 This function only deals with a tiny set of possibilities, which 501 cover all helpers in practice. The restrictions are that only 502 arguments in registers are supported, hence only 503 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In 504 fact the only supported arg type is I64. 505 506 The return type can be I{64,32} or V128. In the V128 case, it 507 is expected that |args| will contain the special node 508 IRExpr_VECRET(), in which case this routine generates code to 509 allocate space on the stack for the vector return value. Since 510 we are not passing any scalars on the stack, it is enough to 511 preallocate the return space before marshalling any arguments, 512 in this case. 513 514 |args| may also contain IRExpr_GSPTR(), in which case the 515 value in x21 is passed as the corresponding argument. 516 517 Generating code which is both efficient and correct when 518 parameters are to be passed in registers is difficult, for the 519 reasons elaborated in detail in comments attached to 520 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 521 of the method described in those comments. 522 523 The problem is split into two cases: the fast scheme and the 524 slow scheme. In the fast scheme, arguments are computed 525 directly into the target (real) registers. This is only safe 526 when we can be sure that computation of each argument will not 527 trash any real registers set by computation of any other 528 argument. 529 530 In the slow scheme, all args are first computed into vregs, and 531 once they are all done, they are moved to the relevant real 532 regs. This always gives correct code, but it also gives a bunch 533 of vreg-to-rreg moves which are usually redundant but are hard 534 for the register allocator to get rid of. 535 536 To decide which scheme to use, all argument expressions are 537 first examined. If they are all so simple that it is clear they 538 will be evaluated without use of any fixed registers, use the 539 fast scheme, else use the slow scheme. Note also that only 540 unconditional calls may use the fast scheme, since having to 541 compute a condition expression could itself trash real 542 registers. 543 544 Note this requires being able to examine an expression and 545 determine whether or not evaluation of it might use a fixed 546 register. That requires knowledge of how the rest of this insn 547 selector works. Currently just the following 3 are regarded as 548 safe -- hopefully they cover the majority of arguments in 549 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 550 */ 551 552 /* Note that the cee->regparms field is meaningless on ARM64 hosts 553 (since there is only one calling convention) and so we always 554 ignore it. */ 555 556 n_args = 0; 557 for (i = 0; args[i]; i++) { 558 IRExpr* arg = args[i]; 559 if (UNLIKELY(arg->tag == Iex_VECRET)) { 560 nVECRETs++; 561 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) { 562 nGSPTRs++; 563 } 564 n_args++; 565 } 566 567 /* If this fails, the IR is ill-formed */ 568 vassert(nGSPTRs == 0 || nGSPTRs == 1); 569 570 /* If we have a VECRET, allocate space on the stack for the return 571 value, and record the stack pointer after that. */ 572 HReg r_vecRetAddr = INVALID_HREG; 573 if (nVECRETs == 1) { 574 vassert(retTy == Ity_V128 || retTy == Ity_V256); 575 vassert(retTy != Ity_V256); // we don't handle that yet (if ever) 576 r_vecRetAddr = newVRegI(env); 577 addInstr(env, ARM64Instr_AddToSP(-16)); 578 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr)); 579 } else { 580 // If either of these fail, the IR is ill-formed 581 vassert(retTy != Ity_V128 && retTy != Ity_V256); 582 vassert(nVECRETs == 0); 583 } 584 585 argregs[0] = hregARM64_X0(); 586 argregs[1] = hregARM64_X1(); 587 argregs[2] = hregARM64_X2(); 588 argregs[3] = hregARM64_X3(); 589 argregs[4] = hregARM64_X4(); 590 argregs[5] = hregARM64_X5(); 591 argregs[6] = hregARM64_X6(); 592 argregs[7] = hregARM64_X7(); 593 594 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; 595 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; 596 597 /* First decide which scheme (slow or fast) is to be used. First 598 assume the fast scheme, and select slow if any contraindications 599 (wow) appear. */ 600 601 go_fast = True; 602 603 if (guard) { 604 if (guard->tag == Iex_Const 605 && guard->Iex.Const.con->tag == Ico_U1 606 && guard->Iex.Const.con->Ico.U1 == True) { 607 /* unconditional */ 608 } else { 609 /* Not manifestly unconditional -- be conservative. */ 610 go_fast = False; 611 } 612 } 613 614 if (go_fast) { 615 for (i = 0; i < n_args; i++) { 616 if (mightRequireFixedRegs(args[i])) { 617 go_fast = False; 618 break; 619 } 620 } 621 } 622 623 if (go_fast) { 624 if (retTy == Ity_V128 || retTy == Ity_V256) 625 go_fast = False; 626 } 627 628 /* At this point the scheme to use has been established. Generate 629 code to get the arg values into the argument rregs. If we run 630 out of arg regs, give up. */ 631 632 if (go_fast) { 633 634 /* FAST SCHEME */ 635 nextArgReg = 0; 636 637 for (i = 0; i < n_args; i++) { 638 IRExpr* arg = args[i]; 639 640 IRType aTy = Ity_INVALID; 641 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 642 aTy = typeOfIRExpr(env->type_env, args[i]); 643 644 if (nextArgReg >= ARM64_N_ARGREGS) 645 return False; /* out of argregs */ 646 647 if (aTy == Ity_I64) { 648 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 649 iselIntExpr_R(env, args[i]) )); 650 nextArgReg++; 651 } 652 else if (arg->tag == Iex_GSPTR) { 653 vassert(0); //ATC 654 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], 655 hregARM64_X21() )); 656 nextArgReg++; 657 } 658 else if (arg->tag == Iex_VECRET) { 659 // because of the go_fast logic above, we can't get here, 660 // since vector return values makes us use the slow path 661 // instead. 662 vassert(0); 663 } 664 else 665 return False; /* unhandled arg type */ 666 } 667 668 /* Fast scheme only applies for unconditional calls. Hence: */ 669 cc = ARM64cc_AL; 670 671 } else { 672 673 /* SLOW SCHEME; move via temporaries */ 674 nextArgReg = 0; 675 676 for (i = 0; i < n_args; i++) { 677 IRExpr* arg = args[i]; 678 679 IRType aTy = Ity_INVALID; 680 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) 681 aTy = typeOfIRExpr(env->type_env, args[i]); 682 683 if (nextArgReg >= ARM64_N_ARGREGS) 684 return False; /* out of argregs */ 685 686 if (aTy == Ity_I64) { 687 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); 688 nextArgReg++; 689 } 690 else if (arg->tag == Iex_GSPTR) { 691 vassert(0); //ATC 692 tmpregs[nextArgReg] = hregARM64_X21(); 693 nextArgReg++; 694 } 695 else if (arg->tag == Iex_VECRET) { 696 vassert(!hregIsInvalid(r_vecRetAddr)); 697 tmpregs[nextArgReg] = r_vecRetAddr; 698 nextArgReg++; 699 } 700 else 701 return False; /* unhandled arg type */ 702 } 703 704 /* Now we can compute the condition. We can't do it earlier 705 because the argument computations could trash the condition 706 codes. Be a bit clever to handle the common case where the 707 guard is 1:Bit. */ 708 cc = ARM64cc_AL; 709 if (guard) { 710 if (guard->tag == Iex_Const 711 && guard->Iex.Const.con->tag == Ico_U1 712 && guard->Iex.Const.con->Ico.U1 == True) { 713 /* unconditional -- do nothing */ 714 } else { 715 cc = iselCondCode( env, guard ); 716 } 717 } 718 719 /* Move the args to their final destinations. */ 720 for (i = 0; i < nextArgReg; i++) { 721 vassert(!(hregIsInvalid(tmpregs[i]))); 722 /* None of these insns, including any spill code that might 723 be generated, may alter the condition codes. */ 724 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) ); 725 } 726 727 } 728 729 /* Should be assured by checks above */ 730 vassert(nextArgReg <= ARM64_N_ARGREGS); 731 732 /* Do final checks, set the return values, and generate the call 733 instruction proper. */ 734 vassert(nGSPTRs == 0 || nGSPTRs == 1); 735 vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0)); 736 vassert(*stackAdjustAfterCall == 0); 737 vassert(is_RetLoc_INVALID(*retloc)); 738 switch (retTy) { 739 case Ity_INVALID: 740 /* Function doesn't return a value. */ 741 *retloc = mk_RetLoc_simple(RLPri_None); 742 break; 743 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 744 *retloc = mk_RetLoc_simple(RLPri_Int); 745 break; 746 case Ity_V128: 747 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 748 *stackAdjustAfterCall = 16; 749 break; 750 case Ity_V256: 751 vassert(0); // ATC 752 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 753 *stackAdjustAfterCall = 32; 754 break; 755 default: 756 /* IR can denote other possible return types, but we don't 757 handle those here. */ 758 vassert(0); 759 } 760 761 /* Finally, generate the call itself. This needs the *retloc value 762 set in the switch above, which is why it's at the end. */ 763 764 /* nextArgReg doles out argument registers. Since these are 765 assigned in the order x0 .. x7, its numeric value at this point, 766 which must be between 0 and 8 inclusive, is going to be equal to 767 the number of arg regs in use for the call. Hence bake that 768 number into the call (we'll need to know it when doing register 769 allocation, to know what regs the call reads.) */ 770 771 target = (Addr)cee->addr; 772 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc )); 773 774 return True; /* success */ 775 } 776 777 778 /*---------------------------------------------------------*/ 779 /*--- ISEL: Integer expressions (64/32 bit) ---*/ 780 /*---------------------------------------------------------*/ 781 782 /* Select insns for an integer-typed expression, and add them to the 783 code list. Return a reg holding the result. This reg will be a 784 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 785 want to modify it, ask for a new vreg, copy it in there, and modify 786 the copy. The register allocator will do its best to map both 787 vregs to the same real register, so the copies will often disappear 788 later in the game. 789 790 This should handle expressions of 64- and 32-bit type. All results 791 are returned in a 64-bit register. For 32-bit expressions, the 792 upper 32 bits are arbitrary, so you should mask or sign extend 793 partial values if necessary. 794 */ 795 796 /* --------------------- AMode --------------------- */ 797 798 /* Return an AMode which computes the value of the specified 799 expression, possibly also adding insns to the code list as a 800 result. The expression may only be a 64-bit one. 801 */ 802 803 static Bool isValidScale ( UChar scale ) 804 { 805 switch (scale) { 806 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True; 807 default: return False; 808 } 809 } 810 811 static Bool sane_AMode ( ARM64AMode* am ) 812 { 813 switch (am->tag) { 814 case ARM64am_RI9: 815 return 816 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64 817 && (hregIsVirtual(am->ARM64am.RI9.reg) 818 /* || sameHReg(am->ARM64am.RI9.reg, 819 hregARM64_X21()) */ ) 820 && am->ARM64am.RI9.simm9 >= -256 821 && am->ARM64am.RI9.simm9 <= 255 ); 822 case ARM64am_RI12: 823 return 824 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64 825 && (hregIsVirtual(am->ARM64am.RI12.reg) 826 /* || sameHReg(am->ARM64am.RI12.reg, 827 hregARM64_X21()) */ ) 828 && am->ARM64am.RI12.uimm12 < 4096 829 && isValidScale(am->ARM64am.RI12.szB) ); 830 case ARM64am_RR: 831 return 832 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64 833 && hregIsVirtual(am->ARM64am.RR.base) 834 && hregClass(am->ARM64am.RR.index) == HRcInt64 835 && hregIsVirtual(am->ARM64am.RR.index) ); 836 default: 837 vpanic("sane_AMode: unknown ARM64 AMode1 tag"); 838 } 839 } 840 841 static 842 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ) 843 { 844 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); 845 vassert(sane_AMode(am)); 846 return am; 847 } 848 849 static 850 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ) 851 { 852 IRType ty = typeOfIRExpr(env->type_env,e); 853 vassert(ty == Ity_I64); 854 855 ULong szBbits = 0; 856 switch (dty) { 857 case Ity_I64: szBbits = 3; break; 858 case Ity_I32: szBbits = 2; break; 859 case Ity_I16: szBbits = 1; break; 860 case Ity_I8: szBbits = 0; break; 861 default: vassert(0); 862 } 863 864 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since 865 we're going to create an amode suitable for LDU* or STU* 866 instructions, which use unscaled immediate offsets. */ 867 if (e->tag == Iex_Binop 868 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64) 869 && e->Iex.Binop.arg2->tag == Iex_Const 870 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 871 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 872 if (simm >= -255 && simm <= 255) { 873 /* Although the gating condition might seem to be 874 simm >= -256 && simm <= 255 875 we will need to negate simm in the case where the op is Sub64. 876 Hence limit the lower value to -255 in order that its negation 877 is representable. */ 878 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 879 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm; 880 return ARM64AMode_RI9(reg, (Int)simm); 881 } 882 } 883 884 /* Add64(expr, uimm12 * transfer-size) */ 885 if (e->tag == Iex_Binop 886 && e->Iex.Binop.op == Iop_Add64 887 && e->Iex.Binop.arg2->tag == Iex_Const 888 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { 889 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 890 ULong szB = 1 << szBbits; 891 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */ 892 && (uimm >> szBbits) < 4096) { 893 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 894 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB); 895 } 896 } 897 898 /* Add64(expr1, expr2) */ 899 if (e->tag == Iex_Binop 900 && e->Iex.Binop.op == Iop_Add64) { 901 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 902 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 903 return ARM64AMode_RR(reg1, reg2); 904 } 905 906 /* Doesn't match anything in particular. Generate it into 907 a register and use that. */ 908 HReg reg = iselIntExpr_R(env, e); 909 return ARM64AMode_RI9(reg, 0); 910 } 911 912 913 /* --------------------- RIA --------------------- */ 914 915 /* Select instructions to generate 'e' into a RIA. */ 916 917 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ) 918 { 919 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e); 920 /* sanity checks ... */ 921 switch (ri->tag) { 922 case ARM64riA_I12: 923 vassert(ri->ARM64riA.I12.imm12 < 4096); 924 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12); 925 return ri; 926 case ARM64riA_R: 927 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64); 928 vassert(hregIsVirtual(ri->ARM64riA.R.reg)); 929 return ri; 930 default: 931 vpanic("iselIntExpr_RIA: unknown arm RIA tag"); 932 } 933 } 934 935 /* DO NOT CALL THIS DIRECTLY ! */ 936 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ) 937 { 938 IRType ty = typeOfIRExpr(env->type_env,e); 939 vassert(ty == Ity_I64 || ty == Ity_I32); 940 941 /* special case: immediate */ 942 if (e->tag == Iex_Const) { 943 ULong u = 0xF000000ULL; /* invalid */ 944 switch (e->Iex.Const.con->tag) { 945 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 946 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 947 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)"); 948 } 949 if (0 == (u & ~(0xFFFULL << 0))) 950 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0); 951 if (0 == (u & ~(0xFFFULL << 12))) 952 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12); 953 /* else fail, fall through to default case */ 954 } 955 956 /* default case: calculate into a register and return that */ 957 { 958 HReg r = iselIntExpr_R ( env, e ); 959 return ARM64RIA_R(r); 960 } 961 } 962 963 964 /* --------------------- RIL --------------------- */ 965 966 /* Select instructions to generate 'e' into a RIL. At this point we 967 have to deal with the strange bitfield-immediate encoding for logic 968 instructions. */ 969 970 971 // The following four functions 972 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical 973 // are copied, with modifications, from 974 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc 975 // which has the following copyright notice: 976 /* 977 Copyright 2013, ARM Limited 978 All rights reserved. 979 980 Redistribution and use in source and binary forms, with or without 981 modification, are permitted provided that the following conditions are met: 982 983 * Redistributions of source code must retain the above copyright notice, 984 this list of conditions and the following disclaimer. 985 * Redistributions in binary form must reproduce the above copyright notice, 986 this list of conditions and the following disclaimer in the documentation 987 and/or other materials provided with the distribution. 988 * Neither the name of ARM Limited nor the names of its contributors may be 989 used to endorse or promote products derived from this software without 990 specific prior written permission. 991 992 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 993 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 994 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 995 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 996 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 997 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 998 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 999 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 1000 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1001 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1002 */ 1003 1004 static Int CountLeadingZeros(ULong value, Int width) 1005 { 1006 vassert(width == 32 || width == 64); 1007 Int count = 0; 1008 ULong bit_test = 1ULL << (width - 1); 1009 while ((count < width) && ((bit_test & value) == 0)) { 1010 count++; 1011 bit_test >>= 1; 1012 } 1013 return count; 1014 } 1015 1016 static Int CountTrailingZeros(ULong value, Int width) 1017 { 1018 vassert(width == 32 || width == 64); 1019 Int count = 0; 1020 while ((count < width) && (((value >> count) & 1) == 0)) { 1021 count++; 1022 } 1023 return count; 1024 } 1025 1026 static Int CountSetBits(ULong value, Int width) 1027 { 1028 // TODO: Other widths could be added here, as the implementation already 1029 // supports them. 1030 vassert(width == 32 || width == 64); 1031 1032 // Mask out unused bits to ensure that they are not counted. 1033 value &= (0xffffffffffffffffULL >> (64-width)); 1034 1035 // Add up the set bits. 1036 // The algorithm works by adding pairs of bit fields together iteratively, 1037 // where the size of each bit field doubles each time. 1038 // An example for an 8-bit value: 1039 // Bits: h g f e d c b a 1040 // \ | \ | \ | \ | 1041 // value = h+g f+e d+c b+a 1042 // \ | \ | 1043 // value = h+g+f+e d+c+b+a 1044 // \ | 1045 // value = h+g+f+e+d+c+b+a 1046 value = ((value >> 1) & 0x5555555555555555ULL) 1047 + (value & 0x5555555555555555ULL); 1048 value = ((value >> 2) & 0x3333333333333333ULL) 1049 + (value & 0x3333333333333333ULL); 1050 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) 1051 + (value & 0x0f0f0f0f0f0f0f0fULL); 1052 value = ((value >> 8) & 0x00ff00ff00ff00ffULL) 1053 + (value & 0x00ff00ff00ff00ffULL); 1054 value = ((value >> 16) & 0x0000ffff0000ffffULL) 1055 + (value & 0x0000ffff0000ffffULL); 1056 value = ((value >> 32) & 0x00000000ffffffffULL) 1057 + (value & 0x00000000ffffffffULL); 1058 1059 return value; 1060 } 1061 1062 static Bool isImmLogical ( /*OUT*/UInt* n, 1063 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r, 1064 ULong value, UInt width ) 1065 { 1066 // Test if a given value can be encoded in the immediate field of a 1067 // logical instruction. 1068 1069 // If it can be encoded, the function returns true, and values 1070 // pointed to by n, imm_s and imm_r are updated with immediates 1071 // encoded in the format required by the corresponding fields in the 1072 // logical instruction. If it can not be encoded, the function 1073 // returns false, and the values pointed to by n, imm_s and imm_r 1074 // are undefined. 1075 vassert(n != NULL && imm_s != NULL && imm_r != NULL); 1076 vassert(width == 32 || width == 64); 1077 1078 // Logical immediates are encoded using parameters n, imm_s and imm_r using 1079 // the following table: 1080 // 1081 // N imms immr size S R 1082 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 1083 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 1084 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 1085 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 1086 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 1087 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 1088 // (s bits must not be all set) 1089 // 1090 // A pattern is constructed of size bits, where the least significant S+1 1091 // bits are set. The pattern is rotated right by R, and repeated across a 1092 // 32 or 64-bit value, depending on destination register width. 1093 // 1094 // To test if an arbitrary immediate can be encoded using this scheme, an 1095 // iterative algorithm is used. 1096 // 1097 // TODO: This code does not consider using X/W register overlap to support 1098 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits 1099 // are an encodable logical immediate. 1100 1101 // 1. If the value has all set or all clear bits, it can't be encoded. 1102 if ((value == 0) || (value == 0xffffffffffffffffULL) || 1103 ((width == 32) && (value == 0xffffffff))) { 1104 return False; 1105 } 1106 1107 UInt lead_zero = CountLeadingZeros(value, width); 1108 UInt lead_one = CountLeadingZeros(~value, width); 1109 UInt trail_zero = CountTrailingZeros(value, width); 1110 UInt trail_one = CountTrailingZeros(~value, width); 1111 UInt set_bits = CountSetBits(value, width); 1112 1113 // The fixed bits in the immediate s field. 1114 // If width == 64 (X reg), start at 0xFFFFFF80. 1115 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 1116 // widths won't be executed. 1117 Int imm_s_fixed = (width == 64) ? -128 : -64; 1118 Int imm_s_mask = 0x3F; 1119 1120 for (;;) { 1121 // 2. If the value is two bits wide, it can be encoded. 1122 if (width == 2) { 1123 *n = 0; 1124 *imm_s = 0x3C; 1125 *imm_r = (value & 3) - 1; 1126 return True; 1127 } 1128 1129 *n = (width == 64) ? 1 : 0; 1130 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 1131 if ((lead_zero + set_bits) == width) { 1132 *imm_r = 0; 1133 } else { 1134 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 1135 } 1136 1137 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to 1138 // the bit width of the value, it can be encoded. 1139 if (lead_zero + trail_zero + set_bits == width) { 1140 return True; 1141 } 1142 1143 // 4. If the sum of leading ones, trailing ones and unset bits in the 1144 // value is equal to the bit width of the value, it can be encoded. 1145 if (lead_one + trail_one + (width - set_bits) == width) { 1146 return True; 1147 } 1148 1149 // 5. If the most-significant half of the bitwise value is equal to the 1150 // least-significant half, return to step 2 using the least-significant 1151 // half of the value. 1152 ULong mask = (1ULL << (width >> 1)) - 1; 1153 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 1154 width >>= 1; 1155 set_bits >>= 1; 1156 imm_s_fixed >>= 1; 1157 continue; 1158 } 1159 1160 // 6. Otherwise, the value can't be encoded. 1161 return False; 1162 } 1163 } 1164 1165 1166 /* Create a RIL for the given immediate, if it is representable, or 1167 return NULL if not. */ 1168 1169 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ) 1170 { 1171 UInt n = 0, imm_s = 0, imm_r = 0; 1172 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64); 1173 if (!ok) return NULL; 1174 vassert(n < 2 && imm_s < 64 && imm_r < 64); 1175 return ARM64RIL_I13(n, imm_r, imm_s); 1176 } 1177 1178 /* So, finally .. */ 1179 1180 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ) 1181 { 1182 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e); 1183 /* sanity checks ... */ 1184 switch (ri->tag) { 1185 case ARM64riL_I13: 1186 vassert(ri->ARM64riL.I13.bitN < 2); 1187 vassert(ri->ARM64riL.I13.immR < 64); 1188 vassert(ri->ARM64riL.I13.immS < 64); 1189 return ri; 1190 case ARM64riL_R: 1191 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64); 1192 vassert(hregIsVirtual(ri->ARM64riL.R.reg)); 1193 return ri; 1194 default: 1195 vpanic("iselIntExpr_RIL: unknown arm RIL tag"); 1196 } 1197 } 1198 1199 /* DO NOT CALL THIS DIRECTLY ! */ 1200 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ) 1201 { 1202 IRType ty = typeOfIRExpr(env->type_env,e); 1203 vassert(ty == Ity_I64 || ty == Ity_I32); 1204 1205 /* special case: immediate */ 1206 if (e->tag == Iex_Const) { 1207 ARM64RIL* maybe = NULL; 1208 if (ty == Ity_I64) { 1209 vassert(e->Iex.Const.con->tag == Ico_U64); 1210 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64); 1211 } else { 1212 vassert(ty == Ity_I32); 1213 vassert(e->Iex.Const.con->tag == Ico_U32); 1214 UInt u32 = e->Iex.Const.con->Ico.U32; 1215 ULong u64 = (ULong)u32; 1216 /* First try with 32 leading zeroes. */ 1217 maybe = mb_mkARM64RIL_I(u64); 1218 /* If that doesn't work, try with 2 copies, since it doesn't 1219 matter what winds up in the upper 32 bits. */ 1220 if (!maybe) { 1221 maybe = mb_mkARM64RIL_I((u64 << 32) | u64); 1222 } 1223 } 1224 if (maybe) return maybe; 1225 /* else fail, fall through to default case */ 1226 } 1227 1228 /* default case: calculate into a register and return that */ 1229 { 1230 HReg r = iselIntExpr_R ( env, e ); 1231 return ARM64RIL_R(r); 1232 } 1233 } 1234 1235 1236 /* --------------------- RI6 --------------------- */ 1237 1238 /* Select instructions to generate 'e' into a RI6. */ 1239 1240 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ) 1241 { 1242 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e); 1243 /* sanity checks ... */ 1244 switch (ri->tag) { 1245 case ARM64ri6_I6: 1246 vassert(ri->ARM64ri6.I6.imm6 < 64); 1247 vassert(ri->ARM64ri6.I6.imm6 > 0); 1248 return ri; 1249 case ARM64ri6_R: 1250 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64); 1251 vassert(hregIsVirtual(ri->ARM64ri6.R.reg)); 1252 return ri; 1253 default: 1254 vpanic("iselIntExpr_RI6: unknown arm RI6 tag"); 1255 } 1256 } 1257 1258 /* DO NOT CALL THIS DIRECTLY ! */ 1259 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ) 1260 { 1261 IRType ty = typeOfIRExpr(env->type_env,e); 1262 vassert(ty == Ity_I64 || ty == Ity_I8); 1263 1264 /* special case: immediate */ 1265 if (e->tag == Iex_Const) { 1266 switch (e->Iex.Const.con->tag) { 1267 case Ico_U8: { 1268 UInt u = e->Iex.Const.con->Ico.U8; 1269 if (u > 0 && u < 64) 1270 return ARM64RI6_I6(u); 1271 break; 1272 default: 1273 break; 1274 } 1275 } 1276 /* else fail, fall through to default case */ 1277 } 1278 1279 /* default case: calculate into a register and return that */ 1280 { 1281 HReg r = iselIntExpr_R ( env, e ); 1282 return ARM64RI6_R(r); 1283 } 1284 } 1285 1286 1287 /* ------------------- CondCode ------------------- */ 1288 1289 /* Generate code to evaluated a bit-typed expression, returning the 1290 condition code which would correspond when the expression would 1291 notionally have returned 1. */ 1292 1293 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1294 { 1295 ARM64CondCode cc = iselCondCode_wrk(env,e); 1296 vassert(cc != ARM64cc_NV); 1297 return cc; 1298 } 1299 1300 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1301 { 1302 vassert(e); 1303 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1304 1305 /* var */ 1306 if (e->tag == Iex_RdTmp) { 1307 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1308 /* Cmp doesn't modify rTmp; so this is OK. */ 1309 ARM64RIL* one = mb_mkARM64RIL_I(1); 1310 vassert(one); 1311 addInstr(env, ARM64Instr_Test(rTmp, one)); 1312 return ARM64cc_NE; 1313 } 1314 1315 /* Not1(e) */ 1316 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1317 /* Generate code for the arg, and negate the test condition */ 1318 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1319 if (cc == ARM64cc_AL || cc == ARM64cc_NV) { 1320 return ARM64cc_AL; 1321 } else { 1322 return 1 ^ cc; 1323 } 1324 } 1325 1326 /* --- patterns rooted at: 64to1 --- */ 1327 1328 if (e->tag == Iex_Unop 1329 && e->Iex.Unop.op == Iop_64to1) { 1330 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); 1331 ARM64RIL* one = mb_mkARM64RIL_I(1); 1332 vassert(one); /* '1' must be representable */ 1333 addInstr(env, ARM64Instr_Test(rTmp, one)); 1334 return ARM64cc_NE; 1335 } 1336 1337 /* --- patterns rooted at: CmpNEZ8 --- */ 1338 1339 if (e->tag == Iex_Unop 1340 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1341 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1342 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF); 1343 addInstr(env, ARM64Instr_Test(r1, xFF)); 1344 return ARM64cc_NE; 1345 } 1346 1347 /* --- patterns rooted at: CmpNEZ16 --- */ 1348 1349 if (e->tag == Iex_Unop 1350 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1351 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1352 ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF); 1353 addInstr(env, ARM64Instr_Test(r1, xFFFF)); 1354 return ARM64cc_NE; 1355 } 1356 1357 /* --- patterns rooted at: CmpNEZ64 --- */ 1358 1359 if (e->tag == Iex_Unop 1360 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1361 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1362 ARM64RIA* zero = ARM64RIA_I12(0,0); 1363 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/)); 1364 return ARM64cc_NE; 1365 } 1366 1367 /* --- patterns rooted at: CmpNEZ32 --- */ 1368 1369 if (e->tag == Iex_Unop 1370 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1371 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1372 ARM64RIA* zero = ARM64RIA_I12(0,0); 1373 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/)); 1374 return ARM64cc_NE; 1375 } 1376 1377 /* --- Cmp*64*(x,y) --- */ 1378 if (e->tag == Iex_Binop 1379 && (e->Iex.Binop.op == Iop_CmpEQ64 1380 || e->Iex.Binop.op == Iop_CmpNE64 1381 || e->Iex.Binop.op == Iop_CmpLT64S 1382 || e->Iex.Binop.op == Iop_CmpLT64U 1383 || e->Iex.Binop.op == Iop_CmpLE64S 1384 || e->Iex.Binop.op == Iop_CmpLE64U 1385 || e->Iex.Binop.op == Iop_CasCmpEQ64)) { 1386 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1387 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1388 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/)); 1389 switch (e->Iex.Binop.op) { 1390 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ; 1391 case Iop_CmpNE64: return ARM64cc_NE; 1392 case Iop_CmpLT64S: return ARM64cc_LT; 1393 case Iop_CmpLT64U: return ARM64cc_CC; 1394 case Iop_CmpLE64S: return ARM64cc_LE; 1395 case Iop_CmpLE64U: return ARM64cc_LS; 1396 default: vpanic("iselCondCode(arm64): CmpXX64"); 1397 } 1398 } 1399 1400 /* --- Cmp*32*(x,y) --- */ 1401 if (e->tag == Iex_Binop 1402 && (e->Iex.Binop.op == Iop_CmpEQ32 1403 || e->Iex.Binop.op == Iop_CmpNE32 1404 || e->Iex.Binop.op == Iop_CmpLT32S 1405 || e->Iex.Binop.op == Iop_CmpLT32U 1406 || e->Iex.Binop.op == Iop_CmpLE32S 1407 || e->Iex.Binop.op == Iop_CmpLE32U 1408 || e->Iex.Binop.op == Iop_CasCmpEQ32)) { 1409 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1410 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1411 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/)); 1412 switch (e->Iex.Binop.op) { 1413 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ; 1414 case Iop_CmpNE32: return ARM64cc_NE; 1415 case Iop_CmpLT32S: return ARM64cc_LT; 1416 case Iop_CmpLT32U: return ARM64cc_CC; 1417 case Iop_CmpLE32S: return ARM64cc_LE; 1418 case Iop_CmpLE32U: return ARM64cc_LS; 1419 default: vpanic("iselCondCode(arm64): CmpXX32"); 1420 } 1421 } 1422 1423 /* --- Cmp*16*(x,y) --- */ 1424 if (e->tag == Iex_Binop 1425 && (e->Iex.Binop.op == Iop_CasCmpEQ16)) { 1426 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1427 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1428 HReg argL2 = widen_z_16_to_64(env, argL); 1429 HReg argR2 = widen_z_16_to_64(env, argR); 1430 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/)); 1431 switch (e->Iex.Binop.op) { 1432 case Iop_CasCmpEQ16: return ARM64cc_EQ; 1433 default: vpanic("iselCondCode(arm64): CmpXX16"); 1434 } 1435 } 1436 1437 /* --- Cmp*8*(x,y) --- */ 1438 if (e->tag == Iex_Binop 1439 && (e->Iex.Binop.op == Iop_CasCmpEQ8)) { 1440 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1441 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1442 HReg argL2 = widen_z_8_to_64(env, argL); 1443 HReg argR2 = widen_z_8_to_64(env, argR); 1444 addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/)); 1445 switch (e->Iex.Binop.op) { 1446 case Iop_CasCmpEQ8: return ARM64cc_EQ; 1447 default: vpanic("iselCondCode(arm64): CmpXX8"); 1448 } 1449 } 1450 1451 ppIRExpr(e); 1452 vpanic("iselCondCode"); 1453 } 1454 1455 1456 /* --------------------- Reg --------------------- */ 1457 1458 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 1459 { 1460 HReg r = iselIntExpr_R_wrk(env, e); 1461 /* sanity checks ... */ 1462 # if 0 1463 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1464 # endif 1465 vassert(hregClass(r) == HRcInt64); 1466 vassert(hregIsVirtual(r)); 1467 return r; 1468 } 1469 1470 /* DO NOT CALL THIS DIRECTLY ! */ 1471 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 1472 { 1473 IRType ty = typeOfIRExpr(env->type_env,e); 1474 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1475 1476 switch (e->tag) { 1477 1478 /* --------- TEMP --------- */ 1479 case Iex_RdTmp: { 1480 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 1481 } 1482 1483 /* --------- LOAD --------- */ 1484 case Iex_Load: { 1485 HReg dst = newVRegI(env); 1486 1487 if (e->Iex.Load.end != Iend_LE) 1488 goto irreducible; 1489 1490 if (ty == Ity_I64) { 1491 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1492 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode)); 1493 return dst; 1494 } 1495 if (ty == Ity_I32) { 1496 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1497 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode)); 1498 return dst; 1499 } 1500 if (ty == Ity_I16) { 1501 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1502 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode)); 1503 return dst; 1504 } 1505 if (ty == Ity_I8) { 1506 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); 1507 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode)); 1508 return dst; 1509 } 1510 break; 1511 } 1512 1513 /* --------- BINARY OP --------- */ 1514 case Iex_Binop: { 1515 1516 ARM64LogicOp lop = 0; /* invalid */ 1517 ARM64ShiftOp sop = 0; /* invalid */ 1518 1519 /* Special-case 0-x into a Neg instruction. Not because it's 1520 particularly useful but more so as to give value flow using 1521 this instruction, so as to check its assembly correctness for 1522 implementation of Left32/Left64. */ 1523 switch (e->Iex.Binop.op) { 1524 case Iop_Sub64: 1525 if (isZeroU64(e->Iex.Binop.arg1)) { 1526 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1527 HReg dst = newVRegI(env); 1528 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG)); 1529 return dst; 1530 } 1531 break; 1532 default: 1533 break; 1534 } 1535 1536 /* ADD/SUB */ 1537 switch (e->Iex.Binop.op) { 1538 case Iop_Add64: case Iop_Add32: 1539 case Iop_Sub64: case Iop_Sub32: { 1540 Bool isAdd = e->Iex.Binop.op == Iop_Add64 1541 || e->Iex.Binop.op == Iop_Add32; 1542 HReg dst = newVRegI(env); 1543 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1544 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); 1545 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd)); 1546 return dst; 1547 } 1548 default: 1549 break; 1550 } 1551 1552 /* AND/OR/XOR */ 1553 switch (e->Iex.Binop.op) { 1554 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; 1555 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; 1556 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; 1557 log_binop: { 1558 HReg dst = newVRegI(env); 1559 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1560 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2); 1561 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop)); 1562 return dst; 1563 } 1564 default: 1565 break; 1566 } 1567 1568 /* SHL/SHR/SAR */ 1569 switch (e->Iex.Binop.op) { 1570 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop; 1571 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop; 1572 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop; 1573 sh_binop: { 1574 HReg dst = newVRegI(env); 1575 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1576 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1577 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop)); 1578 return dst; 1579 } 1580 case Iop_Shr32: 1581 case Iop_Sar32: { 1582 Bool zx = e->Iex.Binop.op == Iop_Shr32; 1583 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1584 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); 1585 HReg dst = zx ? widen_z_32_to_64(env, argL) 1586 : widen_s_32_to_64(env, argL); 1587 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR)); 1588 return dst; 1589 } 1590 default: break; 1591 } 1592 1593 /* MUL */ 1594 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) { 1595 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1596 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1597 HReg dst = newVRegI(env); 1598 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN)); 1599 return dst; 1600 } 1601 1602 /* MULL */ 1603 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { 1604 Bool isS = e->Iex.Binop.op == Iop_MullS32; 1605 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1606 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL); 1607 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1608 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR); 1609 HReg dst = newVRegI(env); 1610 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN)); 1611 return dst; 1612 } 1613 1614 /* Handle misc other ops. */ 1615 1616 if (e->Iex.Binop.op == Iop_Max32U) { 1617 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1618 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1619 HReg dst = newVRegI(env); 1620 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/)); 1621 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS)); 1622 return dst; 1623 } 1624 1625 if (e->Iex.Binop.op == Iop_32HLto64) { 1626 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1627 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1628 HReg lo32 = widen_z_32_to_64(env, lo32s); 1629 HReg hi32 = newVRegI(env); 1630 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32), 1631 ARM64sh_SHL)); 1632 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32), 1633 ARM64lo_OR)); 1634 return hi32; 1635 } 1636 1637 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) { 1638 Bool isD = e->Iex.Binop.op == Iop_CmpF64; 1639 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1); 1640 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2); 1641 HReg dst = newVRegI(env); 1642 HReg imm = newVRegI(env); 1643 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then 1644 create in dst, the IRCmpF64Result encoded result. */ 1645 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR)); 1646 addInstr(env, ARM64Instr_Imm64(dst, 0)); 1647 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ 1648 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ)); 1649 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT 1650 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI)); 1651 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT 1652 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT)); 1653 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN 1654 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS)); 1655 return dst; 1656 } 1657 1658 { /* local scope */ 1659 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 1660 Bool srcIsD = False; 1661 switch (e->Iex.Binop.op) { 1662 case Iop_F64toI64S: 1663 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break; 1664 case Iop_F64toI64U: 1665 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break; 1666 case Iop_F64toI32S: 1667 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break; 1668 case Iop_F64toI32U: 1669 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; 1670 case Iop_F32toI32S: 1671 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; 1672 case Iop_F32toI32U: 1673 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; 1674 case Iop_F32toI64S: 1675 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; 1676 case Iop_F32toI64U: 1677 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; 1678 default: 1679 break; 1680 } 1681 if (cvt_op != ARM64cvt_INVALID) { 1682 /* This is all a bit dodgy, because we can't handle a 1683 non-constant (not-known-at-JIT-time) rounding mode 1684 indication. That's because there's no instruction 1685 AFAICS that does this conversion but rounds according to 1686 FPCR.RM, so we have to bake the rounding mode into the 1687 instruction right now. But that should be OK because 1688 (1) the front end attaches a literal Irrm_ value to the 1689 conversion binop, and (2) iropt will never float that 1690 off via CSE, into a literal. Hence we should always 1691 have an Irrm_ value as the first arg. */ 1692 IRExpr* arg1 = e->Iex.Binop.arg1; 1693 if (arg1->tag != Iex_Const) goto irreducible; 1694 IRConst* arg1con = arg1->Iex.Const.con; 1695 vassert(arg1con->tag == Ico_U32); // else ill-typed IR 1696 UInt irrm = arg1con->Ico.U32; 1697 /* Find the ARM-encoded equivalent for |irrm|. */ 1698 UInt armrm = 4; /* impossible */ 1699 switch (irrm) { 1700 case Irrm_NEAREST: armrm = 0; break; 1701 case Irrm_NegINF: armrm = 2; break; 1702 case Irrm_PosINF: armrm = 1; break; 1703 case Irrm_ZERO: armrm = 3; break; 1704 default: goto irreducible; 1705 } 1706 HReg src = (srcIsD ? iselDblExpr : iselFltExpr) 1707 (env, e->Iex.Binop.arg2); 1708 HReg dst = newVRegI(env); 1709 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); 1710 return dst; 1711 } 1712 } /* local scope */ 1713 1714 /* All cases involving host-side helper calls. */ 1715 void* fn = NULL; 1716 switch (e->Iex.Binop.op) { 1717 case Iop_DivU32: 1718 fn = &h_calc_udiv32_w_arm_semantics; break; 1719 case Iop_DivS32: 1720 fn = &h_calc_sdiv32_w_arm_semantics; break; 1721 case Iop_DivU64: 1722 fn = &h_calc_udiv64_w_arm_semantics; break; 1723 case Iop_DivS64: 1724 fn = &h_calc_sdiv64_w_arm_semantics; break; 1725 default: 1726 break; 1727 } 1728 1729 if (fn) { 1730 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1731 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1732 HReg res = newVRegI(env); 1733 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL)); 1734 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR)); 1735 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn, 1736 2, mk_RetLoc_simple(RLPri_Int) )); 1737 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0())); 1738 return res; 1739 } 1740 1741 break; 1742 } 1743 1744 /* --------- UNARY OP --------- */ 1745 case Iex_Unop: { 1746 1747 switch (e->Iex.Unop.op) { 1748 case Iop_16Uto64: { 1749 /* This probably doesn't occur often enough to be worth 1750 rolling the extension into the load. */ 1751 IRExpr* arg = e->Iex.Unop.arg; 1752 HReg src = iselIntExpr_R(env, arg); 1753 HReg dst = widen_z_16_to_64(env, src); 1754 return dst; 1755 } 1756 case Iop_32Uto64: { 1757 IRExpr* arg = e->Iex.Unop.arg; 1758 if (arg->tag == Iex_Load) { 1759 /* This correctly zero extends because _LdSt32 is 1760 defined to do a zero extending load. */ 1761 HReg dst = newVRegI(env); 1762 ARM64AMode* am 1763 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32); 1764 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 1765 return dst; 1766 } 1767 /* else be lame and mask it */ 1768 HReg src = iselIntExpr_R(env, arg); 1769 HReg dst = widen_z_32_to_64(env, src); 1770 return dst; 1771 } 1772 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */ 1773 case Iop_8Uto64: { 1774 IRExpr* arg = e->Iex.Unop.arg; 1775 if (arg->tag == Iex_Load) { 1776 /* This correctly zero extends because _LdSt8 is 1777 defined to do a zero extending load. */ 1778 HReg dst = newVRegI(env); 1779 ARM64AMode* am 1780 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8); 1781 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 1782 return dst; 1783 } 1784 /* else be lame and mask it */ 1785 HReg src = iselIntExpr_R(env, arg); 1786 HReg dst = widen_z_8_to_64(env, src); 1787 return dst; 1788 } 1789 case Iop_128HIto64: { 1790 HReg rHi, rLo; 1791 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1792 return rHi; /* and abandon rLo */ 1793 } 1794 case Iop_8Sto32: case Iop_8Sto64: { 1795 IRExpr* arg = e->Iex.Unop.arg; 1796 HReg src = iselIntExpr_R(env, arg); 1797 HReg dst = widen_s_8_to_64(env, src); 1798 return dst; 1799 } 1800 case Iop_16Sto32: case Iop_16Sto64: { 1801 IRExpr* arg = e->Iex.Unop.arg; 1802 HReg src = iselIntExpr_R(env, arg); 1803 HReg dst = widen_s_16_to_64(env, src); 1804 return dst; 1805 } 1806 case Iop_32Sto64: { 1807 IRExpr* arg = e->Iex.Unop.arg; 1808 HReg src = iselIntExpr_R(env, arg); 1809 HReg dst = widen_s_32_to_64(env, src); 1810 return dst; 1811 } 1812 case Iop_Not32: 1813 case Iop_Not64: { 1814 HReg dst = newVRegI(env); 1815 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1816 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT)); 1817 return dst; 1818 } 1819 case Iop_Clz64: { 1820 HReg dst = newVRegI(env); 1821 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1822 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ)); 1823 return dst; 1824 } 1825 case Iop_Left32: 1826 case Iop_Left64: { 1827 /* Left64(src) = src | -src. Left32 can use the same 1828 implementation since in that case we don't care what 1829 the upper 32 bits become. */ 1830 HReg dst = newVRegI(env); 1831 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1832 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1833 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1834 ARM64lo_OR)); 1835 return dst; 1836 } 1837 case Iop_CmpwNEZ64: { 1838 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1 1839 = Left64(src) >>s 63 */ 1840 HReg dst = newVRegI(env); 1841 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1842 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1843 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1844 ARM64lo_OR)); 1845 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1846 ARM64sh_SAR)); 1847 return dst; 1848 } 1849 case Iop_CmpwNEZ32: { 1850 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF) 1851 = Left64(src & 0xFFFFFFFF) >>s 63 */ 1852 HReg dst = newVRegI(env); 1853 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1854 HReg src = widen_z_32_to_64(env, pre); 1855 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); 1856 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), 1857 ARM64lo_OR)); 1858 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1859 ARM64sh_SAR)); 1860 return dst; 1861 } 1862 case Iop_V128to64: case Iop_V128HIto64: { 1863 HReg dst = newVRegI(env); 1864 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 1865 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0; 1866 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); 1867 return dst; 1868 } 1869 case Iop_ReinterpF64asI64: { 1870 HReg dst = newVRegI(env); 1871 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1872 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/)); 1873 return dst; 1874 } 1875 case Iop_ReinterpF32asI32: { 1876 HReg dst = newVRegI(env); 1877 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1878 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/)); 1879 return dst; 1880 } 1881 case Iop_1Sto16: 1882 case Iop_1Sto32: 1883 case Iop_1Sto64: { 1884 /* As with the iselStmt case for 'tmp:I1 = expr', we could 1885 do a lot better here if it ever became necessary. */ 1886 HReg zero = newVRegI(env); 1887 HReg one = newVRegI(env); 1888 HReg dst = newVRegI(env); 1889 addInstr(env, ARM64Instr_Imm64(zero, 0)); 1890 addInstr(env, ARM64Instr_Imm64(one, 1)); 1891 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1892 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 1893 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1894 ARM64sh_SHL)); 1895 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), 1896 ARM64sh_SAR)); 1897 return dst; 1898 } 1899 case Iop_NarrowUn16to8x8: 1900 case Iop_NarrowUn32to16x4: 1901 case Iop_NarrowUn64to32x2: 1902 case Iop_QNarrowUn16Sto8Sx8: 1903 case Iop_QNarrowUn32Sto16Sx4: 1904 case Iop_QNarrowUn64Sto32Sx2: 1905 case Iop_QNarrowUn16Uto8Ux8: 1906 case Iop_QNarrowUn32Uto16Ux4: 1907 case Iop_QNarrowUn64Uto32Ux2: 1908 case Iop_QNarrowUn16Sto8Ux8: 1909 case Iop_QNarrowUn32Sto16Ux4: 1910 case Iop_QNarrowUn64Sto32Ux2: 1911 { 1912 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 1913 HReg tmp = newVRegV(env); 1914 HReg dst = newVRegI(env); 1915 UInt dszBlg2 = 3; /* illegal */ 1916 ARM64VecNarrowOp op = ARM64vecna_INVALID; 1917 switch (e->Iex.Unop.op) { 1918 case Iop_NarrowUn16to8x8: 1919 dszBlg2 = 0; op = ARM64vecna_XTN; break; 1920 case Iop_NarrowUn32to16x4: 1921 dszBlg2 = 1; op = ARM64vecna_XTN; break; 1922 case Iop_NarrowUn64to32x2: 1923 dszBlg2 = 2; op = ARM64vecna_XTN; break; 1924 case Iop_QNarrowUn16Sto8Sx8: 1925 dszBlg2 = 0; op = ARM64vecna_SQXTN; break; 1926 case Iop_QNarrowUn32Sto16Sx4: 1927 dszBlg2 = 1; op = ARM64vecna_SQXTN; break; 1928 case Iop_QNarrowUn64Sto32Sx2: 1929 dszBlg2 = 2; op = ARM64vecna_SQXTN; break; 1930 case Iop_QNarrowUn16Uto8Ux8: 1931 dszBlg2 = 0; op = ARM64vecna_UQXTN; break; 1932 case Iop_QNarrowUn32Uto16Ux4: 1933 dszBlg2 = 1; op = ARM64vecna_UQXTN; break; 1934 case Iop_QNarrowUn64Uto32Ux2: 1935 dszBlg2 = 2; op = ARM64vecna_UQXTN; break; 1936 case Iop_QNarrowUn16Sto8Ux8: 1937 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break; 1938 case Iop_QNarrowUn32Sto16Ux4: 1939 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break; 1940 case Iop_QNarrowUn64Sto32Ux2: 1941 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break; 1942 default: 1943 vassert(0); 1944 } 1945 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src)); 1946 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/)); 1947 return dst; 1948 } 1949 case Iop_1Uto64: { 1950 /* 1Uto64(tmp). */ 1951 HReg dst = newVRegI(env); 1952 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 1953 ARM64RIL* one = mb_mkARM64RIL_I(1); 1954 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 1955 vassert(one); 1956 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); 1957 } else { 1958 /* CLONE-01 */ 1959 HReg zero = newVRegI(env); 1960 HReg one = newVRegI(env); 1961 addInstr(env, ARM64Instr_Imm64(zero, 0)); 1962 addInstr(env, ARM64Instr_Imm64(one, 1)); 1963 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); 1964 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 1965 } 1966 return dst; 1967 } 1968 case Iop_64to32: 1969 case Iop_64to16: 1970 case Iop_64to8: 1971 /* These are no-ops. */ 1972 return iselIntExpr_R(env, e->Iex.Unop.arg); 1973 1974 default: 1975 break; 1976 } 1977 1978 break; 1979 } 1980 1981 /* --------- GET --------- */ 1982 case Iex_Get: { 1983 if (ty == Ity_I64 1984 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) { 1985 HReg dst = newVRegI(env); 1986 ARM64AMode* am 1987 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset); 1988 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am)); 1989 return dst; 1990 } 1991 if (ty == Ity_I32 1992 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) { 1993 HReg dst = newVRegI(env); 1994 ARM64AMode* am 1995 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset); 1996 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); 1997 return dst; 1998 } 1999 if (ty == Ity_I16 2000 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) { 2001 HReg dst = newVRegI(env); 2002 ARM64AMode* am 2003 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset); 2004 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am)); 2005 return dst; 2006 } 2007 if (ty == Ity_I8 2008 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) { 2009 HReg dst = newVRegI(env); 2010 ARM64AMode* am 2011 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset); 2012 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); 2013 return dst; 2014 } 2015 break; 2016 } 2017 2018 /* --------- CCALL --------- */ 2019 case Iex_CCall: { 2020 HReg dst = newVRegI(env); 2021 vassert(ty == e->Iex.CCall.retty); 2022 2023 /* be very restrictive for now. Only 64-bit ints allowed for 2024 args, and 64 bits for return type. Don't forget to change 2025 the RetLoc if more types are allowed in future. */ 2026 if (e->Iex.CCall.retty != Ity_I64) 2027 goto irreducible; 2028 2029 /* Marshal args, do the call, clear stack. */ 2030 UInt addToSp = 0; 2031 RetLoc rloc = mk_RetLoc_INVALID(); 2032 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2033 e->Iex.CCall.cee, e->Iex.CCall.retty, 2034 e->Iex.CCall.args ); 2035 /* */ 2036 if (ok) { 2037 vassert(is_sane_RetLoc(rloc)); 2038 vassert(rloc.pri == RLPri_Int); 2039 vassert(addToSp == 0); 2040 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0())); 2041 return dst; 2042 } 2043 /* else fall through; will hit the irreducible: label */ 2044 } 2045 2046 /* --------- LITERAL --------- */ 2047 /* 64-bit literals */ 2048 case Iex_Const: { 2049 ULong u = 0; 2050 HReg dst = newVRegI(env); 2051 switch (e->Iex.Const.con->tag) { 2052 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; 2053 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 2054 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break; 2055 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break; 2056 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)"); 2057 } 2058 addInstr(env, ARM64Instr_Imm64(dst, u)); 2059 return dst; 2060 } 2061 2062 /* --------- MULTIPLEX --------- */ 2063 case Iex_ITE: { 2064 /* ITE(ccexpr, iftrue, iffalse) */ 2065 if (ty == Ity_I64 || ty == Ity_I32) { 2066 ARM64CondCode cc; 2067 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 2068 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); 2069 HReg dst = newVRegI(env); 2070 cc = iselCondCode(env, e->Iex.ITE.cond); 2071 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc)); 2072 return dst; 2073 } 2074 break; 2075 } 2076 2077 default: 2078 break; 2079 } /* switch (e->tag) */ 2080 2081 /* We get here if no pattern matched. */ 2082 irreducible: 2083 ppIRExpr(e); 2084 vpanic("iselIntExpr_R: cannot reduce tree"); 2085 } 2086 2087 2088 /*---------------------------------------------------------*/ 2089 /*--- ISEL: Integer expressions (128 bit) ---*/ 2090 /*---------------------------------------------------------*/ 2091 2092 /* Compute a 128-bit value into a register pair, which is returned as 2093 the first two parameters. As with iselIntExpr_R, these may be 2094 either real or virtual regs; in any case they must not be changed 2095 by subsequent code emitted by the caller. */ 2096 2097 static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2098 ISelEnv* env, IRExpr* e ) 2099 { 2100 iselInt128Expr_wrk(rHi, rLo, env, e); 2101 # if 0 2102 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2103 # endif 2104 vassert(hregClass(*rHi) == HRcInt64); 2105 vassert(hregIsVirtual(*rHi)); 2106 vassert(hregClass(*rLo) == HRcInt64); 2107 vassert(hregIsVirtual(*rLo)); 2108 } 2109 2110 /* DO NOT CALL THIS DIRECTLY ! */ 2111 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2112 ISelEnv* env, IRExpr* e ) 2113 { 2114 vassert(e); 2115 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2116 2117 /* --------- BINARY ops --------- */ 2118 if (e->tag == Iex_Binop) { 2119 switch (e->Iex.Binop.op) { 2120 /* 64 x 64 -> 128 multiply */ 2121 case Iop_MullU64: 2122 case Iop_MullS64: { 2123 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2124 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2125 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2126 HReg dstLo = newVRegI(env); 2127 HReg dstHi = newVRegI(env); 2128 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR, 2129 ARM64mul_PLAIN)); 2130 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR, 2131 syned ? ARM64mul_SX : ARM64mul_ZX)); 2132 *rHi = dstHi; 2133 *rLo = dstLo; 2134 return; 2135 } 2136 /* 64HLto128(e1,e2) */ 2137 case Iop_64HLto128: 2138 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2139 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2140 return; 2141 default: 2142 break; 2143 } 2144 } /* if (e->tag == Iex_Binop) */ 2145 2146 ppIRExpr(e); 2147 vpanic("iselInt128Expr(arm64)"); 2148 } 2149 2150 2151 /*---------------------------------------------------------*/ 2152 /*--- ISEL: Vector expressions (128 bit) ---*/ 2153 /*---------------------------------------------------------*/ 2154 2155 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ) 2156 { 2157 HReg r = iselV128Expr_wrk( env, e ); 2158 vassert(hregClass(r) == HRcVec128); 2159 vassert(hregIsVirtual(r)); 2160 return r; 2161 } 2162 2163 /* DO NOT CALL THIS DIRECTLY */ 2164 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) 2165 { 2166 IRType ty = typeOfIRExpr(env->type_env, e); 2167 vassert(e); 2168 vassert(ty == Ity_V128); 2169 2170 if (e->tag == Iex_RdTmp) { 2171 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2172 } 2173 2174 if (e->tag == Iex_Const) { 2175 /* Only a very limited range of constants is handled. */ 2176 vassert(e->Iex.Const.con->tag == Ico_V128); 2177 UShort con = e->Iex.Const.con->Ico.V128; 2178 HReg res = newVRegV(env); 2179 switch (con) { 2180 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF: 2181 addInstr(env, ARM64Instr_VImmQ(res, con)); 2182 return res; 2183 case 0x00F0: 2184 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2185 addInstr(env, ARM64Instr_VExtV(res, res, res, 12)); 2186 return res; 2187 case 0x0F00: 2188 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2189 addInstr(env, ARM64Instr_VExtV(res, res, res, 8)); 2190 return res; 2191 case 0x0FF0: 2192 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF)); 2193 addInstr(env, ARM64Instr_VExtV(res, res, res, 12)); 2194 return res; 2195 case 0x0FFF: 2196 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2197 addInstr(env, ARM64Instr_VExtV(res, res, res, 4)); 2198 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); 2199 return res; 2200 case 0xF000: 2201 addInstr(env, ARM64Instr_VImmQ(res, 0x000F)); 2202 addInstr(env, ARM64Instr_VExtV(res, res, res, 4)); 2203 return res; 2204 case 0xFF00: 2205 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF)); 2206 addInstr(env, ARM64Instr_VExtV(res, res, res, 8)); 2207 return res; 2208 default: 2209 break; 2210 } 2211 /* Unhandled */ 2212 goto v128_expr_bad; 2213 } 2214 2215 if (e->tag == Iex_Load) { 2216 HReg res = newVRegV(env); 2217 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr); 2218 vassert(ty == Ity_V128); 2219 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN)); 2220 return res; 2221 } 2222 2223 if (e->tag == Iex_Get) { 2224 UInt offs = (UInt)e->Iex.Get.offset; 2225 if (offs < (1<<12)) { 2226 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 2227 HReg res = newVRegV(env); 2228 vassert(ty == Ity_V128); 2229 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr)); 2230 return res; 2231 } 2232 goto v128_expr_bad; 2233 } 2234 2235 if (e->tag == Iex_Unop) { 2236 2237 /* Iop_ZeroHIXXofV128 cases */ 2238 UShort imm16 = 0; 2239 switch (e->Iex.Unop.op) { 2240 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break; 2241 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break; 2242 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break; 2243 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break; 2244 default: break; 2245 } 2246 if (imm16 != 0) { 2247 HReg src = iselV128Expr(env, e->Iex.Unop.arg); 2248 HReg imm = newVRegV(env); 2249 HReg res = newVRegV(env); 2250 addInstr(env, ARM64Instr_VImmQ(imm, imm16)); 2251 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm)); 2252 return res; 2253 } 2254 2255 /* Other cases */ 2256 switch (e->Iex.Unop.op) { 2257 case Iop_NotV128: 2258 case Iop_Abs64Fx2: case Iop_Abs32Fx4: 2259 case Iop_Neg64Fx2: case Iop_Neg32Fx4: 2260 case Iop_Abs64x2: case Iop_Abs32x4: 2261 case Iop_Abs16x8: case Iop_Abs8x16: 2262 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16: 2263 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16: 2264 case Iop_Cnt8x16: 2265 case Iop_Reverse1sIn8_x16: 2266 case Iop_Reverse8sIn16_x8: 2267 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4: 2268 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2: 2269 case Iop_Reverse32sIn64_x2: 2270 case Iop_RecipEst32Ux4: 2271 case Iop_RSqrtEst32Ux4: 2272 case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4: 2273 case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4: 2274 { 2275 HReg res = newVRegV(env); 2276 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 2277 Bool setRM = False; 2278 ARM64VecUnaryOp op = ARM64vecu_INVALID; 2279 switch (e->Iex.Unop.op) { 2280 case Iop_NotV128: op = ARM64vecu_NOT; break; 2281 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; 2282 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; 2283 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; 2284 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; 2285 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break; 2286 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break; 2287 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break; 2288 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break; 2289 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break; 2290 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break; 2291 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break; 2292 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break; 2293 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break; 2294 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break; 2295 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break; 2296 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break; 2297 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break; 2298 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break; 2299 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break; 2300 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break; 2301 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break; 2302 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break; 2303 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break; 2304 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break; 2305 case Iop_RecipEst64Fx2: setRM = True; 2306 op = ARM64vecu_FRECPE64x2; break; 2307 case Iop_RecipEst32Fx4: setRM = True; 2308 op = ARM64vecu_FRECPE32x4; break; 2309 case Iop_RSqrtEst64Fx2: setRM = True; 2310 op = ARM64vecu_FRSQRTE64x2; break; 2311 case Iop_RSqrtEst32Fx4: setRM = True; 2312 op = ARM64vecu_FRSQRTE32x4; break; 2313 default: vassert(0); 2314 } 2315 if (setRM) { 2316 // This is a bit of a kludge. We should do rm properly for 2317 // these recip-est insns, but that would require changing the 2318 // primop's type to take an rmode. 2319 set_FPCR_rounding_mode(env, IRExpr_Const( 2320 IRConst_U32(Irrm_NEAREST))); 2321 } 2322 addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); 2323 return res; 2324 } 2325 case Iop_CmpNEZ8x16: 2326 case Iop_CmpNEZ16x8: 2327 case Iop_CmpNEZ32x4: 2328 case Iop_CmpNEZ64x2: { 2329 HReg arg = iselV128Expr(env, e->Iex.Unop.arg); 2330 HReg zero = newVRegV(env); 2331 HReg res = newVRegV(env); 2332 ARM64VecBinOp cmp = ARM64vecb_INVALID; 2333 switch (e->Iex.Unop.op) { 2334 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break; 2335 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break; 2336 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break; 2337 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break; 2338 default: vassert(0); 2339 } 2340 // This is pretty feeble. Better: use CMP against zero 2341 // and avoid the extra instruction and extra register. 2342 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000)); 2343 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero)); 2344 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); 2345 return res; 2346 } 2347 case Iop_V256toV128_0: 2348 case Iop_V256toV128_1: { 2349 HReg vHi, vLo; 2350 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg); 2351 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo; 2352 } 2353 case Iop_64UtoV128: { 2354 HReg res = newVRegV(env); 2355 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2356 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2357 return res; 2358 } 2359 case Iop_Widen8Sto16x8: { 2360 HReg res = newVRegV(env); 2361 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2362 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2363 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res)); 2364 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8, 2365 res, res, 8)); 2366 return res; 2367 } 2368 case Iop_Widen16Sto32x4: { 2369 HReg res = newVRegV(env); 2370 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2371 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2372 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res)); 2373 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4, 2374 res, res, 16)); 2375 return res; 2376 } 2377 case Iop_Widen32Sto64x2: { 2378 HReg res = newVRegV(env); 2379 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 2380 addInstr(env, ARM64Instr_VQfromX(res, arg)); 2381 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res)); 2382 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2, 2383 res, res, 32)); 2384 return res; 2385 } 2386 /* ... */ 2387 default: 2388 break; 2389 } /* switch on the unop */ 2390 } /* if (e->tag == Iex_Unop) */ 2391 2392 if (e->tag == Iex_Binop) { 2393 switch (e->Iex.Binop.op) { 2394 case Iop_Sqrt32Fx4: 2395 case Iop_Sqrt64Fx2: { 2396 HReg arg = iselV128Expr(env, e->Iex.Binop.arg2); 2397 HReg res = newVRegV(env); 2398 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 2399 ARM64VecUnaryOp op 2400 = e->Iex.Binop.op == Iop_Sqrt32Fx4 2401 ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2; 2402 addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); 2403 return res; 2404 } 2405 case Iop_64HLtoV128: { 2406 HReg res = newVRegV(env); 2407 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2408 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2409 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); 2410 return res; 2411 } 2412 /* -- Cases where we can generate a simple three-reg instruction. -- */ 2413 case Iop_AndV128: 2414 case Iop_OrV128: 2415 case Iop_XorV128: 2416 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16: 2417 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16: 2418 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16: 2419 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16: 2420 case Iop_Add64x2: case Iop_Add32x4: 2421 case Iop_Add16x8: case Iop_Add8x16: 2422 case Iop_Sub64x2: case Iop_Sub32x4: 2423 case Iop_Sub16x8: case Iop_Sub8x16: 2424 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16: 2425 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4: 2426 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16: 2427 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4: 2428 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16: 2429 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4: 2430 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16: 2431 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4: 2432 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4: 2433 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4: 2434 case Iop_Perm8x16: 2435 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4: 2436 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16: 2437 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4: 2438 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16: 2439 case Iop_InterleaveHI32x4: 2440 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16: 2441 case Iop_InterleaveLO32x4: 2442 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16: 2443 case Iop_PolynomialMul8x16: 2444 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4: 2445 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16: 2446 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4: 2447 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16: 2448 case Iop_QSub64Sx2: case Iop_QSub32Sx4: 2449 case Iop_QSub16Sx8: case Iop_QSub8Sx16: 2450 case Iop_QSub64Ux2: case Iop_QSub32Ux4: 2451 case Iop_QSub16Ux8: case Iop_QSub8Ux16: 2452 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8: 2453 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8: 2454 case Iop_Sh8Sx16: case Iop_Sh16Sx8: 2455 case Iop_Sh32Sx4: case Iop_Sh64Sx2: 2456 case Iop_Sh8Ux16: case Iop_Sh16Ux8: 2457 case Iop_Sh32Ux4: case Iop_Sh64Ux2: 2458 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8: 2459 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2: 2460 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8: 2461 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2: 2462 case Iop_Max64Fx2: case Iop_Max32Fx4: 2463 case Iop_Min64Fx2: case Iop_Min32Fx4: 2464 case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4: 2465 case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4: 2466 { 2467 HReg res = newVRegV(env); 2468 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 2469 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 2470 Bool sw = False; 2471 Bool setRM = False; 2472 ARM64VecBinOp op = ARM64vecb_INVALID; 2473 switch (e->Iex.Binop.op) { 2474 case Iop_AndV128: op = ARM64vecb_AND; break; 2475 case Iop_OrV128: op = ARM64vecb_ORR; break; 2476 case Iop_XorV128: op = ARM64vecb_XOR; break; 2477 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; 2478 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; 2479 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; 2480 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; 2481 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; 2482 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; 2483 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; 2484 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; 2485 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; 2486 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; 2487 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; 2488 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; 2489 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; 2490 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; 2491 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; 2492 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break; 2493 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; 2494 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; 2495 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; 2496 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; 2497 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; 2498 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; 2499 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; 2500 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; 2501 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; 2502 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; 2503 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; 2504 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; 2505 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; 2506 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; 2507 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; 2508 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; 2509 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; 2510 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; 2511 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; 2512 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; 2513 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; 2514 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; 2515 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; 2516 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; 2517 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; 2518 case Iop_Perm8x16: op = ARM64vecb_TBL1; break; 2519 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True; 2520 break; 2521 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True; 2522 break; 2523 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True; 2524 break; 2525 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True; 2526 break; 2527 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True; 2528 break; 2529 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True; 2530 break; 2531 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True; 2532 break; 2533 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True; 2534 break; 2535 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True; 2536 break; 2537 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True; 2538 break; 2539 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True; 2540 break; 2541 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True; 2542 break; 2543 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True; 2544 break; 2545 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True; 2546 break; 2547 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break; 2548 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break; 2549 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break; 2550 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break; 2551 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break; 2552 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break; 2553 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break; 2554 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break; 2555 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break; 2556 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break; 2557 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break; 2558 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break; 2559 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break; 2560 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break; 2561 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break; 2562 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break; 2563 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break; 2564 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break; 2565 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break; 2566 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break; 2567 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break; 2568 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break; 2569 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break; 2570 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break; 2571 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break; 2572 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break; 2573 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break; 2574 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break; 2575 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break; 2576 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break; 2577 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break; 2578 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break; 2579 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break; 2580 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break; 2581 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break; 2582 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break; 2583 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break; 2584 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break; 2585 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break; 2586 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break; 2587 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break; 2588 case Iop_RecipStep64Fx2: setRM = True; 2589 op = ARM64vecb_FRECPS64x2; break; 2590 case Iop_RecipStep32Fx4: setRM = True; 2591 op = ARM64vecb_FRECPS32x4; break; 2592 case Iop_RSqrtStep64Fx2: setRM = True; 2593 op = ARM64vecb_FRSQRTS64x2; break; 2594 case Iop_RSqrtStep32Fx4: setRM = True; 2595 op = ARM64vecb_FRSQRTS32x4; break; 2596 default: vassert(0); 2597 } 2598 if (setRM) { 2599 // This is a bit of a kludge. We should do rm properly for 2600 // these recip-step insns, but that would require changing the 2601 // primop's type to take an rmode. 2602 set_FPCR_rounding_mode(env, IRExpr_Const( 2603 IRConst_U32(Irrm_NEAREST))); 2604 } 2605 if (sw) { 2606 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); 2607 } else { 2608 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); 2609 } 2610 return res; 2611 } 2612 /* -- These only have 2 operand instructions, so we have to first move 2613 the first argument into a new register, for modification. -- */ 2614 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8: 2615 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2: 2616 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8: 2617 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2: 2618 { 2619 HReg res = newVRegV(env); 2620 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 2621 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 2622 ARM64VecModifyOp op = ARM64vecmo_INVALID; 2623 switch (e->Iex.Binop.op) { 2624 /* In the following 8 cases, the US - SU switching is intended. 2625 See comments on the libvex_ir.h for details. Also in the 2626 ARM64 front end, where used these primops are generated. */ 2627 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break; 2628 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break; 2629 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break; 2630 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break; 2631 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break; 2632 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break; 2633 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break; 2634 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break; 2635 default: vassert(0); 2636 } 2637 /* The order of the operands is important. Although this is 2638 basically addition, the two operands are extended differently, 2639 making it important to get them into the correct registers in 2640 the instruction. */ 2641 addInstr(env, ARM64Instr_VMov(16, res, argR)); 2642 addInstr(env, ARM64Instr_VModifyV(op, res, argL)); 2643 return res; 2644 } 2645 /* -- Shifts by an immediate. -- */ 2646 case Iop_ShrN64x2: case Iop_ShrN32x4: 2647 case Iop_ShrN16x8: case Iop_ShrN8x16: 2648 case Iop_SarN64x2: case Iop_SarN32x4: 2649 case Iop_SarN16x8: case Iop_SarN8x16: 2650 case Iop_ShlN64x2: case Iop_ShlN32x4: 2651 case Iop_ShlN16x8: case Iop_ShlN8x16: 2652 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4: 2653 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16: 2654 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4: 2655 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16: 2656 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4: 2657 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16: 2658 { 2659 IRExpr* argL = e->Iex.Binop.arg1; 2660 IRExpr* argR = e->Iex.Binop.arg2; 2661 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2662 UInt amt = argR->Iex.Const.con->Ico.U8; 2663 UInt limLo = 0; 2664 UInt limHi = 0; 2665 ARM64VecShiftImmOp op = ARM64vecshi_INVALID; 2666 /* Establish the instruction to use. */ 2667 switch (e->Iex.Binop.op) { 2668 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break; 2669 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break; 2670 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break; 2671 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break; 2672 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break; 2673 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break; 2674 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break; 2675 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break; 2676 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break; 2677 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break; 2678 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break; 2679 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break; 2680 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break; 2681 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break; 2682 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break; 2683 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break; 2684 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break; 2685 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break; 2686 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break; 2687 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break; 2688 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break; 2689 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break; 2690 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break; 2691 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break; 2692 default: vassert(0); 2693 } 2694 /* Establish the shift limits, for sanity check purposes only. */ 2695 switch (e->Iex.Binop.op) { 2696 case Iop_ShrN64x2: limLo = 1; limHi = 64; break; 2697 case Iop_ShrN32x4: limLo = 1; limHi = 32; break; 2698 case Iop_ShrN16x8: limLo = 1; limHi = 16; break; 2699 case Iop_ShrN8x16: limLo = 1; limHi = 8; break; 2700 case Iop_SarN64x2: limLo = 1; limHi = 64; break; 2701 case Iop_SarN32x4: limLo = 1; limHi = 32; break; 2702 case Iop_SarN16x8: limLo = 1; limHi = 16; break; 2703 case Iop_SarN8x16: limLo = 1; limHi = 8; break; 2704 case Iop_ShlN64x2: limLo = 0; limHi = 63; break; 2705 case Iop_ShlN32x4: limLo = 0; limHi = 31; break; 2706 case Iop_ShlN16x8: limLo = 0; limHi = 15; break; 2707 case Iop_ShlN8x16: limLo = 0; limHi = 7; break; 2708 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break; 2709 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break; 2710 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break; 2711 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break; 2712 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break; 2713 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break; 2714 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break; 2715 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break; 2716 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break; 2717 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break; 2718 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break; 2719 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break; 2720 default: vassert(0); 2721 } 2722 /* For left shifts, the allowable amt values are 2723 0 .. lane_bits-1. For right shifts the allowable 2724 values are 1 .. lane_bits. */ 2725 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) { 2726 HReg src = iselV128Expr(env, argL); 2727 HReg dst = newVRegV(env); 2728 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); 2729 return dst; 2730 } 2731 /* Special case some no-op shifts that the arm64 front end 2732 throws at us. We can't generate any instructions for these, 2733 but we don't need to either. */ 2734 switch (e->Iex.Binop.op) { 2735 case Iop_ShrN64x2: case Iop_ShrN32x4: 2736 case Iop_ShrN16x8: case Iop_ShrN8x16: 2737 if (amt == 0) { 2738 return iselV128Expr(env, argL); 2739 } 2740 break; 2741 default: 2742 break; 2743 } 2744 /* otherwise unhandled */ 2745 } 2746 /* else fall out; this is unhandled */ 2747 break; 2748 } 2749 /* -- Saturating narrowing by an immediate -- */ 2750 /* uu */ 2751 case Iop_QandQShrNnarrow16Uto8Ux8: 2752 case Iop_QandQShrNnarrow32Uto16Ux4: 2753 case Iop_QandQShrNnarrow64Uto32Ux2: 2754 /* ss */ 2755 case Iop_QandQSarNnarrow16Sto8Sx8: 2756 case Iop_QandQSarNnarrow32Sto16Sx4: 2757 case Iop_QandQSarNnarrow64Sto32Sx2: 2758 /* su */ 2759 case Iop_QandQSarNnarrow16Sto8Ux8: 2760 case Iop_QandQSarNnarrow32Sto16Ux4: 2761 case Iop_QandQSarNnarrow64Sto32Ux2: 2762 /* ruu */ 2763 case Iop_QandQRShrNnarrow16Uto8Ux8: 2764 case Iop_QandQRShrNnarrow32Uto16Ux4: 2765 case Iop_QandQRShrNnarrow64Uto32Ux2: 2766 /* rss */ 2767 case Iop_QandQRSarNnarrow16Sto8Sx8: 2768 case Iop_QandQRSarNnarrow32Sto16Sx4: 2769 case Iop_QandQRSarNnarrow64Sto32Sx2: 2770 /* rsu */ 2771 case Iop_QandQRSarNnarrow16Sto8Ux8: 2772 case Iop_QandQRSarNnarrow32Sto16Ux4: 2773 case Iop_QandQRSarNnarrow64Sto32Ux2: 2774 { 2775 IRExpr* argL = e->Iex.Binop.arg1; 2776 IRExpr* argR = e->Iex.Binop.arg2; 2777 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2778 UInt amt = argR->Iex.Const.con->Ico.U8; 2779 UInt limit = 0; 2780 ARM64VecShiftImmOp op = ARM64vecshi_INVALID; 2781 switch (e->Iex.Binop.op) { 2782 /* uu */ 2783 case Iop_QandQShrNnarrow64Uto32Ux2: 2784 op = ARM64vecshi_UQSHRN2SD; limit = 64; break; 2785 case Iop_QandQShrNnarrow32Uto16Ux4: 2786 op = ARM64vecshi_UQSHRN4HS; limit = 32; break; 2787 case Iop_QandQShrNnarrow16Uto8Ux8: 2788 op = ARM64vecshi_UQSHRN8BH; limit = 16; break; 2789 /* ss */ 2790 case Iop_QandQSarNnarrow64Sto32Sx2: 2791 op = ARM64vecshi_SQSHRN2SD; limit = 64; break; 2792 case Iop_QandQSarNnarrow32Sto16Sx4: 2793 op = ARM64vecshi_SQSHRN4HS; limit = 32; break; 2794 case Iop_QandQSarNnarrow16Sto8Sx8: 2795 op = ARM64vecshi_SQSHRN8BH; limit = 16; break; 2796 /* su */ 2797 case Iop_QandQSarNnarrow64Sto32Ux2: 2798 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break; 2799 case Iop_QandQSarNnarrow32Sto16Ux4: 2800 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break; 2801 case Iop_QandQSarNnarrow16Sto8Ux8: 2802 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break; 2803 /* ruu */ 2804 case Iop_QandQRShrNnarrow64Uto32Ux2: 2805 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break; 2806 case Iop_QandQRShrNnarrow32Uto16Ux4: 2807 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break; 2808 case Iop_QandQRShrNnarrow16Uto8Ux8: 2809 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break; 2810 /* rss */ 2811 case Iop_QandQRSarNnarrow64Sto32Sx2: 2812 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break; 2813 case Iop_QandQRSarNnarrow32Sto16Sx4: 2814 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break; 2815 case Iop_QandQRSarNnarrow16Sto8Sx8: 2816 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break; 2817 /* rsu */ 2818 case Iop_QandQRSarNnarrow64Sto32Ux2: 2819 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break; 2820 case Iop_QandQRSarNnarrow32Sto16Ux4: 2821 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break; 2822 case Iop_QandQRSarNnarrow16Sto8Ux8: 2823 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break; 2824 /**/ 2825 default: 2826 vassert(0); 2827 } 2828 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) { 2829 HReg src = iselV128Expr(env, argL); 2830 HReg dst = newVRegV(env); 2831 HReg fpsr = newVRegI(env); 2832 /* Clear FPSR.Q, do the operation, and return both its 2833 result and the new value of FPSR.Q. We can simply 2834 zero out FPSR since all the other bits have no relevance 2835 in VEX generated code. */ 2836 addInstr(env, ARM64Instr_Imm64(fpsr, 0)); 2837 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr)); 2838 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); 2839 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr)); 2840 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27), 2841 ARM64sh_SHR)); 2842 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 2843 vassert(ril_one); 2844 addInstr(env, ARM64Instr_Logic(fpsr, 2845 fpsr, ril_one, ARM64lo_AND)); 2846 /* Now we have: the main (shift) result in the bottom half 2847 of |dst|, and the Q bit at the bottom of |fpsr|. 2848 Combining them with a "InterleaveLO64x2" style operation 2849 produces a 128 bit value, dst[63:0]:fpsr[63:0], 2850 which is what we want. */ 2851 HReg scratch = newVRegV(env); 2852 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr)); 2853 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2, 2854 dst, dst, scratch)); 2855 return dst; 2856 } 2857 } 2858 /* else fall out; this is unhandled */ 2859 break; 2860 } 2861 2862 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128, 2863 // as it is in some ways more general and often leads to better 2864 // code overall. 2865 case Iop_ShlV128: 2866 case Iop_ShrV128: { 2867 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128; 2868 /* This is tricky. Generate an EXT instruction with zeroes in 2869 the high operand (shift right) or low operand (shift left). 2870 Note that we can only slice in the EXT instruction at a byte 2871 level of granularity, so the shift amount needs careful 2872 checking. */ 2873 IRExpr* argL = e->Iex.Binop.arg1; 2874 IRExpr* argR = e->Iex.Binop.arg2; 2875 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { 2876 UInt amt = argR->Iex.Const.con->Ico.U8; 2877 Bool amtOK = False; 2878 switch (amt) { 2879 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: 2880 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50: 2881 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78: 2882 amtOK = True; break; 2883 } 2884 /* We could also deal with amt==0 by copying the source to 2885 the destination, but there's no need for that so far. */ 2886 if (amtOK) { 2887 HReg src = iselV128Expr(env, argL); 2888 HReg srcZ = newVRegV(env); 2889 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000)); 2890 UInt immB = amt / 8; 2891 vassert(immB >= 1 && immB <= 15); 2892 HReg dst = newVRegV(env); 2893 if (isSHR) { 2894 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/, 2895 immB)); 2896 } else { 2897 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/, 2898 16 - immB)); 2899 } 2900 return dst; 2901 } 2902 } 2903 /* else fall out; this is unhandled */ 2904 break; 2905 } 2906 2907 case Iop_PolynomialMull8x8: 2908 case Iop_Mull32Ux2: 2909 case Iop_Mull16Ux4: 2910 case Iop_Mull8Ux8: 2911 case Iop_Mull32Sx2: 2912 case Iop_Mull16Sx4: 2913 case Iop_Mull8Sx8: 2914 case Iop_QDMull32Sx2: 2915 case Iop_QDMull16Sx4: 2916 { 2917 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1); 2918 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2919 HReg vSrcL = newVRegV(env); 2920 HReg vSrcR = newVRegV(env); 2921 HReg dst = newVRegV(env); 2922 ARM64VecBinOp op = ARM64vecb_INVALID; 2923 switch (e->Iex.Binop.op) { 2924 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break; 2925 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break; 2926 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break; 2927 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break; 2928 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break; 2929 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break; 2930 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break; 2931 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break; 2932 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break; 2933 default: vassert(0); 2934 } 2935 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL)); 2936 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR)); 2937 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR)); 2938 return dst; 2939 } 2940 2941 /* ... */ 2942 default: 2943 break; 2944 } /* switch on the binop */ 2945 } /* if (e->tag == Iex_Binop) */ 2946 2947 if (e->tag == Iex_Triop) { 2948 IRTriop* triop = e->Iex.Triop.details; 2949 ARM64VecBinOp vecbop = ARM64vecb_INVALID; 2950 switch (triop->op) { 2951 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break; 2952 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break; 2953 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break; 2954 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break; 2955 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break; 2956 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break; 2957 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break; 2958 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break; 2959 default: break; 2960 } 2961 if (vecbop != ARM64vecb_INVALID) { 2962 HReg argL = iselV128Expr(env, triop->arg2); 2963 HReg argR = iselV128Expr(env, triop->arg3); 2964 HReg dst = newVRegV(env); 2965 set_FPCR_rounding_mode(env, triop->arg1); 2966 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); 2967 return dst; 2968 } 2969 2970 if (triop->op == Iop_SliceV128) { 2971 /* Note that, compared to ShlV128/ShrV128 just above, the shift 2972 amount here is in bytes, not bits. */ 2973 IRExpr* argHi = triop->arg1; 2974 IRExpr* argLo = triop->arg2; 2975 IRExpr* argAmt = triop->arg3; 2976 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) { 2977 UInt amt = argAmt->Iex.Const.con->Ico.U8; 2978 Bool amtOK = amt >= 1 && amt <= 15; 2979 /* We could also deal with amt==0 by copying argLO to 2980 the destination, but there's no need for that so far. */ 2981 if (amtOK) { 2982 HReg srcHi = iselV128Expr(env, argHi); 2983 HReg srcLo = iselV128Expr(env, argLo); 2984 HReg dst = newVRegV(env); 2985 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt)); 2986 return dst; 2987 } 2988 } 2989 /* else fall out; this is unhandled */ 2990 } 2991 2992 } /* if (e->tag == Iex_Triop) */ 2993 2994 v128_expr_bad: 2995 ppIRExpr(e); 2996 vpanic("iselV128Expr_wrk"); 2997 } 2998 2999 3000 /*---------------------------------------------------------*/ 3001 /*--- ISEL: Floating point expressions (64 bit) ---*/ 3002 /*---------------------------------------------------------*/ 3003 3004 /* Compute a 64-bit floating point value into a register, the identity 3005 of which is returned. As with iselIntExpr_R, the reg may be either 3006 real or virtual; in any case it must not be changed by subsequent 3007 code emitted by the caller. */ 3008 3009 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 3010 { 3011 HReg r = iselDblExpr_wrk( env, e ); 3012 # if 0 3013 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3014 # endif 3015 vassert(hregClass(r) == HRcFlt64); 3016 vassert(hregIsVirtual(r)); 3017 return r; 3018 } 3019 3020 /* DO NOT CALL THIS DIRECTLY */ 3021 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 3022 { 3023 IRType ty = typeOfIRExpr(env->type_env,e); 3024 vassert(e); 3025 vassert(ty == Ity_F64); 3026 3027 if (e->tag == Iex_RdTmp) { 3028 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3029 } 3030 3031 if (e->tag == Iex_Const) { 3032 IRConst* con = e->Iex.Const.con; 3033 if (con->tag == Ico_F64i) { 3034 HReg src = newVRegI(env); 3035 HReg dst = newVRegD(env); 3036 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i)); 3037 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3038 return dst; 3039 } 3040 if (con->tag == Ico_F64) { 3041 HReg src = newVRegI(env); 3042 HReg dst = newVRegD(env); 3043 union { Double d64; ULong u64; } u; 3044 vassert(sizeof(u) == 8); 3045 u.d64 = con->Ico.F64; 3046 addInstr(env, ARM64Instr_Imm64(src, u.u64)); 3047 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3048 return dst; 3049 } 3050 } 3051 3052 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3053 vassert(e->Iex.Load.ty == Ity_F64); 3054 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); 3055 HReg res = newVRegD(env); 3056 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); 3057 return res; 3058 } 3059 3060 if (e->tag == Iex_Get) { 3061 Int offs = e->Iex.Get.offset; 3062 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) { 3063 HReg rD = newVRegD(env); 3064 HReg rN = get_baseblock_register(); 3065 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs)); 3066 return rD; 3067 } 3068 } 3069 3070 if (e->tag == Iex_Unop) { 3071 switch (e->Iex.Unop.op) { 3072 case Iop_NegF64: { 3073 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3074 HReg dst = newVRegD(env); 3075 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src)); 3076 return dst; 3077 } 3078 case Iop_AbsF64: { 3079 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3080 HReg dst = newVRegD(env); 3081 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src)); 3082 return dst; 3083 } 3084 case Iop_F32toF64: { 3085 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3086 HReg dst = newVRegD(env); 3087 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); 3088 return dst; 3089 } 3090 case Iop_F16toF64: { 3091 HReg src = iselF16Expr(env, e->Iex.Unop.arg); 3092 HReg dst = newVRegD(env); 3093 addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src)); 3094 return dst; 3095 } 3096 case Iop_I32UtoF64: 3097 case Iop_I32StoF64: { 3098 /* Rounding mode is not involved here, since the 3099 conversion can always be done without loss of 3100 precision. */ 3101 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 3102 HReg dst = newVRegD(env); 3103 Bool syned = e->Iex.Unop.op == Iop_I32StoF64; 3104 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U; 3105 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src)); 3106 return dst; 3107 } 3108 default: 3109 break; 3110 } 3111 } 3112 3113 if (e->tag == Iex_Binop) { 3114 switch (e->Iex.Binop.op) { 3115 case Iop_RoundF64toInt: 3116 case Iop_SqrtF64: 3117 case Iop_RecpExpF64: { 3118 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3119 HReg dst = newVRegD(env); 3120 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3121 ARM64FpUnaryOp op = ARM64fpu_INVALID; 3122 switch (e->Iex.Binop.op) { 3123 case Iop_RoundF64toInt: op = ARM64fpu_RINT; break; 3124 case Iop_SqrtF64: op = ARM64fpu_SQRT; break; 3125 case Iop_RecpExpF64: op = ARM64fpu_RECPX; break; 3126 default: vassert(0); 3127 } 3128 addInstr(env, ARM64Instr_VUnaryD(op, dst, src)); 3129 return dst; 3130 } 3131 case Iop_I64StoF64: 3132 case Iop_I64UtoF64: { 3133 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64 3134 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U; 3135 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3136 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3137 HReg dstS = newVRegD(env); 3138 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 3139 return dstS; 3140 } 3141 default: 3142 break; 3143 } 3144 } 3145 3146 if (e->tag == Iex_Triop) { 3147 IRTriop* triop = e->Iex.Triop.details; 3148 ARM64FpBinOp dblop = ARM64fpb_INVALID; 3149 switch (triop->op) { 3150 case Iop_DivF64: dblop = ARM64fpb_DIV; break; 3151 case Iop_MulF64: dblop = ARM64fpb_MUL; break; 3152 case Iop_SubF64: dblop = ARM64fpb_SUB; break; 3153 case Iop_AddF64: dblop = ARM64fpb_ADD; break; 3154 default: break; 3155 } 3156 if (dblop != ARM64fpb_INVALID) { 3157 HReg argL = iselDblExpr(env, triop->arg2); 3158 HReg argR = iselDblExpr(env, triop->arg3); 3159 HReg dst = newVRegD(env); 3160 set_FPCR_rounding_mode(env, triop->arg1); 3161 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR)); 3162 return dst; 3163 } 3164 } 3165 3166 if (e->tag == Iex_ITE) { 3167 /* ITE(ccexpr, iftrue, iffalse) */ 3168 ARM64CondCode cc; 3169 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3170 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3171 HReg dst = newVRegD(env); 3172 cc = iselCondCode(env, e->Iex.ITE.cond); 3173 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/)); 3174 return dst; 3175 } 3176 3177 ppIRExpr(e); 3178 vpanic("iselDblExpr_wrk"); 3179 } 3180 3181 3182 /*---------------------------------------------------------*/ 3183 /*--- ISEL: Floating point expressions (32 bit) ---*/ 3184 /*---------------------------------------------------------*/ 3185 3186 /* Compute a 32-bit floating point value into a register, the identity 3187 of which is returned. As with iselIntExpr_R, the reg may be either 3188 real or virtual; in any case it must not be changed by subsequent 3189 code emitted by the caller. Values are generated into HRcFlt64 3190 registers despite the values themselves being Ity_F32s. */ 3191 3192 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 3193 { 3194 HReg r = iselFltExpr_wrk( env, e ); 3195 # if 0 3196 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3197 # endif 3198 vassert(hregClass(r) == HRcFlt64); 3199 vassert(hregIsVirtual(r)); 3200 return r; 3201 } 3202 3203 /* DO NOT CALL THIS DIRECTLY */ 3204 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 3205 { 3206 IRType ty = typeOfIRExpr(env->type_env,e); 3207 vassert(e); 3208 vassert(ty == Ity_F32); 3209 3210 if (e->tag == Iex_RdTmp) { 3211 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3212 } 3213 3214 if (e->tag == Iex_Const) { 3215 /* This is something of a kludge. Since a 32 bit floating point 3216 zero is just .. all zeroes, just create a 64 bit zero word 3217 and transfer it. This avoids having to create a SfromW 3218 instruction for this specific case. */ 3219 IRConst* con = e->Iex.Const.con; 3220 if (con->tag == Ico_F32i && con->Ico.F32i == 0) { 3221 HReg src = newVRegI(env); 3222 HReg dst = newVRegD(env); 3223 addInstr(env, ARM64Instr_Imm64(src, 0)); 3224 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3225 return dst; 3226 } 3227 if (con->tag == Ico_F32) { 3228 HReg src = newVRegI(env); 3229 HReg dst = newVRegD(env); 3230 union { Float f32; UInt u32; } u; 3231 vassert(sizeof(u) == 4); 3232 u.f32 = con->Ico.F32; 3233 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32)); 3234 addInstr(env, ARM64Instr_VDfromX(dst, src)); 3235 return dst; 3236 } 3237 } 3238 3239 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3240 vassert(e->Iex.Load.ty == Ity_F32); 3241 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); 3242 HReg res = newVRegD(env); 3243 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0)); 3244 return res; 3245 } 3246 3247 if (e->tag == Iex_Get) { 3248 Int offs = e->Iex.Get.offset; 3249 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) { 3250 HReg rD = newVRegD(env); 3251 HReg rN = get_baseblock_register(); 3252 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs)); 3253 return rD; 3254 } 3255 } 3256 3257 if (e->tag == Iex_Unop) { 3258 switch (e->Iex.Unop.op) { 3259 case Iop_NegF32: { 3260 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3261 HReg dst = newVRegD(env); 3262 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src)); 3263 return dst; 3264 } 3265 case Iop_AbsF32: { 3266 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 3267 HReg dst = newVRegD(env); 3268 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); 3269 return dst; 3270 } 3271 case Iop_F16toF32: { 3272 HReg src = iselF16Expr(env, e->Iex.Unop.arg); 3273 HReg dst = newVRegD(env); 3274 addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src)); 3275 return dst; 3276 } 3277 default: 3278 break; 3279 } 3280 } 3281 3282 if (e->tag == Iex_Binop) { 3283 switch (e->Iex.Binop.op) { 3284 case Iop_RoundF32toInt: 3285 case Iop_SqrtF32: 3286 case Iop_RecpExpF32: { 3287 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 3288 HReg dst = newVRegD(env); 3289 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3290 ARM64FpUnaryOp op = ARM64fpu_INVALID; 3291 switch (e->Iex.Binop.op) { 3292 case Iop_RoundF32toInt: op = ARM64fpu_RINT; break; 3293 case Iop_SqrtF32: op = ARM64fpu_SQRT; break; 3294 case Iop_RecpExpF32: op = ARM64fpu_RECPX; break; 3295 default: vassert(0); 3296 } 3297 addInstr(env, ARM64Instr_VUnaryS(op, dst, src)); 3298 return dst; 3299 } 3300 case Iop_F64toF32: { 3301 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); 3302 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3303 HReg dstS = newVRegD(env); 3304 addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD)); 3305 return dstS; 3306 } 3307 case Iop_I32UtoF32: 3308 case Iop_I32StoF32: 3309 case Iop_I64UtoF32: 3310 case Iop_I64StoF32: { 3311 ARM64CvtOp cvt_op = ARM64cvt_INVALID; 3312 switch (e->Iex.Binop.op) { 3313 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; 3314 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; 3315 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; 3316 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; 3317 default: vassert(0); 3318 } 3319 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3320 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3321 HReg dstS = newVRegD(env); 3322 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); 3323 return dstS; 3324 } 3325 default: 3326 break; 3327 } 3328 } 3329 3330 if (e->tag == Iex_Triop) { 3331 IRTriop* triop = e->Iex.Triop.details; 3332 ARM64FpBinOp sglop = ARM64fpb_INVALID; 3333 switch (triop->op) { 3334 case Iop_DivF32: sglop = ARM64fpb_DIV; break; 3335 case Iop_MulF32: sglop = ARM64fpb_MUL; break; 3336 case Iop_SubF32: sglop = ARM64fpb_SUB; break; 3337 case Iop_AddF32: sglop = ARM64fpb_ADD; break; 3338 default: break; 3339 } 3340 if (sglop != ARM64fpb_INVALID) { 3341 HReg argL = iselFltExpr(env, triop->arg2); 3342 HReg argR = iselFltExpr(env, triop->arg3); 3343 HReg dst = newVRegD(env); 3344 set_FPCR_rounding_mode(env, triop->arg1); 3345 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR)); 3346 return dst; 3347 } 3348 } 3349 3350 if (e->tag == Iex_ITE) { 3351 /* ITE(ccexpr, iftrue, iffalse) */ 3352 ARM64CondCode cc; 3353 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); 3354 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); 3355 HReg dst = newVRegD(env); 3356 cc = iselCondCode(env, e->Iex.ITE.cond); 3357 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/)); 3358 return dst; 3359 } 3360 3361 ppIRExpr(e); 3362 vpanic("iselFltExpr_wrk"); 3363 } 3364 3365 3366 /*---------------------------------------------------------*/ 3367 /*--- ISEL: Floating point expressions (16 bit) ---*/ 3368 /*---------------------------------------------------------*/ 3369 3370 /* Compute a 16-bit floating point value into a register, the identity 3371 of which is returned. As with iselIntExpr_R, the reg may be either 3372 real or virtual; in any case it must not be changed by subsequent 3373 code emitted by the caller. Values are generated into HRcFlt64 3374 registers despite the values themselves being Ity_F16s. */ 3375 3376 static HReg iselF16Expr ( ISelEnv* env, IRExpr* e ) 3377 { 3378 HReg r = iselF16Expr_wrk( env, e ); 3379 # if 0 3380 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3381 # endif 3382 vassert(hregClass(r) == HRcFlt64); 3383 vassert(hregIsVirtual(r)); 3384 return r; 3385 } 3386 3387 /* DO NOT CALL THIS DIRECTLY */ 3388 static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e ) 3389 { 3390 IRType ty = typeOfIRExpr(env->type_env,e); 3391 vassert(e); 3392 vassert(ty == Ity_F16); 3393 3394 if (e->tag == Iex_Get) { 3395 Int offs = e->Iex.Get.offset; 3396 if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) { 3397 HReg rD = newVRegD(env); 3398 HReg rN = get_baseblock_register(); 3399 addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs)); 3400 return rD; 3401 } 3402 } 3403 3404 if (e->tag == Iex_Binop) { 3405 switch (e->Iex.Binop.op) { 3406 case Iop_F32toF16: { 3407 HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2); 3408 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3409 HReg dstH = newVRegD(env); 3410 addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS)); 3411 return dstH; 3412 } 3413 case Iop_F64toF16: { 3414 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); 3415 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); 3416 HReg dstH = newVRegD(env); 3417 addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD)); 3418 return dstH; 3419 } 3420 default: 3421 break; 3422 } 3423 } 3424 3425 ppIRExpr(e); 3426 vpanic("iselF16Expr_wrk"); 3427 } 3428 3429 3430 /*---------------------------------------------------------*/ 3431 /*--- ISEL: Vector expressions (256 bit) ---*/ 3432 /*---------------------------------------------------------*/ 3433 3434 static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo, 3435 ISelEnv* env, IRExpr* e ) 3436 { 3437 iselV256Expr_wrk( rHi, rLo, env, e ); 3438 vassert(hregClass(*rHi) == HRcVec128); 3439 vassert(hregClass(*rLo) == HRcVec128); 3440 vassert(hregIsVirtual(*rHi)); 3441 vassert(hregIsVirtual(*rLo)); 3442 } 3443 3444 /* DO NOT CALL THIS DIRECTLY */ 3445 static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3446 ISelEnv* env, IRExpr* e ) 3447 { 3448 vassert(e); 3449 IRType ty = typeOfIRExpr(env->type_env,e); 3450 vassert(ty == Ity_V256); 3451 3452 /* read 256-bit IRTemp */ 3453 if (e->tag == Iex_RdTmp) { 3454 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 3455 return; 3456 } 3457 3458 if (e->tag == Iex_Binop) { 3459 switch (e->Iex.Binop.op) { 3460 case Iop_V128HLtoV256: { 3461 *rHi = iselV128Expr(env, e->Iex.Binop.arg1); 3462 *rLo = iselV128Expr(env, e->Iex.Binop.arg2); 3463 return; 3464 } 3465 case Iop_QandSQsh64x2: 3466 case Iop_QandSQsh32x4: 3467 case Iop_QandSQsh16x8: 3468 case Iop_QandSQsh8x16: 3469 case Iop_QandUQsh64x2: 3470 case Iop_QandUQsh32x4: 3471 case Iop_QandUQsh16x8: 3472 case Iop_QandUQsh8x16: 3473 case Iop_QandSQRsh64x2: 3474 case Iop_QandSQRsh32x4: 3475 case Iop_QandSQRsh16x8: 3476 case Iop_QandSQRsh8x16: 3477 case Iop_QandUQRsh64x2: 3478 case Iop_QandUQRsh32x4: 3479 case Iop_QandUQRsh16x8: 3480 case Iop_QandUQRsh8x16: 3481 { 3482 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); 3483 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); 3484 HReg fpsr = newVRegI(env); 3485 HReg resHi = newVRegV(env); 3486 HReg resLo = newVRegV(env); 3487 ARM64VecBinOp op = ARM64vecb_INVALID; 3488 switch (e->Iex.Binop.op) { 3489 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break; 3490 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break; 3491 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break; 3492 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break; 3493 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break; 3494 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break; 3495 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break; 3496 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break; 3497 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break; 3498 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break; 3499 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break; 3500 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break; 3501 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break; 3502 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break; 3503 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break; 3504 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break; 3505 default: vassert(0); 3506 } 3507 /* Clear FPSR.Q, do the operation, and return both its result 3508 and the new value of FPSR.Q. We can simply zero out FPSR 3509 since all the other bits have no relevance in VEX generated 3510 code. */ 3511 addInstr(env, ARM64Instr_Imm64(fpsr, 0)); 3512 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr)); 3513 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR)); 3514 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr)); 3515 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27), 3516 ARM64sh_SHR)); 3517 ARM64RIL* ril_one = mb_mkARM64RIL_I(1); 3518 vassert(ril_one); 3519 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND)); 3520 /* Now we have: the main (shift) result in |resLo|, and the 3521 Q bit at the bottom of |fpsr|. */ 3522 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr)); 3523 *rHi = resHi; 3524 *rLo = resLo; 3525 return; 3526 } 3527 3528 /* ... */ 3529 default: 3530 break; 3531 } /* switch on the binop */ 3532 } /* if (e->tag == Iex_Binop) */ 3533 3534 ppIRExpr(e); 3535 vpanic("iselV256Expr_wrk"); 3536 } 3537 3538 3539 /*---------------------------------------------------------*/ 3540 /*--- ISEL: Statements ---*/ 3541 /*---------------------------------------------------------*/ 3542 3543 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3544 { 3545 if (vex_traceflags & VEX_TRACE_VCODE) { 3546 vex_printf("\n-- "); 3547 ppIRStmt(stmt); 3548 vex_printf("\n"); 3549 } 3550 switch (stmt->tag) { 3551 3552 /* --------- STORE --------- */ 3553 /* little-endian write to memory */ 3554 case Ist_Store: { 3555 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3556 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3557 IREndness end = stmt->Ist.Store.end; 3558 3559 if (tya != Ity_I64 || end != Iend_LE) 3560 goto stmt_fail; 3561 3562 if (tyd == Ity_I64) { 3563 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3564 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3565 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 3566 return; 3567 } 3568 if (tyd == Ity_I32) { 3569 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3570 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3571 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 3572 return; 3573 } 3574 if (tyd == Ity_I16) { 3575 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3576 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3577 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 3578 return; 3579 } 3580 if (tyd == Ity_I8) { 3581 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 3582 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); 3583 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 3584 return; 3585 } 3586 if (tyd == Ity_V128) { 3587 HReg qD = iselV128Expr(env, stmt->Ist.Store.data); 3588 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3589 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 3590 return; 3591 } 3592 if (tyd == Ity_F64) { 3593 HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 3594 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3595 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0)); 3596 return; 3597 } 3598 if (tyd == Ity_F32) { 3599 HReg sD = iselFltExpr(env, stmt->Ist.Store.data); 3600 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); 3601 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0)); 3602 return; 3603 } 3604 break; 3605 } 3606 3607 /* --------- PUT --------- */ 3608 /* write guest state, fixed offset */ 3609 case Ist_Put: { 3610 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3611 UInt offs = (UInt)stmt->Ist.Put.offset; 3612 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { 3613 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3614 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); 3615 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); 3616 return; 3617 } 3618 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) { 3619 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3620 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs); 3621 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); 3622 return; 3623 } 3624 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) { 3625 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3626 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs); 3627 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); 3628 return; 3629 } 3630 if (tyd == Ity_I8 && offs < (1<<12)) { 3631 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 3632 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs); 3633 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); 3634 return; 3635 } 3636 if (tyd == Ity_V128 && offs < (1<<12)) { 3637 HReg qD = iselV128Expr(env, stmt->Ist.Put.data); 3638 HReg addr = mk_baseblock_128bit_access_addr(env, offs); 3639 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); 3640 return; 3641 } 3642 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) { 3643 HReg dD = iselDblExpr(env, stmt->Ist.Put.data); 3644 HReg bbp = get_baseblock_register(); 3645 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs)); 3646 return; 3647 } 3648 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { 3649 HReg sD = iselFltExpr(env, stmt->Ist.Put.data); 3650 HReg bbp = get_baseblock_register(); 3651 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs)); 3652 return; 3653 } 3654 if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) { 3655 HReg hD = iselF16Expr(env, stmt->Ist.Put.data); 3656 HReg bbp = get_baseblock_register(); 3657 addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs)); 3658 return; 3659 } 3660 3661 break; 3662 } 3663 3664 /* --------- TMP --------- */ 3665 /* assign value to temporary */ 3666 case Ist_WrTmp: { 3667 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3668 IRType ty = typeOfIRTemp(env->type_env, tmp); 3669 3670 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 3671 /* We could do a lot better here. But for the time being: */ 3672 HReg dst = lookupIRTemp(env, tmp); 3673 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); 3674 addInstr(env, ARM64Instr_MovI(dst, rD)); 3675 return; 3676 } 3677 if (ty == Ity_I1) { 3678 /* Here, we are generating a I1 value into a 64 bit register. 3679 Make sure the value in the register is only zero or one, 3680 but no other. This allows optimisation of the 3681 1Uto64(tmp:I1) case, by making it simply a copy of the 3682 register holding 'tmp'. The point being that the value in 3683 the register holding 'tmp' can only have been created 3684 here. LATER: that seems dangerous; safer to do 'tmp & 1' 3685 in that case. Also, could do this just with a single CINC 3686 insn. */ 3687 /* CLONE-01 */ 3688 HReg zero = newVRegI(env); 3689 HReg one = newVRegI(env); 3690 HReg dst = lookupIRTemp(env, tmp); 3691 addInstr(env, ARM64Instr_Imm64(zero, 0)); 3692 addInstr(env, ARM64Instr_Imm64(one, 1)); 3693 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); 3694 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); 3695 return; 3696 } 3697 if (ty == Ity_F64) { 3698 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3699 HReg dst = lookupIRTemp(env, tmp); 3700 addInstr(env, ARM64Instr_VMov(8, dst, src)); 3701 return; 3702 } 3703 if (ty == Ity_F32) { 3704 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3705 HReg dst = lookupIRTemp(env, tmp); 3706 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); 3707 return; 3708 } 3709 if (ty == Ity_V128) { 3710 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); 3711 HReg dst = lookupIRTemp(env, tmp); 3712 addInstr(env, ARM64Instr_VMov(16, dst, src)); 3713 return; 3714 } 3715 if (ty == Ity_V256) { 3716 HReg srcHi, srcLo, dstHi, dstLo; 3717 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data); 3718 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 3719 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi)); 3720 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo)); 3721 return; 3722 } 3723 break; 3724 } 3725 3726 /* --------- Call to DIRTY helper --------- */ 3727 /* call complex ("dirty") helper function */ 3728 case Ist_Dirty: { 3729 IRDirty* d = stmt->Ist.Dirty.details; 3730 3731 /* Figure out the return type, if any. */ 3732 IRType retty = Ity_INVALID; 3733 if (d->tmp != IRTemp_INVALID) 3734 retty = typeOfIRTemp(env->type_env, d->tmp); 3735 3736 Bool retty_ok = False; 3737 switch (retty) { 3738 case Ity_INVALID: /* function doesn't return anything */ 3739 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 3740 case Ity_V128: 3741 retty_ok = True; break; 3742 default: 3743 break; 3744 } 3745 if (!retty_ok) 3746 break; /* will go to stmt_fail: */ 3747 3748 /* Marshal args, do the call, and set the return value to 0x555..555 3749 if this is a conditional call that returns a value and the 3750 call is skipped. */ 3751 UInt addToSp = 0; 3752 RetLoc rloc = mk_RetLoc_INVALID(); 3753 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 3754 vassert(is_sane_RetLoc(rloc)); 3755 3756 /* Now figure out what to do with the returned value, if any. */ 3757 switch (retty) { 3758 case Ity_INVALID: { 3759 /* No return value. Nothing to do. */ 3760 vassert(d->tmp == IRTemp_INVALID); 3761 vassert(rloc.pri == RLPri_None); 3762 vassert(addToSp == 0); 3763 return; 3764 } 3765 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { 3766 vassert(rloc.pri == RLPri_Int); 3767 vassert(addToSp == 0); 3768 /* The returned value is in x0. Park it in the register 3769 associated with tmp. */ 3770 HReg dst = lookupIRTemp(env, d->tmp); 3771 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) ); 3772 return; 3773 } 3774 case Ity_V128: { 3775 /* The returned value is on the stack, and *retloc tells 3776 us where. Fish it off the stack and then move the 3777 stack pointer upwards to clear it, as directed by 3778 doHelperCall. */ 3779 vassert(rloc.pri == RLPri_V128SpRel); 3780 vassert(rloc.spOff < 256); // stay sane 3781 vassert(addToSp >= 16); // ditto 3782 vassert(addToSp < 256); // ditto 3783 HReg dst = lookupIRTemp(env, d->tmp); 3784 HReg tmp = newVRegI(env); // the address of the returned value 3785 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP 3786 addInstr(env, ARM64Instr_Arith(tmp, tmp, 3787 ARM64RIA_I12((UShort)rloc.spOff, 0), 3788 True/*isAdd*/ )); 3789 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp)); 3790 addInstr(env, ARM64Instr_AddToSP(addToSp)); 3791 return; 3792 } 3793 default: 3794 /*NOTREACHED*/ 3795 vassert(0); 3796 } 3797 break; 3798 } 3799 3800 /* --------- Load Linked and Store Conditional --------- */ 3801 case Ist_LLSC: { 3802 if (stmt->Ist.LLSC.storedata == NULL) { 3803 /* LL */ 3804 IRTemp res = stmt->Ist.LLSC.result; 3805 IRType ty = typeOfIRTemp(env->type_env, res); 3806 if (ty == Ity_I64 || ty == Ity_I32 3807 || ty == Ity_I16 || ty == Ity_I8) { 3808 Int szB = 0; 3809 HReg r_dst = lookupIRTemp(env, res); 3810 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 3811 switch (ty) { 3812 case Ity_I8: szB = 1; break; 3813 case Ity_I16: szB = 2; break; 3814 case Ity_I32: szB = 4; break; 3815 case Ity_I64: szB = 8; break; 3816 default: vassert(0); 3817 } 3818 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); 3819 addInstr(env, ARM64Instr_LdrEX(szB)); 3820 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); 3821 return; 3822 } 3823 goto stmt_fail; 3824 } else { 3825 /* SC */ 3826 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); 3827 if (tyd == Ity_I64 || tyd == Ity_I32 3828 || tyd == Ity_I16 || tyd == Ity_I8) { 3829 Int szB = 0; 3830 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); 3831 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 3832 switch (tyd) { 3833 case Ity_I8: szB = 1; break; 3834 case Ity_I16: szB = 2; break; 3835 case Ity_I32: szB = 4; break; 3836 case Ity_I64: szB = 8; break; 3837 default: vassert(0); 3838 } 3839 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); 3840 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); 3841 addInstr(env, ARM64Instr_StrEX(szB)); 3842 } else { 3843 goto stmt_fail; 3844 } 3845 /* now r0 is 1 if failed, 0 if success. Change to IR 3846 conventions (0 is fail, 1 is success). Also transfer 3847 result to r_res. */ 3848 IRTemp res = stmt->Ist.LLSC.result; 3849 IRType ty = typeOfIRTemp(env->type_env, res); 3850 HReg r_res = lookupIRTemp(env, res); 3851 ARM64RIL* one = mb_mkARM64RIL_I(1); 3852 vassert(ty == Ity_I1); 3853 vassert(one); 3854 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, 3855 ARM64lo_XOR)); 3856 /* And be conservative -- mask off all but the lowest bit. */ 3857 addInstr(env, ARM64Instr_Logic(r_res, r_res, one, 3858 ARM64lo_AND)); 3859 return; 3860 } 3861 break; 3862 } 3863 3864 /* --------- ACAS --------- */ 3865 case Ist_CAS: { 3866 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 3867 /* "normal" singleton CAS */ 3868 UChar sz; 3869 IRCAS* cas = stmt->Ist.CAS.details; 3870 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3871 switch (ty) { 3872 case Ity_I64: sz = 8; break; 3873 case Ity_I32: sz = 4; break; 3874 case Ity_I16: sz = 2; break; 3875 case Ity_I8: sz = 1; break; 3876 default: goto unhandled_cas; 3877 } 3878 HReg rAddr = iselIntExpr_R(env, cas->addr); 3879 HReg rExpd = iselIntExpr_R(env, cas->expdLo); 3880 HReg rData = iselIntExpr_R(env, cas->dataLo); 3881 vassert(cas->expdHi == NULL); 3882 vassert(cas->dataHi == NULL); 3883 addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr)); 3884 addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd)); 3885 addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData)); 3886 addInstr(env, ARM64Instr_CAS(sz)); 3887 /* Now we have the lowest szB bytes of x1 are either equal to 3888 the lowest szB bytes of x5, indicating success, or they 3889 aren't, indicating failure. The IR semantics actually 3890 require us to return the old value at the location, 3891 regardless of success or failure, but in the case of 3892 failure it's not clear how to do this, since 3893 ARM64Instr_CAS can't provide that. Instead we'll just 3894 return the relevant bit of x1, since that's at least 3895 guaranteed to be different from the lowest bits of x5 on 3896 failure. */ 3897 HReg rResult = hregARM64_X1(); 3898 switch (sz) { 3899 case 8: break; 3900 case 4: rResult = widen_z_32_to_64(env, rResult); break; 3901 case 2: rResult = widen_z_16_to_64(env, rResult); break; 3902 case 1: rResult = widen_z_8_to_64(env, rResult); break; 3903 default: vassert(0); 3904 } 3905 // "old" in this case is interpreted somewhat liberally, per 3906 // the previous comment. 3907 HReg rOld = lookupIRTemp(env, cas->oldLo); 3908 addInstr(env, ARM64Instr_MovI(rOld, rResult)); 3909 return; 3910 } 3911 unhandled_cas: 3912 break; 3913 } 3914 3915 /* --------- MEM FENCE --------- */ 3916 case Ist_MBE: 3917 switch (stmt->Ist.MBE.event) { 3918 case Imbe_Fence: 3919 addInstr(env, ARM64Instr_MFence()); 3920 return; 3921 case Imbe_CancelReservation: 3922 addInstr(env, ARM64Instr_ClrEX()); 3923 return; 3924 default: 3925 break; 3926 } 3927 break; 3928 3929 /* --------- INSTR MARK --------- */ 3930 /* Doesn't generate any executable code ... */ 3931 case Ist_IMark: 3932 return; 3933 3934 /* --------- ABI HINT --------- */ 3935 /* These have no meaning (denotation in the IR) and so we ignore 3936 them ... if any actually made it this far. */ 3937 case Ist_AbiHint: 3938 return; 3939 3940 /* --------- NO-OP --------- */ 3941 case Ist_NoOp: 3942 return; 3943 3944 /* --------- EXIT --------- */ 3945 case Ist_Exit: { 3946 if (stmt->Ist.Exit.dst->tag != Ico_U64) 3947 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value"); 3948 3949 ARM64CondCode cc 3950 = iselCondCode(env, stmt->Ist.Exit.guard); 3951 ARM64AMode* amPC 3952 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP); 3953 3954 /* Case: boring transfer to known address */ 3955 if (stmt->Ist.Exit.jk == Ijk_Boring) { 3956 if (env->chainingAllowed) { 3957 /* .. almost always true .. */ 3958 /* Skip the event check at the dst if this is a forwards 3959 edge. */ 3960 Bool toFastEP 3961 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 3962 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 3963 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 3964 amPC, cc, toFastEP)); 3965 } else { 3966 /* .. very occasionally .. */ 3967 /* We can't use chaining, so ask for an assisted transfer, 3968 as that's the only alternative that is allowable. */ 3969 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 3970 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring)); 3971 } 3972 return; 3973 } 3974 3975 /* Case: assisted transfer to arbitrary address */ 3976 switch (stmt->Ist.Exit.jk) { 3977 /* Keep this list in sync with that for iselNext below */ 3978 case Ijk_ClientReq: 3979 case Ijk_NoDecode: 3980 case Ijk_NoRedir: 3981 case Ijk_Sys_syscall: 3982 case Ijk_InvalICache: 3983 case Ijk_FlushDCache: 3984 case Ijk_SigTRAP: 3985 case Ijk_Yield: { 3986 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 3987 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, 3988 stmt->Ist.Exit.jk)); 3989 return; 3990 } 3991 default: 3992 break; 3993 } 3994 3995 /* Do we ever expect to see any other kind? */ 3996 goto stmt_fail; 3997 } 3998 3999 default: break; 4000 } 4001 stmt_fail: 4002 ppIRStmt(stmt); 4003 vpanic("iselStmt"); 4004 } 4005 4006 4007 /*---------------------------------------------------------*/ 4008 /*--- ISEL: Basic block terminators (Nexts) ---*/ 4009 /*---------------------------------------------------------*/ 4010 4011 static void iselNext ( ISelEnv* env, 4012 IRExpr* next, IRJumpKind jk, Int offsIP ) 4013 { 4014 if (vex_traceflags & VEX_TRACE_VCODE) { 4015 vex_printf( "\n-- PUT(%d) = ", offsIP); 4016 ppIRExpr( next ); 4017 vex_printf( "; exit-"); 4018 ppIRJumpKind(jk); 4019 vex_printf( "\n"); 4020 } 4021 4022 /* Case: boring transfer to known address */ 4023 if (next->tag == Iex_Const) { 4024 IRConst* cdst = next->Iex.Const.con; 4025 vassert(cdst->tag == Ico_U64); 4026 if (jk == Ijk_Boring || jk == Ijk_Call) { 4027 /* Boring transfer to known address */ 4028 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 4029 if (env->chainingAllowed) { 4030 /* .. almost always true .. */ 4031 /* Skip the event check at the dst if this is a forwards 4032 edge. */ 4033 Bool toFastEP 4034 = ((Addr64)cdst->Ico.U64) > env->max_ga; 4035 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4036 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64, 4037 amPC, ARM64cc_AL, 4038 toFastEP)); 4039 } else { 4040 /* .. very occasionally .. */ 4041 /* We can't use chaining, so ask for an assisted transfer, 4042 as that's the only alternative that is allowable. */ 4043 HReg r = iselIntExpr_R(env, next); 4044 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 4045 Ijk_Boring)); 4046 } 4047 return; 4048 } 4049 } 4050 4051 /* Case: call/return (==boring) transfer to any address */ 4052 switch (jk) { 4053 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4054 HReg r = iselIntExpr_R(env, next); 4055 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 4056 if (env->chainingAllowed) { 4057 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL)); 4058 } else { 4059 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, 4060 Ijk_Boring)); 4061 } 4062 return; 4063 } 4064 default: 4065 break; 4066 } 4067 4068 /* Case: assisted transfer to arbitrary address */ 4069 switch (jk) { 4070 /* Keep this list in sync with that for Ist_Exit above */ 4071 case Ijk_ClientReq: 4072 case Ijk_NoDecode: 4073 case Ijk_NoRedir: 4074 case Ijk_Sys_syscall: 4075 case Ijk_InvalICache: 4076 case Ijk_FlushDCache: 4077 case Ijk_SigTRAP: 4078 case Ijk_Yield: 4079 { 4080 HReg r = iselIntExpr_R(env, next); 4081 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); 4082 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); 4083 return; 4084 } 4085 default: 4086 break; 4087 } 4088 4089 vex_printf( "\n-- PUT(%d) = ", offsIP); 4090 ppIRExpr( next ); 4091 vex_printf( "; exit-"); 4092 ppIRJumpKind(jk); 4093 vex_printf( "\n"); 4094 vassert(0); // are we expecting any other kind? 4095 } 4096 4097 4098 /*---------------------------------------------------------*/ 4099 /*--- Insn selector top-level ---*/ 4100 /*---------------------------------------------------------*/ 4101 4102 /* Translate an entire SB to arm64 code. */ 4103 4104 HInstrArray* iselSB_ARM64 ( const IRSB* bb, 4105 VexArch arch_host, 4106 const VexArchInfo* archinfo_host, 4107 const VexAbiInfo* vbi/*UNUSED*/, 4108 Int offs_Host_EvC_Counter, 4109 Int offs_Host_EvC_FailAddr, 4110 Bool chainingAllowed, 4111 Bool addProfInc, 4112 Addr max_ga ) 4113 { 4114 Int i, j; 4115 HReg hreg, hregHI; 4116 ISelEnv* env; 4117 UInt hwcaps_host = archinfo_host->hwcaps; 4118 ARM64AMode *amCounter, *amFailAddr; 4119 4120 /* sanity ... */ 4121 vassert(arch_host == VexArchARM64); 4122 4123 /* Check that the host's endianness is as expected. */ 4124 vassert(archinfo_host->endness == VexEndnessLE); 4125 4126 /* guard against unexpected space regressions */ 4127 vassert(sizeof(ARM64Instr) <= 32); 4128 4129 /* Make up an initial environment to use. */ 4130 env = LibVEX_Alloc_inline(sizeof(ISelEnv)); 4131 env->vreg_ctr = 0; 4132 4133 /* Set up output code array. */ 4134 env->code = newHInstrArray(); 4135 4136 /* Copy BB's type env. */ 4137 env->type_env = bb->tyenv; 4138 4139 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4140 change as we go along. */ 4141 env->n_vregmap = bb->tyenv->types_used; 4142 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4143 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); 4144 4145 /* and finally ... */ 4146 env->chainingAllowed = chainingAllowed; 4147 env->hwcaps = hwcaps_host; 4148 env->previous_rm = NULL; 4149 env->max_ga = max_ga; 4150 4151 /* For each IR temporary, allocate a suitably-kinded virtual 4152 register. */ 4153 j = 0; 4154 for (i = 0; i < env->n_vregmap; i++) { 4155 hregHI = hreg = INVALID_HREG; 4156 switch (bb->tyenv->types[i]) { 4157 case Ity_I1: 4158 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 4159 hreg = mkHReg(True, HRcInt64, 0, j++); 4160 break; 4161 case Ity_I128: 4162 hreg = mkHReg(True, HRcInt64, 0, j++); 4163 hregHI = mkHReg(True, HRcInt64, 0, j++); 4164 break; 4165 case Ity_F16: // we'll use HRcFlt64 regs for F16 too 4166 case Ity_F32: // we'll use HRcFlt64 regs for F32 too 4167 case Ity_F64: 4168 hreg = mkHReg(True, HRcFlt64, 0, j++); 4169 break; 4170 case Ity_V128: 4171 hreg = mkHReg(True, HRcVec128, 0, j++); 4172 break; 4173 case Ity_V256: 4174 hreg = mkHReg(True, HRcVec128, 0, j++); 4175 hregHI = mkHReg(True, HRcVec128, 0, j++); 4176 break; 4177 default: 4178 ppIRType(bb->tyenv->types[i]); 4179 vpanic("iselBB(arm64): IRTemp type"); 4180 } 4181 env->vregmap[i] = hreg; 4182 env->vregmapHI[i] = hregHI; 4183 } 4184 env->vreg_ctr = j; 4185 4186 /* The very first instruction must be an event check. */ 4187 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter); 4188 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr); 4189 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr)); 4190 4191 /* Possibly a block counter increment (for profiling). At this 4192 point we don't know the address of the counter, so just pretend 4193 it is zero. It will have to be patched later, but before this 4194 translation is used, by a call to LibVEX_patchProfCtr. */ 4195 if (addProfInc) { 4196 addInstr(env, ARM64Instr_ProfInc()); 4197 } 4198 4199 /* Ok, finally we can iterate over the statements. */ 4200 for (i = 0; i < bb->stmts_used; i++) 4201 iselStmt(env, bb->stmts[i]); 4202 4203 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4204 4205 /* record the number of vregs we used. */ 4206 env->code->n_vregs = env->vreg_ctr; 4207 return env->code; 4208 } 4209 4210 4211 /*---------------------------------------------------------------*/ 4212 /*--- end host_arm64_isel.c ---*/ 4213 /*---------------------------------------------------------------*/ 4214