1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_arm_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 NEON support is 14 Copyright (C) 2010-2013 Samsung Electronics 15 contributed by Dmitry Zhurikhin <zhur (at) ispras.ru> 16 and Kirill Batuzov <batuzovk (at) ispras.ru> 17 18 This program is free software; you can redistribute it and/or 19 modify it under the terms of the GNU General Public License as 20 published by the Free Software Foundation; either version 2 of the 21 License, or (at your option) any later version. 22 23 This program is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received a copy of the GNU General Public License 29 along with this program; if not, write to the Free Software 30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 31 02110-1301, USA. 32 33 The GNU General Public License is contained in the file COPYING. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 #include "ir_match.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers 45 #include "host_arm_defs.h" 46 47 48 /*---------------------------------------------------------*/ 49 /*--- ARMvfp control word stuff ---*/ 50 /*---------------------------------------------------------*/ 51 52 /* Vex-generated code expects to run with the FPU set as follows: all 53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV 54 flags cleared, and FZ (flush to zero) disabled. Curiously enough, 55 this corresponds to a FPSCR value of zero. 56 57 fpscr should therefore be zero on entry to Vex-generated code, and 58 should be unchanged at exit. (Or at least the bottom 28 bits 59 should be zero). 60 */ 61 62 #define DEFAULT_FPSCR 0 63 64 65 /*---------------------------------------------------------*/ 66 /*--- ISelEnv ---*/ 67 /*---------------------------------------------------------*/ 68 69 /* This carries around: 70 71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 72 might encounter. This is computed before insn selection starts, 73 and does not change. 74 75 - A mapping from IRTemp to HReg. This tells the insn selector 76 which virtual register(s) are associated with each IRTemp 77 temporary. This is computed before insn selection starts, and 78 does not change. We expect this mapping to map precisely the 79 same set of IRTemps as the type mapping does. 80 81 - vregmap holds the primary register for the IRTemp. 82 - vregmapHI is only used for 64-bit integer-typed 83 IRTemps. It holds the identity of a second 84 32-bit virtual HReg, which holds the high half 85 of the value. 86 87 - The code array, that is, the insns selected so far. 88 89 - A counter, for generating new virtual registers. 90 91 - The host hardware capabilities word. This is set at the start 92 and does not change. 93 94 - A Bool for indicating whether we may generate chain-me 95 instructions for control flow transfers, or whether we must use 96 XAssisted. 97 98 - The maximum guest address of any guest insn in this block. 99 Actually, the address of the highest-addressed byte from any insn 100 in this block. Is set at the start and does not change. This is 101 used for detecting jumps which are definitely forward-edges from 102 this block, and therefore can be made (chained) to the fast entry 103 point of the destination, thereby avoiding the destination's 104 event check. 105 106 Note, this is all (well, mostly) host-independent. 107 */ 108 109 typedef 110 struct { 111 /* Constant -- are set at the start and do not change. */ 112 IRTypeEnv* type_env; 113 114 HReg* vregmap; 115 HReg* vregmapHI; 116 Int n_vregmap; 117 118 UInt hwcaps; 119 120 Bool chainingAllowed; 121 Addr64 max_ga; 122 123 /* These are modified as we go along. */ 124 HInstrArray* code; 125 Int vreg_ctr; 126 } 127 ISelEnv; 128 129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 130 { 131 vassert(tmp >= 0); 132 vassert(tmp < env->n_vregmap); 133 return env->vregmap[tmp]; 134 } 135 136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 137 { 138 vassert(tmp >= 0); 139 vassert(tmp < env->n_vregmap); 140 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 141 *vrLO = env->vregmap[tmp]; 142 *vrHI = env->vregmapHI[tmp]; 143 } 144 145 static void addInstr ( ISelEnv* env, ARMInstr* instr ) 146 { 147 addHInstr(env->code, instr); 148 if (vex_traceflags & VEX_TRACE_VCODE) { 149 ppARMInstr(instr); 150 vex_printf("\n"); 151 } 152 #if 0 153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary 154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS 155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) { 156 ppARMInstr(instr); 157 vex_printf("\n"); 158 } 159 #endif 160 } 161 162 static HReg newVRegI ( ISelEnv* env ) 163 { 164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 165 env->vreg_ctr++; 166 return reg; 167 } 168 169 static HReg newVRegD ( ISelEnv* env ) 170 { 171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 172 env->vreg_ctr++; 173 return reg; 174 } 175 176 static HReg newVRegF ( ISelEnv* env ) 177 { 178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/); 179 env->vreg_ctr++; 180 return reg; 181 } 182 183 static HReg newVRegV ( ISelEnv* env ) 184 { 185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 186 env->vreg_ctr++; 187 return reg; 188 } 189 190 /* These are duplicated in guest_arm_toIR.c */ 191 static IRExpr* unop ( IROp op, IRExpr* a ) 192 { 193 return IRExpr_Unop(op, a); 194 } 195 196 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 197 { 198 return IRExpr_Binop(op, a1, a2); 199 } 200 201 static IRExpr* bind ( Int binder ) 202 { 203 return IRExpr_Binder(binder); 204 } 205 206 207 /*---------------------------------------------------------*/ 208 /*--- ISEL: Forward declarations ---*/ 209 /*---------------------------------------------------------*/ 210 211 /* These are organised as iselXXX and iselXXX_wrk pairs. The 212 iselXXX_wrk do the real work, but are not to be called directly. 213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 214 checks that all returned registers are virtual. You should not 215 call the _wrk version directly. 216 */ 217 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e ); 218 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e ); 219 220 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e ); 221 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e ); 222 223 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ); 224 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ); 225 226 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ); 227 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ); 228 229 static ARMRI84* iselIntExpr_RI84_wrk 230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e ); 231 static ARMRI84* iselIntExpr_RI84 232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e ); 233 234 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e ); 235 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e ); 236 237 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 238 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 239 240 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 241 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 242 243 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 244 ISelEnv* env, IRExpr* e ); 245 static void iselInt64Expr ( HReg* rHi, HReg* rLo, 246 ISelEnv* env, IRExpr* e ); 247 248 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 249 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 250 251 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 252 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 253 254 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ); 255 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ); 256 257 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e ); 258 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e ); 259 260 /*---------------------------------------------------------*/ 261 /*--- ISEL: Misc helpers ---*/ 262 /*---------------------------------------------------------*/ 263 264 static UInt ROR32 ( UInt x, UInt sh ) { 265 vassert(sh >= 0 && sh < 32); 266 if (sh == 0) 267 return x; 268 else 269 return (x << (32-sh)) | (x >> sh); 270 } 271 272 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate 273 form, and if so return the components. */ 274 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u ) 275 { 276 UInt i; 277 for (i = 0; i < 16; i++) { 278 if (0 == (u & 0xFFFFFF00)) { 279 *u8 = u; 280 *u4 = i; 281 return True; 282 } 283 u = ROR32(u, 30); 284 } 285 vassert(i == 16); 286 return False; 287 } 288 289 /* Make a int reg-reg move. */ 290 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src ) 291 { 292 vassert(hregClass(src) == HRcInt32); 293 vassert(hregClass(dst) == HRcInt32); 294 return ARMInstr_Mov(dst, ARMRI84_R(src)); 295 } 296 297 /* Set the VFP unit's rounding mode to default (round to nearest). */ 298 static void set_VFP_rounding_default ( ISelEnv* env ) 299 { 300 /* mov rTmp, #DEFAULT_FPSCR 301 fmxr fpscr, rTmp 302 */ 303 HReg rTmp = newVRegI(env); 304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR)); 305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp)); 306 } 307 308 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed 309 expression denoting a value in the range 0 .. 3, indicating a round 310 mode encoded as per type IRRoundingMode. Set FPSCR to have the 311 same rounding. 312 */ 313 static 314 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode ) 315 { 316 /* This isn't simple, because 'mode' carries an IR rounding 317 encoding, and we need to translate that to an ARMvfp one: 318 The IR encoding: 319 00 to nearest (the default) 320 10 to +infinity 321 01 to -infinity 322 11 to zero 323 The ARMvfp encoding: 324 00 to nearest 325 01 to +infinity 326 10 to -infinity 327 11 to zero 328 Easy enough to do; just swap the two bits. 329 */ 330 HReg irrm = iselIntExpr_R(env, mode); 331 HReg tL = newVRegI(env); 332 HReg tR = newVRegI(env); 333 HReg t3 = newVRegI(env); 334 /* tL = irrm << 1; 335 tR = irrm >> 1; if we're lucky, these will issue together 336 tL &= 2; 337 tR &= 1; ditto 338 t3 = tL | tR; 339 t3 <<= 22; 340 fmxr fpscr, t3 341 */ 342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1))); 343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1))); 344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0))); 345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0))); 346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR))); 347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22))); 348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3)); 349 } 350 351 352 /*---------------------------------------------------------*/ 353 /*--- ISEL: Function call helpers ---*/ 354 /*---------------------------------------------------------*/ 355 356 /* Used only in doHelperCall. See big comment in doHelperCall re 357 handling of register-parameter args. This function figures out 358 whether evaluation of an expression might require use of a fixed 359 register. If in doubt return True (safe but suboptimal). 360 */ 361 static 362 Bool mightRequireFixedRegs ( IRExpr* e ) 363 { 364 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { 365 // These are always "safe" -- either a copy of r13(sp) in some 366 // arbitrary vreg, or a copy of r8, respectively. 367 return False; 368 } 369 /* Else it's a "normal" expression. */ 370 switch (e->tag) { 371 case Iex_RdTmp: case Iex_Const: case Iex_Get: 372 return False; 373 default: 374 return True; 375 } 376 } 377 378 379 /* Do a complete function call. |guard| is a Ity_Bit expression 380 indicating whether or not the call happens. If guard==NULL, the 381 call is unconditional. |retloc| is set to indicate where the 382 return value is after the call. The caller (of this fn) must 383 generate code to add |stackAdjustAfterCall| to the stack pointer 384 after the call is done. Returns True iff it managed to handle this 385 combination of arg/return types, else returns False. */ 386 387 static 388 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 389 /*OUT*/RetLoc* retloc, 390 ISelEnv* env, 391 IRExpr* guard, 392 IRCallee* cee, IRType retTy, IRExpr** args ) 393 { 394 ARMCondCode cc; 395 HReg argregs[ARM_N_ARGREGS]; 396 HReg tmpregs[ARM_N_ARGREGS]; 397 Bool go_fast; 398 Int n_args, i, nextArgReg; 399 ULong target; 400 401 vassert(ARM_N_ARGREGS == 4); 402 403 /* Set default returns. We'll update them later if needed. */ 404 *stackAdjustAfterCall = 0; 405 *retloc = mk_RetLoc_INVALID(); 406 407 /* These are used for cross-checking that IR-level constraints on 408 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ 409 UInt nVECRETs = 0; 410 UInt nBBPTRs = 0; 411 412 /* Marshal args for a call and do the call. 413 414 This function only deals with a tiny set of possibilities, which 415 cover all helpers in practice. The restrictions are that only 416 arguments in registers are supported, hence only ARM_N_REGPARMS 417 x 32 integer bits in total can be passed. In fact the only 418 supported arg types are I32 and I64. 419 420 The return type can be I{64,32} or V128. In the V128 case, it 421 is expected that |args| will contain the special node 422 IRExpr_VECRET(), in which case this routine generates code to 423 allocate space on the stack for the vector return value. Since 424 we are not passing any scalars on the stack, it is enough to 425 preallocate the return space before marshalling any arguments, 426 in this case. 427 428 |args| may also contain IRExpr_BBPTR(), in which case the 429 value in r8 is passed as the corresponding argument. 430 431 Generating code which is both efficient and correct when 432 parameters are to be passed in registers is difficult, for the 433 reasons elaborated in detail in comments attached to 434 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 435 of the method described in those comments. 436 437 The problem is split into two cases: the fast scheme and the 438 slow scheme. In the fast scheme, arguments are computed 439 directly into the target (real) registers. This is only safe 440 when we can be sure that computation of each argument will not 441 trash any real registers set by computation of any other 442 argument. 443 444 In the slow scheme, all args are first computed into vregs, and 445 once they are all done, they are moved to the relevant real 446 regs. This always gives correct code, but it also gives a bunch 447 of vreg-to-rreg moves which are usually redundant but are hard 448 for the register allocator to get rid of. 449 450 To decide which scheme to use, all argument expressions are 451 first examined. If they are all so simple that it is clear they 452 will be evaluated without use of any fixed registers, use the 453 fast scheme, else use the slow scheme. Note also that only 454 unconditional calls may use the fast scheme, since having to 455 compute a condition expression could itself trash real 456 registers. 457 458 Note this requires being able to examine an expression and 459 determine whether or not evaluation of it might use a fixed 460 register. That requires knowledge of how the rest of this insn 461 selector works. Currently just the following 3 are regarded as 462 safe -- hopefully they cover the majority of arguments in 463 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 464 */ 465 466 /* Note that the cee->regparms field is meaningless on ARM hosts 467 (since there is only one calling convention) and so we always 468 ignore it. */ 469 470 n_args = 0; 471 for (i = 0; args[i]; i++) { 472 IRExpr* arg = args[i]; 473 if (UNLIKELY(arg->tag == Iex_VECRET)) { 474 nVECRETs++; 475 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 476 nBBPTRs++; 477 } 478 n_args++; 479 } 480 481 argregs[0] = hregARM_R0(); 482 argregs[1] = hregARM_R1(); 483 argregs[2] = hregARM_R2(); 484 argregs[3] = hregARM_R3(); 485 486 tmpregs[0] = tmpregs[1] = tmpregs[2] = 487 tmpregs[3] = INVALID_HREG; 488 489 /* First decide which scheme (slow or fast) is to be used. First 490 assume the fast scheme, and select slow if any contraindications 491 (wow) appear. */ 492 493 go_fast = True; 494 495 if (guard) { 496 if (guard->tag == Iex_Const 497 && guard->Iex.Const.con->tag == Ico_U1 498 && guard->Iex.Const.con->Ico.U1 == True) { 499 /* unconditional */ 500 } else { 501 /* Not manifestly unconditional -- be conservative. */ 502 go_fast = False; 503 } 504 } 505 506 if (go_fast) { 507 for (i = 0; i < n_args; i++) { 508 if (mightRequireFixedRegs(args[i])) { 509 go_fast = False; 510 break; 511 } 512 } 513 } 514 515 if (go_fast) { 516 if (retTy == Ity_V128 || retTy == Ity_V256) 517 go_fast = False; 518 } 519 520 /* At this point the scheme to use has been established. Generate 521 code to get the arg values into the argument rregs. If we run 522 out of arg regs, give up. */ 523 524 if (go_fast) { 525 526 /* FAST SCHEME */ 527 nextArgReg = 0; 528 529 for (i = 0; i < n_args; i++) { 530 IRExpr* arg = args[i]; 531 532 IRType aTy = Ity_INVALID; 533 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 534 aTy = typeOfIRExpr(env->type_env, arg); 535 536 if (nextArgReg >= ARM_N_ARGREGS) 537 return False; /* out of argregs */ 538 539 if (aTy == Ity_I32) { 540 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], 541 iselIntExpr_R(env, arg) )); 542 nextArgReg++; 543 } 544 else if (aTy == Ity_I64) { 545 /* 64-bit args must be passed in an a reg-pair of the form 546 n:n+1, where n is even. Hence either r0:r1 or r2:r3. 547 On a little-endian host, the less significant word is 548 passed in the lower-numbered register. */ 549 if (nextArgReg & 1) { 550 if (nextArgReg >= ARM_N_ARGREGS) 551 return False; /* out of argregs */ 552 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA )); 553 nextArgReg++; 554 } 555 if (nextArgReg >= ARM_N_ARGREGS) 556 return False; /* out of argregs */ 557 HReg raHi, raLo; 558 iselInt64Expr(&raHi, &raLo, env, arg); 559 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo )); 560 nextArgReg++; 561 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi )); 562 nextArgReg++; 563 } 564 else if (arg->tag == Iex_BBPTR) { 565 vassert(0); //ATC 566 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], 567 hregARM_R8() )); 568 nextArgReg++; 569 } 570 else if (arg->tag == Iex_VECRET) { 571 // If this happens, it denotes ill-formed IR 572 vassert(0); 573 } 574 else 575 return False; /* unhandled arg type */ 576 } 577 578 /* Fast scheme only applies for unconditional calls. Hence: */ 579 cc = ARMcc_AL; 580 581 } else { 582 583 /* SLOW SCHEME; move via temporaries */ 584 nextArgReg = 0; 585 586 for (i = 0; i < n_args; i++) { 587 IRExpr* arg = args[i]; 588 589 IRType aTy = Ity_INVALID; 590 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 591 aTy = typeOfIRExpr(env->type_env, arg); 592 593 if (nextArgReg >= ARM_N_ARGREGS) 594 return False; /* out of argregs */ 595 596 if (aTy == Ity_I32) { 597 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); 598 nextArgReg++; 599 } 600 else if (aTy == Ity_I64) { 601 /* Same comment applies as in the Fast-scheme case. */ 602 if (nextArgReg & 1) 603 nextArgReg++; 604 if (nextArgReg + 1 >= ARM_N_ARGREGS) 605 return False; /* out of argregs */ 606 HReg raHi, raLo; 607 iselInt64Expr(&raHi, &raLo, env, args[i]); 608 tmpregs[nextArgReg] = raLo; 609 nextArgReg++; 610 tmpregs[nextArgReg] = raHi; 611 nextArgReg++; 612 } 613 else if (arg->tag == Iex_BBPTR) { 614 vassert(0); //ATC 615 tmpregs[nextArgReg] = hregARM_R8(); 616 nextArgReg++; 617 } 618 else if (arg->tag == Iex_VECRET) { 619 // If this happens, it denotes ill-formed IR 620 vassert(0); 621 } 622 else 623 return False; /* unhandled arg type */ 624 } 625 626 /* Now we can compute the condition. We can't do it earlier 627 because the argument computations could trash the condition 628 codes. Be a bit clever to handle the common case where the 629 guard is 1:Bit. */ 630 cc = ARMcc_AL; 631 if (guard) { 632 if (guard->tag == Iex_Const 633 && guard->Iex.Const.con->tag == Ico_U1 634 && guard->Iex.Const.con->Ico.U1 == True) { 635 /* unconditional -- do nothing */ 636 } else { 637 cc = iselCondCode( env, guard ); 638 } 639 } 640 641 /* Move the args to their final destinations. */ 642 for (i = 0; i < nextArgReg; i++) { 643 if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs 644 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA )); 645 continue; 646 } 647 /* None of these insns, including any spill code that might 648 be generated, may alter the condition codes. */ 649 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) ); 650 } 651 652 } 653 654 /* Should be assured by checks above */ 655 vassert(nextArgReg <= ARM_N_ARGREGS); 656 657 /* Do final checks, set the return values, and generate the call 658 instruction proper. */ 659 vassert(nBBPTRs == 0 || nBBPTRs == 1); 660 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0); 661 vassert(*stackAdjustAfterCall == 0); 662 vassert(is_RetLoc_INVALID(*retloc)); 663 switch (retTy) { 664 case Ity_INVALID: 665 /* Function doesn't return a value. */ 666 *retloc = mk_RetLoc_simple(RLPri_None); 667 break; 668 case Ity_I64: 669 *retloc = mk_RetLoc_simple(RLPri_2Int); 670 break; 671 case Ity_I32: case Ity_I16: case Ity_I8: 672 *retloc = mk_RetLoc_simple(RLPri_Int); 673 break; 674 case Ity_V128: 675 vassert(0); // ATC 676 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 677 *stackAdjustAfterCall = 16; 678 break; 679 case Ity_V256: 680 vassert(0); // ATC 681 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 682 *stackAdjustAfterCall = 32; 683 break; 684 default: 685 /* IR can denote other possible return types, but we don't 686 handle those here. */ 687 vassert(0); 688 } 689 690 /* Finally, generate the call itself. This needs the *retloc value 691 set in the switch above, which is why it's at the end. */ 692 693 /* nextArgReg doles out argument registers. Since these are 694 assigned in the order r0, r1, r2, r3, its numeric value at this 695 point, which must be between 0 and 4 inclusive, is going to be 696 equal to the number of arg regs in use for the call. Hence bake 697 that number into the call (we'll need to know it when doing 698 register allocation, to know what regs the call reads.) 699 700 There is a bit of a twist -- harmless but worth recording. 701 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have 702 the first arg in r0 and the second in r3:r2, but r1 isn't used. 703 We nevertheless have nextArgReg==4 and bake that into the call 704 instruction. This will mean the register allocator wil believe 705 this insn reads r1 when in fact it doesn't. But that's 706 harmless; it just artificially extends the live range of r1 707 unnecessarily. The best fix would be to put into the 708 instruction, a bitmask indicating which of r0/1/2/3 carry live 709 values. But that's too much hassle. */ 710 711 target = (HWord)Ptr_to_ULong(cee->addr); 712 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc )); 713 714 return True; /* success */ 715 } 716 717 718 /*---------------------------------------------------------*/ 719 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 720 /*---------------------------------------------------------*/ 721 722 /* Select insns for an integer-typed expression, and add them to the 723 code list. Return a reg holding the result. This reg will be a 724 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 725 want to modify it, ask for a new vreg, copy it in there, and modify 726 the copy. The register allocator will do its best to map both 727 vregs to the same real register, so the copies will often disappear 728 later in the game. 729 730 This should handle expressions of 32, 16 and 8-bit type. All 731 results are returned in a 32-bit register. For 16- and 8-bit 732 expressions, the upper 16/24 bits are arbitrary, so you should mask 733 or sign extend partial values if necessary. 734 */ 735 736 /* --------------------- AMode1 --------------------- */ 737 738 /* Return an AMode1 which computes the value of the specified 739 expression, possibly also adding insns to the code list as a 740 result. The expression may only be a 32-bit one. 741 */ 742 743 static Bool sane_AMode1 ( ARMAMode1* am ) 744 { 745 switch (am->tag) { 746 case ARMam1_RI: 747 return 748 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32 749 && (hregIsVirtual(am->ARMam1.RI.reg) 750 || sameHReg(am->ARMam1.RI.reg, hregARM_R8())) 751 && am->ARMam1.RI.simm13 >= -4095 752 && am->ARMam1.RI.simm13 <= 4095 ); 753 case ARMam1_RRS: 754 return 755 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32 756 && hregIsVirtual(am->ARMam1.RRS.base) 757 && hregClass(am->ARMam1.RRS.index) == HRcInt32 758 && hregIsVirtual(am->ARMam1.RRS.index) 759 && am->ARMam1.RRS.shift >= 0 760 && am->ARMam1.RRS.shift <= 3 ); 761 default: 762 vpanic("sane_AMode: unknown ARM AMode1 tag"); 763 } 764 } 765 766 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e ) 767 { 768 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e); 769 vassert(sane_AMode1(am)); 770 return am; 771 } 772 773 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e ) 774 { 775 IRType ty = typeOfIRExpr(env->type_env,e); 776 vassert(ty == Ity_I32); 777 778 /* FIXME: add RRS matching */ 779 780 /* {Add32,Sub32}(expr,simm13) */ 781 if (e->tag == Iex_Binop 782 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 783 && e->Iex.Binop.arg2->tag == Iex_Const 784 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 785 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 786 if (simm >= -4095 && simm <= 4095) { 787 HReg reg; 788 if (e->Iex.Binop.op == Iop_Sub32) 789 simm = -simm; 790 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 791 return ARMAMode1_RI(reg, simm); 792 } 793 } 794 795 /* Doesn't match anything in particular. Generate it into 796 a register and use that. */ 797 { 798 HReg reg = iselIntExpr_R(env, e); 799 return ARMAMode1_RI(reg, 0); 800 } 801 802 } 803 804 805 /* --------------------- AMode2 --------------------- */ 806 807 /* Return an AMode2 which computes the value of the specified 808 expression, possibly also adding insns to the code list as a 809 result. The expression may only be a 32-bit one. 810 */ 811 812 static Bool sane_AMode2 ( ARMAMode2* am ) 813 { 814 switch (am->tag) { 815 case ARMam2_RI: 816 return 817 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32 818 && hregIsVirtual(am->ARMam2.RI.reg) 819 && am->ARMam2.RI.simm9 >= -255 820 && am->ARMam2.RI.simm9 <= 255 ); 821 case ARMam2_RR: 822 return 823 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32 824 && hregIsVirtual(am->ARMam2.RR.base) 825 && hregClass(am->ARMam2.RR.index) == HRcInt32 826 && hregIsVirtual(am->ARMam2.RR.index) ); 827 default: 828 vpanic("sane_AMode: unknown ARM AMode2 tag"); 829 } 830 } 831 832 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e ) 833 { 834 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e); 835 vassert(sane_AMode2(am)); 836 return am; 837 } 838 839 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e ) 840 { 841 IRType ty = typeOfIRExpr(env->type_env,e); 842 vassert(ty == Ity_I32); 843 844 /* FIXME: add RR matching */ 845 846 /* {Add32,Sub32}(expr,simm8) */ 847 if (e->tag == Iex_Binop 848 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 849 && e->Iex.Binop.arg2->tag == Iex_Const 850 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 851 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 852 if (simm >= -255 && simm <= 255) { 853 HReg reg; 854 if (e->Iex.Binop.op == Iop_Sub32) 855 simm = -simm; 856 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 857 return ARMAMode2_RI(reg, simm); 858 } 859 } 860 861 /* Doesn't match anything in particular. Generate it into 862 a register and use that. */ 863 { 864 HReg reg = iselIntExpr_R(env, e); 865 return ARMAMode2_RI(reg, 0); 866 } 867 868 } 869 870 871 /* --------------------- AModeV --------------------- */ 872 873 /* Return an AModeV which computes the value of the specified 874 expression, possibly also adding insns to the code list as a 875 result. The expression may only be a 32-bit one. 876 */ 877 878 static Bool sane_AModeV ( ARMAModeV* am ) 879 { 880 return toBool( hregClass(am->reg) == HRcInt32 881 && hregIsVirtual(am->reg) 882 && am->simm11 >= -1020 && am->simm11 <= 1020 883 && 0 == (am->simm11 & 3) ); 884 } 885 886 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ) 887 { 888 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e); 889 vassert(sane_AModeV(am)); 890 return am; 891 } 892 893 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ) 894 { 895 IRType ty = typeOfIRExpr(env->type_env,e); 896 vassert(ty == Ity_I32); 897 898 /* {Add32,Sub32}(expr, simm8 << 2) */ 899 if (e->tag == Iex_Binop 900 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 901 && e->Iex.Binop.arg2->tag == Iex_Const 902 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 903 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 904 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) { 905 HReg reg; 906 if (e->Iex.Binop.op == Iop_Sub32) 907 simm = -simm; 908 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 909 return mkARMAModeV(reg, simm); 910 } 911 } 912 913 /* Doesn't match anything in particular. Generate it into 914 a register and use that. */ 915 { 916 HReg reg = iselIntExpr_R(env, e); 917 return mkARMAModeV(reg, 0); 918 } 919 920 } 921 922 /* -------------------- AModeN -------------------- */ 923 924 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ) 925 { 926 return iselIntExpr_AModeN_wrk(env, e); 927 } 928 929 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ) 930 { 931 HReg reg = iselIntExpr_R(env, e); 932 return mkARMAModeN_R(reg); 933 } 934 935 936 /* --------------------- RI84 --------------------- */ 937 938 /* Select instructions to generate 'e' into a RI84. If mayInv is 939 true, then the caller will also accept an I84 form that denotes 940 'not e'. In this case didInv may not be NULL, and *didInv is set 941 to True. This complication is so as to allow generation of an RI84 942 which is suitable for use in either an AND or BIC instruction, 943 without knowing (before this call) which one. 944 */ 945 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv, 946 ISelEnv* env, IRExpr* e ) 947 { 948 ARMRI84* ri; 949 if (mayInv) 950 vassert(didInv != NULL); 951 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e); 952 /* sanity checks ... */ 953 switch (ri->tag) { 954 case ARMri84_I84: 955 return ri; 956 case ARMri84_R: 957 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32); 958 vassert(hregIsVirtual(ri->ARMri84.R.reg)); 959 return ri; 960 default: 961 vpanic("iselIntExpr_RI84: unknown arm RI84 tag"); 962 } 963 } 964 965 /* DO NOT CALL THIS DIRECTLY ! */ 966 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, 967 ISelEnv* env, IRExpr* e ) 968 { 969 IRType ty = typeOfIRExpr(env->type_env,e); 970 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 971 972 if (didInv) *didInv = False; 973 974 /* special case: immediate */ 975 if (e->tag == Iex_Const) { 976 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */ 977 switch (e->Iex.Const.con->tag) { 978 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 979 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 980 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 981 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)"); 982 } 983 if (fitsIn8x4(&u8, &u4, u)) { 984 return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 985 } 986 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) { 987 vassert(didInv); 988 *didInv = True; 989 return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 990 } 991 /* else fail, fall through to default case */ 992 } 993 994 /* default case: calculate into a register and return that */ 995 { 996 HReg r = iselIntExpr_R ( env, e ); 997 return ARMRI84_R(r); 998 } 999 } 1000 1001 1002 /* --------------------- RI5 --------------------- */ 1003 1004 /* Select instructions to generate 'e' into a RI5. */ 1005 1006 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e ) 1007 { 1008 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e); 1009 /* sanity checks ... */ 1010 switch (ri->tag) { 1011 case ARMri5_I5: 1012 return ri; 1013 case ARMri5_R: 1014 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32); 1015 vassert(hregIsVirtual(ri->ARMri5.R.reg)); 1016 return ri; 1017 default: 1018 vpanic("iselIntExpr_RI5: unknown arm RI5 tag"); 1019 } 1020 } 1021 1022 /* DO NOT CALL THIS DIRECTLY ! */ 1023 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e ) 1024 { 1025 IRType ty = typeOfIRExpr(env->type_env,e); 1026 vassert(ty == Ity_I32 || ty == Ity_I8); 1027 1028 /* special case: immediate */ 1029 if (e->tag == Iex_Const) { 1030 UInt u; /* both invalid */ 1031 switch (e->Iex.Const.con->tag) { 1032 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1033 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1034 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1035 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)"); 1036 } 1037 if (u >= 1 && u <= 31) { 1038 return ARMRI5_I5(u); 1039 } 1040 /* else fail, fall through to default case */ 1041 } 1042 1043 /* default case: calculate into a register and return that */ 1044 { 1045 HReg r = iselIntExpr_R ( env, e ); 1046 return ARMRI5_R(r); 1047 } 1048 } 1049 1050 1051 /* ------------------- CondCode ------------------- */ 1052 1053 /* Generate code to evaluated a bit-typed expression, returning the 1054 condition code which would correspond when the expression would 1055 notionally have returned 1. */ 1056 1057 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1058 { 1059 ARMCondCode cc = iselCondCode_wrk(env,e); 1060 vassert(cc != ARMcc_NV); 1061 return cc; 1062 } 1063 1064 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1065 { 1066 vassert(e); 1067 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1068 1069 /* var */ 1070 if (e->tag == Iex_RdTmp) { 1071 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1072 /* CmpOrTst doesn't modify rTmp; so this is OK. */ 1073 ARMRI84* one = ARMRI84_I84(1,0); 1074 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one)); 1075 return ARMcc_NE; 1076 } 1077 1078 /* Not1(e) */ 1079 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1080 /* Generate code for the arg, and negate the test condition */ 1081 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1082 } 1083 1084 /* --- patterns rooted at: 32to1 --- */ 1085 1086 if (e->tag == Iex_Unop 1087 && e->Iex.Unop.op == Iop_32to1) { 1088 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); 1089 ARMRI84* one = ARMRI84_I84(1,0); 1090 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one)); 1091 return ARMcc_NE; 1092 } 1093 1094 /* --- patterns rooted at: CmpNEZ8 --- */ 1095 1096 if (e->tag == Iex_Unop 1097 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1098 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1099 ARMRI84* xFF = ARMRI84_I84(0xFF,0); 1100 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF)); 1101 return ARMcc_NE; 1102 } 1103 1104 /* --- patterns rooted at: CmpNEZ32 --- */ 1105 1106 if (e->tag == Iex_Unop 1107 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1108 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1109 ARMRI84* zero = ARMRI84_I84(0,0); 1110 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero)); 1111 return ARMcc_NE; 1112 } 1113 1114 /* --- patterns rooted at: CmpNEZ64 --- */ 1115 1116 if (e->tag == Iex_Unop 1117 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1118 HReg tHi, tLo; 1119 HReg tmp = newVRegI(env); 1120 ARMRI84* zero = ARMRI84_I84(0,0); 1121 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg); 1122 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo))); 1123 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero)); 1124 return ARMcc_NE; 1125 } 1126 1127 /* --- Cmp*32*(x,y) --- */ 1128 if (e->tag == Iex_Binop 1129 && (e->Iex.Binop.op == Iop_CmpEQ32 1130 || e->Iex.Binop.op == Iop_CmpNE32 1131 || e->Iex.Binop.op == Iop_CmpLT32S 1132 || e->Iex.Binop.op == Iop_CmpLT32U 1133 || e->Iex.Binop.op == Iop_CmpLE32S 1134 || e->Iex.Binop.op == Iop_CmpLE32U)) { 1135 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1136 ARMRI84* argR = iselIntExpr_RI84(NULL,False, 1137 env, e->Iex.Binop.arg2); 1138 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR)); 1139 switch (e->Iex.Binop.op) { 1140 case Iop_CmpEQ32: return ARMcc_EQ; 1141 case Iop_CmpNE32: return ARMcc_NE; 1142 case Iop_CmpLT32S: return ARMcc_LT; 1143 case Iop_CmpLT32U: return ARMcc_LO; 1144 case Iop_CmpLE32S: return ARMcc_LE; 1145 case Iop_CmpLE32U: return ARMcc_LS; 1146 default: vpanic("iselCondCode(arm): CmpXX32"); 1147 } 1148 } 1149 1150 /* const */ 1151 /* Constant 1:Bit */ 1152 if (e->tag == Iex_Const) { 1153 HReg r; 1154 vassert(e->Iex.Const.con->tag == Ico_U1); 1155 vassert(e->Iex.Const.con->Ico.U1 == True 1156 || e->Iex.Const.con->Ico.U1 == False); 1157 r = newVRegI(env); 1158 addInstr(env, ARMInstr_Imm32(r, 0)); 1159 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r))); 1160 return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE; 1161 } 1162 1163 // JRS 2013-Jan-03: this seems completely nonsensical 1164 /* --- CasCmpEQ* --- */ 1165 /* Ist_Cas has a dummy argument to compare with, so comparison is 1166 always true. */ 1167 //if (e->tag == Iex_Binop 1168 // && (e->Iex.Binop.op == Iop_CasCmpEQ32 1169 // || e->Iex.Binop.op == Iop_CasCmpEQ16 1170 // || e->Iex.Binop.op == Iop_CasCmpEQ8)) { 1171 // return ARMcc_AL; 1172 //} 1173 1174 ppIRExpr(e); 1175 vpanic("iselCondCode"); 1176 } 1177 1178 1179 /* --------------------- Reg --------------------- */ 1180 1181 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 1182 { 1183 HReg r = iselIntExpr_R_wrk(env, e); 1184 /* sanity checks ... */ 1185 # if 0 1186 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1187 # endif 1188 vassert(hregClass(r) == HRcInt32); 1189 vassert(hregIsVirtual(r)); 1190 return r; 1191 } 1192 1193 /* DO NOT CALL THIS DIRECTLY ! */ 1194 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 1195 { 1196 IRType ty = typeOfIRExpr(env->type_env,e); 1197 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1198 1199 switch (e->tag) { 1200 1201 /* --------- TEMP --------- */ 1202 case Iex_RdTmp: { 1203 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 1204 } 1205 1206 /* --------- LOAD --------- */ 1207 case Iex_Load: { 1208 HReg dst = newVRegI(env); 1209 1210 if (e->Iex.Load.end != Iend_LE) 1211 goto irreducible; 1212 1213 if (ty == Ity_I32) { 1214 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr ); 1215 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode)); 1216 return dst; 1217 } 1218 if (ty == Ity_I16) { 1219 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr ); 1220 addInstr(env, ARMInstr_LdSt16(ARMcc_AL, 1221 True/*isLoad*/, False/*!signedLoad*/, 1222 dst, amode)); 1223 return dst; 1224 } 1225 if (ty == Ity_I8) { 1226 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr ); 1227 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode)); 1228 return dst; 1229 } 1230 break; 1231 } 1232 1233 //zz /* --------- TERNARY OP --------- */ 1234 //zz case Iex_Triop: { 1235 //zz IRTriop *triop = e->Iex.Triop.details; 1236 //zz /* C3210 flags following FPU partial remainder (fprem), both 1237 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1238 //zz if (triop->op == Iop_PRemC3210F64 1239 //zz || triop->op == Iop_PRem1C3210F64) { 1240 //zz HReg junk = newVRegF(env); 1241 //zz HReg dst = newVRegI(env); 1242 //zz HReg srcL = iselDblExpr(env, triop->arg2); 1243 //zz HReg srcR = iselDblExpr(env, triop->arg3); 1244 //zz /* XXXROUNDINGFIXME */ 1245 //zz /* set roundingmode here */ 1246 //zz addInstr(env, X86Instr_FpBinary( 1247 //zz e->Iex.Binop.op==Iop_PRemC3210F64 1248 //zz ? Xfp_PREM : Xfp_PREM1, 1249 //zz srcL,srcR,junk 1250 //zz )); 1251 //zz /* The previous pseudo-insn will have left the FPU's C3210 1252 //zz flags set correctly. So bag them. */ 1253 //zz addInstr(env, X86Instr_FpStSW_AX()); 1254 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1255 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 1256 //zz return dst; 1257 //zz } 1258 //zz 1259 //zz break; 1260 //zz } 1261 1262 /* --------- BINARY OP --------- */ 1263 case Iex_Binop: { 1264 1265 ARMAluOp aop = 0; /* invalid */ 1266 ARMShiftOp sop = 0; /* invalid */ 1267 1268 /* ADD/SUB/AND/OR/XOR */ 1269 switch (e->Iex.Binop.op) { 1270 case Iop_And32: { 1271 Bool didInv = False; 1272 HReg dst = newVRegI(env); 1273 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1274 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/, 1275 env, e->Iex.Binop.arg2); 1276 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND, 1277 dst, argL, argR)); 1278 return dst; 1279 } 1280 case Iop_Or32: aop = ARMalu_OR; goto std_binop; 1281 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop; 1282 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop; 1283 case Iop_Add32: aop = ARMalu_ADD; goto std_binop; 1284 std_binop: { 1285 HReg dst = newVRegI(env); 1286 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1287 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/, 1288 env, e->Iex.Binop.arg2); 1289 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR)); 1290 return dst; 1291 } 1292 default: break; 1293 } 1294 1295 /* SHL/SHR/SAR */ 1296 switch (e->Iex.Binop.op) { 1297 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop; 1298 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop; 1299 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop; 1300 sh_binop: { 1301 HReg dst = newVRegI(env); 1302 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1303 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2); 1304 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR)); 1305 vassert(ty == Ity_I32); /* else the IR is ill-typed */ 1306 return dst; 1307 } 1308 default: break; 1309 } 1310 1311 /* MUL */ 1312 if (e->Iex.Binop.op == Iop_Mul32) { 1313 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1314 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1315 HReg dst = newVRegI(env); 1316 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); 1317 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); 1318 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN)); 1319 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0())); 1320 return dst; 1321 } 1322 1323 /* Handle misc other ops. */ 1324 1325 if (e->Iex.Binop.op == Iop_Max32U) { 1326 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1327 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1328 HReg dst = newVRegI(env); 1329 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, 1330 ARMRI84_R(argR))); 1331 addInstr(env, mk_iMOVds_RR(dst, argL)); 1332 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR))); 1333 return dst; 1334 } 1335 1336 if (e->Iex.Binop.op == Iop_CmpF64) { 1337 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1); 1338 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2); 1339 HReg dst = newVRegI(env); 1340 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do 1341 FMSTAT, so we can examine the results directly. */ 1342 addInstr(env, ARMInstr_VCmpD(dL, dR)); 1343 /* Create in dst, the IRCmpF64Result encoded result. */ 1344 addInstr(env, ARMInstr_Imm32(dst, 0)); 1345 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ 1346 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT 1347 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT 1348 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN 1349 return dst; 1350 } 1351 1352 if (e->Iex.Binop.op == Iop_F64toI32S 1353 || e->Iex.Binop.op == Iop_F64toI32U) { 1354 /* Wretched uglyness all round, due to having to deal 1355 with rounding modes. Oh well. */ 1356 /* FIXME: if arg1 is a constant indicating round-to-zero, 1357 then we could skip all this arsing around with FPSCR and 1358 simply emit FTO{S,U}IZD. */ 1359 Bool syned = e->Iex.Binop.op == Iop_F64toI32S; 1360 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2); 1361 set_VFP_rounding_mode(env, e->Iex.Binop.arg1); 1362 /* FTO{S,U}ID valF, valD */ 1363 HReg valF = newVRegF(env); 1364 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned, 1365 valF, valD)); 1366 set_VFP_rounding_default(env); 1367 /* VMOV dst, valF */ 1368 HReg dst = newVRegI(env); 1369 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst)); 1370 return dst; 1371 } 1372 1373 if (e->Iex.Binop.op == Iop_GetElem8x8 1374 || e->Iex.Binop.op == Iop_GetElem16x4 1375 || e->Iex.Binop.op == Iop_GetElem32x2) { 1376 HReg res = newVRegI(env); 1377 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 1378 UInt index, size; 1379 if (e->Iex.Binop.arg2->tag != Iex_Const || 1380 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1381 vpanic("ARM target supports GetElem with constant " 1382 "second argument only\n"); 1383 } 1384 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1385 switch (e->Iex.Binop.op) { 1386 case Iop_GetElem8x8: vassert(index < 8); size = 0; break; 1387 case Iop_GetElem16x4: vassert(index < 4); size = 1; break; 1388 case Iop_GetElem32x2: vassert(index < 2); size = 2; break; 1389 default: vassert(0); 1390 } 1391 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1392 mkARMNRS(ARMNRS_Reg, res, 0), 1393 mkARMNRS(ARMNRS_Scalar, arg, index), 1394 size, False)); 1395 return res; 1396 } 1397 1398 if (e->Iex.Binop.op == Iop_GetElem8x16 1399 || e->Iex.Binop.op == Iop_GetElem16x8 1400 || e->Iex.Binop.op == Iop_GetElem32x4) { 1401 HReg res = newVRegI(env); 1402 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 1403 UInt index, size; 1404 if (e->Iex.Binop.arg2->tag != Iex_Const || 1405 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1406 vpanic("ARM target supports GetElem with constant " 1407 "second argument only\n"); 1408 } 1409 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1410 switch (e->Iex.Binop.op) { 1411 case Iop_GetElem8x16: vassert(index < 16); size = 0; break; 1412 case Iop_GetElem16x8: vassert(index < 8); size = 1; break; 1413 case Iop_GetElem32x4: vassert(index < 4); size = 2; break; 1414 default: vassert(0); 1415 } 1416 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1417 mkARMNRS(ARMNRS_Reg, res, 0), 1418 mkARMNRS(ARMNRS_Scalar, arg, index), 1419 size, True)); 1420 return res; 1421 } 1422 1423 /* All cases involving host-side helper calls. */ 1424 void* fn = NULL; 1425 switch (e->Iex.Binop.op) { 1426 case Iop_Add16x2: 1427 fn = &h_generic_calc_Add16x2; break; 1428 case Iop_Sub16x2: 1429 fn = &h_generic_calc_Sub16x2; break; 1430 case Iop_HAdd16Ux2: 1431 fn = &h_generic_calc_HAdd16Ux2; break; 1432 case Iop_HAdd16Sx2: 1433 fn = &h_generic_calc_HAdd16Sx2; break; 1434 case Iop_HSub16Ux2: 1435 fn = &h_generic_calc_HSub16Ux2; break; 1436 case Iop_HSub16Sx2: 1437 fn = &h_generic_calc_HSub16Sx2; break; 1438 case Iop_QAdd16Sx2: 1439 fn = &h_generic_calc_QAdd16Sx2; break; 1440 case Iop_QAdd16Ux2: 1441 fn = &h_generic_calc_QAdd16Ux2; break; 1442 case Iop_QSub16Sx2: 1443 fn = &h_generic_calc_QSub16Sx2; break; 1444 case Iop_Add8x4: 1445 fn = &h_generic_calc_Add8x4; break; 1446 case Iop_Sub8x4: 1447 fn = &h_generic_calc_Sub8x4; break; 1448 case Iop_HAdd8Ux4: 1449 fn = &h_generic_calc_HAdd8Ux4; break; 1450 case Iop_HAdd8Sx4: 1451 fn = &h_generic_calc_HAdd8Sx4; break; 1452 case Iop_HSub8Ux4: 1453 fn = &h_generic_calc_HSub8Ux4; break; 1454 case Iop_HSub8Sx4: 1455 fn = &h_generic_calc_HSub8Sx4; break; 1456 case Iop_QAdd8Sx4: 1457 fn = &h_generic_calc_QAdd8Sx4; break; 1458 case Iop_QAdd8Ux4: 1459 fn = &h_generic_calc_QAdd8Ux4; break; 1460 case Iop_QSub8Sx4: 1461 fn = &h_generic_calc_QSub8Sx4; break; 1462 case Iop_QSub8Ux4: 1463 fn = &h_generic_calc_QSub8Ux4; break; 1464 case Iop_Sad8Ux4: 1465 fn = &h_generic_calc_Sad8Ux4; break; 1466 case Iop_QAdd32S: 1467 fn = &h_generic_calc_QAdd32S; break; 1468 case Iop_QSub32S: 1469 fn = &h_generic_calc_QSub32S; break; 1470 case Iop_QSub16Ux2: 1471 fn = &h_generic_calc_QSub16Ux2; break; 1472 case Iop_DivU32: 1473 fn = &h_calc_udiv32_w_arm_semantics; break; 1474 case Iop_DivS32: 1475 fn = &h_calc_sdiv32_w_arm_semantics; break; 1476 default: 1477 break; 1478 } 1479 1480 if (fn) { 1481 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1482 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1483 HReg res = newVRegI(env); 1484 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL)); 1485 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR)); 1486 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1487 2, mk_RetLoc_simple(RLPri_Int) )); 1488 addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); 1489 return res; 1490 } 1491 1492 break; 1493 } 1494 1495 /* --------- UNARY OP --------- */ 1496 case Iex_Unop: { 1497 1498 //zz /* 1Uto8(32to1(expr32)) */ 1499 //zz if (e->Iex.Unop.op == Iop_1Uto8) { 1500 //zz DECLARE_PATTERN(p_32to1_then_1Uto8); 1501 //zz DEFINE_PATTERN(p_32to1_then_1Uto8, 1502 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1503 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1504 //zz IRExpr* expr32 = mi.bindee[0]; 1505 //zz HReg dst = newVRegI(env); 1506 //zz HReg src = iselIntExpr_R(env, expr32); 1507 //zz addInstr(env, mk_iMOVsd_RR(src,dst) ); 1508 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, 1509 //zz X86RMI_Imm(1), dst)); 1510 //zz return dst; 1511 //zz } 1512 //zz } 1513 //zz 1514 //zz /* 8Uto32(LDle(expr32)) */ 1515 //zz if (e->Iex.Unop.op == Iop_8Uto32) { 1516 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32); 1517 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32, 1518 //zz unop(Iop_8Uto32, 1519 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1520 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1521 //zz HReg dst = newVRegI(env); 1522 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1523 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1524 //zz return dst; 1525 //zz } 1526 //zz } 1527 //zz 1528 //zz /* 8Sto32(LDle(expr32)) */ 1529 //zz if (e->Iex.Unop.op == Iop_8Sto32) { 1530 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32); 1531 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32, 1532 //zz unop(Iop_8Sto32, 1533 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1534 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1535 //zz HReg dst = newVRegI(env); 1536 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1537 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1538 //zz return dst; 1539 //zz } 1540 //zz } 1541 //zz 1542 //zz /* 16Uto32(LDle(expr32)) */ 1543 //zz if (e->Iex.Unop.op == Iop_16Uto32) { 1544 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32); 1545 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32, 1546 //zz unop(Iop_16Uto32, 1547 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1548 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1549 //zz HReg dst = newVRegI(env); 1550 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1551 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1552 //zz return dst; 1553 //zz } 1554 //zz } 1555 //zz 1556 //zz /* 8Uto32(GET:I8) */ 1557 //zz if (e->Iex.Unop.op == Iop_8Uto32) { 1558 //zz if (e->Iex.Unop.arg->tag == Iex_Get) { 1559 //zz HReg dst; 1560 //zz X86AMode* amode; 1561 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1562 //zz dst = newVRegI(env); 1563 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1564 //zz hregX86_EBP()); 1565 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1566 //zz return dst; 1567 //zz } 1568 //zz } 1569 //zz 1570 //zz /* 16to32(GET:I16) */ 1571 //zz if (e->Iex.Unop.op == Iop_16Uto32) { 1572 //zz if (e->Iex.Unop.arg->tag == Iex_Get) { 1573 //zz HReg dst; 1574 //zz X86AMode* amode; 1575 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1576 //zz dst = newVRegI(env); 1577 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1578 //zz hregX86_EBP()); 1579 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1580 //zz return dst; 1581 //zz } 1582 //zz } 1583 1584 switch (e->Iex.Unop.op) { 1585 case Iop_8Uto32: { 1586 HReg dst = newVRegI(env); 1587 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1588 addInstr(env, ARMInstr_Alu(ARMalu_AND, 1589 dst, src, ARMRI84_I84(0xFF,0))); 1590 return dst; 1591 } 1592 //zz case Iop_8Uto16: 1593 //zz case Iop_8Uto32: 1594 //zz case Iop_16Uto32: { 1595 //zz HReg dst = newVRegI(env); 1596 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1597 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1598 //zz addInstr(env, mk_iMOVsd_RR(src,dst) ); 1599 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, 1600 //zz X86RMI_Imm(mask), dst)); 1601 //zz return dst; 1602 //zz } 1603 //zz case Iop_8Sto16: 1604 //zz case Iop_8Sto32: 1605 case Iop_16Uto32: { 1606 HReg dst = newVRegI(env); 1607 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1608 ARMRI5* amt = ARMRI5_I5(16); 1609 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt)); 1610 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt)); 1611 return dst; 1612 } 1613 case Iop_8Sto32: 1614 case Iop_16Sto32: { 1615 HReg dst = newVRegI(env); 1616 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1617 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24); 1618 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt)); 1619 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 1620 return dst; 1621 } 1622 //zz case Iop_Not8: 1623 //zz case Iop_Not16: 1624 case Iop_Not32: { 1625 HReg dst = newVRegI(env); 1626 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1627 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src)); 1628 return dst; 1629 } 1630 case Iop_64HIto32: { 1631 HReg rHi, rLo; 1632 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1633 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1634 } 1635 case Iop_64to32: { 1636 HReg rHi, rLo; 1637 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1638 return rLo; /* similar stupid comment to the above ... */ 1639 } 1640 case Iop_64to8: { 1641 HReg rHi, rLo; 1642 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 1643 HReg tHi = newVRegI(env); 1644 HReg tLo = newVRegI(env); 1645 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); 1646 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 1647 rHi = tHi; 1648 rLo = tLo; 1649 } else { 1650 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1651 } 1652 return rLo; 1653 } 1654 1655 case Iop_1Uto32: 1656 /* 1Uto32(tmp). Since I1 values generated into registers 1657 are guaranteed to have value either only zero or one, 1658 we can simply return the value of the register in this 1659 case. */ 1660 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 1661 HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 1662 return dst; 1663 } 1664 /* else fall through */ 1665 case Iop_1Uto8: { 1666 HReg dst = newVRegI(env); 1667 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1668 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 1669 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 1670 return dst; 1671 } 1672 1673 case Iop_1Sto32: { 1674 HReg dst = newVRegI(env); 1675 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1676 ARMRI5* amt = ARMRI5_I5(31); 1677 /* This is really rough. We could do much better here; 1678 perhaps mvn{cond} dst, #0 as the second insn? 1679 (same applies to 1Sto64) */ 1680 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 1681 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 1682 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 1683 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 1684 return dst; 1685 } 1686 1687 1688 //zz case Iop_1Sto8: 1689 //zz case Iop_1Sto16: 1690 //zz case Iop_1Sto32: { 1691 //zz /* could do better than this, but for now ... */ 1692 //zz HReg dst = newVRegI(env); 1693 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1694 //zz addInstr(env, X86Instr_Set32(cond,dst)); 1695 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1696 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1697 //zz return dst; 1698 //zz } 1699 //zz case Iop_Ctz32: { 1700 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */ 1701 //zz HReg dst = newVRegI(env); 1702 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1703 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1704 //zz return dst; 1705 //zz } 1706 case Iop_Clz32: { 1707 /* Count leading zeroes; easy on ARM. */ 1708 HReg dst = newVRegI(env); 1709 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1710 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src)); 1711 return dst; 1712 } 1713 1714 case Iop_CmpwNEZ32: { 1715 HReg dst = newVRegI(env); 1716 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1717 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); 1718 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); 1719 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31))); 1720 return dst; 1721 } 1722 1723 case Iop_Left32: { 1724 HReg dst = newVRegI(env); 1725 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1726 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); 1727 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); 1728 return dst; 1729 } 1730 1731 //zz case Iop_V128to32: { 1732 //zz HReg dst = newVRegI(env); 1733 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1734 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1735 //zz sub_from_esp(env, 16); 1736 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1737 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1738 //zz add_to_esp(env, 16); 1739 //zz return dst; 1740 //zz } 1741 //zz 1742 case Iop_ReinterpF32asI32: { 1743 HReg dst = newVRegI(env); 1744 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1745 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst)); 1746 return dst; 1747 } 1748 1749 //zz 1750 //zz case Iop_16to8: 1751 case Iop_32to8: 1752 case Iop_32to16: 1753 /* These are no-ops. */ 1754 return iselIntExpr_R(env, e->Iex.Unop.arg); 1755 1756 default: 1757 break; 1758 } 1759 1760 /* All Unop cases involving host-side helper calls. */ 1761 void* fn = NULL; 1762 switch (e->Iex.Unop.op) { 1763 case Iop_CmpNEZ16x2: 1764 fn = &h_generic_calc_CmpNEZ16x2; break; 1765 case Iop_CmpNEZ8x4: 1766 fn = &h_generic_calc_CmpNEZ8x4; break; 1767 default: 1768 break; 1769 } 1770 1771 if (fn) { 1772 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1773 HReg res = newVRegI(env); 1774 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg)); 1775 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1776 1, mk_RetLoc_simple(RLPri_Int) )); 1777 addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); 1778 return res; 1779 } 1780 1781 break; 1782 } 1783 1784 /* --------- GET --------- */ 1785 case Iex_Get: { 1786 if (ty == Ity_I32 1787 && 0 == (e->Iex.Get.offset & 3) 1788 && e->Iex.Get.offset < 4096-4) { 1789 HReg dst = newVRegI(env); 1790 addInstr(env, ARMInstr_LdSt32( 1791 ARMcc_AL, True/*isLoad*/, 1792 dst, 1793 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset))); 1794 return dst; 1795 } 1796 //zz if (ty == Ity_I8 || ty == Ity_I16) { 1797 //zz HReg dst = newVRegI(env); 1798 //zz addInstr(env, X86Instr_LoadEX( 1799 //zz toUChar(ty==Ity_I8 ? 1 : 2), 1800 //zz False, 1801 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1802 //zz dst)); 1803 //zz return dst; 1804 //zz } 1805 break; 1806 } 1807 1808 //zz case Iex_GetI: { 1809 //zz X86AMode* am 1810 //zz = genGuestArrayOffset( 1811 //zz env, e->Iex.GetI.descr, 1812 //zz e->Iex.GetI.ix, e->Iex.GetI.bias ); 1813 //zz HReg dst = newVRegI(env); 1814 //zz if (ty == Ity_I8) { 1815 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1816 //zz return dst; 1817 //zz } 1818 //zz if (ty == Ity_I32) { 1819 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1820 //zz return dst; 1821 //zz } 1822 //zz break; 1823 //zz } 1824 1825 /* --------- CCALL --------- */ 1826 case Iex_CCall: { 1827 HReg dst = newVRegI(env); 1828 vassert(ty == e->Iex.CCall.retty); 1829 1830 /* be very restrictive for now. Only 32/64-bit ints allowed for 1831 args, and 32 bits for return type. Don't forget to change 1832 the RetLoc if more types are allowed in future. */ 1833 if (e->Iex.CCall.retty != Ity_I32) 1834 goto irreducible; 1835 1836 /* Marshal args, do the call, clear stack. */ 1837 UInt addToSp = 0; 1838 RetLoc rloc = mk_RetLoc_INVALID(); 1839 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1840 e->Iex.CCall.cee, e->Iex.CCall.retty, 1841 e->Iex.CCall.args ); 1842 /* */ 1843 if (ok) { 1844 vassert(is_sane_RetLoc(rloc)); 1845 vassert(rloc.pri == RLPri_Int); 1846 vassert(addToSp == 0); 1847 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0())); 1848 return dst; 1849 } 1850 /* else fall through; will hit the irreducible: label */ 1851 } 1852 1853 /* --------- LITERAL --------- */ 1854 /* 32 literals */ 1855 case Iex_Const: { 1856 UInt u = 0; 1857 HReg dst = newVRegI(env); 1858 switch (e->Iex.Const.con->tag) { 1859 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1860 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1861 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1862 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)"); 1863 } 1864 addInstr(env, ARMInstr_Imm32(dst, u)); 1865 return dst; 1866 } 1867 1868 /* --------- MULTIPLEX --------- */ 1869 case Iex_ITE: { // VFD 1870 /* ITE(ccexpr, iftrue, iffalse) */ 1871 if (ty == Ity_I32) { 1872 ARMCondCode cc; 1873 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 1874 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse); 1875 HReg dst = newVRegI(env); 1876 addInstr(env, mk_iMOVds_RR(dst, r1)); 1877 cc = iselCondCode(env, e->Iex.ITE.cond); 1878 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0)); 1879 return dst; 1880 } 1881 break; 1882 } 1883 1884 default: 1885 break; 1886 } /* switch (e->tag) */ 1887 1888 /* We get here if no pattern matched. */ 1889 irreducible: 1890 ppIRExpr(e); 1891 vpanic("iselIntExpr_R: cannot reduce tree"); 1892 } 1893 1894 1895 /* -------------------- 64-bit -------------------- */ 1896 1897 /* Compute a 64-bit value into a register pair, which is returned as 1898 the first two parameters. As with iselIntExpr_R, these may be 1899 either real or virtual regs; in any case they must not be changed 1900 by subsequent code emitted by the caller. */ 1901 1902 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1903 { 1904 iselInt64Expr_wrk(rHi, rLo, env, e); 1905 # if 0 1906 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1907 # endif 1908 vassert(hregClass(*rHi) == HRcInt32); 1909 vassert(hregIsVirtual(*rHi)); 1910 vassert(hregClass(*rLo) == HRcInt32); 1911 vassert(hregIsVirtual(*rLo)); 1912 } 1913 1914 /* DO NOT CALL THIS DIRECTLY ! */ 1915 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1916 { 1917 vassert(e); 1918 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 1919 1920 /* 64-bit literal */ 1921 if (e->tag == Iex_Const) { 1922 ULong w64 = e->Iex.Const.con->Ico.U64; 1923 UInt wHi = toUInt(w64 >> 32); 1924 UInt wLo = toUInt(w64); 1925 HReg tHi = newVRegI(env); 1926 HReg tLo = newVRegI(env); 1927 vassert(e->Iex.Const.con->tag == Ico_U64); 1928 addInstr(env, ARMInstr_Imm32(tHi, wHi)); 1929 addInstr(env, ARMInstr_Imm32(tLo, wLo)); 1930 *rHi = tHi; 1931 *rLo = tLo; 1932 return; 1933 } 1934 1935 /* read 64-bit IRTemp */ 1936 if (e->tag == Iex_RdTmp) { 1937 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 1938 HReg tHi = newVRegI(env); 1939 HReg tLo = newVRegI(env); 1940 HReg tmp = iselNeon64Expr(env, e); 1941 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 1942 *rHi = tHi; 1943 *rLo = tLo; 1944 } else { 1945 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 1946 } 1947 return; 1948 } 1949 1950 /* 64-bit load */ 1951 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 1952 HReg tLo, tHi, rA; 1953 vassert(e->Iex.Load.ty == Ity_I64); 1954 rA = iselIntExpr_R(env, e->Iex.Load.addr); 1955 tHi = newVRegI(env); 1956 tLo = newVRegI(env); 1957 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, 1958 tHi, ARMAMode1_RI(rA, 4))); 1959 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, 1960 tLo, ARMAMode1_RI(rA, 0))); 1961 *rHi = tHi; 1962 *rLo = tLo; 1963 return; 1964 } 1965 1966 /* 64-bit GET */ 1967 if (e->tag == Iex_Get) { 1968 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0); 1969 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4); 1970 HReg tHi = newVRegI(env); 1971 HReg tLo = newVRegI(env); 1972 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4)); 1973 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0)); 1974 *rHi = tHi; 1975 *rLo = tLo; 1976 return; 1977 } 1978 1979 /* --------- BINARY ops --------- */ 1980 if (e->tag == Iex_Binop) { 1981 switch (e->Iex.Binop.op) { 1982 1983 /* 32 x 32 -> 64 multiply */ 1984 case Iop_MullS32: 1985 case Iop_MullU32: { 1986 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1987 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1988 HReg tHi = newVRegI(env); 1989 HReg tLo = newVRegI(env); 1990 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32 1991 ? ARMmul_SX : ARMmul_ZX; 1992 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); 1993 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); 1994 addInstr(env, ARMInstr_Mul(mop)); 1995 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1())); 1996 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0())); 1997 *rHi = tHi; 1998 *rLo = tLo; 1999 return; 2000 } 2001 2002 case Iop_Or64: { 2003 HReg xLo, xHi, yLo, yHi; 2004 HReg tHi = newVRegI(env); 2005 HReg tLo = newVRegI(env); 2006 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2007 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2008 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi))); 2009 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo))); 2010 *rHi = tHi; 2011 *rLo = tLo; 2012 return; 2013 } 2014 2015 case Iop_Add64: { 2016 HReg xLo, xHi, yLo, yHi; 2017 HReg tHi = newVRegI(env); 2018 HReg tLo = newVRegI(env); 2019 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2020 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2021 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo))); 2022 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi))); 2023 *rHi = tHi; 2024 *rLo = tLo; 2025 return; 2026 } 2027 2028 /* 32HLto64(e1,e2) */ 2029 case Iop_32HLto64: { 2030 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2031 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2032 return; 2033 } 2034 2035 default: 2036 break; 2037 } 2038 } 2039 2040 /* --------- UNARY ops --------- */ 2041 if (e->tag == Iex_Unop) { 2042 switch (e->Iex.Unop.op) { 2043 2044 /* ReinterpF64asI64 */ 2045 case Iop_ReinterpF64asI64: { 2046 HReg dstHi = newVRegI(env); 2047 HReg dstLo = newVRegI(env); 2048 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 2049 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo)); 2050 *rHi = dstHi; 2051 *rLo = dstLo; 2052 return; 2053 } 2054 2055 /* Left64(e) */ 2056 case Iop_Left64: { 2057 HReg yLo, yHi; 2058 HReg tHi = newVRegI(env); 2059 HReg tLo = newVRegI(env); 2060 HReg zero = newVRegI(env); 2061 /* yHi:yLo = arg */ 2062 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2063 /* zero = 0 */ 2064 addInstr(env, ARMInstr_Imm32(zero, 0)); 2065 /* tLo = 0 - yLo, and set carry */ 2066 addInstr(env, ARMInstr_Alu(ARMalu_SUBS, 2067 tLo, zero, ARMRI84_R(yLo))); 2068 /* tHi = 0 - yHi - carry */ 2069 addInstr(env, ARMInstr_Alu(ARMalu_SBC, 2070 tHi, zero, ARMRI84_R(yHi))); 2071 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2072 back in, so as to give the final result 2073 tHi:tLo = arg | -arg. */ 2074 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi))); 2075 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo))); 2076 *rHi = tHi; 2077 *rLo = tLo; 2078 return; 2079 } 2080 2081 /* CmpwNEZ64(e) */ 2082 case Iop_CmpwNEZ64: { 2083 HReg srcLo, srcHi; 2084 HReg tmp1 = newVRegI(env); 2085 HReg tmp2 = newVRegI(env); 2086 /* srcHi:srcLo = arg */ 2087 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2088 /* tmp1 = srcHi | srcLo */ 2089 addInstr(env, ARMInstr_Alu(ARMalu_OR, 2090 tmp1, srcHi, ARMRI84_R(srcLo))); 2091 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2092 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1)); 2093 addInstr(env, ARMInstr_Alu(ARMalu_OR, 2094 tmp2, tmp2, ARMRI84_R(tmp1))); 2095 addInstr(env, ARMInstr_Shift(ARMsh_SAR, 2096 tmp2, tmp2, ARMRI5_I5(31))); 2097 *rHi = tmp2; 2098 *rLo = tmp2; 2099 return; 2100 } 2101 2102 case Iop_1Sto64: { 2103 HReg dst = newVRegI(env); 2104 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2105 ARMRI5* amt = ARMRI5_I5(31); 2106 /* This is really rough. We could do much better here; 2107 perhaps mvn{cond} dst, #0 as the second insn? 2108 (same applies to 1Sto32) */ 2109 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 2110 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 2111 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 2112 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 2113 *rHi = dst; 2114 *rLo = dst; 2115 return; 2116 } 2117 2118 default: 2119 break; 2120 } 2121 } /* if (e->tag == Iex_Unop) */ 2122 2123 /* --------- MULTIPLEX --------- */ 2124 if (e->tag == Iex_ITE) { // VFD 2125 IRType tyC; 2126 HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo; 2127 ARMCondCode cc; 2128 tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond); 2129 vassert(tyC == Ity_I1); 2130 iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue); 2131 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse); 2132 dstHi = newVRegI(env); 2133 dstLo = newVRegI(env); 2134 addInstr(env, mk_iMOVds_RR(dstHi, r1hi)); 2135 addInstr(env, mk_iMOVds_RR(dstLo, r1lo)); 2136 cc = iselCondCode(env, e->Iex.ITE.cond); 2137 addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi))); 2138 addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo))); 2139 *rHi = dstHi; 2140 *rLo = dstLo; 2141 return; 2142 } 2143 2144 /* It is convenient sometimes to call iselInt64Expr even when we 2145 have NEON support (e.g. in do_helper_call we need 64-bit 2146 arguments as 2 x 32 regs). */ 2147 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 2148 HReg tHi = newVRegI(env); 2149 HReg tLo = newVRegI(env); 2150 HReg tmp = iselNeon64Expr(env, e); 2151 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 2152 *rHi = tHi; 2153 *rLo = tLo; 2154 return ; 2155 } 2156 2157 ppIRExpr(e); 2158 vpanic("iselInt64Expr"); 2159 } 2160 2161 2162 /*---------------------------------------------------------*/ 2163 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/ 2164 /*---------------------------------------------------------*/ 2165 2166 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ) 2167 { 2168 HReg r = iselNeon64Expr_wrk( env, e ); 2169 vassert(hregClass(r) == HRcFlt64); 2170 vassert(hregIsVirtual(r)); 2171 return r; 2172 } 2173 2174 /* DO NOT CALL THIS DIRECTLY */ 2175 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) 2176 { 2177 IRType ty = typeOfIRExpr(env->type_env, e); 2178 MatchInfo mi; 2179 vassert(e); 2180 vassert(ty == Ity_I64); 2181 2182 if (e->tag == Iex_RdTmp) { 2183 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2184 } 2185 2186 if (e->tag == Iex_Const) { 2187 HReg rLo, rHi; 2188 HReg res = newVRegD(env); 2189 iselInt64Expr(&rHi, &rLo, env, e); 2190 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2191 return res; 2192 } 2193 2194 /* 64-bit load */ 2195 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2196 HReg res = newVRegD(env); 2197 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); 2198 vassert(ty == Ity_I64); 2199 addInstr(env, ARMInstr_NLdStD(True, res, am)); 2200 return res; 2201 } 2202 2203 /* 64-bit GET */ 2204 if (e->tag == Iex_Get) { 2205 HReg addr = newVRegI(env); 2206 HReg res = newVRegD(env); 2207 vassert(ty == Ity_I64); 2208 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); 2209 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr))); 2210 return res; 2211 } 2212 2213 /* --------- BINARY ops --------- */ 2214 if (e->tag == Iex_Binop) { 2215 switch (e->Iex.Binop.op) { 2216 2217 /* 32 x 32 -> 64 multiply */ 2218 case Iop_MullS32: 2219 case Iop_MullU32: { 2220 HReg rLo, rHi; 2221 HReg res = newVRegD(env); 2222 iselInt64Expr(&rHi, &rLo, env, e); 2223 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2224 return res; 2225 } 2226 2227 case Iop_And64: { 2228 HReg res = newVRegD(env); 2229 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2230 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2231 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 2232 res, argL, argR, 4, False)); 2233 return res; 2234 } 2235 case Iop_Or64: { 2236 HReg res = newVRegD(env); 2237 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2238 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2239 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 2240 res, argL, argR, 4, False)); 2241 return res; 2242 } 2243 case Iop_Xor64: { 2244 HReg res = newVRegD(env); 2245 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2246 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2247 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 2248 res, argL, argR, 4, False)); 2249 return res; 2250 } 2251 2252 /* 32HLto64(e1,e2) */ 2253 case Iop_32HLto64: { 2254 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2255 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2256 HReg res = newVRegD(env); 2257 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2258 return res; 2259 } 2260 2261 case Iop_Add8x8: 2262 case Iop_Add16x4: 2263 case Iop_Add32x2: 2264 case Iop_Add64: { 2265 HReg res = newVRegD(env); 2266 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2267 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2268 UInt size; 2269 switch (e->Iex.Binop.op) { 2270 case Iop_Add8x8: size = 0; break; 2271 case Iop_Add16x4: size = 1; break; 2272 case Iop_Add32x2: size = 2; break; 2273 case Iop_Add64: size = 3; break; 2274 default: vassert(0); 2275 } 2276 addInstr(env, ARMInstr_NBinary(ARMneon_VADD, 2277 res, argL, argR, size, False)); 2278 return res; 2279 } 2280 case Iop_Add32Fx2: { 2281 HReg res = newVRegD(env); 2282 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2283 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2284 UInt size = 0; 2285 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, 2286 res, argL, argR, size, False)); 2287 return res; 2288 } 2289 case Iop_Recps32Fx2: { 2290 HReg res = newVRegD(env); 2291 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2292 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2293 UInt size = 0; 2294 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 2295 res, argL, argR, size, False)); 2296 return res; 2297 } 2298 case Iop_Rsqrts32Fx2: { 2299 HReg res = newVRegD(env); 2300 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2301 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2302 UInt size = 0; 2303 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 2304 res, argL, argR, size, False)); 2305 return res; 2306 } 2307 2308 // These 6 verified 18 Apr 2013 2309 case Iop_InterleaveHI32x2: 2310 case Iop_InterleaveLO32x2: 2311 case Iop_InterleaveOddLanes8x8: 2312 case Iop_InterleaveEvenLanes8x8: 2313 case Iop_InterleaveOddLanes16x4: 2314 case Iop_InterleaveEvenLanes16x4: { 2315 HReg rD = newVRegD(env); 2316 HReg rM = newVRegD(env); 2317 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2318 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2319 UInt size; 2320 Bool resRd; // is the result in rD or rM ? 2321 switch (e->Iex.Binop.op) { 2322 case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break; 2323 case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break; 2324 case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break; 2325 case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break; 2326 case Iop_InterleaveHI32x2: resRd = False; size = 2; break; 2327 case Iop_InterleaveLO32x2: resRd = True; size = 2; break; 2328 default: vassert(0); 2329 } 2330 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2331 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2332 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False)); 2333 return resRd ? rD : rM; 2334 } 2335 2336 // These 4 verified 18 Apr 2013 2337 case Iop_InterleaveHI8x8: 2338 case Iop_InterleaveLO8x8: 2339 case Iop_InterleaveHI16x4: 2340 case Iop_InterleaveLO16x4: { 2341 HReg rD = newVRegD(env); 2342 HReg rM = newVRegD(env); 2343 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2344 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2345 UInt size; 2346 Bool resRd; // is the result in rD or rM ? 2347 switch (e->Iex.Binop.op) { 2348 case Iop_InterleaveHI8x8: resRd = False; size = 0; break; 2349 case Iop_InterleaveLO8x8: resRd = True; size = 0; break; 2350 case Iop_InterleaveHI16x4: resRd = False; size = 1; break; 2351 case Iop_InterleaveLO16x4: resRd = True; size = 1; break; 2352 default: vassert(0); 2353 } 2354 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2355 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2356 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False)); 2357 return resRd ? rD : rM; 2358 } 2359 2360 // These 4 verified 18 Apr 2013 2361 case Iop_CatOddLanes8x8: 2362 case Iop_CatEvenLanes8x8: 2363 case Iop_CatOddLanes16x4: 2364 case Iop_CatEvenLanes16x4: { 2365 HReg rD = newVRegD(env); 2366 HReg rM = newVRegD(env); 2367 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2368 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2369 UInt size; 2370 Bool resRd; // is the result in rD or rM ? 2371 switch (e->Iex.Binop.op) { 2372 case Iop_CatOddLanes8x8: resRd = False; size = 0; break; 2373 case Iop_CatEvenLanes8x8: resRd = True; size = 0; break; 2374 case Iop_CatOddLanes16x4: resRd = False; size = 1; break; 2375 case Iop_CatEvenLanes16x4: resRd = True; size = 1; break; 2376 default: vassert(0); 2377 } 2378 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); 2379 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); 2380 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False)); 2381 return resRd ? rD : rM; 2382 } 2383 2384 case Iop_QAdd8Ux8: 2385 case Iop_QAdd16Ux4: 2386 case Iop_QAdd32Ux2: 2387 case Iop_QAdd64Ux1: { 2388 HReg res = newVRegD(env); 2389 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2390 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2391 UInt size; 2392 switch (e->Iex.Binop.op) { 2393 case Iop_QAdd8Ux8: size = 0; break; 2394 case Iop_QAdd16Ux4: size = 1; break; 2395 case Iop_QAdd32Ux2: size = 2; break; 2396 case Iop_QAdd64Ux1: size = 3; break; 2397 default: vassert(0); 2398 } 2399 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 2400 res, argL, argR, size, False)); 2401 return res; 2402 } 2403 case Iop_QAdd8Sx8: 2404 case Iop_QAdd16Sx4: 2405 case Iop_QAdd32Sx2: 2406 case Iop_QAdd64Sx1: { 2407 HReg res = newVRegD(env); 2408 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2409 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2410 UInt size; 2411 switch (e->Iex.Binop.op) { 2412 case Iop_QAdd8Sx8: size = 0; break; 2413 case Iop_QAdd16Sx4: size = 1; break; 2414 case Iop_QAdd32Sx2: size = 2; break; 2415 case Iop_QAdd64Sx1: size = 3; break; 2416 default: vassert(0); 2417 } 2418 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 2419 res, argL, argR, size, False)); 2420 return res; 2421 } 2422 case Iop_Sub8x8: 2423 case Iop_Sub16x4: 2424 case Iop_Sub32x2: 2425 case Iop_Sub64: { 2426 HReg res = newVRegD(env); 2427 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2428 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2429 UInt size; 2430 switch (e->Iex.Binop.op) { 2431 case Iop_Sub8x8: size = 0; break; 2432 case Iop_Sub16x4: size = 1; break; 2433 case Iop_Sub32x2: size = 2; break; 2434 case Iop_Sub64: size = 3; break; 2435 default: vassert(0); 2436 } 2437 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2438 res, argL, argR, size, False)); 2439 return res; 2440 } 2441 case Iop_Sub32Fx2: { 2442 HReg res = newVRegD(env); 2443 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2444 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2445 UInt size = 0; 2446 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, 2447 res, argL, argR, size, False)); 2448 return res; 2449 } 2450 case Iop_QSub8Ux8: 2451 case Iop_QSub16Ux4: 2452 case Iop_QSub32Ux2: 2453 case Iop_QSub64Ux1: { 2454 HReg res = newVRegD(env); 2455 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2456 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2457 UInt size; 2458 switch (e->Iex.Binop.op) { 2459 case Iop_QSub8Ux8: size = 0; break; 2460 case Iop_QSub16Ux4: size = 1; break; 2461 case Iop_QSub32Ux2: size = 2; break; 2462 case Iop_QSub64Ux1: size = 3; break; 2463 default: vassert(0); 2464 } 2465 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 2466 res, argL, argR, size, False)); 2467 return res; 2468 } 2469 case Iop_QSub8Sx8: 2470 case Iop_QSub16Sx4: 2471 case Iop_QSub32Sx2: 2472 case Iop_QSub64Sx1: { 2473 HReg res = newVRegD(env); 2474 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2475 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2476 UInt size; 2477 switch (e->Iex.Binop.op) { 2478 case Iop_QSub8Sx8: size = 0; break; 2479 case Iop_QSub16Sx4: size = 1; break; 2480 case Iop_QSub32Sx2: size = 2; break; 2481 case Iop_QSub64Sx1: size = 3; break; 2482 default: vassert(0); 2483 } 2484 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 2485 res, argL, argR, size, False)); 2486 return res; 2487 } 2488 case Iop_Max8Ux8: 2489 case Iop_Max16Ux4: 2490 case Iop_Max32Ux2: { 2491 HReg res = newVRegD(env); 2492 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2493 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2494 UInt size; 2495 switch (e->Iex.Binop.op) { 2496 case Iop_Max8Ux8: size = 0; break; 2497 case Iop_Max16Ux4: size = 1; break; 2498 case Iop_Max32Ux2: size = 2; break; 2499 default: vassert(0); 2500 } 2501 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 2502 res, argL, argR, size, False)); 2503 return res; 2504 } 2505 case Iop_Max8Sx8: 2506 case Iop_Max16Sx4: 2507 case Iop_Max32Sx2: { 2508 HReg res = newVRegD(env); 2509 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2510 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2511 UInt size; 2512 switch (e->Iex.Binop.op) { 2513 case Iop_Max8Sx8: size = 0; break; 2514 case Iop_Max16Sx4: size = 1; break; 2515 case Iop_Max32Sx2: size = 2; break; 2516 default: vassert(0); 2517 } 2518 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 2519 res, argL, argR, size, False)); 2520 return res; 2521 } 2522 case Iop_Min8Ux8: 2523 case Iop_Min16Ux4: 2524 case Iop_Min32Ux2: { 2525 HReg res = newVRegD(env); 2526 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2527 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2528 UInt size; 2529 switch (e->Iex.Binop.op) { 2530 case Iop_Min8Ux8: size = 0; break; 2531 case Iop_Min16Ux4: size = 1; break; 2532 case Iop_Min32Ux2: size = 2; break; 2533 default: vassert(0); 2534 } 2535 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 2536 res, argL, argR, size, False)); 2537 return res; 2538 } 2539 case Iop_Min8Sx8: 2540 case Iop_Min16Sx4: 2541 case Iop_Min32Sx2: { 2542 HReg res = newVRegD(env); 2543 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2544 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2545 UInt size; 2546 switch (e->Iex.Binop.op) { 2547 case Iop_Min8Sx8: size = 0; break; 2548 case Iop_Min16Sx4: size = 1; break; 2549 case Iop_Min32Sx2: size = 2; break; 2550 default: vassert(0); 2551 } 2552 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 2553 res, argL, argR, size, False)); 2554 return res; 2555 } 2556 case Iop_Sar8x8: 2557 case Iop_Sar16x4: 2558 case Iop_Sar32x2: { 2559 HReg res = newVRegD(env); 2560 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2561 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2562 HReg argR2 = newVRegD(env); 2563 HReg zero = newVRegD(env); 2564 UInt size; 2565 switch (e->Iex.Binop.op) { 2566 case Iop_Sar8x8: size = 0; break; 2567 case Iop_Sar16x4: size = 1; break; 2568 case Iop_Sar32x2: size = 2; break; 2569 case Iop_Sar64: size = 3; break; 2570 default: vassert(0); 2571 } 2572 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 2573 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2574 argR2, zero, argR, size, False)); 2575 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2576 res, argL, argR2, size, False)); 2577 return res; 2578 } 2579 case Iop_Sal8x8: 2580 case Iop_Sal16x4: 2581 case Iop_Sal32x2: 2582 case Iop_Sal64x1: { 2583 HReg res = newVRegD(env); 2584 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2585 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2586 UInt size; 2587 switch (e->Iex.Binop.op) { 2588 case Iop_Sal8x8: size = 0; break; 2589 case Iop_Sal16x4: size = 1; break; 2590 case Iop_Sal32x2: size = 2; break; 2591 case Iop_Sal64x1: size = 3; break; 2592 default: vassert(0); 2593 } 2594 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2595 res, argL, argR, size, False)); 2596 return res; 2597 } 2598 case Iop_Shr8x8: 2599 case Iop_Shr16x4: 2600 case Iop_Shr32x2: { 2601 HReg res = newVRegD(env); 2602 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2603 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2604 HReg argR2 = newVRegD(env); 2605 HReg zero = newVRegD(env); 2606 UInt size; 2607 switch (e->Iex.Binop.op) { 2608 case Iop_Shr8x8: size = 0; break; 2609 case Iop_Shr16x4: size = 1; break; 2610 case Iop_Shr32x2: size = 2; break; 2611 default: vassert(0); 2612 } 2613 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 2614 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2615 argR2, zero, argR, size, False)); 2616 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2617 res, argL, argR2, size, False)); 2618 return res; 2619 } 2620 case Iop_Shl8x8: 2621 case Iop_Shl16x4: 2622 case Iop_Shl32x2: { 2623 HReg res = newVRegD(env); 2624 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2625 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2626 UInt size; 2627 switch (e->Iex.Binop.op) { 2628 case Iop_Shl8x8: size = 0; break; 2629 case Iop_Shl16x4: size = 1; break; 2630 case Iop_Shl32x2: size = 2; break; 2631 default: vassert(0); 2632 } 2633 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2634 res, argL, argR, size, False)); 2635 return res; 2636 } 2637 case Iop_QShl8x8: 2638 case Iop_QShl16x4: 2639 case Iop_QShl32x2: 2640 case Iop_QShl64x1: { 2641 HReg res = newVRegD(env); 2642 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2643 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2644 UInt size; 2645 switch (e->Iex.Binop.op) { 2646 case Iop_QShl8x8: size = 0; break; 2647 case Iop_QShl16x4: size = 1; break; 2648 case Iop_QShl32x2: size = 2; break; 2649 case Iop_QShl64x1: size = 3; break; 2650 default: vassert(0); 2651 } 2652 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 2653 res, argL, argR, size, False)); 2654 return res; 2655 } 2656 case Iop_QSal8x8: 2657 case Iop_QSal16x4: 2658 case Iop_QSal32x2: 2659 case Iop_QSal64x1: { 2660 HReg res = newVRegD(env); 2661 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2662 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2663 UInt size; 2664 switch (e->Iex.Binop.op) { 2665 case Iop_QSal8x8: size = 0; break; 2666 case Iop_QSal16x4: size = 1; break; 2667 case Iop_QSal32x2: size = 2; break; 2668 case Iop_QSal64x1: size = 3; break; 2669 default: vassert(0); 2670 } 2671 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 2672 res, argL, argR, size, False)); 2673 return res; 2674 } 2675 case Iop_QShlN8x8: 2676 case Iop_QShlN16x4: 2677 case Iop_QShlN32x2: 2678 case Iop_QShlN64x1: { 2679 HReg res = newVRegD(env); 2680 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2681 UInt size, imm; 2682 if (e->Iex.Binop.arg2->tag != Iex_Const || 2683 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2684 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2685 "second argument only\n"); 2686 } 2687 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2688 switch (e->Iex.Binop.op) { 2689 case Iop_QShlN8x8: size = 8 | imm; break; 2690 case Iop_QShlN16x4: size = 16 | imm; break; 2691 case Iop_QShlN32x2: size = 32 | imm; break; 2692 case Iop_QShlN64x1: size = 64 | imm; break; 2693 default: vassert(0); 2694 } 2695 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 2696 res, argL, size, False)); 2697 return res; 2698 } 2699 case Iop_QShlN8Sx8: 2700 case Iop_QShlN16Sx4: 2701 case Iop_QShlN32Sx2: 2702 case Iop_QShlN64Sx1: { 2703 HReg res = newVRegD(env); 2704 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2705 UInt size, imm; 2706 if (e->Iex.Binop.arg2->tag != Iex_Const || 2707 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2708 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2709 "second argument only\n"); 2710 } 2711 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2712 switch (e->Iex.Binop.op) { 2713 case Iop_QShlN8Sx8: size = 8 | imm; break; 2714 case Iop_QShlN16Sx4: size = 16 | imm; break; 2715 case Iop_QShlN32Sx2: size = 32 | imm; break; 2716 case Iop_QShlN64Sx1: size = 64 | imm; break; 2717 default: vassert(0); 2718 } 2719 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 2720 res, argL, size, False)); 2721 return res; 2722 } 2723 case Iop_QSalN8x8: 2724 case Iop_QSalN16x4: 2725 case Iop_QSalN32x2: 2726 case Iop_QSalN64x1: { 2727 HReg res = newVRegD(env); 2728 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2729 UInt size, imm; 2730 if (e->Iex.Binop.arg2->tag != Iex_Const || 2731 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2732 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2733 "second argument only\n"); 2734 } 2735 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2736 switch (e->Iex.Binop.op) { 2737 case Iop_QSalN8x8: size = 8 | imm; break; 2738 case Iop_QSalN16x4: size = 16 | imm; break; 2739 case Iop_QSalN32x2: size = 32 | imm; break; 2740 case Iop_QSalN64x1: size = 64 | imm; break; 2741 default: vassert(0); 2742 } 2743 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 2744 res, argL, size, False)); 2745 return res; 2746 } 2747 case Iop_ShrN8x8: 2748 case Iop_ShrN16x4: 2749 case Iop_ShrN32x2: 2750 case Iop_Shr64: { 2751 HReg res = newVRegD(env); 2752 HReg tmp = newVRegD(env); 2753 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2754 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2755 HReg argR2 = newVRegI(env); 2756 UInt size; 2757 switch (e->Iex.Binop.op) { 2758 case Iop_ShrN8x8: size = 0; break; 2759 case Iop_ShrN16x4: size = 1; break; 2760 case Iop_ShrN32x2: size = 2; break; 2761 case Iop_Shr64: size = 3; break; 2762 default: vassert(0); 2763 } 2764 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 2765 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 2766 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2767 res, argL, tmp, size, False)); 2768 return res; 2769 } 2770 case Iop_ShlN8x8: 2771 case Iop_ShlN16x4: 2772 case Iop_ShlN32x2: 2773 case Iop_Shl64: { 2774 HReg res = newVRegD(env); 2775 HReg tmp = newVRegD(env); 2776 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2777 /* special-case Shl64(x, imm8) since the Neon front 2778 end produces a lot of those for V{LD,ST}{1,2,3,4}. */ 2779 if (e->Iex.Binop.op == Iop_Shl64 2780 && e->Iex.Binop.arg2->tag == Iex_Const) { 2781 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 2782 Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2783 if (nshift >= 1 && nshift <= 63) { 2784 addInstr(env, ARMInstr_NShl64(res, argL, nshift)); 2785 return res; 2786 } 2787 /* else fall through to general case */ 2788 } 2789 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2790 UInt size; 2791 switch (e->Iex.Binop.op) { 2792 case Iop_ShlN8x8: size = 0; break; 2793 case Iop_ShlN16x4: size = 1; break; 2794 case Iop_ShlN32x2: size = 2; break; 2795 case Iop_Shl64: size = 3; break; 2796 default: vassert(0); 2797 } 2798 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, 2799 tmp, argR, 0, False)); 2800 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2801 res, argL, tmp, size, False)); 2802 return res; 2803 } 2804 case Iop_SarN8x8: 2805 case Iop_SarN16x4: 2806 case Iop_SarN32x2: 2807 case Iop_Sar64: { 2808 HReg res = newVRegD(env); 2809 HReg tmp = newVRegD(env); 2810 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2811 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2812 HReg argR2 = newVRegI(env); 2813 UInt size; 2814 switch (e->Iex.Binop.op) { 2815 case Iop_SarN8x8: size = 0; break; 2816 case Iop_SarN16x4: size = 1; break; 2817 case Iop_SarN32x2: size = 2; break; 2818 case Iop_Sar64: size = 3; break; 2819 default: vassert(0); 2820 } 2821 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 2822 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 2823 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2824 res, argL, tmp, size, False)); 2825 return res; 2826 } 2827 case Iop_CmpGT8Ux8: 2828 case Iop_CmpGT16Ux4: 2829 case Iop_CmpGT32Ux2: { 2830 HReg res = newVRegD(env); 2831 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2832 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2833 UInt size; 2834 switch (e->Iex.Binop.op) { 2835 case Iop_CmpGT8Ux8: size = 0; break; 2836 case Iop_CmpGT16Ux4: size = 1; break; 2837 case Iop_CmpGT32Ux2: size = 2; break; 2838 default: vassert(0); 2839 } 2840 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 2841 res, argL, argR, size, False)); 2842 return res; 2843 } 2844 case Iop_CmpGT8Sx8: 2845 case Iop_CmpGT16Sx4: 2846 case Iop_CmpGT32Sx2: { 2847 HReg res = newVRegD(env); 2848 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2849 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2850 UInt size; 2851 switch (e->Iex.Binop.op) { 2852 case Iop_CmpGT8Sx8: size = 0; break; 2853 case Iop_CmpGT16Sx4: size = 1; break; 2854 case Iop_CmpGT32Sx2: size = 2; break; 2855 default: vassert(0); 2856 } 2857 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 2858 res, argL, argR, size, False)); 2859 return res; 2860 } 2861 case Iop_CmpEQ8x8: 2862 case Iop_CmpEQ16x4: 2863 case Iop_CmpEQ32x2: { 2864 HReg res = newVRegD(env); 2865 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2866 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2867 UInt size; 2868 switch (e->Iex.Binop.op) { 2869 case Iop_CmpEQ8x8: size = 0; break; 2870 case Iop_CmpEQ16x4: size = 1; break; 2871 case Iop_CmpEQ32x2: size = 2; break; 2872 default: vassert(0); 2873 } 2874 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 2875 res, argL, argR, size, False)); 2876 return res; 2877 } 2878 case Iop_Mul8x8: 2879 case Iop_Mul16x4: 2880 case Iop_Mul32x2: { 2881 HReg res = newVRegD(env); 2882 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2883 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2884 UInt size = 0; 2885 switch(e->Iex.Binop.op) { 2886 case Iop_Mul8x8: size = 0; break; 2887 case Iop_Mul16x4: size = 1; break; 2888 case Iop_Mul32x2: size = 2; break; 2889 default: vassert(0); 2890 } 2891 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 2892 res, argL, argR, size, False)); 2893 return res; 2894 } 2895 case Iop_Mul32Fx2: { 2896 HReg res = newVRegD(env); 2897 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2898 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2899 UInt size = 0; 2900 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, 2901 res, argL, argR, size, False)); 2902 return res; 2903 } 2904 case Iop_QDMulHi16Sx4: 2905 case Iop_QDMulHi32Sx2: { 2906 HReg res = newVRegD(env); 2907 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2908 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2909 UInt size = 0; 2910 switch(e->Iex.Binop.op) { 2911 case Iop_QDMulHi16Sx4: size = 1; break; 2912 case Iop_QDMulHi32Sx2: size = 2; break; 2913 default: vassert(0); 2914 } 2915 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 2916 res, argL, argR, size, False)); 2917 return res; 2918 } 2919 2920 case Iop_QRDMulHi16Sx4: 2921 case Iop_QRDMulHi32Sx2: { 2922 HReg res = newVRegD(env); 2923 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2924 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2925 UInt size = 0; 2926 switch(e->Iex.Binop.op) { 2927 case Iop_QRDMulHi16Sx4: size = 1; break; 2928 case Iop_QRDMulHi32Sx2: size = 2; break; 2929 default: vassert(0); 2930 } 2931 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 2932 res, argL, argR, size, False)); 2933 return res; 2934 } 2935 2936 case Iop_PwAdd8x8: 2937 case Iop_PwAdd16x4: 2938 case Iop_PwAdd32x2: { 2939 HReg res = newVRegD(env); 2940 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2941 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2942 UInt size = 0; 2943 switch(e->Iex.Binop.op) { 2944 case Iop_PwAdd8x8: size = 0; break; 2945 case Iop_PwAdd16x4: size = 1; break; 2946 case Iop_PwAdd32x2: size = 2; break; 2947 default: vassert(0); 2948 } 2949 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 2950 res, argL, argR, size, False)); 2951 return res; 2952 } 2953 case Iop_PwAdd32Fx2: { 2954 HReg res = newVRegD(env); 2955 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2956 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2957 UInt size = 0; 2958 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP, 2959 res, argL, argR, size, False)); 2960 return res; 2961 } 2962 case Iop_PwMin8Ux8: 2963 case Iop_PwMin16Ux4: 2964 case Iop_PwMin32Ux2: { 2965 HReg res = newVRegD(env); 2966 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2967 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2968 UInt size = 0; 2969 switch(e->Iex.Binop.op) { 2970 case Iop_PwMin8Ux8: size = 0; break; 2971 case Iop_PwMin16Ux4: size = 1; break; 2972 case Iop_PwMin32Ux2: size = 2; break; 2973 default: vassert(0); 2974 } 2975 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU, 2976 res, argL, argR, size, False)); 2977 return res; 2978 } 2979 case Iop_PwMin8Sx8: 2980 case Iop_PwMin16Sx4: 2981 case Iop_PwMin32Sx2: { 2982 HReg res = newVRegD(env); 2983 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2984 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2985 UInt size = 0; 2986 switch(e->Iex.Binop.op) { 2987 case Iop_PwMin8Sx8: size = 0; break; 2988 case Iop_PwMin16Sx4: size = 1; break; 2989 case Iop_PwMin32Sx2: size = 2; break; 2990 default: vassert(0); 2991 } 2992 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS, 2993 res, argL, argR, size, False)); 2994 return res; 2995 } 2996 case Iop_PwMax8Ux8: 2997 case Iop_PwMax16Ux4: 2998 case Iop_PwMax32Ux2: { 2999 HReg res = newVRegD(env); 3000 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3001 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3002 UInt size = 0; 3003 switch(e->Iex.Binop.op) { 3004 case Iop_PwMax8Ux8: size = 0; break; 3005 case Iop_PwMax16Ux4: size = 1; break; 3006 case Iop_PwMax32Ux2: size = 2; break; 3007 default: vassert(0); 3008 } 3009 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU, 3010 res, argL, argR, size, False)); 3011 return res; 3012 } 3013 case Iop_PwMax8Sx8: 3014 case Iop_PwMax16Sx4: 3015 case Iop_PwMax32Sx2: { 3016 HReg res = newVRegD(env); 3017 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3018 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3019 UInt size = 0; 3020 switch(e->Iex.Binop.op) { 3021 case Iop_PwMax8Sx8: size = 0; break; 3022 case Iop_PwMax16Sx4: size = 1; break; 3023 case Iop_PwMax32Sx2: size = 2; break; 3024 default: vassert(0); 3025 } 3026 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS, 3027 res, argL, argR, size, False)); 3028 return res; 3029 } 3030 case Iop_Perm8x8: { 3031 HReg res = newVRegD(env); 3032 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3033 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3034 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL, 3035 res, argL, argR, 0, False)); 3036 return res; 3037 } 3038 case Iop_PolynomialMul8x8: { 3039 HReg res = newVRegD(env); 3040 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3041 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3042 UInt size = 0; 3043 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 3044 res, argL, argR, size, False)); 3045 return res; 3046 } 3047 case Iop_Max32Fx2: { 3048 HReg res = newVRegD(env); 3049 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3050 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3051 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 3052 res, argL, argR, 2, False)); 3053 return res; 3054 } 3055 case Iop_Min32Fx2: { 3056 HReg res = newVRegD(env); 3057 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3058 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3059 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 3060 res, argL, argR, 2, False)); 3061 return res; 3062 } 3063 case Iop_PwMax32Fx2: { 3064 HReg res = newVRegD(env); 3065 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3066 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3067 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 3068 res, argL, argR, 2, False)); 3069 return res; 3070 } 3071 case Iop_PwMin32Fx2: { 3072 HReg res = newVRegD(env); 3073 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3074 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3075 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 3076 res, argL, argR, 2, False)); 3077 return res; 3078 } 3079 case Iop_CmpGT32Fx2: { 3080 HReg res = newVRegD(env); 3081 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3082 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3083 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 3084 res, argL, argR, 2, False)); 3085 return res; 3086 } 3087 case Iop_CmpGE32Fx2: { 3088 HReg res = newVRegD(env); 3089 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3090 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3091 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 3092 res, argL, argR, 2, False)); 3093 return res; 3094 } 3095 case Iop_CmpEQ32Fx2: { 3096 HReg res = newVRegD(env); 3097 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3098 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3099 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 3100 res, argL, argR, 2, False)); 3101 return res; 3102 } 3103 case Iop_F32ToFixed32Ux2_RZ: 3104 case Iop_F32ToFixed32Sx2_RZ: 3105 case Iop_Fixed32UToF32x2_RN: 3106 case Iop_Fixed32SToF32x2_RN: { 3107 HReg res = newVRegD(env); 3108 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 3109 ARMNeonUnOp op; 3110 UInt imm6; 3111 if (e->Iex.Binop.arg2->tag != Iex_Const || 3112 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3113 vpanic("ARM supports FP <-> Fixed conversion with constant " 3114 "second argument less than 33 only\n"); 3115 } 3116 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3117 vassert(imm6 <= 32 && imm6 > 0); 3118 imm6 = 64 - imm6; 3119 switch(e->Iex.Binop.op) { 3120 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break; 3121 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break; 3122 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break; 3123 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break; 3124 default: vassert(0); 3125 } 3126 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False)); 3127 return res; 3128 } 3129 /* 3130 FIXME: is this here or not? 3131 case Iop_VDup8x8: 3132 case Iop_VDup16x4: 3133 case Iop_VDup32x2: { 3134 HReg res = newVRegD(env); 3135 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3136 UInt index; 3137 UInt imm4; 3138 UInt size = 0; 3139 if (e->Iex.Binop.arg2->tag != Iex_Const || 3140 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3141 vpanic("ARM supports Iop_VDup with constant " 3142 "second argument less than 16 only\n"); 3143 } 3144 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3145 switch(e->Iex.Binop.op) { 3146 case Iop_VDup8x8: imm4 = (index << 1) + 1; break; 3147 case Iop_VDup16x4: imm4 = (index << 2) + 2; break; 3148 case Iop_VDup32x2: imm4 = (index << 3) + 4; break; 3149 default: vassert(0); 3150 } 3151 if (imm4 >= 16) { 3152 vpanic("ARM supports Iop_VDup with constant " 3153 "second argument less than 16 only\n"); 3154 } 3155 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 3156 res, argL, imm4, False)); 3157 return res; 3158 } 3159 */ 3160 default: 3161 break; 3162 } 3163 } 3164 3165 /* --------- UNARY ops --------- */ 3166 if (e->tag == Iex_Unop) { 3167 switch (e->Iex.Unop.op) { 3168 3169 /* 32Uto64 */ 3170 case Iop_32Uto64: { 3171 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); 3172 HReg rHi = newVRegI(env); 3173 HReg res = newVRegD(env); 3174 addInstr(env, ARMInstr_Imm32(rHi, 0)); 3175 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3176 return res; 3177 } 3178 3179 /* 32Sto64 */ 3180 case Iop_32Sto64: { 3181 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); 3182 HReg rHi = newVRegI(env); 3183 addInstr(env, mk_iMOVds_RR(rHi, rLo)); 3184 addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31))); 3185 HReg res = newVRegD(env); 3186 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3187 return res; 3188 } 3189 3190 /* The next 3 are pass-throughs */ 3191 /* ReinterpF64asI64 */ 3192 case Iop_ReinterpF64asI64: 3193 /* Left64(e) */ 3194 case Iop_Left64: 3195 /* CmpwNEZ64(e) */ 3196 case Iop_1Sto64: { 3197 HReg rLo, rHi; 3198 HReg res = newVRegD(env); 3199 iselInt64Expr(&rHi, &rLo, env, e); 3200 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3201 return res; 3202 } 3203 3204 case Iop_Not64: { 3205 DECLARE_PATTERN(p_veqz_8x8); 3206 DECLARE_PATTERN(p_veqz_16x4); 3207 DECLARE_PATTERN(p_veqz_32x2); 3208 DECLARE_PATTERN(p_vcge_8sx8); 3209 DECLARE_PATTERN(p_vcge_16sx4); 3210 DECLARE_PATTERN(p_vcge_32sx2); 3211 DECLARE_PATTERN(p_vcge_8ux8); 3212 DECLARE_PATTERN(p_vcge_16ux4); 3213 DECLARE_PATTERN(p_vcge_32ux2); 3214 DEFINE_PATTERN(p_veqz_8x8, 3215 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0)))); 3216 DEFINE_PATTERN(p_veqz_16x4, 3217 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0)))); 3218 DEFINE_PATTERN(p_veqz_32x2, 3219 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0)))); 3220 DEFINE_PATTERN(p_vcge_8sx8, 3221 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0)))); 3222 DEFINE_PATTERN(p_vcge_16sx4, 3223 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0)))); 3224 DEFINE_PATTERN(p_vcge_32sx2, 3225 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0)))); 3226 DEFINE_PATTERN(p_vcge_8ux8, 3227 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0)))); 3228 DEFINE_PATTERN(p_vcge_16ux4, 3229 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0)))); 3230 DEFINE_PATTERN(p_vcge_32ux2, 3231 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0)))); 3232 if (matchIRExpr(&mi, p_veqz_8x8, e)) { 3233 HReg res = newVRegD(env); 3234 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3235 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False)); 3236 return res; 3237 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) { 3238 HReg res = newVRegD(env); 3239 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3240 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False)); 3241 return res; 3242 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) { 3243 HReg res = newVRegD(env); 3244 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3245 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False)); 3246 return res; 3247 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) { 3248 HReg res = newVRegD(env); 3249 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3250 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3251 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3252 res, argL, argR, 0, False)); 3253 return res; 3254 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) { 3255 HReg res = newVRegD(env); 3256 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3257 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3258 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3259 res, argL, argR, 1, False)); 3260 return res; 3261 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) { 3262 HReg res = newVRegD(env); 3263 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3264 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3265 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3266 res, argL, argR, 2, False)); 3267 return res; 3268 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) { 3269 HReg res = newVRegD(env); 3270 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3271 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3272 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3273 res, argL, argR, 0, False)); 3274 return res; 3275 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) { 3276 HReg res = newVRegD(env); 3277 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3278 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3279 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3280 res, argL, argR, 1, False)); 3281 return res; 3282 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) { 3283 HReg res = newVRegD(env); 3284 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3285 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3286 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3287 res, argL, argR, 2, False)); 3288 return res; 3289 } else { 3290 HReg res = newVRegD(env); 3291 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3292 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False)); 3293 return res; 3294 } 3295 } 3296 case Iop_Dup8x8: 3297 case Iop_Dup16x4: 3298 case Iop_Dup32x2: { 3299 HReg res, arg; 3300 UInt size; 3301 DECLARE_PATTERN(p_vdup_8x8); 3302 DECLARE_PATTERN(p_vdup_16x4); 3303 DECLARE_PATTERN(p_vdup_32x2); 3304 DEFINE_PATTERN(p_vdup_8x8, 3305 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1)))); 3306 DEFINE_PATTERN(p_vdup_16x4, 3307 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1)))); 3308 DEFINE_PATTERN(p_vdup_32x2, 3309 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1)))); 3310 if (matchIRExpr(&mi, p_vdup_8x8, e)) { 3311 UInt index; 3312 UInt imm4; 3313 if (mi.bindee[1]->tag == Iex_Const && 3314 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3315 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3316 imm4 = (index << 1) + 1; 3317 if (index < 8) { 3318 res = newVRegD(env); 3319 arg = iselNeon64Expr(env, mi.bindee[0]); 3320 addInstr(env, ARMInstr_NUnaryS( 3321 ARMneon_VDUP, 3322 mkARMNRS(ARMNRS_Reg, res, 0), 3323 mkARMNRS(ARMNRS_Scalar, arg, index), 3324 imm4, False 3325 )); 3326 return res; 3327 } 3328 } 3329 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) { 3330 UInt index; 3331 UInt imm4; 3332 if (mi.bindee[1]->tag == Iex_Const && 3333 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3334 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3335 imm4 = (index << 2) + 2; 3336 if (index < 4) { 3337 res = newVRegD(env); 3338 arg = iselNeon64Expr(env, mi.bindee[0]); 3339 addInstr(env, ARMInstr_NUnaryS( 3340 ARMneon_VDUP, 3341 mkARMNRS(ARMNRS_Reg, res, 0), 3342 mkARMNRS(ARMNRS_Scalar, arg, index), 3343 imm4, False 3344 )); 3345 return res; 3346 } 3347 } 3348 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) { 3349 UInt index; 3350 UInt imm4; 3351 if (mi.bindee[1]->tag == Iex_Const && 3352 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3353 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3354 imm4 = (index << 3) + 4; 3355 if (index < 2) { 3356 res = newVRegD(env); 3357 arg = iselNeon64Expr(env, mi.bindee[0]); 3358 addInstr(env, ARMInstr_NUnaryS( 3359 ARMneon_VDUP, 3360 mkARMNRS(ARMNRS_Reg, res, 0), 3361 mkARMNRS(ARMNRS_Scalar, arg, index), 3362 imm4, False 3363 )); 3364 return res; 3365 } 3366 } 3367 } 3368 arg = iselIntExpr_R(env, e->Iex.Unop.arg); 3369 res = newVRegD(env); 3370 switch (e->Iex.Unop.op) { 3371 case Iop_Dup8x8: size = 0; break; 3372 case Iop_Dup16x4: size = 1; break; 3373 case Iop_Dup32x2: size = 2; break; 3374 default: vassert(0); 3375 } 3376 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False)); 3377 return res; 3378 } 3379 case Iop_Abs8x8: 3380 case Iop_Abs16x4: 3381 case Iop_Abs32x2: { 3382 HReg res = newVRegD(env); 3383 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3384 UInt size = 0; 3385 switch(e->Iex.Binop.op) { 3386 case Iop_Abs8x8: size = 0; break; 3387 case Iop_Abs16x4: size = 1; break; 3388 case Iop_Abs32x2: size = 2; break; 3389 default: vassert(0); 3390 } 3391 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False)); 3392 return res; 3393 } 3394 case Iop_Reverse64_8x8: 3395 case Iop_Reverse64_16x4: 3396 case Iop_Reverse64_32x2: { 3397 HReg res = newVRegD(env); 3398 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3399 UInt size = 0; 3400 switch(e->Iex.Binop.op) { 3401 case Iop_Reverse64_8x8: size = 0; break; 3402 case Iop_Reverse64_16x4: size = 1; break; 3403 case Iop_Reverse64_32x2: size = 2; break; 3404 default: vassert(0); 3405 } 3406 addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 3407 res, arg, size, False)); 3408 return res; 3409 } 3410 case Iop_Reverse32_8x8: 3411 case Iop_Reverse32_16x4: { 3412 HReg res = newVRegD(env); 3413 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3414 UInt size = 0; 3415 switch(e->Iex.Binop.op) { 3416 case Iop_Reverse32_8x8: size = 0; break; 3417 case Iop_Reverse32_16x4: size = 1; break; 3418 default: vassert(0); 3419 } 3420 addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 3421 res, arg, size, False)); 3422 return res; 3423 } 3424 case Iop_Reverse16_8x8: { 3425 HReg res = newVRegD(env); 3426 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3427 UInt size = 0; 3428 addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 3429 res, arg, size, False)); 3430 return res; 3431 } 3432 case Iop_CmpwNEZ64: { 3433 HReg x_lsh = newVRegD(env); 3434 HReg x_rsh = newVRegD(env); 3435 HReg lsh_amt = newVRegD(env); 3436 HReg rsh_amt = newVRegD(env); 3437 HReg zero = newVRegD(env); 3438 HReg tmp = newVRegD(env); 3439 HReg tmp2 = newVRegD(env); 3440 HReg res = newVRegD(env); 3441 HReg x = newVRegD(env); 3442 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3443 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False)); 3444 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False)); 3445 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 3446 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 3447 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3448 rsh_amt, zero, lsh_amt, 2, False)); 3449 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3450 x_lsh, x, lsh_amt, 3, False)); 3451 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3452 x_rsh, x, rsh_amt, 3, False)); 3453 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3454 tmp, x_lsh, x_rsh, 0, False)); 3455 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3456 res, tmp, x, 0, False)); 3457 return res; 3458 } 3459 case Iop_CmpNEZ8x8: 3460 case Iop_CmpNEZ16x4: 3461 case Iop_CmpNEZ32x2: { 3462 HReg res = newVRegD(env); 3463 HReg tmp = newVRegD(env); 3464 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3465 UInt size; 3466 switch (e->Iex.Unop.op) { 3467 case Iop_CmpNEZ8x8: size = 0; break; 3468 case Iop_CmpNEZ16x4: size = 1; break; 3469 case Iop_CmpNEZ32x2: size = 2; break; 3470 default: vassert(0); 3471 } 3472 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False)); 3473 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False)); 3474 return res; 3475 } 3476 case Iop_NarrowUn16to8x8: 3477 case Iop_NarrowUn32to16x4: 3478 case Iop_NarrowUn64to32x2: { 3479 HReg res = newVRegD(env); 3480 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3481 UInt size = 0; 3482 switch(e->Iex.Binop.op) { 3483 case Iop_NarrowUn16to8x8: size = 0; break; 3484 case Iop_NarrowUn32to16x4: size = 1; break; 3485 case Iop_NarrowUn64to32x2: size = 2; break; 3486 default: vassert(0); 3487 } 3488 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN, 3489 res, arg, size, False)); 3490 return res; 3491 } 3492 case Iop_QNarrowUn16Sto8Sx8: 3493 case Iop_QNarrowUn32Sto16Sx4: 3494 case Iop_QNarrowUn64Sto32Sx2: { 3495 HReg res = newVRegD(env); 3496 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3497 UInt size = 0; 3498 switch(e->Iex.Binop.op) { 3499 case Iop_QNarrowUn16Sto8Sx8: size = 0; break; 3500 case Iop_QNarrowUn32Sto16Sx4: size = 1; break; 3501 case Iop_QNarrowUn64Sto32Sx2: size = 2; break; 3502 default: vassert(0); 3503 } 3504 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS, 3505 res, arg, size, False)); 3506 return res; 3507 } 3508 case Iop_QNarrowUn16Sto8Ux8: 3509 case Iop_QNarrowUn32Sto16Ux4: 3510 case Iop_QNarrowUn64Sto32Ux2: { 3511 HReg res = newVRegD(env); 3512 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3513 UInt size = 0; 3514 switch(e->Iex.Binop.op) { 3515 case Iop_QNarrowUn16Sto8Ux8: size = 0; break; 3516 case Iop_QNarrowUn32Sto16Ux4: size = 1; break; 3517 case Iop_QNarrowUn64Sto32Ux2: size = 2; break; 3518 default: vassert(0); 3519 } 3520 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS, 3521 res, arg, size, False)); 3522 return res; 3523 } 3524 case Iop_QNarrowUn16Uto8Ux8: 3525 case Iop_QNarrowUn32Uto16Ux4: 3526 case Iop_QNarrowUn64Uto32Ux2: { 3527 HReg res = newVRegD(env); 3528 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3529 UInt size = 0; 3530 switch(e->Iex.Binop.op) { 3531 case Iop_QNarrowUn16Uto8Ux8: size = 0; break; 3532 case Iop_QNarrowUn32Uto16Ux4: size = 1; break; 3533 case Iop_QNarrowUn64Uto32Ux2: size = 2; break; 3534 default: vassert(0); 3535 } 3536 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU, 3537 res, arg, size, False)); 3538 return res; 3539 } 3540 case Iop_PwAddL8Sx8: 3541 case Iop_PwAddL16Sx4: 3542 case Iop_PwAddL32Sx2: { 3543 HReg res = newVRegD(env); 3544 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3545 UInt size = 0; 3546 switch(e->Iex.Binop.op) { 3547 case Iop_PwAddL8Sx8: size = 0; break; 3548 case Iop_PwAddL16Sx4: size = 1; break; 3549 case Iop_PwAddL32Sx2: size = 2; break; 3550 default: vassert(0); 3551 } 3552 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 3553 res, arg, size, False)); 3554 return res; 3555 } 3556 case Iop_PwAddL8Ux8: 3557 case Iop_PwAddL16Ux4: 3558 case Iop_PwAddL32Ux2: { 3559 HReg res = newVRegD(env); 3560 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3561 UInt size = 0; 3562 switch(e->Iex.Binop.op) { 3563 case Iop_PwAddL8Ux8: size = 0; break; 3564 case Iop_PwAddL16Ux4: size = 1; break; 3565 case Iop_PwAddL32Ux2: size = 2; break; 3566 default: vassert(0); 3567 } 3568 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 3569 res, arg, size, False)); 3570 return res; 3571 } 3572 case Iop_Cnt8x8: { 3573 HReg res = newVRegD(env); 3574 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3575 UInt size = 0; 3576 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, 3577 res, arg, size, False)); 3578 return res; 3579 } 3580 case Iop_Clz8Sx8: 3581 case Iop_Clz16Sx4: 3582 case Iop_Clz32Sx2: { 3583 HReg res = newVRegD(env); 3584 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3585 UInt size = 0; 3586 switch(e->Iex.Binop.op) { 3587 case Iop_Clz8Sx8: size = 0; break; 3588 case Iop_Clz16Sx4: size = 1; break; 3589 case Iop_Clz32Sx2: size = 2; break; 3590 default: vassert(0); 3591 } 3592 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, 3593 res, arg, size, False)); 3594 return res; 3595 } 3596 case Iop_Cls8Sx8: 3597 case Iop_Cls16Sx4: 3598 case Iop_Cls32Sx2: { 3599 HReg res = newVRegD(env); 3600 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3601 UInt size = 0; 3602 switch(e->Iex.Binop.op) { 3603 case Iop_Cls8Sx8: size = 0; break; 3604 case Iop_Cls16Sx4: size = 1; break; 3605 case Iop_Cls32Sx2: size = 2; break; 3606 default: vassert(0); 3607 } 3608 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, 3609 res, arg, size, False)); 3610 return res; 3611 } 3612 case Iop_FtoI32Sx2_RZ: { 3613 HReg res = newVRegD(env); 3614 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3615 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 3616 res, arg, 2, False)); 3617 return res; 3618 } 3619 case Iop_FtoI32Ux2_RZ: { 3620 HReg res = newVRegD(env); 3621 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3622 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 3623 res, arg, 2, False)); 3624 return res; 3625 } 3626 case Iop_I32StoFx2: { 3627 HReg res = newVRegD(env); 3628 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3629 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 3630 res, arg, 2, False)); 3631 return res; 3632 } 3633 case Iop_I32UtoFx2: { 3634 HReg res = newVRegD(env); 3635 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3636 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 3637 res, arg, 2, False)); 3638 return res; 3639 } 3640 case Iop_F32toF16x4: { 3641 HReg res = newVRegD(env); 3642 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3643 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16, 3644 res, arg, 2, False)); 3645 return res; 3646 } 3647 case Iop_Recip32Fx2: { 3648 HReg res = newVRegD(env); 3649 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3650 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 3651 res, argL, 0, False)); 3652 return res; 3653 } 3654 case Iop_Recip32x2: { 3655 HReg res = newVRegD(env); 3656 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3657 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 3658 res, argL, 0, False)); 3659 return res; 3660 } 3661 case Iop_Abs32Fx2: { 3662 DECLARE_PATTERN(p_vabd_32fx2); 3663 DEFINE_PATTERN(p_vabd_32fx2, 3664 unop(Iop_Abs32Fx2, 3665 binop(Iop_Sub32Fx2, 3666 bind(0), 3667 bind(1)))); 3668 if (matchIRExpr(&mi, p_vabd_32fx2, e)) { 3669 HReg res = newVRegD(env); 3670 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3671 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3672 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, 3673 res, argL, argR, 0, False)); 3674 return res; 3675 } else { 3676 HReg res = newVRegD(env); 3677 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3678 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, 3679 res, arg, 0, False)); 3680 return res; 3681 } 3682 } 3683 case Iop_Rsqrte32Fx2: { 3684 HReg res = newVRegD(env); 3685 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3686 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 3687 res, arg, 0, False)); 3688 return res; 3689 } 3690 case Iop_Rsqrte32x2: { 3691 HReg res = newVRegD(env); 3692 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3693 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 3694 res, arg, 0, False)); 3695 return res; 3696 } 3697 case Iop_Neg32Fx2: { 3698 HReg res = newVRegD(env); 3699 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3700 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, 3701 res, arg, 0, False)); 3702 return res; 3703 } 3704 default: 3705 break; 3706 } 3707 } /* if (e->tag == Iex_Unop) */ 3708 3709 if (e->tag == Iex_Triop) { 3710 IRTriop *triop = e->Iex.Triop.details; 3711 3712 switch (triop->op) { 3713 case Iop_Extract64: { 3714 HReg res = newVRegD(env); 3715 HReg argL = iselNeon64Expr(env, triop->arg1); 3716 HReg argR = iselNeon64Expr(env, triop->arg2); 3717 UInt imm4; 3718 if (triop->arg3->tag != Iex_Const || 3719 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 3720 vpanic("ARM target supports Iop_Extract64 with constant " 3721 "third argument less than 16 only\n"); 3722 } 3723 imm4 = triop->arg3->Iex.Const.con->Ico.U8; 3724 if (imm4 >= 8) { 3725 vpanic("ARM target supports Iop_Extract64 with constant " 3726 "third argument less than 16 only\n"); 3727 } 3728 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 3729 res, argL, argR, imm4, False)); 3730 return res; 3731 } 3732 case Iop_SetElem8x8: 3733 case Iop_SetElem16x4: 3734 case Iop_SetElem32x2: { 3735 HReg res = newVRegD(env); 3736 HReg dreg = iselNeon64Expr(env, triop->arg1); 3737 HReg arg = iselIntExpr_R(env, triop->arg3); 3738 UInt index, size; 3739 if (triop->arg2->tag != Iex_Const || 3740 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) { 3741 vpanic("ARM target supports SetElem with constant " 3742 "second argument only\n"); 3743 } 3744 index = triop->arg2->Iex.Const.con->Ico.U8; 3745 switch (triop->op) { 3746 case Iop_SetElem8x8: vassert(index < 8); size = 0; break; 3747 case Iop_SetElem16x4: vassert(index < 4); size = 1; break; 3748 case Iop_SetElem32x2: vassert(index < 2); size = 2; break; 3749 default: vassert(0); 3750 } 3751 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False)); 3752 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM, 3753 mkARMNRS(ARMNRS_Scalar, res, index), 3754 mkARMNRS(ARMNRS_Reg, arg, 0), 3755 size, False)); 3756 return res; 3757 } 3758 default: 3759 break; 3760 } 3761 } 3762 3763 /* --------- MULTIPLEX --------- */ 3764 if (e->tag == Iex_ITE) { // VFD 3765 HReg rLo, rHi; 3766 HReg res = newVRegD(env); 3767 iselInt64Expr(&rHi, &rLo, env, e); 3768 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3769 return res; 3770 } 3771 3772 ppIRExpr(e); 3773 vpanic("iselNeon64Expr"); 3774 } 3775 3776 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e ) 3777 { 3778 HReg r = iselNeonExpr_wrk( env, e ); 3779 vassert(hregClass(r) == HRcVec128); 3780 vassert(hregIsVirtual(r)); 3781 return r; 3782 } 3783 3784 /* DO NOT CALL THIS DIRECTLY */ 3785 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e ) 3786 { 3787 IRType ty = typeOfIRExpr(env->type_env, e); 3788 MatchInfo mi; 3789 vassert(e); 3790 vassert(ty == Ity_V128); 3791 3792 if (e->tag == Iex_RdTmp) { 3793 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3794 } 3795 3796 if (e->tag == Iex_Const) { 3797 /* At the moment there should be no 128-bit constants in IR for ARM 3798 generated during disassemble. They are represented as Iop_64HLtoV128 3799 binary operation and are handled among binary ops. */ 3800 /* But zero can be created by valgrind internal optimizer */ 3801 if (e->Iex.Const.con->Ico.V128 == 0x0000) { 3802 HReg res = newVRegV(env); 3803 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0))); 3804 return res; 3805 } 3806 if (e->Iex.Const.con->Ico.V128 == 0xFFFF) { 3807 HReg res = newVRegV(env); 3808 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255))); 3809 return res; 3810 } 3811 ppIRExpr(e); 3812 vpanic("128-bit constant is not implemented"); 3813 } 3814 3815 if (e->tag == Iex_Load) { 3816 HReg res = newVRegV(env); 3817 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); 3818 vassert(ty == Ity_V128); 3819 addInstr(env, ARMInstr_NLdStQ(True, res, am)); 3820 return res; 3821 } 3822 3823 if (e->tag == Iex_Get) { 3824 HReg addr = newVRegI(env); 3825 HReg res = newVRegV(env); 3826 vassert(ty == Ity_V128); 3827 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); 3828 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr))); 3829 return res; 3830 } 3831 3832 if (e->tag == Iex_Unop) { 3833 switch (e->Iex.Unop.op) { 3834 case Iop_NotV128: { 3835 DECLARE_PATTERN(p_veqz_8x16); 3836 DECLARE_PATTERN(p_veqz_16x8); 3837 DECLARE_PATTERN(p_veqz_32x4); 3838 DECLARE_PATTERN(p_vcge_8sx16); 3839 DECLARE_PATTERN(p_vcge_16sx8); 3840 DECLARE_PATTERN(p_vcge_32sx4); 3841 DECLARE_PATTERN(p_vcge_8ux16); 3842 DECLARE_PATTERN(p_vcge_16ux8); 3843 DECLARE_PATTERN(p_vcge_32ux4); 3844 DEFINE_PATTERN(p_veqz_8x16, 3845 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0)))); 3846 DEFINE_PATTERN(p_veqz_16x8, 3847 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0)))); 3848 DEFINE_PATTERN(p_veqz_32x4, 3849 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0)))); 3850 DEFINE_PATTERN(p_vcge_8sx16, 3851 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0)))); 3852 DEFINE_PATTERN(p_vcge_16sx8, 3853 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0)))); 3854 DEFINE_PATTERN(p_vcge_32sx4, 3855 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0)))); 3856 DEFINE_PATTERN(p_vcge_8ux16, 3857 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0)))); 3858 DEFINE_PATTERN(p_vcge_16ux8, 3859 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0)))); 3860 DEFINE_PATTERN(p_vcge_32ux4, 3861 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0)))); 3862 if (matchIRExpr(&mi, p_veqz_8x16, e)) { 3863 HReg res = newVRegV(env); 3864 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3865 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True)); 3866 return res; 3867 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) { 3868 HReg res = newVRegV(env); 3869 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3870 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True)); 3871 return res; 3872 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) { 3873 HReg res = newVRegV(env); 3874 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3875 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True)); 3876 return res; 3877 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) { 3878 HReg res = newVRegV(env); 3879 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3880 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3881 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3882 res, argL, argR, 0, True)); 3883 return res; 3884 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) { 3885 HReg res = newVRegV(env); 3886 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3887 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3888 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3889 res, argL, argR, 1, True)); 3890 return res; 3891 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) { 3892 HReg res = newVRegV(env); 3893 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3894 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3895 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3896 res, argL, argR, 2, True)); 3897 return res; 3898 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) { 3899 HReg res = newVRegV(env); 3900 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3901 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3902 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3903 res, argL, argR, 0, True)); 3904 return res; 3905 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) { 3906 HReg res = newVRegV(env); 3907 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3908 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3909 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3910 res, argL, argR, 1, True)); 3911 return res; 3912 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) { 3913 HReg res = newVRegV(env); 3914 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3915 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3916 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3917 res, argL, argR, 2, True)); 3918 return res; 3919 } else { 3920 HReg res = newVRegV(env); 3921 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3922 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True)); 3923 return res; 3924 } 3925 } 3926 case Iop_Dup8x16: 3927 case Iop_Dup16x8: 3928 case Iop_Dup32x4: { 3929 HReg res, arg; 3930 UInt size; 3931 DECLARE_PATTERN(p_vdup_8x16); 3932 DECLARE_PATTERN(p_vdup_16x8); 3933 DECLARE_PATTERN(p_vdup_32x4); 3934 DEFINE_PATTERN(p_vdup_8x16, 3935 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1)))); 3936 DEFINE_PATTERN(p_vdup_16x8, 3937 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1)))); 3938 DEFINE_PATTERN(p_vdup_32x4, 3939 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1)))); 3940 if (matchIRExpr(&mi, p_vdup_8x16, e)) { 3941 UInt index; 3942 UInt imm4; 3943 if (mi.bindee[1]->tag == Iex_Const && 3944 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3945 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3946 imm4 = (index << 1) + 1; 3947 if (index < 8) { 3948 res = newVRegV(env); 3949 arg = iselNeon64Expr(env, mi.bindee[0]); 3950 addInstr(env, ARMInstr_NUnaryS( 3951 ARMneon_VDUP, 3952 mkARMNRS(ARMNRS_Reg, res, 0), 3953 mkARMNRS(ARMNRS_Scalar, arg, index), 3954 imm4, True 3955 )); 3956 return res; 3957 } 3958 } 3959 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) { 3960 UInt index; 3961 UInt imm4; 3962 if (mi.bindee[1]->tag == Iex_Const && 3963 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3964 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3965 imm4 = (index << 2) + 2; 3966 if (index < 4) { 3967 res = newVRegV(env); 3968 arg = iselNeon64Expr(env, mi.bindee[0]); 3969 addInstr(env, ARMInstr_NUnaryS( 3970 ARMneon_VDUP, 3971 mkARMNRS(ARMNRS_Reg, res, 0), 3972 mkARMNRS(ARMNRS_Scalar, arg, index), 3973 imm4, True 3974 )); 3975 return res; 3976 } 3977 } 3978 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) { 3979 UInt index; 3980 UInt imm4; 3981 if (mi.bindee[1]->tag == Iex_Const && 3982 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3983 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3984 imm4 = (index << 3) + 4; 3985 if (index < 2) { 3986 res = newVRegV(env); 3987 arg = iselNeon64Expr(env, mi.bindee[0]); 3988 addInstr(env, ARMInstr_NUnaryS( 3989 ARMneon_VDUP, 3990 mkARMNRS(ARMNRS_Reg, res, 0), 3991 mkARMNRS(ARMNRS_Scalar, arg, index), 3992 imm4, True 3993 )); 3994 return res; 3995 } 3996 } 3997 } 3998 arg = iselIntExpr_R(env, e->Iex.Unop.arg); 3999 res = newVRegV(env); 4000 switch (e->Iex.Unop.op) { 4001 case Iop_Dup8x16: size = 0; break; 4002 case Iop_Dup16x8: size = 1; break; 4003 case Iop_Dup32x4: size = 2; break; 4004 default: vassert(0); 4005 } 4006 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True)); 4007 return res; 4008 } 4009 case Iop_Abs8x16: 4010 case Iop_Abs16x8: 4011 case Iop_Abs32x4: { 4012 HReg res = newVRegV(env); 4013 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4014 UInt size = 0; 4015 switch(e->Iex.Binop.op) { 4016 case Iop_Abs8x16: size = 0; break; 4017 case Iop_Abs16x8: size = 1; break; 4018 case Iop_Abs32x4: size = 2; break; 4019 default: vassert(0); 4020 } 4021 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True)); 4022 return res; 4023 } 4024 case Iop_Reverse64_8x16: 4025 case Iop_Reverse64_16x8: 4026 case Iop_Reverse64_32x4: { 4027 HReg res = newVRegV(env); 4028 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4029 UInt size = 0; 4030 switch(e->Iex.Binop.op) { 4031 case Iop_Reverse64_8x16: size = 0; break; 4032 case Iop_Reverse64_16x8: size = 1; break; 4033 case Iop_Reverse64_32x4: size = 2; break; 4034 default: vassert(0); 4035 } 4036 addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 4037 res, arg, size, True)); 4038 return res; 4039 } 4040 case Iop_Reverse32_8x16: 4041 case Iop_Reverse32_16x8: { 4042 HReg res = newVRegV(env); 4043 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4044 UInt size = 0; 4045 switch(e->Iex.Binop.op) { 4046 case Iop_Reverse32_8x16: size = 0; break; 4047 case Iop_Reverse32_16x8: size = 1; break; 4048 default: vassert(0); 4049 } 4050 addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 4051 res, arg, size, True)); 4052 return res; 4053 } 4054 case Iop_Reverse16_8x16: { 4055 HReg res = newVRegV(env); 4056 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4057 UInt size = 0; 4058 addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 4059 res, arg, size, True)); 4060 return res; 4061 } 4062 case Iop_CmpNEZ64x2: { 4063 HReg x_lsh = newVRegV(env); 4064 HReg x_rsh = newVRegV(env); 4065 HReg lsh_amt = newVRegV(env); 4066 HReg rsh_amt = newVRegV(env); 4067 HReg zero = newVRegV(env); 4068 HReg tmp = newVRegV(env); 4069 HReg tmp2 = newVRegV(env); 4070 HReg res = newVRegV(env); 4071 HReg x = newVRegV(env); 4072 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4073 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True)); 4074 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True)); 4075 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 4076 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 4077 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4078 rsh_amt, zero, lsh_amt, 2, True)); 4079 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4080 x_lsh, x, lsh_amt, 3, True)); 4081 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4082 x_rsh, x, rsh_amt, 3, True)); 4083 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4084 tmp, x_lsh, x_rsh, 0, True)); 4085 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4086 res, tmp, x, 0, True)); 4087 return res; 4088 } 4089 case Iop_CmpNEZ8x16: 4090 case Iop_CmpNEZ16x8: 4091 case Iop_CmpNEZ32x4: { 4092 HReg res = newVRegV(env); 4093 HReg tmp = newVRegV(env); 4094 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4095 UInt size; 4096 switch (e->Iex.Unop.op) { 4097 case Iop_CmpNEZ8x16: size = 0; break; 4098 case Iop_CmpNEZ16x8: size = 1; break; 4099 case Iop_CmpNEZ32x4: size = 2; break; 4100 default: vassert(0); 4101 } 4102 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True)); 4103 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True)); 4104 return res; 4105 } 4106 case Iop_Widen8Uto16x8: 4107 case Iop_Widen16Uto32x4: 4108 case Iop_Widen32Uto64x2: { 4109 HReg res = newVRegV(env); 4110 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4111 UInt size; 4112 switch (e->Iex.Unop.op) { 4113 case Iop_Widen8Uto16x8: size = 0; break; 4114 case Iop_Widen16Uto32x4: size = 1; break; 4115 case Iop_Widen32Uto64x2: size = 2; break; 4116 default: vassert(0); 4117 } 4118 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU, 4119 res, arg, size, True)); 4120 return res; 4121 } 4122 case Iop_Widen8Sto16x8: 4123 case Iop_Widen16Sto32x4: 4124 case Iop_Widen32Sto64x2: { 4125 HReg res = newVRegV(env); 4126 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4127 UInt size; 4128 switch (e->Iex.Unop.op) { 4129 case Iop_Widen8Sto16x8: size = 0; break; 4130 case Iop_Widen16Sto32x4: size = 1; break; 4131 case Iop_Widen32Sto64x2: size = 2; break; 4132 default: vassert(0); 4133 } 4134 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS, 4135 res, arg, size, True)); 4136 return res; 4137 } 4138 case Iop_PwAddL8Sx16: 4139 case Iop_PwAddL16Sx8: 4140 case Iop_PwAddL32Sx4: { 4141 HReg res = newVRegV(env); 4142 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4143 UInt size = 0; 4144 switch(e->Iex.Binop.op) { 4145 case Iop_PwAddL8Sx16: size = 0; break; 4146 case Iop_PwAddL16Sx8: size = 1; break; 4147 case Iop_PwAddL32Sx4: size = 2; break; 4148 default: vassert(0); 4149 } 4150 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 4151 res, arg, size, True)); 4152 return res; 4153 } 4154 case Iop_PwAddL8Ux16: 4155 case Iop_PwAddL16Ux8: 4156 case Iop_PwAddL32Ux4: { 4157 HReg res = newVRegV(env); 4158 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4159 UInt size = 0; 4160 switch(e->Iex.Binop.op) { 4161 case Iop_PwAddL8Ux16: size = 0; break; 4162 case Iop_PwAddL16Ux8: size = 1; break; 4163 case Iop_PwAddL32Ux4: size = 2; break; 4164 default: vassert(0); 4165 } 4166 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 4167 res, arg, size, True)); 4168 return res; 4169 } 4170 case Iop_Cnt8x16: { 4171 HReg res = newVRegV(env); 4172 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4173 UInt size = 0; 4174 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True)); 4175 return res; 4176 } 4177 case Iop_Clz8Sx16: 4178 case Iop_Clz16Sx8: 4179 case Iop_Clz32Sx4: { 4180 HReg res = newVRegV(env); 4181 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4182 UInt size = 0; 4183 switch(e->Iex.Binop.op) { 4184 case Iop_Clz8Sx16: size = 0; break; 4185 case Iop_Clz16Sx8: size = 1; break; 4186 case Iop_Clz32Sx4: size = 2; break; 4187 default: vassert(0); 4188 } 4189 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True)); 4190 return res; 4191 } 4192 case Iop_Cls8Sx16: 4193 case Iop_Cls16Sx8: 4194 case Iop_Cls32Sx4: { 4195 HReg res = newVRegV(env); 4196 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4197 UInt size = 0; 4198 switch(e->Iex.Binop.op) { 4199 case Iop_Cls8Sx16: size = 0; break; 4200 case Iop_Cls16Sx8: size = 1; break; 4201 case Iop_Cls32Sx4: size = 2; break; 4202 default: vassert(0); 4203 } 4204 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True)); 4205 return res; 4206 } 4207 case Iop_FtoI32Sx4_RZ: { 4208 HReg res = newVRegV(env); 4209 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4210 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 4211 res, arg, 2, True)); 4212 return res; 4213 } 4214 case Iop_FtoI32Ux4_RZ: { 4215 HReg res = newVRegV(env); 4216 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4217 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 4218 res, arg, 2, True)); 4219 return res; 4220 } 4221 case Iop_I32StoFx4: { 4222 HReg res = newVRegV(env); 4223 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4224 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 4225 res, arg, 2, True)); 4226 return res; 4227 } 4228 case Iop_I32UtoFx4: { 4229 HReg res = newVRegV(env); 4230 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4231 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 4232 res, arg, 2, True)); 4233 return res; 4234 } 4235 case Iop_F16toF32x4: { 4236 HReg res = newVRegV(env); 4237 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4238 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32, 4239 res, arg, 2, True)); 4240 return res; 4241 } 4242 case Iop_Recip32Fx4: { 4243 HReg res = newVRegV(env); 4244 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4245 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 4246 res, argL, 0, True)); 4247 return res; 4248 } 4249 case Iop_Recip32x4: { 4250 HReg res = newVRegV(env); 4251 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4252 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 4253 res, argL, 0, True)); 4254 return res; 4255 } 4256 case Iop_Abs32Fx4: { 4257 HReg res = newVRegV(env); 4258 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4259 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, 4260 res, argL, 0, True)); 4261 return res; 4262 } 4263 case Iop_Rsqrte32Fx4: { 4264 HReg res = newVRegV(env); 4265 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4266 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 4267 res, argL, 0, True)); 4268 return res; 4269 } 4270 case Iop_Rsqrte32x4: { 4271 HReg res = newVRegV(env); 4272 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4273 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 4274 res, argL, 0, True)); 4275 return res; 4276 } 4277 case Iop_Neg32Fx4: { 4278 HReg res = newVRegV(env); 4279 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4280 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, 4281 res, arg, 0, True)); 4282 return res; 4283 } 4284 /* ... */ 4285 default: 4286 break; 4287 } 4288 } 4289 4290 if (e->tag == Iex_Binop) { 4291 switch (e->Iex.Binop.op) { 4292 case Iop_64HLtoV128: 4293 /* Try to match into single "VMOV reg, imm" instruction */ 4294 if (e->Iex.Binop.arg1->tag == Iex_Const && 4295 e->Iex.Binop.arg2->tag == Iex_Const && 4296 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 && 4297 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 && 4298 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 4299 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) { 4300 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 4301 ARMNImm *imm = Imm64_to_ARMNImm(imm64); 4302 if (imm) { 4303 HReg res = newVRegV(env); 4304 addInstr(env, ARMInstr_NeonImm(res, imm)); 4305 return res; 4306 } 4307 if ((imm64 >> 32) == 0LL && 4308 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) { 4309 HReg tmp1 = newVRegV(env); 4310 HReg tmp2 = newVRegV(env); 4311 HReg res = newVRegV(env); 4312 if (imm->type < 10) { 4313 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f))); 4314 addInstr(env, ARMInstr_NeonImm(tmp2, imm)); 4315 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4316 res, tmp1, tmp2, 4, True)); 4317 return res; 4318 } 4319 } 4320 if ((imm64 & 0xFFFFFFFFLL) == 0LL && 4321 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) { 4322 HReg tmp1 = newVRegV(env); 4323 HReg tmp2 = newVRegV(env); 4324 HReg res = newVRegV(env); 4325 if (imm->type < 10) { 4326 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0))); 4327 addInstr(env, ARMInstr_NeonImm(tmp2, imm)); 4328 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4329 res, tmp1, tmp2, 4, True)); 4330 return res; 4331 } 4332 } 4333 } 4334 /* Does not match "VMOV Reg, Imm" form. We'll have to do 4335 it the slow way. */ 4336 { 4337 /* local scope */ 4338 /* Done via the stack for ease of use. */ 4339 /* FIXME: assumes little endian host */ 4340 HReg w3, w2, w1, w0; 4341 HReg res = newVRegV(env); 4342 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0); 4343 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4); 4344 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8); 4345 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12); 4346 ARMRI84* c_16 = ARMRI84_I84(16,0); 4347 /* Make space for SP */ 4348 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(), 4349 hregARM_R13(), c_16)); 4350 4351 /* Store the less significant 64 bits */ 4352 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2); 4353 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, 4354 w0, sp_0)); 4355 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, 4356 w1, sp_4)); 4357 4358 /* Store the more significant 64 bits */ 4359 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1); 4360 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, 4361 w2, sp_8)); 4362 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, 4363 w3, sp_12)); 4364 4365 /* Load result back from stack. */ 4366 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res, 4367 mkARMAModeN_R(hregARM_R13()))); 4368 4369 /* Restore SP */ 4370 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(), 4371 hregARM_R13(), c_16)); 4372 return res; 4373 } /* local scope */ 4374 goto neon_expr_bad; 4375 case Iop_AndV128: { 4376 HReg res = newVRegV(env); 4377 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4378 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4379 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4380 res, argL, argR, 4, True)); 4381 return res; 4382 } 4383 case Iop_OrV128: { 4384 HReg res = newVRegV(env); 4385 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4386 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4387 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4388 res, argL, argR, 4, True)); 4389 return res; 4390 } 4391 case Iop_XorV128: { 4392 HReg res = newVRegV(env); 4393 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4394 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4395 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 4396 res, argL, argR, 4, True)); 4397 return res; 4398 } 4399 case Iop_Add8x16: 4400 case Iop_Add16x8: 4401 case Iop_Add32x4: 4402 case Iop_Add64x2: { 4403 /* 4404 FIXME: remove this if not used 4405 DECLARE_PATTERN(p_vrhadd_32sx4); 4406 ULong one = (1LL << 32) | 1LL; 4407 DEFINE_PATTERN(p_vrhadd_32sx4, 4408 binop(Iop_Add32x4, 4409 binop(Iop_Add32x4, 4410 binop(Iop_SarN32x4, 4411 bind(0), 4412 mkU8(1)), 4413 binop(Iop_SarN32x4, 4414 bind(1), 4415 mkU8(1))), 4416 binop(Iop_SarN32x4, 4417 binop(Iop_Add32x4, 4418 binop(Iop_Add32x4, 4419 binop(Iop_AndV128, 4420 bind(0), 4421 mkU128(one)), 4422 binop(Iop_AndV128, 4423 bind(1), 4424 mkU128(one))), 4425 mkU128(one)), 4426 mkU8(1)))); 4427 */ 4428 HReg res = newVRegV(env); 4429 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4430 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4431 UInt size; 4432 switch (e->Iex.Binop.op) { 4433 case Iop_Add8x16: size = 0; break; 4434 case Iop_Add16x8: size = 1; break; 4435 case Iop_Add32x4: size = 2; break; 4436 case Iop_Add64x2: size = 3; break; 4437 default: 4438 ppIROp(e->Iex.Binop.op); 4439 vpanic("Illegal element size in VADD"); 4440 } 4441 addInstr(env, ARMInstr_NBinary(ARMneon_VADD, 4442 res, argL, argR, size, True)); 4443 return res; 4444 } 4445 case Iop_Recps32Fx4: { 4446 HReg res = newVRegV(env); 4447 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4448 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4449 UInt size = 0; 4450 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 4451 res, argL, argR, size, True)); 4452 return res; 4453 } 4454 case Iop_Rsqrts32Fx4: { 4455 HReg res = newVRegV(env); 4456 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4457 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4458 UInt size = 0; 4459 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 4460 res, argL, argR, size, True)); 4461 return res; 4462 } 4463 4464 // These 6 verified 18 Apr 2013 4465 case Iop_InterleaveEvenLanes8x16: 4466 case Iop_InterleaveOddLanes8x16: 4467 case Iop_InterleaveEvenLanes16x8: 4468 case Iop_InterleaveOddLanes16x8: 4469 case Iop_InterleaveEvenLanes32x4: 4470 case Iop_InterleaveOddLanes32x4: { 4471 HReg rD = newVRegV(env); 4472 HReg rM = newVRegV(env); 4473 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4474 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4475 UInt size; 4476 Bool resRd; // is the result in rD or rM ? 4477 switch (e->Iex.Binop.op) { 4478 case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break; 4479 case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break; 4480 case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break; 4481 case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break; 4482 case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break; 4483 case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break; 4484 default: vassert(0); 4485 } 4486 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 4487 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 4488 addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True)); 4489 return resRd ? rD : rM; 4490 } 4491 4492 // These 6 verified 18 Apr 2013 4493 case Iop_InterleaveHI8x16: 4494 case Iop_InterleaveLO8x16: 4495 case Iop_InterleaveHI16x8: 4496 case Iop_InterleaveLO16x8: 4497 case Iop_InterleaveHI32x4: 4498 case Iop_InterleaveLO32x4: { 4499 HReg rD = newVRegV(env); 4500 HReg rM = newVRegV(env); 4501 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4502 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4503 UInt size; 4504 Bool resRd; // is the result in rD or rM ? 4505 switch (e->Iex.Binop.op) { 4506 case Iop_InterleaveHI8x16: resRd = False; size = 0; break; 4507 case Iop_InterleaveLO8x16: resRd = True; size = 0; break; 4508 case Iop_InterleaveHI16x8: resRd = False; size = 1; break; 4509 case Iop_InterleaveLO16x8: resRd = True; size = 1; break; 4510 case Iop_InterleaveHI32x4: resRd = False; size = 2; break; 4511 case Iop_InterleaveLO32x4: resRd = True; size = 2; break; 4512 default: vassert(0); 4513 } 4514 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 4515 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 4516 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True)); 4517 return resRd ? rD : rM; 4518 } 4519 4520 // These 6 verified 18 Apr 2013 4521 case Iop_CatOddLanes8x16: 4522 case Iop_CatEvenLanes8x16: 4523 case Iop_CatOddLanes16x8: 4524 case Iop_CatEvenLanes16x8: 4525 case Iop_CatOddLanes32x4: 4526 case Iop_CatEvenLanes32x4: { 4527 HReg rD = newVRegV(env); 4528 HReg rM = newVRegV(env); 4529 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4530 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4531 UInt size; 4532 Bool resRd; // is the result in rD or rM ? 4533 switch (e->Iex.Binop.op) { 4534 case Iop_CatOddLanes8x16: resRd = False; size = 0; break; 4535 case Iop_CatEvenLanes8x16: resRd = True; size = 0; break; 4536 case Iop_CatOddLanes16x8: resRd = False; size = 1; break; 4537 case Iop_CatEvenLanes16x8: resRd = True; size = 1; break; 4538 case Iop_CatOddLanes32x4: resRd = False; size = 2; break; 4539 case Iop_CatEvenLanes32x4: resRd = True; size = 2; break; 4540 default: vassert(0); 4541 } 4542 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); 4543 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); 4544 addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True)); 4545 return resRd ? rD : rM; 4546 } 4547 4548 case Iop_QAdd8Ux16: 4549 case Iop_QAdd16Ux8: 4550 case Iop_QAdd32Ux4: 4551 case Iop_QAdd64Ux2: { 4552 HReg res = newVRegV(env); 4553 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4554 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4555 UInt size; 4556 switch (e->Iex.Binop.op) { 4557 case Iop_QAdd8Ux16: size = 0; break; 4558 case Iop_QAdd16Ux8: size = 1; break; 4559 case Iop_QAdd32Ux4: size = 2; break; 4560 case Iop_QAdd64Ux2: size = 3; break; 4561 default: 4562 ppIROp(e->Iex.Binop.op); 4563 vpanic("Illegal element size in VQADDU"); 4564 } 4565 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 4566 res, argL, argR, size, True)); 4567 return res; 4568 } 4569 case Iop_QAdd8Sx16: 4570 case Iop_QAdd16Sx8: 4571 case Iop_QAdd32Sx4: 4572 case Iop_QAdd64Sx2: { 4573 HReg res = newVRegV(env); 4574 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4575 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4576 UInt size; 4577 switch (e->Iex.Binop.op) { 4578 case Iop_QAdd8Sx16: size = 0; break; 4579 case Iop_QAdd16Sx8: size = 1; break; 4580 case Iop_QAdd32Sx4: size = 2; break; 4581 case Iop_QAdd64Sx2: size = 3; break; 4582 default: 4583 ppIROp(e->Iex.Binop.op); 4584 vpanic("Illegal element size in VQADDS"); 4585 } 4586 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 4587 res, argL, argR, size, True)); 4588 return res; 4589 } 4590 case Iop_Sub8x16: 4591 case Iop_Sub16x8: 4592 case Iop_Sub32x4: 4593 case Iop_Sub64x2: { 4594 HReg res = newVRegV(env); 4595 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4596 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4597 UInt size; 4598 switch (e->Iex.Binop.op) { 4599 case Iop_Sub8x16: size = 0; break; 4600 case Iop_Sub16x8: size = 1; break; 4601 case Iop_Sub32x4: size = 2; break; 4602 case Iop_Sub64x2: size = 3; break; 4603 default: 4604 ppIROp(e->Iex.Binop.op); 4605 vpanic("Illegal element size in VSUB"); 4606 } 4607 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4608 res, argL, argR, size, True)); 4609 return res; 4610 } 4611 case Iop_QSub8Ux16: 4612 case Iop_QSub16Ux8: 4613 case Iop_QSub32Ux4: 4614 case Iop_QSub64Ux2: { 4615 HReg res = newVRegV(env); 4616 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4617 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4618 UInt size; 4619 switch (e->Iex.Binop.op) { 4620 case Iop_QSub8Ux16: size = 0; break; 4621 case Iop_QSub16Ux8: size = 1; break; 4622 case Iop_QSub32Ux4: size = 2; break; 4623 case Iop_QSub64Ux2: size = 3; break; 4624 default: 4625 ppIROp(e->Iex.Binop.op); 4626 vpanic("Illegal element size in VQSUBU"); 4627 } 4628 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 4629 res, argL, argR, size, True)); 4630 return res; 4631 } 4632 case Iop_QSub8Sx16: 4633 case Iop_QSub16Sx8: 4634 case Iop_QSub32Sx4: 4635 case Iop_QSub64Sx2: { 4636 HReg res = newVRegV(env); 4637 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4638 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4639 UInt size; 4640 switch (e->Iex.Binop.op) { 4641 case Iop_QSub8Sx16: size = 0; break; 4642 case Iop_QSub16Sx8: size = 1; break; 4643 case Iop_QSub32Sx4: size = 2; break; 4644 case Iop_QSub64Sx2: size = 3; break; 4645 default: 4646 ppIROp(e->Iex.Binop.op); 4647 vpanic("Illegal element size in VQSUBS"); 4648 } 4649 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 4650 res, argL, argR, size, True)); 4651 return res; 4652 } 4653 case Iop_Max8Ux16: 4654 case Iop_Max16Ux8: 4655 case Iop_Max32Ux4: { 4656 HReg res = newVRegV(env); 4657 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4658 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4659 UInt size; 4660 switch (e->Iex.Binop.op) { 4661 case Iop_Max8Ux16: size = 0; break; 4662 case Iop_Max16Ux8: size = 1; break; 4663 case Iop_Max32Ux4: size = 2; break; 4664 default: vpanic("Illegal element size in VMAXU"); 4665 } 4666 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 4667 res, argL, argR, size, True)); 4668 return res; 4669 } 4670 case Iop_Max8Sx16: 4671 case Iop_Max16Sx8: 4672 case Iop_Max32Sx4: { 4673 HReg res = newVRegV(env); 4674 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4675 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4676 UInt size; 4677 switch (e->Iex.Binop.op) { 4678 case Iop_Max8Sx16: size = 0; break; 4679 case Iop_Max16Sx8: size = 1; break; 4680 case Iop_Max32Sx4: size = 2; break; 4681 default: vpanic("Illegal element size in VMAXU"); 4682 } 4683 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 4684 res, argL, argR, size, True)); 4685 return res; 4686 } 4687 case Iop_Min8Ux16: 4688 case Iop_Min16Ux8: 4689 case Iop_Min32Ux4: { 4690 HReg res = newVRegV(env); 4691 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4692 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4693 UInt size; 4694 switch (e->Iex.Binop.op) { 4695 case Iop_Min8Ux16: size = 0; break; 4696 case Iop_Min16Ux8: size = 1; break; 4697 case Iop_Min32Ux4: size = 2; break; 4698 default: vpanic("Illegal element size in VMAXU"); 4699 } 4700 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 4701 res, argL, argR, size, True)); 4702 return res; 4703 } 4704 case Iop_Min8Sx16: 4705 case Iop_Min16Sx8: 4706 case Iop_Min32Sx4: { 4707 HReg res = newVRegV(env); 4708 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4709 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4710 UInt size; 4711 switch (e->Iex.Binop.op) { 4712 case Iop_Min8Sx16: size = 0; break; 4713 case Iop_Min16Sx8: size = 1; break; 4714 case Iop_Min32Sx4: size = 2; break; 4715 default: vpanic("Illegal element size in VMAXU"); 4716 } 4717 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 4718 res, argL, argR, size, True)); 4719 return res; 4720 } 4721 case Iop_Sar8x16: 4722 case Iop_Sar16x8: 4723 case Iop_Sar32x4: 4724 case Iop_Sar64x2: { 4725 HReg res = newVRegV(env); 4726 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4727 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4728 HReg argR2 = newVRegV(env); 4729 HReg zero = newVRegV(env); 4730 UInt size; 4731 switch (e->Iex.Binop.op) { 4732 case Iop_Sar8x16: size = 0; break; 4733 case Iop_Sar16x8: size = 1; break; 4734 case Iop_Sar32x4: size = 2; break; 4735 case Iop_Sar64x2: size = 3; break; 4736 default: vassert(0); 4737 } 4738 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 4739 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4740 argR2, zero, argR, size, True)); 4741 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4742 res, argL, argR2, size, True)); 4743 return res; 4744 } 4745 case Iop_Sal8x16: 4746 case Iop_Sal16x8: 4747 case Iop_Sal32x4: 4748 case Iop_Sal64x2: { 4749 HReg res = newVRegV(env); 4750 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4751 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4752 UInt size; 4753 switch (e->Iex.Binop.op) { 4754 case Iop_Sal8x16: size = 0; break; 4755 case Iop_Sal16x8: size = 1; break; 4756 case Iop_Sal32x4: size = 2; break; 4757 case Iop_Sal64x2: size = 3; break; 4758 default: vassert(0); 4759 } 4760 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4761 res, argL, argR, size, True)); 4762 return res; 4763 } 4764 case Iop_Shr8x16: 4765 case Iop_Shr16x8: 4766 case Iop_Shr32x4: 4767 case Iop_Shr64x2: { 4768 HReg res = newVRegV(env); 4769 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4770 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4771 HReg argR2 = newVRegV(env); 4772 HReg zero = newVRegV(env); 4773 UInt size; 4774 switch (e->Iex.Binop.op) { 4775 case Iop_Shr8x16: size = 0; break; 4776 case Iop_Shr16x8: size = 1; break; 4777 case Iop_Shr32x4: size = 2; break; 4778 case Iop_Shr64x2: size = 3; break; 4779 default: vassert(0); 4780 } 4781 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 4782 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4783 argR2, zero, argR, size, True)); 4784 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4785 res, argL, argR2, size, True)); 4786 return res; 4787 } 4788 case Iop_Shl8x16: 4789 case Iop_Shl16x8: 4790 case Iop_Shl32x4: 4791 case Iop_Shl64x2: { 4792 HReg res = newVRegV(env); 4793 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4794 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4795 UInt size; 4796 switch (e->Iex.Binop.op) { 4797 case Iop_Shl8x16: size = 0; break; 4798 case Iop_Shl16x8: size = 1; break; 4799 case Iop_Shl32x4: size = 2; break; 4800 case Iop_Shl64x2: size = 3; break; 4801 default: vassert(0); 4802 } 4803 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4804 res, argL, argR, size, True)); 4805 return res; 4806 } 4807 case Iop_QShl8x16: 4808 case Iop_QShl16x8: 4809 case Iop_QShl32x4: 4810 case Iop_QShl64x2: { 4811 HReg res = newVRegV(env); 4812 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4813 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4814 UInt size; 4815 switch (e->Iex.Binop.op) { 4816 case Iop_QShl8x16: size = 0; break; 4817 case Iop_QShl16x8: size = 1; break; 4818 case Iop_QShl32x4: size = 2; break; 4819 case Iop_QShl64x2: size = 3; break; 4820 default: vassert(0); 4821 } 4822 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 4823 res, argL, argR, size, True)); 4824 return res; 4825 } 4826 case Iop_QSal8x16: 4827 case Iop_QSal16x8: 4828 case Iop_QSal32x4: 4829 case Iop_QSal64x2: { 4830 HReg res = newVRegV(env); 4831 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4832 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4833 UInt size; 4834 switch (e->Iex.Binop.op) { 4835 case Iop_QSal8x16: size = 0; break; 4836 case Iop_QSal16x8: size = 1; break; 4837 case Iop_QSal32x4: size = 2; break; 4838 case Iop_QSal64x2: size = 3; break; 4839 default: vassert(0); 4840 } 4841 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 4842 res, argL, argR, size, True)); 4843 return res; 4844 } 4845 case Iop_QShlN8x16: 4846 case Iop_QShlN16x8: 4847 case Iop_QShlN32x4: 4848 case Iop_QShlN64x2: { 4849 HReg res = newVRegV(env); 4850 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4851 UInt size, imm; 4852 if (e->Iex.Binop.arg2->tag != Iex_Const || 4853 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4854 vpanic("ARM taget supports Iop_QShlNAxB with constant " 4855 "second argument only\n"); 4856 } 4857 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4858 switch (e->Iex.Binop.op) { 4859 case Iop_QShlN8x16: size = 8 | imm; break; 4860 case Iop_QShlN16x8: size = 16 | imm; break; 4861 case Iop_QShlN32x4: size = 32 | imm; break; 4862 case Iop_QShlN64x2: size = 64 | imm; break; 4863 default: vassert(0); 4864 } 4865 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 4866 res, argL, size, True)); 4867 return res; 4868 } 4869 case Iop_QShlN8Sx16: 4870 case Iop_QShlN16Sx8: 4871 case Iop_QShlN32Sx4: 4872 case Iop_QShlN64Sx2: { 4873 HReg res = newVRegV(env); 4874 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4875 UInt size, imm; 4876 if (e->Iex.Binop.arg2->tag != Iex_Const || 4877 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4878 vpanic("ARM taget supports Iop_QShlNASxB with constant " 4879 "second argument only\n"); 4880 } 4881 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4882 switch (e->Iex.Binop.op) { 4883 case Iop_QShlN8Sx16: size = 8 | imm; break; 4884 case Iop_QShlN16Sx8: size = 16 | imm; break; 4885 case Iop_QShlN32Sx4: size = 32 | imm; break; 4886 case Iop_QShlN64Sx2: size = 64 | imm; break; 4887 default: vassert(0); 4888 } 4889 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 4890 res, argL, size, True)); 4891 return res; 4892 } 4893 case Iop_QSalN8x16: 4894 case Iop_QSalN16x8: 4895 case Iop_QSalN32x4: 4896 case Iop_QSalN64x2: { 4897 HReg res = newVRegV(env); 4898 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4899 UInt size, imm; 4900 if (e->Iex.Binop.arg2->tag != Iex_Const || 4901 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4902 vpanic("ARM taget supports Iop_QShlNAxB with constant " 4903 "second argument only\n"); 4904 } 4905 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4906 switch (e->Iex.Binop.op) { 4907 case Iop_QSalN8x16: size = 8 | imm; break; 4908 case Iop_QSalN16x8: size = 16 | imm; break; 4909 case Iop_QSalN32x4: size = 32 | imm; break; 4910 case Iop_QSalN64x2: size = 64 | imm; break; 4911 default: vassert(0); 4912 } 4913 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 4914 res, argL, size, True)); 4915 return res; 4916 } 4917 case Iop_ShrN8x16: 4918 case Iop_ShrN16x8: 4919 case Iop_ShrN32x4: 4920 case Iop_ShrN64x2: { 4921 HReg res = newVRegV(env); 4922 HReg tmp = newVRegV(env); 4923 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4924 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4925 HReg argR2 = newVRegI(env); 4926 UInt size; 4927 switch (e->Iex.Binop.op) { 4928 case Iop_ShrN8x16: size = 0; break; 4929 case Iop_ShrN16x8: size = 1; break; 4930 case Iop_ShrN32x4: size = 2; break; 4931 case Iop_ShrN64x2: size = 3; break; 4932 default: vassert(0); 4933 } 4934 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 4935 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, 4936 tmp, argR2, 0, True)); 4937 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4938 res, argL, tmp, size, True)); 4939 return res; 4940 } 4941 case Iop_ShlN8x16: 4942 case Iop_ShlN16x8: 4943 case Iop_ShlN32x4: 4944 case Iop_ShlN64x2: { 4945 HReg res = newVRegV(env); 4946 HReg tmp = newVRegV(env); 4947 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4948 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4949 UInt size; 4950 switch (e->Iex.Binop.op) { 4951 case Iop_ShlN8x16: size = 0; break; 4952 case Iop_ShlN16x8: size = 1; break; 4953 case Iop_ShlN32x4: size = 2; break; 4954 case Iop_ShlN64x2: size = 3; break; 4955 default: vassert(0); 4956 } 4957 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True)); 4958 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4959 res, argL, tmp, size, True)); 4960 return res; 4961 } 4962 case Iop_SarN8x16: 4963 case Iop_SarN16x8: 4964 case Iop_SarN32x4: 4965 case Iop_SarN64x2: { 4966 HReg res = newVRegV(env); 4967 HReg tmp = newVRegV(env); 4968 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4969 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4970 HReg argR2 = newVRegI(env); 4971 UInt size; 4972 switch (e->Iex.Binop.op) { 4973 case Iop_SarN8x16: size = 0; break; 4974 case Iop_SarN16x8: size = 1; break; 4975 case Iop_SarN32x4: size = 2; break; 4976 case Iop_SarN64x2: size = 3; break; 4977 default: vassert(0); 4978 } 4979 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 4980 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True)); 4981 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4982 res, argL, tmp, size, True)); 4983 return res; 4984 } 4985 case Iop_CmpGT8Ux16: 4986 case Iop_CmpGT16Ux8: 4987 case Iop_CmpGT32Ux4: { 4988 HReg res = newVRegV(env); 4989 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4990 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4991 UInt size; 4992 switch (e->Iex.Binop.op) { 4993 case Iop_CmpGT8Ux16: size = 0; break; 4994 case Iop_CmpGT16Ux8: size = 1; break; 4995 case Iop_CmpGT32Ux4: size = 2; break; 4996 default: vassert(0); 4997 } 4998 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 4999 res, argL, argR, size, True)); 5000 return res; 5001 } 5002 case Iop_CmpGT8Sx16: 5003 case Iop_CmpGT16Sx8: 5004 case Iop_CmpGT32Sx4: { 5005 HReg res = newVRegV(env); 5006 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5007 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5008 UInt size; 5009 switch (e->Iex.Binop.op) { 5010 case Iop_CmpGT8Sx16: size = 0; break; 5011 case Iop_CmpGT16Sx8: size = 1; break; 5012 case Iop_CmpGT32Sx4: size = 2; break; 5013 default: vassert(0); 5014 } 5015 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 5016 res, argL, argR, size, True)); 5017 return res; 5018 } 5019 case Iop_CmpEQ8x16: 5020 case Iop_CmpEQ16x8: 5021 case Iop_CmpEQ32x4: { 5022 HReg res = newVRegV(env); 5023 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5024 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5025 UInt size; 5026 switch (e->Iex.Binop.op) { 5027 case Iop_CmpEQ8x16: size = 0; break; 5028 case Iop_CmpEQ16x8: size = 1; break; 5029 case Iop_CmpEQ32x4: size = 2; break; 5030 default: vassert(0); 5031 } 5032 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 5033 res, argL, argR, size, True)); 5034 return res; 5035 } 5036 case Iop_Mul8x16: 5037 case Iop_Mul16x8: 5038 case Iop_Mul32x4: { 5039 HReg res = newVRegV(env); 5040 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5041 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5042 UInt size = 0; 5043 switch(e->Iex.Binop.op) { 5044 case Iop_Mul8x16: size = 0; break; 5045 case Iop_Mul16x8: size = 1; break; 5046 case Iop_Mul32x4: size = 2; break; 5047 default: vassert(0); 5048 } 5049 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 5050 res, argL, argR, size, True)); 5051 return res; 5052 } 5053 case Iop_Mull8Ux8: 5054 case Iop_Mull16Ux4: 5055 case Iop_Mull32Ux2: { 5056 HReg res = newVRegV(env); 5057 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5058 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5059 UInt size = 0; 5060 switch(e->Iex.Binop.op) { 5061 case Iop_Mull8Ux8: size = 0; break; 5062 case Iop_Mull16Ux4: size = 1; break; 5063 case Iop_Mull32Ux2: size = 2; break; 5064 default: vassert(0); 5065 } 5066 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU, 5067 res, argL, argR, size, True)); 5068 return res; 5069 } 5070 5071 case Iop_Mull8Sx8: 5072 case Iop_Mull16Sx4: 5073 case Iop_Mull32Sx2: { 5074 HReg res = newVRegV(env); 5075 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5076 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5077 UInt size = 0; 5078 switch(e->Iex.Binop.op) { 5079 case Iop_Mull8Sx8: size = 0; break; 5080 case Iop_Mull16Sx4: size = 1; break; 5081 case Iop_Mull32Sx2: size = 2; break; 5082 default: vassert(0); 5083 } 5084 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS, 5085 res, argL, argR, size, True)); 5086 return res; 5087 } 5088 5089 case Iop_QDMulHi16Sx8: 5090 case Iop_QDMulHi32Sx4: { 5091 HReg res = newVRegV(env); 5092 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5093 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5094 UInt size = 0; 5095 switch(e->Iex.Binop.op) { 5096 case Iop_QDMulHi16Sx8: size = 1; break; 5097 case Iop_QDMulHi32Sx4: size = 2; break; 5098 default: vassert(0); 5099 } 5100 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 5101 res, argL, argR, size, True)); 5102 return res; 5103 } 5104 5105 case Iop_QRDMulHi16Sx8: 5106 case Iop_QRDMulHi32Sx4: { 5107 HReg res = newVRegV(env); 5108 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5109 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5110 UInt size = 0; 5111 switch(e->Iex.Binop.op) { 5112 case Iop_QRDMulHi16Sx8: size = 1; break; 5113 case Iop_QRDMulHi32Sx4: size = 2; break; 5114 default: vassert(0); 5115 } 5116 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 5117 res, argL, argR, size, True)); 5118 return res; 5119 } 5120 5121 case Iop_QDMulLong16Sx4: 5122 case Iop_QDMulLong32Sx2: { 5123 HReg res = newVRegV(env); 5124 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5125 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5126 UInt size = 0; 5127 switch(e->Iex.Binop.op) { 5128 case Iop_QDMulLong16Sx4: size = 1; break; 5129 case Iop_QDMulLong32Sx2: size = 2; break; 5130 default: vassert(0); 5131 } 5132 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL, 5133 res, argL, argR, size, True)); 5134 return res; 5135 } 5136 case Iop_PolynomialMul8x16: { 5137 HReg res = newVRegV(env); 5138 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5139 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5140 UInt size = 0; 5141 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 5142 res, argL, argR, size, True)); 5143 return res; 5144 } 5145 case Iop_Max32Fx4: { 5146 HReg res = newVRegV(env); 5147 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5148 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5149 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 5150 res, argL, argR, 2, True)); 5151 return res; 5152 } 5153 case Iop_Min32Fx4: { 5154 HReg res = newVRegV(env); 5155 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5156 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5157 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 5158 res, argL, argR, 2, True)); 5159 return res; 5160 } 5161 case Iop_PwMax32Fx4: { 5162 HReg res = newVRegV(env); 5163 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5164 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5165 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 5166 res, argL, argR, 2, True)); 5167 return res; 5168 } 5169 case Iop_PwMin32Fx4: { 5170 HReg res = newVRegV(env); 5171 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5172 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5173 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 5174 res, argL, argR, 2, True)); 5175 return res; 5176 } 5177 case Iop_CmpGT32Fx4: { 5178 HReg res = newVRegV(env); 5179 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5180 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5181 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 5182 res, argL, argR, 2, True)); 5183 return res; 5184 } 5185 case Iop_CmpGE32Fx4: { 5186 HReg res = newVRegV(env); 5187 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5188 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5189 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 5190 res, argL, argR, 2, True)); 5191 return res; 5192 } 5193 case Iop_CmpEQ32Fx4: { 5194 HReg res = newVRegV(env); 5195 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5196 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5197 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 5198 res, argL, argR, 2, True)); 5199 return res; 5200 } 5201 5202 case Iop_PolynomialMull8x8: { 5203 HReg res = newVRegV(env); 5204 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5205 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5206 UInt size = 0; 5207 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP, 5208 res, argL, argR, size, True)); 5209 return res; 5210 } 5211 case Iop_F32ToFixed32Ux4_RZ: 5212 case Iop_F32ToFixed32Sx4_RZ: 5213 case Iop_Fixed32UToF32x4_RN: 5214 case Iop_Fixed32SToF32x4_RN: { 5215 HReg res = newVRegV(env); 5216 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 5217 ARMNeonUnOp op; 5218 UInt imm6; 5219 if (e->Iex.Binop.arg2->tag != Iex_Const || 5220 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5221 vpanic("ARM supports FP <-> Fixed conversion with constant " 5222 "second argument less than 33 only\n"); 5223 } 5224 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5225 vassert(imm6 <= 32 && imm6 > 0); 5226 imm6 = 64 - imm6; 5227 switch(e->Iex.Binop.op) { 5228 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break; 5229 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break; 5230 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break; 5231 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break; 5232 default: vassert(0); 5233 } 5234 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True)); 5235 return res; 5236 } 5237 /* 5238 FIXME remove if not used 5239 case Iop_VDup8x16: 5240 case Iop_VDup16x8: 5241 case Iop_VDup32x4: { 5242 HReg res = newVRegV(env); 5243 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5244 UInt imm4; 5245 UInt index; 5246 if (e->Iex.Binop.arg2->tag != Iex_Const || 5247 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5248 vpanic("ARM supports Iop_VDup with constant " 5249 "second argument less than 16 only\n"); 5250 } 5251 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5252 switch(e->Iex.Binop.op) { 5253 case Iop_VDup8x16: imm4 = (index << 1) + 1; break; 5254 case Iop_VDup16x8: imm4 = (index << 2) + 2; break; 5255 case Iop_VDup32x4: imm4 = (index << 3) + 4; break; 5256 default: vassert(0); 5257 } 5258 if (imm4 >= 16) { 5259 vpanic("ARM supports Iop_VDup with constant " 5260 "second argument less than 16 only\n"); 5261 } 5262 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 5263 res, argL, imm4, True)); 5264 return res; 5265 } 5266 */ 5267 case Iop_PwAdd8x16: 5268 case Iop_PwAdd16x8: 5269 case Iop_PwAdd32x4: { 5270 HReg res = newVRegV(env); 5271 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5272 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5273 UInt size = 0; 5274 switch(e->Iex.Binop.op) { 5275 case Iop_PwAdd8x16: size = 0; break; 5276 case Iop_PwAdd16x8: size = 1; break; 5277 case Iop_PwAdd32x4: size = 2; break; 5278 default: vassert(0); 5279 } 5280 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 5281 res, argL, argR, size, True)); 5282 return res; 5283 } 5284 /* ... */ 5285 default: 5286 break; 5287 } 5288 } 5289 5290 if (e->tag == Iex_Triop) { 5291 IRTriop *triop = e->Iex.Triop.details; 5292 5293 switch (triop->op) { 5294 case Iop_ExtractV128: { 5295 HReg res = newVRegV(env); 5296 HReg argL = iselNeonExpr(env, triop->arg1); 5297 HReg argR = iselNeonExpr(env, triop->arg2); 5298 UInt imm4; 5299 if (triop->arg3->tag != Iex_Const || 5300 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 5301 vpanic("ARM target supports Iop_ExtractV128 with constant " 5302 "third argument less than 16 only\n"); 5303 } 5304 imm4 = triop->arg3->Iex.Const.con->Ico.U8; 5305 if (imm4 >= 16) { 5306 vpanic("ARM target supports Iop_ExtractV128 with constant " 5307 "third argument less than 16 only\n"); 5308 } 5309 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 5310 res, argL, argR, imm4, True)); 5311 return res; 5312 } 5313 case Iop_Mul32Fx4: 5314 case Iop_Sub32Fx4: 5315 case Iop_Add32Fx4: { 5316 HReg res = newVRegV(env); 5317 HReg argL = iselNeonExpr(env, triop->arg2); 5318 HReg argR = iselNeonExpr(env, triop->arg3); 5319 UInt size = 0; 5320 ARMNeonBinOp op = ARMneon_INVALID; 5321 switch (triop->op) { 5322 case Iop_Mul32Fx4: op = ARMneon_VMULFP; break; 5323 case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break; 5324 case Iop_Add32Fx4: op = ARMneon_VADDFP; break; 5325 default: vassert(0); 5326 } 5327 addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True)); 5328 return res; 5329 } 5330 default: 5331 break; 5332 } 5333 } 5334 5335 if (e->tag == Iex_ITE) { // VFD 5336 ARMCondCode cc; 5337 HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue); 5338 HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse); 5339 HReg dst = newVRegV(env); 5340 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True)); 5341 cc = iselCondCode(env, e->Iex.ITE.cond); 5342 addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0)); 5343 return dst; 5344 } 5345 5346 neon_expr_bad: 5347 ppIRExpr(e); 5348 vpanic("iselNeonExpr_wrk"); 5349 } 5350 5351 /*---------------------------------------------------------*/ 5352 /*--- ISEL: Floating point expressions (64 bit) ---*/ 5353 /*---------------------------------------------------------*/ 5354 5355 /* Compute a 64-bit floating point value into a register, the identity 5356 of which is returned. As with iselIntExpr_R, the reg may be either 5357 real or virtual; in any case it must not be changed by subsequent 5358 code emitted by the caller. */ 5359 5360 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 5361 { 5362 HReg r = iselDblExpr_wrk( env, e ); 5363 # if 0 5364 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5365 # endif 5366 vassert(hregClass(r) == HRcFlt64); 5367 vassert(hregIsVirtual(r)); 5368 return r; 5369 } 5370 5371 /* DO NOT CALL THIS DIRECTLY */ 5372 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 5373 { 5374 IRType ty = typeOfIRExpr(env->type_env,e); 5375 vassert(e); 5376 vassert(ty == Ity_F64); 5377 5378 if (e->tag == Iex_RdTmp) { 5379 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 5380 } 5381 5382 if (e->tag == Iex_Const) { 5383 /* Just handle the zero case. */ 5384 IRConst* con = e->Iex.Const.con; 5385 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) { 5386 HReg z32 = newVRegI(env); 5387 HReg dst = newVRegD(env); 5388 addInstr(env, ARMInstr_Imm32(z32, 0)); 5389 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32)); 5390 return dst; 5391 } 5392 } 5393 5394 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 5395 ARMAModeV* am; 5396 HReg res = newVRegD(env); 5397 vassert(e->Iex.Load.ty == Ity_F64); 5398 am = iselIntExpr_AModeV(env, e->Iex.Load.addr); 5399 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am)); 5400 return res; 5401 } 5402 5403 if (e->tag == Iex_Get) { 5404 // XXX This won't work if offset > 1020 or is not 0 % 4. 5405 // In which case we'll have to generate more longwinded code. 5406 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset); 5407 HReg res = newVRegD(env); 5408 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am)); 5409 return res; 5410 } 5411 5412 if (e->tag == Iex_Unop) { 5413 switch (e->Iex.Unop.op) { 5414 case Iop_ReinterpI64asF64: { 5415 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5416 return iselNeon64Expr(env, e->Iex.Unop.arg); 5417 } else { 5418 HReg srcHi, srcLo; 5419 HReg dst = newVRegD(env); 5420 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 5421 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo)); 5422 return dst; 5423 } 5424 } 5425 case Iop_NegF64: { 5426 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 5427 HReg dst = newVRegD(env); 5428 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src)); 5429 return dst; 5430 } 5431 case Iop_AbsF64: { 5432 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 5433 HReg dst = newVRegD(env); 5434 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src)); 5435 return dst; 5436 } 5437 case Iop_F32toF64: { 5438 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5439 HReg dst = newVRegD(env); 5440 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src)); 5441 return dst; 5442 } 5443 case Iop_I32UtoF64: 5444 case Iop_I32StoF64: { 5445 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 5446 HReg f32 = newVRegF(env); 5447 HReg dst = newVRegD(env); 5448 Bool syned = e->Iex.Unop.op == Iop_I32StoF64; 5449 /* VMOV f32, src */ 5450 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src)); 5451 /* FSITOD dst, f32 */ 5452 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned, 5453 dst, f32)); 5454 return dst; 5455 } 5456 default: 5457 break; 5458 } 5459 } 5460 5461 if (e->tag == Iex_Binop) { 5462 switch (e->Iex.Binop.op) { 5463 case Iop_SqrtF64: { 5464 /* first arg is rounding mode; we ignore it. */ 5465 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 5466 HReg dst = newVRegD(env); 5467 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src)); 5468 return dst; 5469 } 5470 default: 5471 break; 5472 } 5473 } 5474 5475 if (e->tag == Iex_Triop) { 5476 IRTriop *triop = e->Iex.Triop.details; 5477 5478 switch (triop->op) { 5479 case Iop_DivF64: 5480 case Iop_MulF64: 5481 case Iop_AddF64: 5482 case Iop_SubF64: { 5483 ARMVfpOp op = 0; /*INVALID*/ 5484 HReg argL = iselDblExpr(env, triop->arg2); 5485 HReg argR = iselDblExpr(env, triop->arg3); 5486 HReg dst = newVRegD(env); 5487 switch (triop->op) { 5488 case Iop_DivF64: op = ARMvfp_DIV; break; 5489 case Iop_MulF64: op = ARMvfp_MUL; break; 5490 case Iop_AddF64: op = ARMvfp_ADD; break; 5491 case Iop_SubF64: op = ARMvfp_SUB; break; 5492 default: vassert(0); 5493 } 5494 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR)); 5495 return dst; 5496 } 5497 default: 5498 break; 5499 } 5500 } 5501 5502 if (e->tag == Iex_ITE) { // VFD 5503 if (ty == Ity_F64 5504 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 5505 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 5506 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 5507 HReg dst = newVRegD(env); 5508 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1)); 5509 ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond); 5510 addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0)); 5511 return dst; 5512 } 5513 } 5514 5515 ppIRExpr(e); 5516 vpanic("iselDblExpr_wrk"); 5517 } 5518 5519 5520 /*---------------------------------------------------------*/ 5521 /*--- ISEL: Floating point expressions (32 bit) ---*/ 5522 /*---------------------------------------------------------*/ 5523 5524 /* Compute a 32-bit floating point value into a register, the identity 5525 of which is returned. As with iselIntExpr_R, the reg may be either 5526 real or virtual; in any case it must not be changed by subsequent 5527 code emitted by the caller. */ 5528 5529 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 5530 { 5531 HReg r = iselFltExpr_wrk( env, e ); 5532 # if 0 5533 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5534 # endif 5535 vassert(hregClass(r) == HRcFlt32); 5536 vassert(hregIsVirtual(r)); 5537 return r; 5538 } 5539 5540 /* DO NOT CALL THIS DIRECTLY */ 5541 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 5542 { 5543 IRType ty = typeOfIRExpr(env->type_env,e); 5544 vassert(e); 5545 vassert(ty == Ity_F32); 5546 5547 if (e->tag == Iex_RdTmp) { 5548 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 5549 } 5550 5551 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 5552 ARMAModeV* am; 5553 HReg res = newVRegF(env); 5554 vassert(e->Iex.Load.ty == Ity_F32); 5555 am = iselIntExpr_AModeV(env, e->Iex.Load.addr); 5556 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); 5557 return res; 5558 } 5559 5560 if (e->tag == Iex_Get) { 5561 // XXX This won't work if offset > 1020 or is not 0 % 4. 5562 // In which case we'll have to generate more longwinded code. 5563 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset); 5564 HReg res = newVRegF(env); 5565 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); 5566 return res; 5567 } 5568 5569 if (e->tag == Iex_Unop) { 5570 switch (e->Iex.Unop.op) { 5571 case Iop_ReinterpI32asF32: { 5572 HReg dst = newVRegF(env); 5573 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 5574 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src)); 5575 return dst; 5576 } 5577 case Iop_NegF32: { 5578 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5579 HReg dst = newVRegF(env); 5580 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src)); 5581 return dst; 5582 } 5583 case Iop_AbsF32: { 5584 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5585 HReg dst = newVRegF(env); 5586 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src)); 5587 return dst; 5588 } 5589 default: 5590 break; 5591 } 5592 } 5593 5594 if (e->tag == Iex_Binop) { 5595 switch (e->Iex.Binop.op) { 5596 case Iop_SqrtF32: { 5597 /* first arg is rounding mode; we ignore it. */ 5598 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 5599 HReg dst = newVRegF(env); 5600 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src)); 5601 return dst; 5602 } 5603 case Iop_F64toF32: { 5604 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2); 5605 set_VFP_rounding_mode(env, e->Iex.Binop.arg1); 5606 HReg valS = newVRegF(env); 5607 /* FCVTSD valS, valD */ 5608 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD)); 5609 set_VFP_rounding_default(env); 5610 return valS; 5611 } 5612 default: 5613 break; 5614 } 5615 } 5616 5617 if (e->tag == Iex_Triop) { 5618 IRTriop *triop = e->Iex.Triop.details; 5619 5620 switch (triop->op) { 5621 case Iop_DivF32: 5622 case Iop_MulF32: 5623 case Iop_AddF32: 5624 case Iop_SubF32: { 5625 ARMVfpOp op = 0; /*INVALID*/ 5626 HReg argL = iselFltExpr(env, triop->arg2); 5627 HReg argR = iselFltExpr(env, triop->arg3); 5628 HReg dst = newVRegF(env); 5629 switch (triop->op) { 5630 case Iop_DivF32: op = ARMvfp_DIV; break; 5631 case Iop_MulF32: op = ARMvfp_MUL; break; 5632 case Iop_AddF32: op = ARMvfp_ADD; break; 5633 case Iop_SubF32: op = ARMvfp_SUB; break; 5634 default: vassert(0); 5635 } 5636 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR)); 5637 return dst; 5638 } 5639 default: 5640 break; 5641 } 5642 } 5643 5644 if (e->tag == Iex_ITE) { // VFD 5645 if (ty == Ity_F32 5646 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 5647 ARMCondCode cc; 5648 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); 5649 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); 5650 HReg dst = newVRegF(env); 5651 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1)); 5652 cc = iselCondCode(env, e->Iex.ITE.cond); 5653 addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0)); 5654 return dst; 5655 } 5656 } 5657 5658 ppIRExpr(e); 5659 vpanic("iselFltExpr_wrk"); 5660 } 5661 5662 5663 /*---------------------------------------------------------*/ 5664 /*--- ISEL: Statements ---*/ 5665 /*---------------------------------------------------------*/ 5666 5667 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 5668 { 5669 if (vex_traceflags & VEX_TRACE_VCODE) { 5670 vex_printf("\n-- "); 5671 ppIRStmt(stmt); 5672 vex_printf("\n"); 5673 } 5674 switch (stmt->tag) { 5675 5676 /* --------- STORE --------- */ 5677 /* little-endian write to memory */ 5678 case Ist_Store: { 5679 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 5680 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 5681 IREndness end = stmt->Ist.Store.end; 5682 5683 if (tya != Ity_I32 || end != Iend_LE) 5684 goto stmt_fail; 5685 5686 if (tyd == Ity_I32) { 5687 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5688 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); 5689 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am)); 5690 return; 5691 } 5692 if (tyd == Ity_I16) { 5693 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5694 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr); 5695 addInstr(env, ARMInstr_LdSt16(ARMcc_AL, 5696 False/*!isLoad*/, 5697 False/*!isSignedLoad*/, rD, am)); 5698 return; 5699 } 5700 if (tyd == Ity_I8) { 5701 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5702 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); 5703 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am)); 5704 return; 5705 } 5706 if (tyd == Ity_I64) { 5707 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5708 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); 5709 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 5710 addInstr(env, ARMInstr_NLdStD(False, dD, am)); 5711 } else { 5712 HReg rDhi, rDlo, rA; 5713 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data); 5714 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 5715 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi, 5716 ARMAMode1_RI(rA,4))); 5717 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo, 5718 ARMAMode1_RI(rA,0))); 5719 } 5720 return; 5721 } 5722 if (tyd == Ity_F64) { 5723 HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 5724 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 5725 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am)); 5726 return; 5727 } 5728 if (tyd == Ity_F32) { 5729 HReg fD = iselFltExpr(env, stmt->Ist.Store.data); 5730 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 5731 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am)); 5732 return; 5733 } 5734 if (tyd == Ity_V128) { 5735 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data); 5736 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 5737 addInstr(env, ARMInstr_NLdStQ(False, qD, am)); 5738 return; 5739 } 5740 5741 break; 5742 } 5743 5744 /* --------- CONDITIONAL STORE --------- */ 5745 /* conditional little-endian write to memory */ 5746 case Ist_StoreG: { 5747 IRStoreG* sg = stmt->Ist.StoreG.details; 5748 IRType tya = typeOfIRExpr(env->type_env, sg->addr); 5749 IRType tyd = typeOfIRExpr(env->type_env, sg->data); 5750 IREndness end = sg->end; 5751 5752 if (tya != Ity_I32 || end != Iend_LE) 5753 goto stmt_fail; 5754 5755 switch (tyd) { 5756 case Ity_I8: 5757 case Ity_I32: { 5758 HReg rD = iselIntExpr_R(env, sg->data); 5759 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr); 5760 ARMCondCode cc = iselCondCode(env, sg->guard); 5761 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U) 5762 (cc, False/*!isLoad*/, rD, am)); 5763 return; 5764 } 5765 case Ity_I16: { 5766 HReg rD = iselIntExpr_R(env, sg->data); 5767 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr); 5768 ARMCondCode cc = iselCondCode(env, sg->guard); 5769 addInstr(env, ARMInstr_LdSt16(cc, 5770 False/*!isLoad*/, 5771 False/*!isSignedLoad*/, rD, am)); 5772 return; 5773 } 5774 default: 5775 break; 5776 } 5777 break; 5778 } 5779 5780 /* --------- CONDITIONAL LOAD --------- */ 5781 /* conditional little-endian load from memory */ 5782 case Ist_LoadG: { 5783 IRLoadG* lg = stmt->Ist.LoadG.details; 5784 IRType tya = typeOfIRExpr(env->type_env, lg->addr); 5785 IREndness end = lg->end; 5786 5787 if (tya != Ity_I32 || end != Iend_LE) 5788 goto stmt_fail; 5789 5790 switch (lg->cvt) { 5791 case ILGop_8Uto32: 5792 case ILGop_Ident32: { 5793 HReg rAlt = iselIntExpr_R(env, lg->alt); 5794 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr); 5795 HReg rD = lookupIRTemp(env, lg->dst); 5796 addInstr(env, mk_iMOVds_RR(rD, rAlt)); 5797 ARMCondCode cc = iselCondCode(env, lg->guard); 5798 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32 5799 : ARMInstr_LdSt8U) 5800 (cc, True/*isLoad*/, rD, am)); 5801 return; 5802 } 5803 case ILGop_16Sto32: 5804 case ILGop_16Uto32: 5805 case ILGop_8Sto32: { 5806 HReg rAlt = iselIntExpr_R(env, lg->alt); 5807 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr); 5808 HReg rD = lookupIRTemp(env, lg->dst); 5809 addInstr(env, mk_iMOVds_RR(rD, rAlt)); 5810 ARMCondCode cc = iselCondCode(env, lg->guard); 5811 if (lg->cvt == ILGop_8Sto32) { 5812 addInstr(env, ARMInstr_Ld8S(cc, rD, am)); 5813 } else { 5814 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32); 5815 Bool sx = lg->cvt == ILGop_16Sto32; 5816 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am)); 5817 } 5818 return; 5819 } 5820 default: 5821 break; 5822 } 5823 break; 5824 } 5825 5826 /* --------- PUT --------- */ 5827 /* write guest state, fixed offset */ 5828 case Ist_Put: { 5829 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 5830 5831 if (tyd == Ity_I32) { 5832 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 5833 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset); 5834 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am)); 5835 return; 5836 } 5837 if (tyd == Ity_I64) { 5838 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5839 HReg addr = newVRegI(env); 5840 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); 5841 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), 5842 stmt->Ist.Put.offset)); 5843 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr))); 5844 } else { 5845 HReg rDhi, rDlo; 5846 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), 5847 stmt->Ist.Put.offset + 0); 5848 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), 5849 stmt->Ist.Put.offset + 4); 5850 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data); 5851 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, 5852 rDhi, am4)); 5853 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, 5854 rDlo, am0)); 5855 } 5856 return; 5857 } 5858 if (tyd == Ity_F64) { 5859 // XXX This won't work if offset > 1020 or is not 0 % 4. 5860 // In which case we'll have to generate more longwinded code. 5861 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 5862 HReg rD = iselDblExpr(env, stmt->Ist.Put.data); 5863 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am)); 5864 return; 5865 } 5866 if (tyd == Ity_F32) { 5867 // XXX This won't work if offset > 1020 or is not 0 % 4. 5868 // In which case we'll have to generate more longwinded code. 5869 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 5870 HReg rD = iselFltExpr(env, stmt->Ist.Put.data); 5871 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am)); 5872 return; 5873 } 5874 if (tyd == Ity_V128) { 5875 HReg addr = newVRegI(env); 5876 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data); 5877 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), 5878 stmt->Ist.Put.offset)); 5879 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr))); 5880 return; 5881 } 5882 break; 5883 } 5884 5885 /* --------- TMP --------- */ 5886 /* assign value to temporary */ 5887 case Ist_WrTmp: { 5888 IRTemp tmp = stmt->Ist.WrTmp.tmp; 5889 IRType ty = typeOfIRTemp(env->type_env, tmp); 5890 5891 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 5892 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False, 5893 env, stmt->Ist.WrTmp.data); 5894 HReg dst = lookupIRTemp(env, tmp); 5895 addInstr(env, ARMInstr_Mov(dst,ri84)); 5896 return; 5897 } 5898 if (ty == Ity_I1) { 5899 /* Here, we are generating a I1 value into a 32 bit register. 5900 Make sure the value in the register is only zero or one, 5901 but no other. This allows optimisation of the 5902 1Uto32(tmp:I1) case, by making it simply a copy of the 5903 register holding 'tmp'. The point being that the value in 5904 the register holding 'tmp' can only have been created 5905 here. */ 5906 HReg dst = lookupIRTemp(env, tmp); 5907 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 5908 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 5909 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 5910 return; 5911 } 5912 if (ty == Ity_I64) { 5913 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5914 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data); 5915 HReg dst = lookupIRTemp(env, tmp); 5916 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False)); 5917 } else { 5918 HReg rHi, rLo, dstHi, dstLo; 5919 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 5920 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 5921 addInstr(env, mk_iMOVds_RR(dstHi, rHi) ); 5922 addInstr(env, mk_iMOVds_RR(dstLo, rLo) ); 5923 } 5924 return; 5925 } 5926 if (ty == Ity_F64) { 5927 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 5928 HReg dst = lookupIRTemp(env, tmp); 5929 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src)); 5930 return; 5931 } 5932 if (ty == Ity_F32) { 5933 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 5934 HReg dst = lookupIRTemp(env, tmp); 5935 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src)); 5936 return; 5937 } 5938 if (ty == Ity_V128) { 5939 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data); 5940 HReg dst = lookupIRTemp(env, tmp); 5941 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True)); 5942 return; 5943 } 5944 break; 5945 } 5946 5947 /* --------- Call to DIRTY helper --------- */ 5948 /* call complex ("dirty") helper function */ 5949 case Ist_Dirty: { 5950 IRDirty* d = stmt->Ist.Dirty.details; 5951 5952 /* Figure out the return type, if any. */ 5953 IRType retty = Ity_INVALID; 5954 if (d->tmp != IRTemp_INVALID) 5955 retty = typeOfIRTemp(env->type_env, d->tmp); 5956 5957 Bool retty_ok = False; 5958 switch (retty) { 5959 case Ity_INVALID: /* function doesn't return anything */ 5960 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 5961 //case Ity_V128: //ATC 5962 retty_ok = True; break; 5963 default: 5964 break; 5965 } 5966 if (!retty_ok) 5967 break; /* will go to stmt_fail: */ 5968 5969 /* Marshal args, do the call, and set the return value to 0x555..555 5970 if this is a conditional call that returns a value and the 5971 call is skipped. */ 5972 UInt addToSp = 0; 5973 RetLoc rloc = mk_RetLoc_INVALID(); 5974 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 5975 vassert(is_sane_RetLoc(rloc)); 5976 5977 /* Now figure out what to do with the returned value, if any. */ 5978 switch (retty) { 5979 case Ity_INVALID: { 5980 /* No return value. Nothing to do. */ 5981 vassert(d->tmp == IRTemp_INVALID); 5982 vassert(rloc.pri == RLPri_None); 5983 vassert(addToSp == 0); 5984 return; 5985 } 5986 case Ity_I64: { 5987 vassert(rloc.pri == RLPri_2Int); 5988 vassert(addToSp == 0); 5989 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5990 HReg tmp = lookupIRTemp(env, d->tmp); 5991 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(), 5992 hregARM_R0())); 5993 } else { 5994 HReg dstHi, dstLo; 5995 /* The returned value is in r1:r0. Park it in the 5996 register-pair associated with tmp. */ 5997 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 5998 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) ); 5999 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) ); 6000 } 6001 return; 6002 } 6003 case Ity_I32: case Ity_I16: case Ity_I8: { 6004 vassert(rloc.pri == RLPri_Int); 6005 vassert(addToSp == 0); 6006 /* The returned value is in r0. Park it in the register 6007 associated with tmp. */ 6008 HReg dst = lookupIRTemp(env, d->tmp); 6009 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) ); 6010 return; 6011 } 6012 case Ity_V128: { 6013 vassert(0); // ATC. The code that this produces really 6014 // needs to be looked at, to verify correctness. 6015 // I don't think this can ever happen though, since the 6016 // ARM front end never produces 128-bit loads/stores. 6017 // Hence the following is mostly theoretical. 6018 /* The returned value is on the stack, and *retloc tells 6019 us where. Fish it off the stack and then move the 6020 stack pointer upwards to clear it, as directed by 6021 doHelperCall. */ 6022 vassert(rloc.pri == RLPri_V128SpRel); 6023 vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it 6024 vassert(addToSp >= 16); 6025 vassert(addToSp < 256); // ditto reason as for rloc.spOff 6026 HReg dst = lookupIRTemp(env, d->tmp); 6027 HReg tmp = newVRegI(env); 6028 HReg r13 = hregARM_R13(); // sp 6029 addInstr(env, ARMInstr_Alu(ARMalu_ADD, 6030 tmp, r13, ARMRI84_I84(rloc.spOff,0))); 6031 ARMAModeN* am = mkARMAModeN_R(tmp); 6032 addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am)); 6033 addInstr(env, ARMInstr_Alu(ARMalu_ADD, 6034 r13, r13, ARMRI84_I84(addToSp,0))); 6035 return; 6036 } 6037 default: 6038 /*NOTREACHED*/ 6039 vassert(0); 6040 } 6041 break; 6042 } 6043 6044 /* --------- Load Linked and Store Conditional --------- */ 6045 case Ist_LLSC: { 6046 if (stmt->Ist.LLSC.storedata == NULL) { 6047 /* LL */ 6048 IRTemp res = stmt->Ist.LLSC.result; 6049 IRType ty = typeOfIRTemp(env->type_env, res); 6050 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 6051 Int szB = 0; 6052 HReg r_dst = lookupIRTemp(env, res); 6053 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6054 switch (ty) { 6055 case Ity_I8: szB = 1; break; 6056 case Ity_I16: szB = 2; break; 6057 case Ity_I32: szB = 4; break; 6058 default: vassert(0); 6059 } 6060 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); 6061 addInstr(env, ARMInstr_LdrEX(szB)); 6062 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2())); 6063 return; 6064 } 6065 if (ty == Ity_I64) { 6066 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6067 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); 6068 addInstr(env, ARMInstr_LdrEX(8)); 6069 /* Result is in r3:r2. On a non-NEON capable CPU, we must 6070 move it into a result register pair. On a NEON capable 6071 CPU, the result register will be a 64 bit NEON 6072 register, so we must move it there instead. */ 6073 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 6074 HReg dst = lookupIRTemp(env, res); 6075 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(), 6076 hregARM_R2())); 6077 } else { 6078 HReg r_dst_hi, r_dst_lo; 6079 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res); 6080 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2())); 6081 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3())); 6082 } 6083 return; 6084 } 6085 /*NOTREACHED*/ 6086 vassert(0); 6087 } else { 6088 /* SC */ 6089 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); 6090 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) { 6091 Int szB = 0; 6092 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); 6093 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6094 switch (tyd) { 6095 case Ity_I8: szB = 1; break; 6096 case Ity_I16: szB = 2; break; 6097 case Ity_I32: szB = 4; break; 6098 default: vassert(0); 6099 } 6100 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD)); 6101 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); 6102 addInstr(env, ARMInstr_StrEX(szB)); 6103 } else { 6104 vassert(tyd == Ity_I64); 6105 /* This is really ugly. There is no is/is-not NEON 6106 decision akin to the case for LL, because iselInt64Expr 6107 fudges this for us, and always gets the result into two 6108 GPRs even if this means moving it from a NEON 6109 register. */ 6110 HReg rDhi, rDlo; 6111 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata); 6112 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 6113 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo)); 6114 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi)); 6115 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); 6116 addInstr(env, ARMInstr_StrEX(8)); 6117 } 6118 /* now r0 is 1 if failed, 0 if success. Change to IR 6119 conventions (0 is fail, 1 is success). Also transfer 6120 result to r_res. */ 6121 IRTemp res = stmt->Ist.LLSC.result; 6122 IRType ty = typeOfIRTemp(env->type_env, res); 6123 HReg r_res = lookupIRTemp(env, res); 6124 ARMRI84* one = ARMRI84_I84(1,0); 6125 vassert(ty == Ity_I1); 6126 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one)); 6127 /* And be conservative -- mask off all but the lowest bit */ 6128 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one)); 6129 return; 6130 } 6131 break; 6132 } 6133 6134 /* --------- MEM FENCE --------- */ 6135 case Ist_MBE: 6136 switch (stmt->Ist.MBE.event) { 6137 case Imbe_Fence: 6138 addInstr(env, ARMInstr_MFence()); 6139 return; 6140 case Imbe_CancelReservation: 6141 addInstr(env, ARMInstr_CLREX()); 6142 return; 6143 default: 6144 break; 6145 } 6146 break; 6147 6148 /* --------- INSTR MARK --------- */ 6149 /* Doesn't generate any executable code ... */ 6150 case Ist_IMark: 6151 return; 6152 6153 /* --------- NO-OP --------- */ 6154 case Ist_NoOp: 6155 return; 6156 6157 /* --------- EXIT --------- */ 6158 case Ist_Exit: { 6159 if (stmt->Ist.Exit.dst->tag != Ico_U32) 6160 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value"); 6161 6162 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 6163 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), 6164 stmt->Ist.Exit.offsIP); 6165 6166 /* Case: boring transfer to known address */ 6167 if (stmt->Ist.Exit.jk == Ijk_Boring 6168 || stmt->Ist.Exit.jk == Ijk_Call 6169 || stmt->Ist.Exit.jk == Ijk_Ret) { 6170 if (env->chainingAllowed) { 6171 /* .. almost always true .. */ 6172 /* Skip the event check at the dst if this is a forwards 6173 edge. */ 6174 Bool toFastEP 6175 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; 6176 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 6177 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32, 6178 amR15T, cc, toFastEP)); 6179 } else { 6180 /* .. very occasionally .. */ 6181 /* We can't use chaining, so ask for an assisted transfer, 6182 as that's the only alternative that is allowable. */ 6183 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6184 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring)); 6185 } 6186 return; 6187 } 6188 6189 /* Case: assisted transfer to arbitrary address */ 6190 switch (stmt->Ist.Exit.jk) { 6191 /* Keep this list in sync with that in iselNext below */ 6192 case Ijk_ClientReq: 6193 case Ijk_NoDecode: 6194 case Ijk_NoRedir: 6195 case Ijk_Sys_syscall: 6196 case Ijk_InvalICache: 6197 case Ijk_Yield: 6198 { 6199 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6200 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, 6201 stmt->Ist.Exit.jk)); 6202 return; 6203 } 6204 default: 6205 break; 6206 } 6207 6208 /* Do we ever expect to see any other kind? */ 6209 goto stmt_fail; 6210 } 6211 6212 default: break; 6213 } 6214 stmt_fail: 6215 ppIRStmt(stmt); 6216 vpanic("iselStmt"); 6217 } 6218 6219 6220 /*---------------------------------------------------------*/ 6221 /*--- ISEL: Basic block terminators (Nexts) ---*/ 6222 /*---------------------------------------------------------*/ 6223 6224 static void iselNext ( ISelEnv* env, 6225 IRExpr* next, IRJumpKind jk, Int offsIP ) 6226 { 6227 if (vex_traceflags & VEX_TRACE_VCODE) { 6228 vex_printf( "\n-- PUT(%d) = ", offsIP); 6229 ppIRExpr( next ); 6230 vex_printf( "; exit-"); 6231 ppIRJumpKind(jk); 6232 vex_printf( "\n"); 6233 } 6234 6235 /* Case: boring transfer to known address */ 6236 if (next->tag == Iex_Const) { 6237 IRConst* cdst = next->Iex.Const.con; 6238 vassert(cdst->tag == Ico_U32); 6239 if (jk == Ijk_Boring || jk == Ijk_Call) { 6240 /* Boring transfer to known address */ 6241 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6242 if (env->chainingAllowed) { 6243 /* .. almost always true .. */ 6244 /* Skip the event check at the dst if this is a forwards 6245 edge. */ 6246 Bool toFastEP 6247 = ((Addr64)cdst->Ico.U32) > env->max_ga; 6248 if (0) vex_printf("%s", toFastEP ? "X" : "."); 6249 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32, 6250 amR15T, ARMcc_AL, 6251 toFastEP)); 6252 } else { 6253 /* .. very occasionally .. */ 6254 /* We can't use chaining, so ask for an assisted transfer, 6255 as that's the only alternative that is allowable. */ 6256 HReg r = iselIntExpr_R(env, next); 6257 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, 6258 Ijk_Boring)); 6259 } 6260 return; 6261 } 6262 } 6263 6264 /* Case: call/return (==boring) transfer to any address */ 6265 switch (jk) { 6266 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 6267 HReg r = iselIntExpr_R(env, next); 6268 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6269 if (env->chainingAllowed) { 6270 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL)); 6271 } else { 6272 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, 6273 Ijk_Boring)); 6274 } 6275 return; 6276 } 6277 default: 6278 break; 6279 } 6280 6281 /* Case: assisted transfer to arbitrary address */ 6282 switch (jk) { 6283 /* Keep this list in sync with that for Ist_Exit above */ 6284 case Ijk_ClientReq: 6285 case Ijk_NoDecode: 6286 case Ijk_NoRedir: 6287 case Ijk_Sys_syscall: 6288 case Ijk_InvalICache: 6289 case Ijk_Yield: 6290 { 6291 HReg r = iselIntExpr_R(env, next); 6292 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6293 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk)); 6294 return; 6295 } 6296 default: 6297 break; 6298 } 6299 6300 vex_printf( "\n-- PUT(%d) = ", offsIP); 6301 ppIRExpr( next ); 6302 vex_printf( "; exit-"); 6303 ppIRJumpKind(jk); 6304 vex_printf( "\n"); 6305 vassert(0); // are we expecting any other kind? 6306 } 6307 6308 6309 /*---------------------------------------------------------*/ 6310 /*--- Insn selector top-level ---*/ 6311 /*---------------------------------------------------------*/ 6312 6313 /* Translate an entire SB to arm code. */ 6314 6315 HInstrArray* iselSB_ARM ( IRSB* bb, 6316 VexArch arch_host, 6317 VexArchInfo* archinfo_host, 6318 VexAbiInfo* vbi/*UNUSED*/, 6319 Int offs_Host_EvC_Counter, 6320 Int offs_Host_EvC_FailAddr, 6321 Bool chainingAllowed, 6322 Bool addProfInc, 6323 Addr64 max_ga ) 6324 { 6325 Int i, j; 6326 HReg hreg, hregHI; 6327 ISelEnv* env; 6328 UInt hwcaps_host = archinfo_host->hwcaps; 6329 ARMAMode1 *amCounter, *amFailAddr; 6330 6331 /* sanity ... */ 6332 vassert(arch_host == VexArchARM); 6333 6334 /* guard against unexpected space regressions */ 6335 vassert(sizeof(ARMInstr) <= 28); 6336 6337 /* hwcaps should not change from one ISEL call to another. */ 6338 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM) 6339 6340 /* Make up an initial environment to use. */ 6341 env = LibVEX_Alloc(sizeof(ISelEnv)); 6342 env->vreg_ctr = 0; 6343 6344 /* Set up output code array. */ 6345 env->code = newHInstrArray(); 6346 6347 /* Copy BB's type env. */ 6348 env->type_env = bb->tyenv; 6349 6350 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 6351 change as we go along. */ 6352 env->n_vregmap = bb->tyenv->types_used; 6353 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 6354 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 6355 6356 /* and finally ... */ 6357 env->chainingAllowed = chainingAllowed; 6358 env->hwcaps = hwcaps_host; 6359 env->max_ga = max_ga; 6360 6361 /* For each IR temporary, allocate a suitably-kinded virtual 6362 register. */ 6363 j = 0; 6364 for (i = 0; i < env->n_vregmap; i++) { 6365 hregHI = hreg = INVALID_HREG; 6366 switch (bb->tyenv->types[i]) { 6367 case Ity_I1: 6368 case Ity_I8: 6369 case Ity_I16: 6370 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 6371 case Ity_I64: 6372 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) { 6373 hreg = mkHReg(j++, HRcFlt64, True); 6374 } else { 6375 hregHI = mkHReg(j++, HRcInt32, True); 6376 hreg = mkHReg(j++, HRcInt32, True); 6377 } 6378 break; 6379 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break; 6380 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 6381 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 6382 default: ppIRType(bb->tyenv->types[i]); 6383 vpanic("iselBB: IRTemp type"); 6384 } 6385 env->vregmap[i] = hreg; 6386 env->vregmapHI[i] = hregHI; 6387 } 6388 env->vreg_ctr = j; 6389 6390 /* The very first instruction must be an event check. */ 6391 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter); 6392 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr); 6393 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr)); 6394 6395 /* Possibly a block counter increment (for profiling). At this 6396 point we don't know the address of the counter, so just pretend 6397 it is zero. It will have to be patched later, but before this 6398 translation is used, by a call to LibVEX_patchProfCtr. */ 6399 if (addProfInc) { 6400 addInstr(env, ARMInstr_ProfInc()); 6401 } 6402 6403 /* Ok, finally we can iterate over the statements. */ 6404 for (i = 0; i < bb->stmts_used; i++) 6405 iselStmt(env, bb->stmts[i]); 6406 6407 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 6408 6409 /* record the number of vregs we used. */ 6410 env->code->n_vregs = env->vreg_ctr; 6411 return env->code; 6412 } 6413 6414 6415 /*---------------------------------------------------------------*/ 6416 /*--- end host_arm_isel.c ---*/ 6417 /*---------------------------------------------------------------*/ 6418