1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_arm_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 NEON support is 14 Copyright (C) 2010-2012 Samsung Electronics 15 contributed by Dmitry Zhurikhin <zhur (at) ispras.ru> 16 and Kirill Batuzov <batuzovk (at) ispras.ru> 17 18 This program is free software; you can redistribute it and/or 19 modify it under the terms of the GNU General Public License as 20 published by the Free Software Foundation; either version 2 of the 21 License, or (at your option) any later version. 22 23 This program is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of 25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 26 General Public License for more details. 27 28 You should have received a copy of the GNU General Public License 29 along with this program; if not, write to the Free Software 30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 31 02110-1301, USA. 32 33 The GNU General Public License is contained in the file COPYING. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 #include "ir_match.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers 45 #include "host_arm_defs.h" 46 47 48 /*---------------------------------------------------------*/ 49 /*--- ARMvfp control word stuff ---*/ 50 /*---------------------------------------------------------*/ 51 52 /* Vex-generated code expects to run with the FPU set as follows: all 53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV 54 flags cleared, and FZ (flush to zero) disabled. Curiously enough, 55 this corresponds to a FPSCR value of zero. 56 57 fpscr should therefore be zero on entry to Vex-generated code, and 58 should be unchanged at exit. (Or at least the bottom 28 bits 59 should be zero). 60 */ 61 62 #define DEFAULT_FPSCR 0 63 64 65 /*---------------------------------------------------------*/ 66 /*--- ISelEnv ---*/ 67 /*---------------------------------------------------------*/ 68 69 /* This carries around: 70 71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 72 might encounter. This is computed before insn selection starts, 73 and does not change. 74 75 - A mapping from IRTemp to HReg. This tells the insn selector 76 which virtual register(s) are associated with each IRTemp 77 temporary. This is computed before insn selection starts, and 78 does not change. We expect this mapping to map precisely the 79 same set of IRTemps as the type mapping does. 80 81 - vregmap holds the primary register for the IRTemp. 82 - vregmapHI is only used for 64-bit integer-typed 83 IRTemps. It holds the identity of a second 84 32-bit virtual HReg, which holds the high half 85 of the value. 86 87 - The code array, that is, the insns selected so far. 88 89 - A counter, for generating new virtual registers. 90 91 - The host hardware capabilities word. This is set at the start 92 and does not change. 93 94 - A Bool for indicating whether we may generate chain-me 95 instructions for control flow transfers, or whether we must use 96 XAssisted. 97 98 - The maximum guest address of any guest insn in this block. 99 Actually, the address of the highest-addressed byte from any insn 100 in this block. Is set at the start and does not change. This is 101 used for detecting jumps which are definitely forward-edges from 102 this block, and therefore can be made (chained) to the fast entry 103 point of the destination, thereby avoiding the destination's 104 event check. 105 106 Note, this is all (well, mostly) host-independent. 107 */ 108 109 typedef 110 struct { 111 /* Constant -- are set at the start and do not change. */ 112 IRTypeEnv* type_env; 113 114 HReg* vregmap; 115 HReg* vregmapHI; 116 Int n_vregmap; 117 118 UInt hwcaps; 119 120 Bool chainingAllowed; 121 Addr64 max_ga; 122 123 /* These are modified as we go along. */ 124 HInstrArray* code; 125 Int vreg_ctr; 126 } 127 ISelEnv; 128 129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 130 { 131 vassert(tmp >= 0); 132 vassert(tmp < env->n_vregmap); 133 return env->vregmap[tmp]; 134 } 135 136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 137 { 138 vassert(tmp >= 0); 139 vassert(tmp < env->n_vregmap); 140 vassert(env->vregmapHI[tmp] != INVALID_HREG); 141 *vrLO = env->vregmap[tmp]; 142 *vrHI = env->vregmapHI[tmp]; 143 } 144 145 static void addInstr ( ISelEnv* env, ARMInstr* instr ) 146 { 147 addHInstr(env->code, instr); 148 if (vex_traceflags & VEX_TRACE_VCODE) { 149 ppARMInstr(instr); 150 vex_printf("\n"); 151 } 152 #if 0 153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary 154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS 155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) { 156 ppARMInstr(instr); 157 vex_printf("\n"); 158 } 159 #endif 160 } 161 162 static HReg newVRegI ( ISelEnv* env ) 163 { 164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 165 env->vreg_ctr++; 166 return reg; 167 } 168 169 static HReg newVRegD ( ISelEnv* env ) 170 { 171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 172 env->vreg_ctr++; 173 return reg; 174 } 175 176 static HReg newVRegF ( ISelEnv* env ) 177 { 178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/); 179 env->vreg_ctr++; 180 return reg; 181 } 182 183 static HReg newVRegV ( ISelEnv* env ) 184 { 185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 186 env->vreg_ctr++; 187 return reg; 188 } 189 190 /* These are duplicated in guest_arm_toIR.c */ 191 static IRExpr* unop ( IROp op, IRExpr* a ) 192 { 193 return IRExpr_Unop(op, a); 194 } 195 196 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 197 { 198 return IRExpr_Binop(op, a1, a2); 199 } 200 201 static IRExpr* bind ( Int binder ) 202 { 203 return IRExpr_Binder(binder); 204 } 205 206 207 /*---------------------------------------------------------*/ 208 /*--- ISEL: Forward declarations ---*/ 209 /*---------------------------------------------------------*/ 210 211 /* These are organised as iselXXX and iselXXX_wrk pairs. The 212 iselXXX_wrk do the real work, but are not to be called directly. 213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 214 checks that all returned registers are virtual. You should not 215 call the _wrk version directly. 216 */ 217 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e ); 218 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e ); 219 220 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e ); 221 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e ); 222 223 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ); 224 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ); 225 226 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ); 227 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ); 228 229 static ARMRI84* iselIntExpr_RI84_wrk 230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e ); 231 static ARMRI84* iselIntExpr_RI84 232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e ); 233 234 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e ); 235 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e ); 236 237 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 238 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 239 240 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 241 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 242 243 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 244 ISelEnv* env, IRExpr* e ); 245 static void iselInt64Expr ( HReg* rHi, HReg* rLo, 246 ISelEnv* env, IRExpr* e ); 247 248 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 249 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 250 251 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 252 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 253 254 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ); 255 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ); 256 257 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e ); 258 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e ); 259 260 /*---------------------------------------------------------*/ 261 /*--- ISEL: Misc helpers ---*/ 262 /*---------------------------------------------------------*/ 263 264 static UInt ROR32 ( UInt x, UInt sh ) { 265 vassert(sh >= 0 && sh < 32); 266 if (sh == 0) 267 return x; 268 else 269 return (x << (32-sh)) | (x >> sh); 270 } 271 272 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate 273 form, and if so return the components. */ 274 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u ) 275 { 276 UInt i; 277 for (i = 0; i < 16; i++) { 278 if (0 == (u & 0xFFFFFF00)) { 279 *u8 = u; 280 *u4 = i; 281 return True; 282 } 283 u = ROR32(u, 30); 284 } 285 vassert(i == 16); 286 return False; 287 } 288 289 /* Make a int reg-reg move. */ 290 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src ) 291 { 292 vassert(hregClass(src) == HRcInt32); 293 vassert(hregClass(dst) == HRcInt32); 294 return ARMInstr_Mov(dst, ARMRI84_R(src)); 295 } 296 297 /* Set the VFP unit's rounding mode to default (round to nearest). */ 298 static void set_VFP_rounding_default ( ISelEnv* env ) 299 { 300 /* mov rTmp, #DEFAULT_FPSCR 301 fmxr fpscr, rTmp 302 */ 303 HReg rTmp = newVRegI(env); 304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR)); 305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp)); 306 } 307 308 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed 309 expression denoting a value in the range 0 .. 3, indicating a round 310 mode encoded as per type IRRoundingMode. Set FPSCR to have the 311 same rounding. 312 */ 313 static 314 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode ) 315 { 316 /* This isn't simple, because 'mode' carries an IR rounding 317 encoding, and we need to translate that to an ARMvfp one: 318 The IR encoding: 319 00 to nearest (the default) 320 10 to +infinity 321 01 to -infinity 322 11 to zero 323 The ARMvfp encoding: 324 00 to nearest 325 01 to +infinity 326 10 to -infinity 327 11 to zero 328 Easy enough to do; just swap the two bits. 329 */ 330 HReg irrm = iselIntExpr_R(env, mode); 331 HReg tL = newVRegI(env); 332 HReg tR = newVRegI(env); 333 HReg t3 = newVRegI(env); 334 /* tL = irrm << 1; 335 tR = irrm >> 1; if we're lucky, these will issue together 336 tL &= 2; 337 tR &= 1; ditto 338 t3 = tL | tR; 339 t3 <<= 22; 340 fmxr fpscr, t3 341 */ 342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1))); 343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1))); 344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0))); 345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0))); 346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR))); 347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22))); 348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3)); 349 } 350 351 352 /*---------------------------------------------------------*/ 353 /*--- ISEL: Function call helpers ---*/ 354 /*---------------------------------------------------------*/ 355 356 /* Used only in doHelperCall. See big comment in doHelperCall re 357 handling of register-parameter args. This function figures out 358 whether evaluation of an expression might require use of a fixed 359 register. If in doubt return True (safe but suboptimal). 360 */ 361 static 362 Bool mightRequireFixedRegs ( IRExpr* e ) 363 { 364 switch (e->tag) { 365 case Iex_RdTmp: case Iex_Const: case Iex_Get: 366 return False; 367 default: 368 return True; 369 } 370 } 371 372 373 /* Do a complete function call. guard is a Ity_Bit expression 374 indicating whether or not the call happens. If guard==NULL, the 375 call is unconditional. Returns True iff it managed to handle this 376 combination of arg/return types, else returns False. */ 377 378 static 379 Bool doHelperCall ( ISelEnv* env, 380 Bool passBBP, 381 IRExpr* guard, IRCallee* cee, IRExpr** args ) 382 { 383 ARMCondCode cc; 384 HReg argregs[ARM_N_ARGREGS]; 385 HReg tmpregs[ARM_N_ARGREGS]; 386 Bool go_fast; 387 Int n_args, i, nextArgReg; 388 ULong target; 389 390 vassert(ARM_N_ARGREGS == 4); 391 392 /* Marshal args for a call and do the call. 393 394 If passBBP is True, r8 (the baseblock pointer) is to be passed 395 as the first arg. 396 397 This function only deals with a tiny set of possibilities, which 398 cover all helpers in practice. The restrictions are that only 399 arguments in registers are supported, hence only ARM_N_REGPARMS 400 x 32 integer bits in total can be passed. In fact the only 401 supported arg types are I32 and I64. 402 403 Generating code which is both efficient and correct when 404 parameters are to be passed in registers is difficult, for the 405 reasons elaborated in detail in comments attached to 406 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 407 of the method described in those comments. 408 409 The problem is split into two cases: the fast scheme and the 410 slow scheme. In the fast scheme, arguments are computed 411 directly into the target (real) registers. This is only safe 412 when we can be sure that computation of each argument will not 413 trash any real registers set by computation of any other 414 argument. 415 416 In the slow scheme, all args are first computed into vregs, and 417 once they are all done, they are moved to the relevant real 418 regs. This always gives correct code, but it also gives a bunch 419 of vreg-to-rreg moves which are usually redundant but are hard 420 for the register allocator to get rid of. 421 422 To decide which scheme to use, all argument expressions are 423 first examined. If they are all so simple that it is clear they 424 will be evaluated without use of any fixed registers, use the 425 fast scheme, else use the slow scheme. Note also that only 426 unconditional calls may use the fast scheme, since having to 427 compute a condition expression could itself trash real 428 registers. 429 430 Note this requires being able to examine an expression and 431 determine whether or not evaluation of it might use a fixed 432 register. That requires knowledge of how the rest of this insn 433 selector works. Currently just the following 3 are regarded as 434 safe -- hopefully they cover the majority of arguments in 435 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 436 */ 437 438 /* Note that the cee->regparms field is meaningless on ARM hosts 439 (since there is only one calling convention) and so we always 440 ignore it. */ 441 442 n_args = 0; 443 for (i = 0; args[i]; i++) 444 n_args++; 445 446 argregs[0] = hregARM_R0(); 447 argregs[1] = hregARM_R1(); 448 argregs[2] = hregARM_R2(); 449 argregs[3] = hregARM_R3(); 450 451 tmpregs[0] = tmpregs[1] = tmpregs[2] = 452 tmpregs[3] = INVALID_HREG; 453 454 /* First decide which scheme (slow or fast) is to be used. First 455 assume the fast scheme, and select slow if any contraindications 456 (wow) appear. */ 457 458 go_fast = True; 459 460 if (guard) { 461 if (guard->tag == Iex_Const 462 && guard->Iex.Const.con->tag == Ico_U1 463 && guard->Iex.Const.con->Ico.U1 == True) { 464 /* unconditional */ 465 } else { 466 /* Not manifestly unconditional -- be conservative. */ 467 go_fast = False; 468 } 469 } 470 471 if (go_fast) { 472 for (i = 0; i < n_args; i++) { 473 if (mightRequireFixedRegs(args[i])) { 474 go_fast = False; 475 break; 476 } 477 } 478 } 479 /* At this point the scheme to use has been established. Generate 480 code to get the arg values into the argument rregs. If we run 481 out of arg regs, give up. */ 482 483 if (go_fast) { 484 485 /* FAST SCHEME */ 486 nextArgReg = 0; 487 if (passBBP) { 488 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], 489 hregARM_R8() )); 490 nextArgReg++; 491 } 492 493 for (i = 0; i < n_args; i++) { 494 IRType aTy = typeOfIRExpr(env->type_env, args[i]); 495 if (nextArgReg >= ARM_N_ARGREGS) 496 return False; /* out of argregs */ 497 if (aTy == Ity_I32) { 498 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], 499 iselIntExpr_R(env, args[i]) )); 500 nextArgReg++; 501 } 502 else if (aTy == Ity_I64) { 503 /* 64-bit args must be passed in an a reg-pair of the form 504 n:n+1, where n is even. Hence either r0:r1 or r2:r3. 505 On a little-endian host, the less significant word is 506 passed in the lower-numbered register. */ 507 if (nextArgReg & 1) { 508 if (nextArgReg >= ARM_N_ARGREGS) 509 return False; /* out of argregs */ 510 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA )); 511 nextArgReg++; 512 } 513 if (nextArgReg >= ARM_N_ARGREGS) 514 return False; /* out of argregs */ 515 HReg raHi, raLo; 516 iselInt64Expr(&raHi, &raLo, env, args[i]); 517 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo )); 518 nextArgReg++; 519 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi )); 520 nextArgReg++; 521 } 522 else 523 return False; /* unhandled arg type */ 524 } 525 526 /* Fast scheme only applies for unconditional calls. Hence: */ 527 cc = ARMcc_AL; 528 529 } else { 530 531 /* SLOW SCHEME; move via temporaries */ 532 nextArgReg = 0; 533 534 if (passBBP) { 535 /* This is pretty stupid; better to move directly to r0 536 after the rest of the args are done. */ 537 tmpregs[nextArgReg] = newVRegI(env); 538 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg], 539 hregARM_R8() )); 540 nextArgReg++; 541 } 542 543 for (i = 0; i < n_args; i++) { 544 IRType aTy = typeOfIRExpr(env->type_env, args[i]); 545 if (nextArgReg >= ARM_N_ARGREGS) 546 return False; /* out of argregs */ 547 if (aTy == Ity_I32) { 548 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); 549 nextArgReg++; 550 } 551 else if (aTy == Ity_I64) { 552 /* Same comment applies as in the Fast-scheme case. */ 553 if (nextArgReg & 1) 554 nextArgReg++; 555 if (nextArgReg + 1 >= ARM_N_ARGREGS) 556 return False; /* out of argregs */ 557 HReg raHi, raLo; 558 iselInt64Expr(&raHi, &raLo, env, args[i]); 559 tmpregs[nextArgReg] = raLo; 560 nextArgReg++; 561 tmpregs[nextArgReg] = raHi; 562 nextArgReg++; 563 } 564 } 565 566 /* Now we can compute the condition. We can't do it earlier 567 because the argument computations could trash the condition 568 codes. Be a bit clever to handle the common case where the 569 guard is 1:Bit. */ 570 cc = ARMcc_AL; 571 if (guard) { 572 if (guard->tag == Iex_Const 573 && guard->Iex.Const.con->tag == Ico_U1 574 && guard->Iex.Const.con->Ico.U1 == True) { 575 /* unconditional -- do nothing */ 576 } else { 577 cc = iselCondCode( env, guard ); 578 } 579 } 580 581 /* Move the args to their final destinations. */ 582 for (i = 0; i < nextArgReg; i++) { 583 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs 584 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA )); 585 continue; 586 } 587 /* None of these insns, including any spill code that might 588 be generated, may alter the condition codes. */ 589 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) ); 590 } 591 592 } 593 594 /* Should be assured by checks above */ 595 vassert(nextArgReg <= ARM_N_ARGREGS); 596 597 target = (HWord)Ptr_to_ULong(cee->addr); 598 599 /* nextArgReg doles out argument registers. Since these are 600 assigned in the order r0, r1, r2, r3, its numeric value at this 601 point, which must be between 0 and 4 inclusive, is going to be 602 equal to the number of arg regs in use for the call. Hence bake 603 that number into the call (we'll need to know it when doing 604 register allocation, to know what regs the call reads.) 605 606 There is a bit of a twist -- harmless but worth recording. 607 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have 608 the first arg in r0 and the second in r3:r2, but r1 isn't used. 609 We nevertheless have nextArgReg==4 and bake that into the call 610 instruction. This will mean the register allocator wil believe 611 this insn reads r1 when in fact it doesn't. But that's 612 harmless; it just artificially extends the live range of r1 613 unnecessarily. The best fix would be to put into the 614 instruction, a bitmask indicating which of r0/1/2/3 carry live 615 values. But that's too much hassle. */ 616 617 /* Finally, the call itself. */ 618 addInstr(env, ARMInstr_Call( cc, target, nextArgReg )); 619 620 return True; /* success */ 621 } 622 623 624 /*---------------------------------------------------------*/ 625 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 626 /*---------------------------------------------------------*/ 627 628 /* Select insns for an integer-typed expression, and add them to the 629 code list. Return a reg holding the result. This reg will be a 630 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 631 want to modify it, ask for a new vreg, copy it in there, and modify 632 the copy. The register allocator will do its best to map both 633 vregs to the same real register, so the copies will often disappear 634 later in the game. 635 636 This should handle expressions of 32, 16 and 8-bit type. All 637 results are returned in a 32-bit register. For 16- and 8-bit 638 expressions, the upper 16/24 bits are arbitrary, so you should mask 639 or sign extend partial values if necessary. 640 */ 641 642 /* --------------------- AMode1 --------------------- */ 643 644 /* Return an AMode1 which computes the value of the specified 645 expression, possibly also adding insns to the code list as a 646 result. The expression may only be a 32-bit one. 647 */ 648 649 static Bool sane_AMode1 ( ARMAMode1* am ) 650 { 651 switch (am->tag) { 652 case ARMam1_RI: 653 return 654 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32 655 && (hregIsVirtual(am->ARMam1.RI.reg) 656 || am->ARMam1.RI.reg == hregARM_R8()) 657 && am->ARMam1.RI.simm13 >= -4095 658 && am->ARMam1.RI.simm13 <= 4095 ); 659 case ARMam1_RRS: 660 return 661 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32 662 && hregIsVirtual(am->ARMam1.RRS.base) 663 && hregClass(am->ARMam1.RRS.index) == HRcInt32 664 && hregIsVirtual(am->ARMam1.RRS.index) 665 && am->ARMam1.RRS.shift >= 0 666 && am->ARMam1.RRS.shift <= 3 ); 667 default: 668 vpanic("sane_AMode: unknown ARM AMode1 tag"); 669 } 670 } 671 672 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e ) 673 { 674 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e); 675 vassert(sane_AMode1(am)); 676 return am; 677 } 678 679 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e ) 680 { 681 IRType ty = typeOfIRExpr(env->type_env,e); 682 vassert(ty == Ity_I32); 683 684 /* FIXME: add RRS matching */ 685 686 /* {Add32,Sub32}(expr,simm13) */ 687 if (e->tag == Iex_Binop 688 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 689 && e->Iex.Binop.arg2->tag == Iex_Const 690 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 691 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 692 if (simm >= -4095 && simm <= 4095) { 693 HReg reg; 694 if (e->Iex.Binop.op == Iop_Sub32) 695 simm = -simm; 696 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 697 return ARMAMode1_RI(reg, simm); 698 } 699 } 700 701 /* Doesn't match anything in particular. Generate it into 702 a register and use that. */ 703 { 704 HReg reg = iselIntExpr_R(env, e); 705 return ARMAMode1_RI(reg, 0); 706 } 707 708 } 709 710 711 /* --------------------- AMode2 --------------------- */ 712 713 /* Return an AMode2 which computes the value of the specified 714 expression, possibly also adding insns to the code list as a 715 result. The expression may only be a 32-bit one. 716 */ 717 718 static Bool sane_AMode2 ( ARMAMode2* am ) 719 { 720 switch (am->tag) { 721 case ARMam2_RI: 722 return 723 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32 724 && hregIsVirtual(am->ARMam2.RI.reg) 725 && am->ARMam2.RI.simm9 >= -255 726 && am->ARMam2.RI.simm9 <= 255 ); 727 case ARMam2_RR: 728 return 729 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32 730 && hregIsVirtual(am->ARMam2.RR.base) 731 && hregClass(am->ARMam2.RR.index) == HRcInt32 732 && hregIsVirtual(am->ARMam2.RR.index) ); 733 default: 734 vpanic("sane_AMode: unknown ARM AMode2 tag"); 735 } 736 } 737 738 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e ) 739 { 740 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e); 741 vassert(sane_AMode2(am)); 742 return am; 743 } 744 745 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e ) 746 { 747 IRType ty = typeOfIRExpr(env->type_env,e); 748 vassert(ty == Ity_I32); 749 750 /* FIXME: add RR matching */ 751 752 /* {Add32,Sub32}(expr,simm8) */ 753 if (e->tag == Iex_Binop 754 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 755 && e->Iex.Binop.arg2->tag == Iex_Const 756 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 757 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 758 if (simm >= -255 && simm <= 255) { 759 HReg reg; 760 if (e->Iex.Binop.op == Iop_Sub32) 761 simm = -simm; 762 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 763 return ARMAMode2_RI(reg, simm); 764 } 765 } 766 767 /* Doesn't match anything in particular. Generate it into 768 a register and use that. */ 769 { 770 HReg reg = iselIntExpr_R(env, e); 771 return ARMAMode2_RI(reg, 0); 772 } 773 774 } 775 776 777 /* --------------------- AModeV --------------------- */ 778 779 /* Return an AModeV which computes the value of the specified 780 expression, possibly also adding insns to the code list as a 781 result. The expression may only be a 32-bit one. 782 */ 783 784 static Bool sane_AModeV ( ARMAModeV* am ) 785 { 786 return toBool( hregClass(am->reg) == HRcInt32 787 && hregIsVirtual(am->reg) 788 && am->simm11 >= -1020 && am->simm11 <= 1020 789 && 0 == (am->simm11 & 3) ); 790 } 791 792 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ) 793 { 794 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e); 795 vassert(sane_AModeV(am)); 796 return am; 797 } 798 799 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ) 800 { 801 IRType ty = typeOfIRExpr(env->type_env,e); 802 vassert(ty == Ity_I32); 803 804 /* {Add32,Sub32}(expr, simm8 << 2) */ 805 if (e->tag == Iex_Binop 806 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) 807 && e->Iex.Binop.arg2->tag == Iex_Const 808 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 809 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 810 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) { 811 HReg reg; 812 if (e->Iex.Binop.op == Iop_Sub32) 813 simm = -simm; 814 reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 815 return mkARMAModeV(reg, simm); 816 } 817 } 818 819 /* Doesn't match anything in particular. Generate it into 820 a register and use that. */ 821 { 822 HReg reg = iselIntExpr_R(env, e); 823 return mkARMAModeV(reg, 0); 824 } 825 826 } 827 828 /* -------------------- AModeN -------------------- */ 829 830 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ) 831 { 832 return iselIntExpr_AModeN_wrk(env, e); 833 } 834 835 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ) 836 { 837 HReg reg = iselIntExpr_R(env, e); 838 return mkARMAModeN_R(reg); 839 } 840 841 842 /* --------------------- RI84 --------------------- */ 843 844 /* Select instructions to generate 'e' into a RI84. If mayInv is 845 true, then the caller will also accept an I84 form that denotes 846 'not e'. In this case didInv may not be NULL, and *didInv is set 847 to True. This complication is so as to allow generation of an RI84 848 which is suitable for use in either an AND or BIC instruction, 849 without knowing (before this call) which one. 850 */ 851 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv, 852 ISelEnv* env, IRExpr* e ) 853 { 854 ARMRI84* ri; 855 if (mayInv) 856 vassert(didInv != NULL); 857 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e); 858 /* sanity checks ... */ 859 switch (ri->tag) { 860 case ARMri84_I84: 861 return ri; 862 case ARMri84_R: 863 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32); 864 vassert(hregIsVirtual(ri->ARMri84.R.reg)); 865 return ri; 866 default: 867 vpanic("iselIntExpr_RI84: unknown arm RI84 tag"); 868 } 869 } 870 871 /* DO NOT CALL THIS DIRECTLY ! */ 872 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, 873 ISelEnv* env, IRExpr* e ) 874 { 875 IRType ty = typeOfIRExpr(env->type_env,e); 876 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 877 878 if (didInv) *didInv = False; 879 880 /* special case: immediate */ 881 if (e->tag == Iex_Const) { 882 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */ 883 switch (e->Iex.Const.con->tag) { 884 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 885 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 886 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 887 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)"); 888 } 889 if (fitsIn8x4(&u8, &u4, u)) { 890 return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 891 } 892 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) { 893 vassert(didInv); 894 *didInv = True; 895 return ARMRI84_I84( (UShort)u8, (UShort)u4 ); 896 } 897 /* else fail, fall through to default case */ 898 } 899 900 /* default case: calculate into a register and return that */ 901 { 902 HReg r = iselIntExpr_R ( env, e ); 903 return ARMRI84_R(r); 904 } 905 } 906 907 908 /* --------------------- RI5 --------------------- */ 909 910 /* Select instructions to generate 'e' into a RI5. */ 911 912 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e ) 913 { 914 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e); 915 /* sanity checks ... */ 916 switch (ri->tag) { 917 case ARMri5_I5: 918 return ri; 919 case ARMri5_R: 920 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32); 921 vassert(hregIsVirtual(ri->ARMri5.R.reg)); 922 return ri; 923 default: 924 vpanic("iselIntExpr_RI5: unknown arm RI5 tag"); 925 } 926 } 927 928 /* DO NOT CALL THIS DIRECTLY ! */ 929 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e ) 930 { 931 IRType ty = typeOfIRExpr(env->type_env,e); 932 vassert(ty == Ity_I32 || ty == Ity_I8); 933 934 /* special case: immediate */ 935 if (e->tag == Iex_Const) { 936 UInt u; /* both invalid */ 937 switch (e->Iex.Const.con->tag) { 938 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 939 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 940 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 941 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)"); 942 } 943 if (u >= 1 && u <= 31) { 944 return ARMRI5_I5(u); 945 } 946 /* else fail, fall through to default case */ 947 } 948 949 /* default case: calculate into a register and return that */ 950 { 951 HReg r = iselIntExpr_R ( env, e ); 952 return ARMRI5_R(r); 953 } 954 } 955 956 957 /* ------------------- CondCode ------------------- */ 958 959 /* Generate code to evaluated a bit-typed expression, returning the 960 condition code which would correspond when the expression would 961 notionally have returned 1. */ 962 963 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 964 { 965 ARMCondCode cc = iselCondCode_wrk(env,e); 966 vassert(cc != ARMcc_NV); 967 return cc; 968 } 969 970 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 971 { 972 vassert(e); 973 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 974 975 /* var */ 976 if (e->tag == Iex_RdTmp) { 977 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); 978 /* CmpOrTst doesn't modify rTmp; so this is OK. */ 979 ARMRI84* one = ARMRI84_I84(1,0); 980 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one)); 981 return ARMcc_NE; 982 } 983 984 /* Not1(e) */ 985 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 986 /* Generate code for the arg, and negate the test condition */ 987 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 988 } 989 990 /* --- patterns rooted at: 32to1 --- */ 991 992 if (e->tag == Iex_Unop 993 && e->Iex.Unop.op == Iop_32to1) { 994 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); 995 ARMRI84* one = ARMRI84_I84(1,0); 996 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one)); 997 return ARMcc_NE; 998 } 999 1000 /* --- patterns rooted at: CmpNEZ8 --- */ 1001 1002 if (e->tag == Iex_Unop 1003 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1004 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1005 ARMRI84* xFF = ARMRI84_I84(0xFF,0); 1006 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF)); 1007 return ARMcc_NE; 1008 } 1009 1010 /* --- patterns rooted at: CmpNEZ32 --- */ 1011 1012 if (e->tag == Iex_Unop 1013 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1014 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1015 ARMRI84* zero = ARMRI84_I84(0,0); 1016 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero)); 1017 return ARMcc_NE; 1018 } 1019 1020 /* --- patterns rooted at: CmpNEZ64 --- */ 1021 1022 if (e->tag == Iex_Unop 1023 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1024 HReg tHi, tLo; 1025 HReg tmp = newVRegI(env); 1026 ARMRI84* zero = ARMRI84_I84(0,0); 1027 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg); 1028 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo))); 1029 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero)); 1030 return ARMcc_NE; 1031 } 1032 1033 /* --- Cmp*32*(x,y) --- */ 1034 if (e->tag == Iex_Binop 1035 && (e->Iex.Binop.op == Iop_CmpEQ32 1036 || e->Iex.Binop.op == Iop_CmpNE32 1037 || e->Iex.Binop.op == Iop_CmpLT32S 1038 || e->Iex.Binop.op == Iop_CmpLT32U 1039 || e->Iex.Binop.op == Iop_CmpLE32S 1040 || e->Iex.Binop.op == Iop_CmpLE32U)) { 1041 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1042 ARMRI84* argR = iselIntExpr_RI84(NULL,False, 1043 env, e->Iex.Binop.arg2); 1044 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR)); 1045 switch (e->Iex.Binop.op) { 1046 case Iop_CmpEQ32: return ARMcc_EQ; 1047 case Iop_CmpNE32: return ARMcc_NE; 1048 case Iop_CmpLT32S: return ARMcc_LT; 1049 case Iop_CmpLT32U: return ARMcc_LO; 1050 case Iop_CmpLE32S: return ARMcc_LE; 1051 case Iop_CmpLE32U: return ARMcc_LS; 1052 default: vpanic("iselCondCode(arm): CmpXX32"); 1053 } 1054 } 1055 1056 /* --- CasCmpEQ* --- */ 1057 /* Ist_Cas has a dummy argument to compare with, so comparison is 1058 always true. */ 1059 if (e->tag == Iex_Binop 1060 && (e->Iex.Binop.op == Iop_CasCmpEQ32 1061 || e->Iex.Binop.op == Iop_CasCmpEQ16 1062 || e->Iex.Binop.op == Iop_CasCmpEQ8)) { 1063 return ARMcc_AL; 1064 } 1065 1066 ppIRExpr(e); 1067 vpanic("iselCondCode"); 1068 } 1069 1070 1071 /* --------------------- Reg --------------------- */ 1072 1073 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 1074 { 1075 HReg r = iselIntExpr_R_wrk(env, e); 1076 /* sanity checks ... */ 1077 # if 0 1078 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1079 # endif 1080 vassert(hregClass(r) == HRcInt32); 1081 vassert(hregIsVirtual(r)); 1082 return r; 1083 } 1084 1085 /* DO NOT CALL THIS DIRECTLY ! */ 1086 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 1087 { 1088 IRType ty = typeOfIRExpr(env->type_env,e); 1089 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1090 // vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1091 1092 switch (e->tag) { 1093 1094 /* --------- TEMP --------- */ 1095 case Iex_RdTmp: { 1096 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 1097 } 1098 1099 /* --------- LOAD --------- */ 1100 case Iex_Load: { 1101 HReg dst = newVRegI(env); 1102 1103 if (e->Iex.Load.end != Iend_LE) 1104 goto irreducible; 1105 1106 if (ty == Ity_I32) { 1107 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr ); 1108 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode)); 1109 return dst; 1110 } 1111 if (ty == Ity_I16) { 1112 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr ); 1113 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/, 1114 dst, amode)); 1115 return dst; 1116 } 1117 if (ty == Ity_I8) { 1118 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr ); 1119 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode)); 1120 return dst; 1121 } 1122 1123 //zz if (ty == Ity_I16) { 1124 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1125 //zz return dst; 1126 //zz } 1127 //zz if (ty == Ity_I8) { 1128 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1129 //zz return dst; 1130 //zz } 1131 break; 1132 } 1133 1134 //zz /* --------- TERNARY OP --------- */ 1135 //zz case Iex_Triop: { 1136 //zz IRTriop *triop = e->Iex.Triop.details; 1137 //zz /* C3210 flags following FPU partial remainder (fprem), both 1138 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1139 //zz if (triop->op == Iop_PRemC3210F64 1140 //zz || triop->op == Iop_PRem1C3210F64) { 1141 //zz HReg junk = newVRegF(env); 1142 //zz HReg dst = newVRegI(env); 1143 //zz HReg srcL = iselDblExpr(env, triop->arg2); 1144 //zz HReg srcR = iselDblExpr(env, triop->arg3); 1145 //zz /* XXXROUNDINGFIXME */ 1146 //zz /* set roundingmode here */ 1147 //zz addInstr(env, X86Instr_FpBinary( 1148 //zz e->Iex.Binop.op==Iop_PRemC3210F64 1149 //zz ? Xfp_PREM : Xfp_PREM1, 1150 //zz srcL,srcR,junk 1151 //zz )); 1152 //zz /* The previous pseudo-insn will have left the FPU's C3210 1153 //zz flags set correctly. So bag them. */ 1154 //zz addInstr(env, X86Instr_FpStSW_AX()); 1155 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1156 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 1157 //zz return dst; 1158 //zz } 1159 //zz 1160 //zz break; 1161 //zz } 1162 1163 /* --------- BINARY OP --------- */ 1164 case Iex_Binop: { 1165 1166 ARMAluOp aop = 0; /* invalid */ 1167 ARMShiftOp sop = 0; /* invalid */ 1168 1169 /* ADD/SUB/AND/OR/XOR */ 1170 switch (e->Iex.Binop.op) { 1171 case Iop_And32: { 1172 Bool didInv = False; 1173 HReg dst = newVRegI(env); 1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1175 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/, 1176 env, e->Iex.Binop.arg2); 1177 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND, 1178 dst, argL, argR)); 1179 return dst; 1180 } 1181 case Iop_Or32: aop = ARMalu_OR; goto std_binop; 1182 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop; 1183 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop; 1184 case Iop_Add32: aop = ARMalu_ADD; goto std_binop; 1185 std_binop: { 1186 HReg dst = newVRegI(env); 1187 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1188 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/, 1189 env, e->Iex.Binop.arg2); 1190 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR)); 1191 return dst; 1192 } 1193 default: break; 1194 } 1195 1196 /* SDIV/UDIV */ 1197 if (e->Iex.Binop.op == Iop_DivU32 || e->Iex.Binop.op == Iop_DivS32) { 1198 HReg dst = newVRegI(env); 1199 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1200 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1201 1202 addInstr(env, 1203 ARMInstr_Div(e->Iex.Binop.op == Iop_DivU32 ? 1204 ARMdiv_U : ARMdiv_S, 1205 dst, argL, argR)); 1206 return dst; 1207 } 1208 1209 /* SHL/SHR/SAR */ 1210 switch (e->Iex.Binop.op) { 1211 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop; 1212 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop; 1213 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop; 1214 sh_binop: { 1215 HReg dst = newVRegI(env); 1216 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1217 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2); 1218 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR)); 1219 vassert(ty == Ity_I32); /* else the IR is ill-typed */ 1220 return dst; 1221 } 1222 default: break; 1223 } 1224 1225 /* MUL */ 1226 if (e->Iex.Binop.op == Iop_Mul32) { 1227 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1228 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1229 HReg dst = newVRegI(env); 1230 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); 1231 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); 1232 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN)); 1233 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0())); 1234 return dst; 1235 } 1236 1237 /* Handle misc other ops. */ 1238 1239 if (e->Iex.Binop.op == Iop_Max32U) { 1240 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1241 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1242 HReg dst = newVRegI(env); 1243 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, 1244 ARMRI84_R(argR))); 1245 addInstr(env, mk_iMOVds_RR(dst, argL)); 1246 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR))); 1247 return dst; 1248 } 1249 1250 if (e->Iex.Binop.op == Iop_CmpF64) { 1251 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1); 1252 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2); 1253 HReg dst = newVRegI(env); 1254 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do 1255 FMSTAT, so we can examine the results directly. */ 1256 addInstr(env, ARMInstr_VCmpD(dL, dR)); 1257 /* Create in dst, the IRCmpF64Result encoded result. */ 1258 addInstr(env, ARMInstr_Imm32(dst, 0)); 1259 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ 1260 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT 1261 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT 1262 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN 1263 return dst; 1264 } 1265 1266 if (e->Iex.Binop.op == Iop_F64toI32S 1267 || e->Iex.Binop.op == Iop_F64toI32U) { 1268 /* Wretched uglyness all round, due to having to deal 1269 with rounding modes. Oh well. */ 1270 /* FIXME: if arg1 is a constant indicating round-to-zero, 1271 then we could skip all this arsing around with FPSCR and 1272 simply emit FTO{S,U}IZD. */ 1273 Bool syned = e->Iex.Binop.op == Iop_F64toI32S; 1274 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2); 1275 set_VFP_rounding_mode(env, e->Iex.Binop.arg1); 1276 /* FTO{S,U}ID valF, valD */ 1277 HReg valF = newVRegF(env); 1278 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned, 1279 valF, valD)); 1280 set_VFP_rounding_default(env); 1281 /* VMOV dst, valF */ 1282 HReg dst = newVRegI(env); 1283 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst)); 1284 return dst; 1285 } 1286 1287 if (e->Iex.Binop.op == Iop_GetElem8x8 1288 || e->Iex.Binop.op == Iop_GetElem16x4 1289 || e->Iex.Binop.op == Iop_GetElem32x2) { 1290 HReg res = newVRegI(env); 1291 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 1292 UInt index, size; 1293 if (e->Iex.Binop.arg2->tag != Iex_Const || 1294 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1295 vpanic("ARM target supports GetElem with constant " 1296 "second argument only\n"); 1297 } 1298 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1299 switch (e->Iex.Binop.op) { 1300 case Iop_GetElem8x8: vassert(index < 8); size = 0; break; 1301 case Iop_GetElem16x4: vassert(index < 4); size = 1; break; 1302 case Iop_GetElem32x2: vassert(index < 2); size = 2; break; 1303 default: vassert(0); 1304 } 1305 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1306 mkARMNRS(ARMNRS_Reg, res, 0), 1307 mkARMNRS(ARMNRS_Scalar, arg, index), 1308 size, False)); 1309 return res; 1310 } 1311 1312 if (e->Iex.Binop.op == Iop_GetElem8x16 1313 || e->Iex.Binop.op == Iop_GetElem16x8 1314 || e->Iex.Binop.op == Iop_GetElem32x4) { 1315 HReg res = newVRegI(env); 1316 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 1317 UInt index, size; 1318 if (e->Iex.Binop.arg2->tag != Iex_Const || 1319 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 1320 vpanic("ARM target supports GetElem with constant " 1321 "second argument only\n"); 1322 } 1323 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1324 switch (e->Iex.Binop.op) { 1325 case Iop_GetElem8x16: vassert(index < 16); size = 0; break; 1326 case Iop_GetElem16x8: vassert(index < 8); size = 1; break; 1327 case Iop_GetElem32x4: vassert(index < 4); size = 2; break; 1328 default: vassert(0); 1329 } 1330 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, 1331 mkARMNRS(ARMNRS_Reg, res, 0), 1332 mkARMNRS(ARMNRS_Scalar, arg, index), 1333 size, True)); 1334 return res; 1335 } 1336 1337 /* All cases involving host-side helper calls. */ 1338 void* fn = NULL; 1339 switch (e->Iex.Binop.op) { 1340 case Iop_Add16x2: 1341 fn = &h_generic_calc_Add16x2; break; 1342 case Iop_Sub16x2: 1343 fn = &h_generic_calc_Sub16x2; break; 1344 case Iop_HAdd16Ux2: 1345 fn = &h_generic_calc_HAdd16Ux2; break; 1346 case Iop_HAdd16Sx2: 1347 fn = &h_generic_calc_HAdd16Sx2; break; 1348 case Iop_HSub16Ux2: 1349 fn = &h_generic_calc_HSub16Ux2; break; 1350 case Iop_HSub16Sx2: 1351 fn = &h_generic_calc_HSub16Sx2; break; 1352 case Iop_QAdd16Sx2: 1353 fn = &h_generic_calc_QAdd16Sx2; break; 1354 case Iop_QSub16Sx2: 1355 fn = &h_generic_calc_QSub16Sx2; break; 1356 case Iop_Add8x4: 1357 fn = &h_generic_calc_Add8x4; break; 1358 case Iop_Sub8x4: 1359 fn = &h_generic_calc_Sub8x4; break; 1360 case Iop_HAdd8Ux4: 1361 fn = &h_generic_calc_HAdd8Ux4; break; 1362 case Iop_HAdd8Sx4: 1363 fn = &h_generic_calc_HAdd8Sx4; break; 1364 case Iop_HSub8Ux4: 1365 fn = &h_generic_calc_HSub8Ux4; break; 1366 case Iop_HSub8Sx4: 1367 fn = &h_generic_calc_HSub8Sx4; break; 1368 case Iop_QAdd8Sx4: 1369 fn = &h_generic_calc_QAdd8Sx4; break; 1370 case Iop_QAdd8Ux4: 1371 fn = &h_generic_calc_QAdd8Ux4; break; 1372 case Iop_QSub8Sx4: 1373 fn = &h_generic_calc_QSub8Sx4; break; 1374 case Iop_QSub8Ux4: 1375 fn = &h_generic_calc_QSub8Ux4; break; 1376 case Iop_Sad8Ux4: 1377 fn = &h_generic_calc_Sad8Ux4; break; 1378 case Iop_QAdd32S: 1379 fn = &h_generic_calc_QAdd32S; break; 1380 case Iop_QSub32S: 1381 fn = &h_generic_calc_QSub32S; break; 1382 case Iop_QSub16Ux2: 1383 fn = &h_generic_calc_QSub16Ux2; break; 1384 default: 1385 break; 1386 } 1387 1388 if (fn) { 1389 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1390 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1391 HReg res = newVRegI(env); 1392 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL)); 1393 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR)); 1394 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 )); 1395 addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); 1396 return res; 1397 } 1398 1399 break; 1400 } 1401 1402 /* --------- UNARY OP --------- */ 1403 case Iex_Unop: { 1404 1405 //zz /* 1Uto8(32to1(expr32)) */ 1406 //zz if (e->Iex.Unop.op == Iop_1Uto8) { 1407 //zz DECLARE_PATTERN(p_32to1_then_1Uto8); 1408 //zz DEFINE_PATTERN(p_32to1_then_1Uto8, 1409 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1410 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1411 //zz IRExpr* expr32 = mi.bindee[0]; 1412 //zz HReg dst = newVRegI(env); 1413 //zz HReg src = iselIntExpr_R(env, expr32); 1414 //zz addInstr(env, mk_iMOVsd_RR(src,dst) ); 1415 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, 1416 //zz X86RMI_Imm(1), dst)); 1417 //zz return dst; 1418 //zz } 1419 //zz } 1420 //zz 1421 //zz /* 8Uto32(LDle(expr32)) */ 1422 //zz if (e->Iex.Unop.op == Iop_8Uto32) { 1423 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32); 1424 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32, 1425 //zz unop(Iop_8Uto32, 1426 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1427 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1428 //zz HReg dst = newVRegI(env); 1429 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1430 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1431 //zz return dst; 1432 //zz } 1433 //zz } 1434 //zz 1435 //zz /* 8Sto32(LDle(expr32)) */ 1436 //zz if (e->Iex.Unop.op == Iop_8Sto32) { 1437 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32); 1438 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32, 1439 //zz unop(Iop_8Sto32, 1440 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1441 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1442 //zz HReg dst = newVRegI(env); 1443 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1444 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1445 //zz return dst; 1446 //zz } 1447 //zz } 1448 //zz 1449 //zz /* 16Uto32(LDle(expr32)) */ 1450 //zz if (e->Iex.Unop.op == Iop_16Uto32) { 1451 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32); 1452 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32, 1453 //zz unop(Iop_16Uto32, 1454 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1455 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1456 //zz HReg dst = newVRegI(env); 1457 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1458 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1459 //zz return dst; 1460 //zz } 1461 //zz } 1462 //zz 1463 //zz /* 8Uto32(GET:I8) */ 1464 //zz if (e->Iex.Unop.op == Iop_8Uto32) { 1465 //zz if (e->Iex.Unop.arg->tag == Iex_Get) { 1466 //zz HReg dst; 1467 //zz X86AMode* amode; 1468 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1469 //zz dst = newVRegI(env); 1470 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1471 //zz hregX86_EBP()); 1472 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1473 //zz return dst; 1474 //zz } 1475 //zz } 1476 //zz 1477 //zz /* 16to32(GET:I16) */ 1478 //zz if (e->Iex.Unop.op == Iop_16Uto32) { 1479 //zz if (e->Iex.Unop.arg->tag == Iex_Get) { 1480 //zz HReg dst; 1481 //zz X86AMode* amode; 1482 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1483 //zz dst = newVRegI(env); 1484 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1485 //zz hregX86_EBP()); 1486 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1487 //zz return dst; 1488 //zz } 1489 //zz } 1490 1491 switch (e->Iex.Unop.op) { 1492 case Iop_8Uto32: { 1493 HReg dst = newVRegI(env); 1494 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1495 addInstr(env, ARMInstr_Alu(ARMalu_AND, 1496 dst, src, ARMRI84_I84(0xFF,0))); 1497 return dst; 1498 } 1499 //zz case Iop_8Uto16: 1500 //zz case Iop_8Uto32: 1501 //zz case Iop_16Uto32: { 1502 //zz HReg dst = newVRegI(env); 1503 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1504 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1505 //zz addInstr(env, mk_iMOVsd_RR(src,dst) ); 1506 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, 1507 //zz X86RMI_Imm(mask), dst)); 1508 //zz return dst; 1509 //zz } 1510 //zz case Iop_8Sto16: 1511 //zz case Iop_8Sto32: 1512 case Iop_16Uto32: { 1513 HReg dst = newVRegI(env); 1514 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1515 ARMRI5* amt = ARMRI5_I5(16); 1516 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt)); 1517 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt)); 1518 return dst; 1519 } 1520 case Iop_8Sto32: 1521 case Iop_16Sto32: { 1522 HReg dst = newVRegI(env); 1523 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1524 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24); 1525 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt)); 1526 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 1527 return dst; 1528 } 1529 //zz case Iop_Not8: 1530 //zz case Iop_Not16: 1531 case Iop_Not32: { 1532 HReg dst = newVRegI(env); 1533 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1534 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src)); 1535 return dst; 1536 } 1537 case Iop_64HIto32: { 1538 HReg rHi, rLo; 1539 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1540 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1541 } 1542 case Iop_64to32: { 1543 HReg rHi, rLo; 1544 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1545 return rLo; /* similar stupid comment to the above ... */ 1546 } 1547 case Iop_64to8: { 1548 HReg rHi, rLo; 1549 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 1550 HReg tHi = newVRegI(env); 1551 HReg tLo = newVRegI(env); 1552 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); 1553 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 1554 rHi = tHi; 1555 rLo = tLo; 1556 } else { 1557 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1558 } 1559 return rLo; 1560 } 1561 //zz case Iop_16HIto8: 1562 //zz case Iop_32HIto16: { 1563 //zz HReg dst = newVRegI(env); 1564 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1565 //zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1566 //zz addInstr(env, mk_iMOVsd_RR(src,dst) ); 1567 //zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1568 //zz return dst; 1569 //zz } 1570 case Iop_1Uto32: 1571 case Iop_1Uto8: { 1572 HReg dst = newVRegI(env); 1573 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1574 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 1575 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 1576 return dst; 1577 } 1578 1579 case Iop_1Sto32: { 1580 HReg dst = newVRegI(env); 1581 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1582 ARMRI5* amt = ARMRI5_I5(31); 1583 /* This is really rough. We could do much better here; 1584 perhaps mvn{cond} dst, #0 as the second insn? 1585 (same applies to 1Sto64) */ 1586 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 1587 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 1588 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 1589 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 1590 return dst; 1591 } 1592 1593 1594 //zz case Iop_1Sto8: 1595 //zz case Iop_1Sto16: 1596 //zz case Iop_1Sto32: { 1597 //zz /* could do better than this, but for now ... */ 1598 //zz HReg dst = newVRegI(env); 1599 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1600 //zz addInstr(env, X86Instr_Set32(cond,dst)); 1601 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1602 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1603 //zz return dst; 1604 //zz } 1605 //zz case Iop_Ctz32: { 1606 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */ 1607 //zz HReg dst = newVRegI(env); 1608 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1609 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1610 //zz return dst; 1611 //zz } 1612 case Iop_Clz32: { 1613 /* Count leading zeroes; easy on ARM. */ 1614 HReg dst = newVRegI(env); 1615 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1616 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src)); 1617 return dst; 1618 } 1619 1620 case Iop_CmpwNEZ32: { 1621 HReg dst = newVRegI(env); 1622 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1623 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); 1624 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); 1625 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31))); 1626 return dst; 1627 } 1628 1629 case Iop_Left32: { 1630 HReg dst = newVRegI(env); 1631 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1632 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); 1633 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); 1634 return dst; 1635 } 1636 1637 //zz case Iop_V128to32: { 1638 //zz HReg dst = newVRegI(env); 1639 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1640 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1641 //zz sub_from_esp(env, 16); 1642 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1643 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1644 //zz add_to_esp(env, 16); 1645 //zz return dst; 1646 //zz } 1647 //zz 1648 case Iop_ReinterpF32asI32: { 1649 HReg dst = newVRegI(env); 1650 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1651 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst)); 1652 return dst; 1653 } 1654 1655 //zz 1656 //zz case Iop_16to8: 1657 case Iop_32to8: 1658 case Iop_32to16: 1659 /* These are no-ops. */ 1660 return iselIntExpr_R(env, e->Iex.Unop.arg); 1661 1662 default: 1663 break; 1664 } 1665 1666 /* All Unop cases involving host-side helper calls. */ 1667 void* fn = NULL; 1668 switch (e->Iex.Unop.op) { 1669 case Iop_CmpNEZ16x2: 1670 fn = &h_generic_calc_CmpNEZ16x2; break; 1671 case Iop_CmpNEZ8x4: 1672 fn = &h_generic_calc_CmpNEZ8x4; break; 1673 default: 1674 break; 1675 } 1676 1677 if (fn) { 1678 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1679 HReg res = newVRegI(env); 1680 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg)); 1681 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 )); 1682 addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); 1683 return res; 1684 } 1685 1686 break; 1687 } 1688 1689 /* --------- GET --------- */ 1690 case Iex_Get: { 1691 if (ty == Ity_I32 1692 && 0 == (e->Iex.Get.offset & 3) 1693 && e->Iex.Get.offset < 4096-4) { 1694 HReg dst = newVRegI(env); 1695 addInstr(env, ARMInstr_LdSt32( 1696 True/*isLoad*/, 1697 dst, 1698 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset))); 1699 return dst; 1700 } 1701 //zz if (ty == Ity_I8 || ty == Ity_I16) { 1702 //zz HReg dst = newVRegI(env); 1703 //zz addInstr(env, X86Instr_LoadEX( 1704 //zz toUChar(ty==Ity_I8 ? 1 : 2), 1705 //zz False, 1706 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1707 //zz dst)); 1708 //zz return dst; 1709 //zz } 1710 break; 1711 } 1712 1713 //zz case Iex_GetI: { 1714 //zz X86AMode* am 1715 //zz = genGuestArrayOffset( 1716 //zz env, e->Iex.GetI.descr, 1717 //zz e->Iex.GetI.ix, e->Iex.GetI.bias ); 1718 //zz HReg dst = newVRegI(env); 1719 //zz if (ty == Ity_I8) { 1720 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1721 //zz return dst; 1722 //zz } 1723 //zz if (ty == Ity_I32) { 1724 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1725 //zz return dst; 1726 //zz } 1727 //zz break; 1728 //zz } 1729 1730 /* --------- CCALL --------- */ 1731 case Iex_CCall: { 1732 HReg dst = newVRegI(env); 1733 vassert(ty == e->Iex.CCall.retty); 1734 1735 /* be very restrictive for now. Only 32/64-bit ints allowed 1736 for args, and 32 bits for return type. */ 1737 if (e->Iex.CCall.retty != Ity_I32) 1738 goto irreducible; 1739 1740 /* Marshal args, do the call, clear stack. */ 1741 Bool ok = doHelperCall( env, False, 1742 NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1743 if (ok) { 1744 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0())); 1745 return dst; 1746 } 1747 /* else fall through; will hit the irreducible: label */ 1748 } 1749 1750 /* --------- LITERAL --------- */ 1751 /* 32 literals */ 1752 case Iex_Const: { 1753 UInt u = 0; 1754 HReg dst = newVRegI(env); 1755 switch (e->Iex.Const.con->tag) { 1756 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1757 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1758 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1759 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)"); 1760 } 1761 addInstr(env, ARMInstr_Imm32(dst, u)); 1762 return dst; 1763 } 1764 1765 /* --------- MULTIPLEX --------- */ 1766 case Iex_Mux0X: { 1767 IRExpr* cond = e->Iex.Mux0X.cond; 1768 1769 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */ 1770 if (ty == Ity_I32 1771 && cond->tag == Iex_Unop 1772 && cond->Iex.Unop.op == Iop_32to8 1773 && cond->Iex.Unop.arg->tag == Iex_Unop 1774 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) { 1775 ARMCondCode cc; 1776 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1777 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0); 1778 HReg dst = newVRegI(env); 1779 addInstr(env, mk_iMOVds_RR(dst, rX)); 1780 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg); 1781 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0)); 1782 return dst; 1783 } 1784 1785 /* Mux0X(cond, expr0, exprX) (general case) */ 1786 if (ty == Ity_I32) { 1787 HReg r8; 1788 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1789 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0); 1790 HReg dst = newVRegI(env); 1791 addInstr(env, mk_iMOVds_RR(dst, rX)); 1792 r8 = iselIntExpr_R(env, cond); 1793 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8, 1794 ARMRI84_I84(0xFF,0))); 1795 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0)); 1796 return dst; 1797 } 1798 break; 1799 } 1800 1801 default: 1802 break; 1803 } /* switch (e->tag) */ 1804 1805 /* We get here if no pattern matched. */ 1806 irreducible: 1807 ppIRExpr(e); 1808 vpanic("iselIntExpr_R: cannot reduce tree"); 1809 } 1810 1811 1812 /* -------------------- 64-bit -------------------- */ 1813 1814 /* Compute a 64-bit value into a register pair, which is returned as 1815 the first two parameters. As with iselIntExpr_R, these may be 1816 either real or virtual regs; in any case they must not be changed 1817 by subsequent code emitted by the caller. */ 1818 1819 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1820 { 1821 iselInt64Expr_wrk(rHi, rLo, env, e); 1822 # if 0 1823 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1824 # endif 1825 vassert(hregClass(*rHi) == HRcInt32); 1826 vassert(hregIsVirtual(*rHi)); 1827 vassert(hregClass(*rLo) == HRcInt32); 1828 vassert(hregIsVirtual(*rLo)); 1829 } 1830 1831 /* DO NOT CALL THIS DIRECTLY ! */ 1832 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1833 { 1834 vassert(e); 1835 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 1836 1837 /* 64-bit literal */ 1838 if (e->tag == Iex_Const) { 1839 ULong w64 = e->Iex.Const.con->Ico.U64; 1840 UInt wHi = toUInt(w64 >> 32); 1841 UInt wLo = toUInt(w64); 1842 HReg tHi = newVRegI(env); 1843 HReg tLo = newVRegI(env); 1844 vassert(e->Iex.Const.con->tag == Ico_U64); 1845 addInstr(env, ARMInstr_Imm32(tHi, wHi)); 1846 addInstr(env, ARMInstr_Imm32(tLo, wLo)); 1847 *rHi = tHi; 1848 *rLo = tLo; 1849 return; 1850 } 1851 1852 /* read 64-bit IRTemp */ 1853 if (e->tag == Iex_RdTmp) { 1854 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 1855 HReg tHi = newVRegI(env); 1856 HReg tLo = newVRegI(env); 1857 HReg tmp = iselNeon64Expr(env, e); 1858 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 1859 *rHi = tHi; 1860 *rLo = tLo; 1861 } else { 1862 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 1863 } 1864 return; 1865 } 1866 1867 /* 64-bit load */ 1868 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 1869 HReg tLo, tHi, rA; 1870 vassert(e->Iex.Load.ty == Ity_I64); 1871 rA = iselIntExpr_R(env, e->Iex.Load.addr); 1872 tHi = newVRegI(env); 1873 tLo = newVRegI(env); 1874 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4))); 1875 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0))); 1876 *rHi = tHi; 1877 *rLo = tLo; 1878 return; 1879 } 1880 1881 /* 64-bit GET */ 1882 if (e->tag == Iex_Get) { 1883 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0); 1884 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4); 1885 HReg tHi = newVRegI(env); 1886 HReg tLo = newVRegI(env); 1887 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4)); 1888 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0)); 1889 *rHi = tHi; 1890 *rLo = tLo; 1891 return; 1892 } 1893 1894 /* --------- BINARY ops --------- */ 1895 if (e->tag == Iex_Binop) { 1896 switch (e->Iex.Binop.op) { 1897 1898 /* 32 x 32 -> 64 multiply */ 1899 case Iop_MullS32: 1900 case Iop_MullU32: { 1901 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1902 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1903 HReg tHi = newVRegI(env); 1904 HReg tLo = newVRegI(env); 1905 ARMMulDivOp mop = e->Iex.Binop.op == Iop_MullS32 1906 ? ARMmul_SX : ARMmul_ZX; 1907 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); 1908 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); 1909 addInstr(env, ARMInstr_Mul(mop)); 1910 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1())); 1911 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0())); 1912 *rHi = tHi; 1913 *rLo = tLo; 1914 return; 1915 } 1916 1917 case Iop_Or64: { 1918 HReg xLo, xHi, yLo, yHi; 1919 HReg tHi = newVRegI(env); 1920 HReg tLo = newVRegI(env); 1921 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 1922 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 1923 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi))); 1924 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo))); 1925 *rHi = tHi; 1926 *rLo = tLo; 1927 return; 1928 } 1929 1930 case Iop_Add64: { 1931 HReg xLo, xHi, yLo, yHi; 1932 HReg tHi = newVRegI(env); 1933 HReg tLo = newVRegI(env); 1934 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 1935 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 1936 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo))); 1937 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi))); 1938 *rHi = tHi; 1939 *rLo = tLo; 1940 return; 1941 } 1942 1943 /* 32HLto64(e1,e2) */ 1944 case Iop_32HLto64: { 1945 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 1946 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 1947 return; 1948 } 1949 1950 default: 1951 break; 1952 } 1953 } 1954 1955 /* --------- UNARY ops --------- */ 1956 if (e->tag == Iex_Unop) { 1957 switch (e->Iex.Unop.op) { 1958 1959 /* ReinterpF64asI64 */ 1960 case Iop_ReinterpF64asI64: { 1961 HReg dstHi = newVRegI(env); 1962 HReg dstLo = newVRegI(env); 1963 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1964 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo)); 1965 *rHi = dstHi; 1966 *rLo = dstLo; 1967 return; 1968 } 1969 1970 /* Left64(e) */ 1971 case Iop_Left64: { 1972 HReg yLo, yHi; 1973 HReg tHi = newVRegI(env); 1974 HReg tLo = newVRegI(env); 1975 HReg zero = newVRegI(env); 1976 /* yHi:yLo = arg */ 1977 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 1978 /* zero = 0 */ 1979 addInstr(env, ARMInstr_Imm32(zero, 0)); 1980 /* tLo = 0 - yLo, and set carry */ 1981 addInstr(env, ARMInstr_Alu(ARMalu_SUBS, 1982 tLo, zero, ARMRI84_R(yLo))); 1983 /* tHi = 0 - yHi - carry */ 1984 addInstr(env, ARMInstr_Alu(ARMalu_SBC, 1985 tHi, zero, ARMRI84_R(yHi))); 1986 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 1987 back in, so as to give the final result 1988 tHi:tLo = arg | -arg. */ 1989 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi))); 1990 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo))); 1991 *rHi = tHi; 1992 *rLo = tLo; 1993 return; 1994 } 1995 1996 /* CmpwNEZ64(e) */ 1997 case Iop_CmpwNEZ64: { 1998 HReg srcLo, srcHi; 1999 HReg tmp1 = newVRegI(env); 2000 HReg tmp2 = newVRegI(env); 2001 /* srcHi:srcLo = arg */ 2002 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2003 /* tmp1 = srcHi | srcLo */ 2004 addInstr(env, ARMInstr_Alu(ARMalu_OR, 2005 tmp1, srcHi, ARMRI84_R(srcLo))); 2006 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2007 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1)); 2008 addInstr(env, ARMInstr_Alu(ARMalu_OR, 2009 tmp2, tmp2, ARMRI84_R(tmp1))); 2010 addInstr(env, ARMInstr_Shift(ARMsh_SAR, 2011 tmp2, tmp2, ARMRI5_I5(31))); 2012 *rHi = tmp2; 2013 *rLo = tmp2; 2014 return; 2015 } 2016 2017 case Iop_1Sto64: { 2018 HReg dst = newVRegI(env); 2019 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2020 ARMRI5* amt = ARMRI5_I5(31); 2021 /* This is really rough. We could do much better here; 2022 perhaps mvn{cond} dst, #0 as the second insn? 2023 (same applies to 1Sto32) */ 2024 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 2025 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 2026 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); 2027 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); 2028 *rHi = dst; 2029 *rLo = dst; 2030 return; 2031 } 2032 2033 default: 2034 break; 2035 } 2036 } /* if (e->tag == Iex_Unop) */ 2037 2038 /* --------- MULTIPLEX --------- */ 2039 if (e->tag == Iex_Mux0X) { 2040 IRType ty8; 2041 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo; 2042 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond); 2043 vassert(ty8 == Ity_I8); 2044 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX); 2045 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0); 2046 dstHi = newVRegI(env); 2047 dstLo = newVRegI(env); 2048 addInstr(env, mk_iMOVds_RR(dstHi, rXhi)); 2049 addInstr(env, mk_iMOVds_RR(dstLo, rXlo)); 2050 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 2051 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8, 2052 ARMRI84_I84(0xFF,0))); 2053 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi))); 2054 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo))); 2055 *rHi = dstHi; 2056 *rLo = dstLo; 2057 return; 2058 } 2059 2060 /* It is convenient sometimes to call iselInt64Expr even when we 2061 have NEON support (e.g. in do_helper_call we need 64-bit 2062 arguments as 2 x 32 regs). */ 2063 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 2064 HReg tHi = newVRegI(env); 2065 HReg tLo = newVRegI(env); 2066 HReg tmp = iselNeon64Expr(env, e); 2067 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); 2068 *rHi = tHi; 2069 *rLo = tLo; 2070 return ; 2071 } 2072 2073 ppIRExpr(e); 2074 vpanic("iselInt64Expr"); 2075 } 2076 2077 2078 /*---------------------------------------------------------*/ 2079 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/ 2080 /*---------------------------------------------------------*/ 2081 2082 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ) 2083 { 2084 HReg r = iselNeon64Expr_wrk( env, e ); 2085 vassert(hregClass(r) == HRcFlt64); 2086 vassert(hregIsVirtual(r)); 2087 return r; 2088 } 2089 2090 /* DO NOT CALL THIS DIRECTLY */ 2091 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) 2092 { 2093 IRType ty = typeOfIRExpr(env->type_env, e); 2094 MatchInfo mi; 2095 vassert(e); 2096 vassert(ty == Ity_I64); 2097 2098 if (e->tag == Iex_RdTmp) { 2099 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2100 } 2101 2102 if (e->tag == Iex_Const) { 2103 HReg rLo, rHi; 2104 HReg res = newVRegD(env); 2105 iselInt64Expr(&rHi, &rLo, env, e); 2106 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2107 return res; 2108 } 2109 2110 /* 64-bit load */ 2111 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2112 HReg res = newVRegD(env); 2113 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); 2114 vassert(ty == Ity_I64); 2115 addInstr(env, ARMInstr_NLdStD(True, res, am)); 2116 return res; 2117 } 2118 2119 /* 64-bit GET */ 2120 if (e->tag == Iex_Get) { 2121 HReg addr = newVRegI(env); 2122 HReg res = newVRegD(env); 2123 vassert(ty == Ity_I64); 2124 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); 2125 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr))); 2126 return res; 2127 } 2128 2129 /* --------- BINARY ops --------- */ 2130 if (e->tag == Iex_Binop) { 2131 switch (e->Iex.Binop.op) { 2132 2133 /* 32 x 32 -> 64 multiply */ 2134 case Iop_MullS32: 2135 case Iop_MullU32: { 2136 HReg rLo, rHi; 2137 HReg res = newVRegD(env); 2138 iselInt64Expr(&rHi, &rLo, env, e); 2139 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2140 return res; 2141 } 2142 2143 case Iop_And64: { 2144 HReg res = newVRegD(env); 2145 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2146 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2147 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 2148 res, argL, argR, 4, False)); 2149 return res; 2150 } 2151 case Iop_Or64: { 2152 HReg res = newVRegD(env); 2153 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2154 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2155 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 2156 res, argL, argR, 4, False)); 2157 return res; 2158 } 2159 case Iop_Xor64: { 2160 HReg res = newVRegD(env); 2161 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2162 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2163 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 2164 res, argL, argR, 4, False)); 2165 return res; 2166 } 2167 2168 /* 32HLto64(e1,e2) */ 2169 case Iop_32HLto64: { 2170 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2171 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2172 HReg res = newVRegD(env); 2173 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 2174 return res; 2175 } 2176 2177 case Iop_Add8x8: 2178 case Iop_Add16x4: 2179 case Iop_Add32x2: 2180 case Iop_Add64: { 2181 HReg res = newVRegD(env); 2182 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2183 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2184 UInt size; 2185 switch (e->Iex.Binop.op) { 2186 case Iop_Add8x8: size = 0; break; 2187 case Iop_Add16x4: size = 1; break; 2188 case Iop_Add32x2: size = 2; break; 2189 case Iop_Add64: size = 3; break; 2190 default: vassert(0); 2191 } 2192 addInstr(env, ARMInstr_NBinary(ARMneon_VADD, 2193 res, argL, argR, size, False)); 2194 return res; 2195 } 2196 case Iop_Add32Fx2: { 2197 HReg res = newVRegD(env); 2198 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2199 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2200 UInt size = 0; 2201 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, 2202 res, argL, argR, size, False)); 2203 return res; 2204 } 2205 case Iop_Recps32Fx2: { 2206 HReg res = newVRegD(env); 2207 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2208 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2209 UInt size = 0; 2210 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 2211 res, argL, argR, size, False)); 2212 return res; 2213 } 2214 case Iop_Rsqrts32Fx2: { 2215 HReg res = newVRegD(env); 2216 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2217 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2218 UInt size = 0; 2219 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 2220 res, argL, argR, size, False)); 2221 return res; 2222 } 2223 case Iop_InterleaveOddLanes8x8: 2224 case Iop_InterleaveOddLanes16x4: 2225 case Iop_InterleaveLO32x2: 2226 case Iop_InterleaveEvenLanes8x8: 2227 case Iop_InterleaveEvenLanes16x4: 2228 case Iop_InterleaveHI32x2: { 2229 HReg tmp = newVRegD(env); 2230 HReg res = newVRegD(env); 2231 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2232 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2233 UInt size; 2234 UInt is_lo; 2235 switch (e->Iex.Binop.op) { 2236 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break; 2237 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break; 2238 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break; 2239 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break; 2240 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break; 2241 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break; 2242 default: vassert(0); 2243 } 2244 if (is_lo) { 2245 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2246 tmp, argL, 4, False)); 2247 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2248 res, argR, 4, False)); 2249 addInstr(env, ARMInstr_NDual(ARMneon_TRN, 2250 res, tmp, size, False)); 2251 } else { 2252 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2253 tmp, argR, 4, False)); 2254 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2255 res, argL, 4, False)); 2256 addInstr(env, ARMInstr_NDual(ARMneon_TRN, 2257 tmp, res, size, False)); 2258 } 2259 return res; 2260 } 2261 case Iop_InterleaveHI8x8: 2262 case Iop_InterleaveHI16x4: 2263 case Iop_InterleaveLO8x8: 2264 case Iop_InterleaveLO16x4: { 2265 HReg tmp = newVRegD(env); 2266 HReg res = newVRegD(env); 2267 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2268 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2269 UInt size; 2270 UInt is_lo; 2271 switch (e->Iex.Binop.op) { 2272 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break; 2273 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break; 2274 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break; 2275 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break; 2276 default: vassert(0); 2277 } 2278 if (is_lo) { 2279 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2280 tmp, argL, 4, False)); 2281 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2282 res, argR, 4, False)); 2283 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, 2284 res, tmp, size, False)); 2285 } else { 2286 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2287 tmp, argR, 4, False)); 2288 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2289 res, argL, 4, False)); 2290 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, 2291 tmp, res, size, False)); 2292 } 2293 return res; 2294 } 2295 case Iop_CatOddLanes8x8: 2296 case Iop_CatOddLanes16x4: 2297 case Iop_CatEvenLanes8x8: 2298 case Iop_CatEvenLanes16x4: { 2299 HReg tmp = newVRegD(env); 2300 HReg res = newVRegD(env); 2301 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2302 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2303 UInt size; 2304 UInt is_lo; 2305 switch (e->Iex.Binop.op) { 2306 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break; 2307 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break; 2308 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break; 2309 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break; 2310 default: vassert(0); 2311 } 2312 if (is_lo) { 2313 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2314 tmp, argL, 4, False)); 2315 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2316 res, argR, 4, False)); 2317 addInstr(env, ARMInstr_NDual(ARMneon_UZP, 2318 res, tmp, size, False)); 2319 } else { 2320 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2321 tmp, argR, 4, False)); 2322 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 2323 res, argL, 4, False)); 2324 addInstr(env, ARMInstr_NDual(ARMneon_UZP, 2325 tmp, res, size, False)); 2326 } 2327 return res; 2328 } 2329 case Iop_QAdd8Ux8: 2330 case Iop_QAdd16Ux4: 2331 case Iop_QAdd32Ux2: 2332 case Iop_QAdd64Ux1: { 2333 HReg res = newVRegD(env); 2334 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2335 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2336 UInt size; 2337 switch (e->Iex.Binop.op) { 2338 case Iop_QAdd8Ux8: size = 0; break; 2339 case Iop_QAdd16Ux4: size = 1; break; 2340 case Iop_QAdd32Ux2: size = 2; break; 2341 case Iop_QAdd64Ux1: size = 3; break; 2342 default: vassert(0); 2343 } 2344 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 2345 res, argL, argR, size, False)); 2346 return res; 2347 } 2348 case Iop_QAdd8Sx8: 2349 case Iop_QAdd16Sx4: 2350 case Iop_QAdd32Sx2: 2351 case Iop_QAdd64Sx1: { 2352 HReg res = newVRegD(env); 2353 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2354 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2355 UInt size; 2356 switch (e->Iex.Binop.op) { 2357 case Iop_QAdd8Sx8: size = 0; break; 2358 case Iop_QAdd16Sx4: size = 1; break; 2359 case Iop_QAdd32Sx2: size = 2; break; 2360 case Iop_QAdd64Sx1: size = 3; break; 2361 default: vassert(0); 2362 } 2363 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 2364 res, argL, argR, size, False)); 2365 return res; 2366 } 2367 case Iop_Sub8x8: 2368 case Iop_Sub16x4: 2369 case Iop_Sub32x2: 2370 case Iop_Sub64: { 2371 HReg res = newVRegD(env); 2372 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2373 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2374 UInt size; 2375 switch (e->Iex.Binop.op) { 2376 case Iop_Sub8x8: size = 0; break; 2377 case Iop_Sub16x4: size = 1; break; 2378 case Iop_Sub32x2: size = 2; break; 2379 case Iop_Sub64: size = 3; break; 2380 default: vassert(0); 2381 } 2382 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2383 res, argL, argR, size, False)); 2384 return res; 2385 } 2386 case Iop_Sub32Fx2: { 2387 HReg res = newVRegD(env); 2388 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2389 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2390 UInt size = 0; 2391 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, 2392 res, argL, argR, size, False)); 2393 return res; 2394 } 2395 case Iop_QSub8Ux8: 2396 case Iop_QSub16Ux4: 2397 case Iop_QSub32Ux2: 2398 case Iop_QSub64Ux1: { 2399 HReg res = newVRegD(env); 2400 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2401 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2402 UInt size; 2403 switch (e->Iex.Binop.op) { 2404 case Iop_QSub8Ux8: size = 0; break; 2405 case Iop_QSub16Ux4: size = 1; break; 2406 case Iop_QSub32Ux2: size = 2; break; 2407 case Iop_QSub64Ux1: size = 3; break; 2408 default: vassert(0); 2409 } 2410 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 2411 res, argL, argR, size, False)); 2412 return res; 2413 } 2414 case Iop_QSub8Sx8: 2415 case Iop_QSub16Sx4: 2416 case Iop_QSub32Sx2: 2417 case Iop_QSub64Sx1: { 2418 HReg res = newVRegD(env); 2419 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2420 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2421 UInt size; 2422 switch (e->Iex.Binop.op) { 2423 case Iop_QSub8Sx8: size = 0; break; 2424 case Iop_QSub16Sx4: size = 1; break; 2425 case Iop_QSub32Sx2: size = 2; break; 2426 case Iop_QSub64Sx1: size = 3; break; 2427 default: vassert(0); 2428 } 2429 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 2430 res, argL, argR, size, False)); 2431 return res; 2432 } 2433 case Iop_Max8Ux8: 2434 case Iop_Max16Ux4: 2435 case Iop_Max32Ux2: { 2436 HReg res = newVRegD(env); 2437 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2438 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2439 UInt size; 2440 switch (e->Iex.Binop.op) { 2441 case Iop_Max8Ux8: size = 0; break; 2442 case Iop_Max16Ux4: size = 1; break; 2443 case Iop_Max32Ux2: size = 2; break; 2444 default: vassert(0); 2445 } 2446 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 2447 res, argL, argR, size, False)); 2448 return res; 2449 } 2450 case Iop_Max8Sx8: 2451 case Iop_Max16Sx4: 2452 case Iop_Max32Sx2: { 2453 HReg res = newVRegD(env); 2454 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2455 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2456 UInt size; 2457 switch (e->Iex.Binop.op) { 2458 case Iop_Max8Sx8: size = 0; break; 2459 case Iop_Max16Sx4: size = 1; break; 2460 case Iop_Max32Sx2: size = 2; break; 2461 default: vassert(0); 2462 } 2463 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 2464 res, argL, argR, size, False)); 2465 return res; 2466 } 2467 case Iop_Min8Ux8: 2468 case Iop_Min16Ux4: 2469 case Iop_Min32Ux2: { 2470 HReg res = newVRegD(env); 2471 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2472 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2473 UInt size; 2474 switch (e->Iex.Binop.op) { 2475 case Iop_Min8Ux8: size = 0; break; 2476 case Iop_Min16Ux4: size = 1; break; 2477 case Iop_Min32Ux2: size = 2; break; 2478 default: vassert(0); 2479 } 2480 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 2481 res, argL, argR, size, False)); 2482 return res; 2483 } 2484 case Iop_Min8Sx8: 2485 case Iop_Min16Sx4: 2486 case Iop_Min32Sx2: { 2487 HReg res = newVRegD(env); 2488 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2489 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2490 UInt size; 2491 switch (e->Iex.Binop.op) { 2492 case Iop_Min8Sx8: size = 0; break; 2493 case Iop_Min16Sx4: size = 1; break; 2494 case Iop_Min32Sx2: size = 2; break; 2495 default: vassert(0); 2496 } 2497 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 2498 res, argL, argR, size, False)); 2499 return res; 2500 } 2501 case Iop_Sar8x8: 2502 case Iop_Sar16x4: 2503 case Iop_Sar32x2: { 2504 HReg res = newVRegD(env); 2505 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2506 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2507 HReg argR2 = newVRegD(env); 2508 HReg zero = newVRegD(env); 2509 UInt size; 2510 switch (e->Iex.Binop.op) { 2511 case Iop_Sar8x8: size = 0; break; 2512 case Iop_Sar16x4: size = 1; break; 2513 case Iop_Sar32x2: size = 2; break; 2514 case Iop_Sar64: size = 3; break; 2515 default: vassert(0); 2516 } 2517 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 2518 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2519 argR2, zero, argR, size, False)); 2520 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2521 res, argL, argR2, size, False)); 2522 return res; 2523 } 2524 case Iop_Sal8x8: 2525 case Iop_Sal16x4: 2526 case Iop_Sal32x2: 2527 case Iop_Sal64x1: { 2528 HReg res = newVRegD(env); 2529 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2530 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2531 UInt size; 2532 switch (e->Iex.Binop.op) { 2533 case Iop_Sal8x8: size = 0; break; 2534 case Iop_Sal16x4: size = 1; break; 2535 case Iop_Sal32x2: size = 2; break; 2536 case Iop_Sal64x1: size = 3; break; 2537 default: vassert(0); 2538 } 2539 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2540 res, argL, argR, size, False)); 2541 return res; 2542 } 2543 case Iop_Shr8x8: 2544 case Iop_Shr16x4: 2545 case Iop_Shr32x2: { 2546 HReg res = newVRegD(env); 2547 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2548 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2549 HReg argR2 = newVRegD(env); 2550 HReg zero = newVRegD(env); 2551 UInt size; 2552 switch (e->Iex.Binop.op) { 2553 case Iop_Shr8x8: size = 0; break; 2554 case Iop_Shr16x4: size = 1; break; 2555 case Iop_Shr32x2: size = 2; break; 2556 default: vassert(0); 2557 } 2558 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 2559 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 2560 argR2, zero, argR, size, False)); 2561 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2562 res, argL, argR2, size, False)); 2563 return res; 2564 } 2565 case Iop_Shl8x8: 2566 case Iop_Shl16x4: 2567 case Iop_Shl32x2: { 2568 HReg res = newVRegD(env); 2569 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2570 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2571 UInt size; 2572 switch (e->Iex.Binop.op) { 2573 case Iop_Shl8x8: size = 0; break; 2574 case Iop_Shl16x4: size = 1; break; 2575 case Iop_Shl32x2: size = 2; break; 2576 default: vassert(0); 2577 } 2578 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2579 res, argL, argR, size, False)); 2580 return res; 2581 } 2582 case Iop_QShl8x8: 2583 case Iop_QShl16x4: 2584 case Iop_QShl32x2: 2585 case Iop_QShl64x1: { 2586 HReg res = newVRegD(env); 2587 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2588 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2589 UInt size; 2590 switch (e->Iex.Binop.op) { 2591 case Iop_QShl8x8: size = 0; break; 2592 case Iop_QShl16x4: size = 1; break; 2593 case Iop_QShl32x2: size = 2; break; 2594 case Iop_QShl64x1: size = 3; break; 2595 default: vassert(0); 2596 } 2597 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 2598 res, argL, argR, size, False)); 2599 return res; 2600 } 2601 case Iop_QSal8x8: 2602 case Iop_QSal16x4: 2603 case Iop_QSal32x2: 2604 case Iop_QSal64x1: { 2605 HReg res = newVRegD(env); 2606 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2607 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2608 UInt size; 2609 switch (e->Iex.Binop.op) { 2610 case Iop_QSal8x8: size = 0; break; 2611 case Iop_QSal16x4: size = 1; break; 2612 case Iop_QSal32x2: size = 2; break; 2613 case Iop_QSal64x1: size = 3; break; 2614 default: vassert(0); 2615 } 2616 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 2617 res, argL, argR, size, False)); 2618 return res; 2619 } 2620 case Iop_QShlN8x8: 2621 case Iop_QShlN16x4: 2622 case Iop_QShlN32x2: 2623 case Iop_QShlN64x1: { 2624 HReg res = newVRegD(env); 2625 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2626 UInt size, imm; 2627 if (e->Iex.Binop.arg2->tag != Iex_Const || 2628 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2629 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2630 "second argument only\n"); 2631 } 2632 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2633 switch (e->Iex.Binop.op) { 2634 case Iop_QShlN8x8: size = 8 | imm; break; 2635 case Iop_QShlN16x4: size = 16 | imm; break; 2636 case Iop_QShlN32x2: size = 32 | imm; break; 2637 case Iop_QShlN64x1: size = 64 | imm; break; 2638 default: vassert(0); 2639 } 2640 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 2641 res, argL, size, False)); 2642 return res; 2643 } 2644 case Iop_QShlN8Sx8: 2645 case Iop_QShlN16Sx4: 2646 case Iop_QShlN32Sx2: 2647 case Iop_QShlN64Sx1: { 2648 HReg res = newVRegD(env); 2649 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2650 UInt size, imm; 2651 if (e->Iex.Binop.arg2->tag != Iex_Const || 2652 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2653 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2654 "second argument only\n"); 2655 } 2656 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2657 switch (e->Iex.Binop.op) { 2658 case Iop_QShlN8Sx8: size = 8 | imm; break; 2659 case Iop_QShlN16Sx4: size = 16 | imm; break; 2660 case Iop_QShlN32Sx2: size = 32 | imm; break; 2661 case Iop_QShlN64Sx1: size = 64 | imm; break; 2662 default: vassert(0); 2663 } 2664 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 2665 res, argL, size, False)); 2666 return res; 2667 } 2668 case Iop_QSalN8x8: 2669 case Iop_QSalN16x4: 2670 case Iop_QSalN32x2: 2671 case Iop_QSalN64x1: { 2672 HReg res = newVRegD(env); 2673 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2674 UInt size, imm; 2675 if (e->Iex.Binop.arg2->tag != Iex_Const || 2676 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 2677 vpanic("ARM taget supports Iop_QShlNAxB with constant " 2678 "second argument only\n"); 2679 } 2680 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 2681 switch (e->Iex.Binop.op) { 2682 case Iop_QSalN8x8: size = 8 | imm; break; 2683 case Iop_QSalN16x4: size = 16 | imm; break; 2684 case Iop_QSalN32x2: size = 32 | imm; break; 2685 case Iop_QSalN64x1: size = 64 | imm; break; 2686 default: vassert(0); 2687 } 2688 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 2689 res, argL, size, False)); 2690 return res; 2691 } 2692 case Iop_ShrN8x8: 2693 case Iop_ShrN16x4: 2694 case Iop_ShrN32x2: 2695 case Iop_Shr64: { 2696 HReg res = newVRegD(env); 2697 HReg tmp = newVRegD(env); 2698 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2699 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2700 HReg argR2 = newVRegI(env); 2701 UInt size; 2702 switch (e->Iex.Binop.op) { 2703 case Iop_ShrN8x8: size = 0; break; 2704 case Iop_ShrN16x4: size = 1; break; 2705 case Iop_ShrN32x2: size = 2; break; 2706 case Iop_Shr64: size = 3; break; 2707 default: vassert(0); 2708 } 2709 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 2710 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 2711 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2712 res, argL, tmp, size, False)); 2713 return res; 2714 } 2715 case Iop_ShlN8x8: 2716 case Iop_ShlN16x4: 2717 case Iop_ShlN32x2: 2718 case Iop_Shl64: { 2719 HReg res = newVRegD(env); 2720 HReg tmp = newVRegD(env); 2721 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2722 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2723 UInt size; 2724 switch (e->Iex.Binop.op) { 2725 case Iop_ShlN8x8: size = 0; break; 2726 case Iop_ShlN16x4: size = 1; break; 2727 case Iop_ShlN32x2: size = 2; break; 2728 case Iop_Shl64: size = 3; break; 2729 default: vassert(0); 2730 } 2731 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False)); 2732 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 2733 res, argL, tmp, size, False)); 2734 return res; 2735 } 2736 case Iop_SarN8x8: 2737 case Iop_SarN16x4: 2738 case Iop_SarN32x2: 2739 case Iop_Sar64: { 2740 HReg res = newVRegD(env); 2741 HReg tmp = newVRegD(env); 2742 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2743 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 2744 HReg argR2 = newVRegI(env); 2745 UInt size; 2746 switch (e->Iex.Binop.op) { 2747 case Iop_SarN8x8: size = 0; break; 2748 case Iop_SarN16x4: size = 1; break; 2749 case Iop_SarN32x2: size = 2; break; 2750 case Iop_Sar64: size = 3; break; 2751 default: vassert(0); 2752 } 2753 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 2754 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); 2755 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 2756 res, argL, tmp, size, False)); 2757 return res; 2758 } 2759 case Iop_CmpGT8Ux8: 2760 case Iop_CmpGT16Ux4: 2761 case Iop_CmpGT32Ux2: { 2762 HReg res = newVRegD(env); 2763 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2764 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2765 UInt size; 2766 switch (e->Iex.Binop.op) { 2767 case Iop_CmpGT8Ux8: size = 0; break; 2768 case Iop_CmpGT16Ux4: size = 1; break; 2769 case Iop_CmpGT32Ux2: size = 2; break; 2770 default: vassert(0); 2771 } 2772 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 2773 res, argL, argR, size, False)); 2774 return res; 2775 } 2776 case Iop_CmpGT8Sx8: 2777 case Iop_CmpGT16Sx4: 2778 case Iop_CmpGT32Sx2: { 2779 HReg res = newVRegD(env); 2780 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2781 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2782 UInt size; 2783 switch (e->Iex.Binop.op) { 2784 case Iop_CmpGT8Sx8: size = 0; break; 2785 case Iop_CmpGT16Sx4: size = 1; break; 2786 case Iop_CmpGT32Sx2: size = 2; break; 2787 default: vassert(0); 2788 } 2789 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 2790 res, argL, argR, size, False)); 2791 return res; 2792 } 2793 case Iop_CmpEQ8x8: 2794 case Iop_CmpEQ16x4: 2795 case Iop_CmpEQ32x2: { 2796 HReg res = newVRegD(env); 2797 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2798 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2799 UInt size; 2800 switch (e->Iex.Binop.op) { 2801 case Iop_CmpEQ8x8: size = 0; break; 2802 case Iop_CmpEQ16x4: size = 1; break; 2803 case Iop_CmpEQ32x2: size = 2; break; 2804 default: vassert(0); 2805 } 2806 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 2807 res, argL, argR, size, False)); 2808 return res; 2809 } 2810 case Iop_Mul8x8: 2811 case Iop_Mul16x4: 2812 case Iop_Mul32x2: { 2813 HReg res = newVRegD(env); 2814 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2815 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2816 UInt size = 0; 2817 switch(e->Iex.Binop.op) { 2818 case Iop_Mul8x8: size = 0; break; 2819 case Iop_Mul16x4: size = 1; break; 2820 case Iop_Mul32x2: size = 2; break; 2821 default: vassert(0); 2822 } 2823 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 2824 res, argL, argR, size, False)); 2825 return res; 2826 } 2827 case Iop_Mul32Fx2: { 2828 HReg res = newVRegD(env); 2829 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2830 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2831 UInt size = 0; 2832 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, 2833 res, argL, argR, size, False)); 2834 return res; 2835 } 2836 case Iop_QDMulHi16Sx4: 2837 case Iop_QDMulHi32Sx2: { 2838 HReg res = newVRegD(env); 2839 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2840 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2841 UInt size = 0; 2842 switch(e->Iex.Binop.op) { 2843 case Iop_QDMulHi16Sx4: size = 1; break; 2844 case Iop_QDMulHi32Sx2: size = 2; break; 2845 default: vassert(0); 2846 } 2847 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 2848 res, argL, argR, size, False)); 2849 return res; 2850 } 2851 2852 case Iop_QRDMulHi16Sx4: 2853 case Iop_QRDMulHi32Sx2: { 2854 HReg res = newVRegD(env); 2855 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2856 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2857 UInt size = 0; 2858 switch(e->Iex.Binop.op) { 2859 case Iop_QRDMulHi16Sx4: size = 1; break; 2860 case Iop_QRDMulHi32Sx2: size = 2; break; 2861 default: vassert(0); 2862 } 2863 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 2864 res, argL, argR, size, False)); 2865 return res; 2866 } 2867 2868 case Iop_PwAdd8x8: 2869 case Iop_PwAdd16x4: 2870 case Iop_PwAdd32x2: { 2871 HReg res = newVRegD(env); 2872 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2873 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2874 UInt size = 0; 2875 switch(e->Iex.Binop.op) { 2876 case Iop_PwAdd8x8: size = 0; break; 2877 case Iop_PwAdd16x4: size = 1; break; 2878 case Iop_PwAdd32x2: size = 2; break; 2879 default: vassert(0); 2880 } 2881 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 2882 res, argL, argR, size, False)); 2883 return res; 2884 } 2885 case Iop_PwAdd32Fx2: { 2886 HReg res = newVRegD(env); 2887 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2888 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2889 UInt size = 0; 2890 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP, 2891 res, argL, argR, size, False)); 2892 return res; 2893 } 2894 case Iop_PwMin8Ux8: 2895 case Iop_PwMin16Ux4: 2896 case Iop_PwMin32Ux2: { 2897 HReg res = newVRegD(env); 2898 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2899 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2900 UInt size = 0; 2901 switch(e->Iex.Binop.op) { 2902 case Iop_PwMin8Ux8: size = 0; break; 2903 case Iop_PwMin16Ux4: size = 1; break; 2904 case Iop_PwMin32Ux2: size = 2; break; 2905 default: vassert(0); 2906 } 2907 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU, 2908 res, argL, argR, size, False)); 2909 return res; 2910 } 2911 case Iop_PwMin8Sx8: 2912 case Iop_PwMin16Sx4: 2913 case Iop_PwMin32Sx2: { 2914 HReg res = newVRegD(env); 2915 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2916 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2917 UInt size = 0; 2918 switch(e->Iex.Binop.op) { 2919 case Iop_PwMin8Sx8: size = 0; break; 2920 case Iop_PwMin16Sx4: size = 1; break; 2921 case Iop_PwMin32Sx2: size = 2; break; 2922 default: vassert(0); 2923 } 2924 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS, 2925 res, argL, argR, size, False)); 2926 return res; 2927 } 2928 case Iop_PwMax8Ux8: 2929 case Iop_PwMax16Ux4: 2930 case Iop_PwMax32Ux2: { 2931 HReg res = newVRegD(env); 2932 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2933 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2934 UInt size = 0; 2935 switch(e->Iex.Binop.op) { 2936 case Iop_PwMax8Ux8: size = 0; break; 2937 case Iop_PwMax16Ux4: size = 1; break; 2938 case Iop_PwMax32Ux2: size = 2; break; 2939 default: vassert(0); 2940 } 2941 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU, 2942 res, argL, argR, size, False)); 2943 return res; 2944 } 2945 case Iop_PwMax8Sx8: 2946 case Iop_PwMax16Sx4: 2947 case Iop_PwMax32Sx2: { 2948 HReg res = newVRegD(env); 2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2951 UInt size = 0; 2952 switch(e->Iex.Binop.op) { 2953 case Iop_PwMax8Sx8: size = 0; break; 2954 case Iop_PwMax16Sx4: size = 1; break; 2955 case Iop_PwMax32Sx2: size = 2; break; 2956 default: vassert(0); 2957 } 2958 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS, 2959 res, argL, argR, size, False)); 2960 return res; 2961 } 2962 case Iop_Perm8x8: { 2963 HReg res = newVRegD(env); 2964 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2965 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2966 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL, 2967 res, argL, argR, 0, False)); 2968 return res; 2969 } 2970 case Iop_PolynomialMul8x8: { 2971 HReg res = newVRegD(env); 2972 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2973 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2974 UInt size = 0; 2975 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 2976 res, argL, argR, size, False)); 2977 return res; 2978 } 2979 case Iop_Max32Fx2: { 2980 HReg res = newVRegD(env); 2981 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2982 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2983 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 2984 res, argL, argR, 2, False)); 2985 return res; 2986 } 2987 case Iop_Min32Fx2: { 2988 HReg res = newVRegD(env); 2989 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2990 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2991 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 2992 res, argL, argR, 2, False)); 2993 return res; 2994 } 2995 case Iop_PwMax32Fx2: { 2996 HReg res = newVRegD(env); 2997 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 2998 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 2999 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 3000 res, argL, argR, 2, False)); 3001 return res; 3002 } 3003 case Iop_PwMin32Fx2: { 3004 HReg res = newVRegD(env); 3005 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3006 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3007 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 3008 res, argL, argR, 2, False)); 3009 return res; 3010 } 3011 case Iop_CmpGT32Fx2: { 3012 HReg res = newVRegD(env); 3013 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3014 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3015 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 3016 res, argL, argR, 2, False)); 3017 return res; 3018 } 3019 case Iop_CmpGE32Fx2: { 3020 HReg res = newVRegD(env); 3021 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3022 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3023 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 3024 res, argL, argR, 2, False)); 3025 return res; 3026 } 3027 case Iop_CmpEQ32Fx2: { 3028 HReg res = newVRegD(env); 3029 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3030 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 3031 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 3032 res, argL, argR, 2, False)); 3033 return res; 3034 } 3035 case Iop_F32ToFixed32Ux2_RZ: 3036 case Iop_F32ToFixed32Sx2_RZ: 3037 case Iop_Fixed32UToF32x2_RN: 3038 case Iop_Fixed32SToF32x2_RN: { 3039 HReg res = newVRegD(env); 3040 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); 3041 ARMNeonUnOp op; 3042 UInt imm6; 3043 if (e->Iex.Binop.arg2->tag != Iex_Const || 3044 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3045 vpanic("ARM supports FP <-> Fixed conversion with constant " 3046 "second argument less than 33 only\n"); 3047 } 3048 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3049 vassert(imm6 <= 32 && imm6 > 0); 3050 imm6 = 64 - imm6; 3051 switch(e->Iex.Binop.op) { 3052 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break; 3053 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break; 3054 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break; 3055 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break; 3056 default: vassert(0); 3057 } 3058 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False)); 3059 return res; 3060 } 3061 /* 3062 FIXME: is this here or not? 3063 case Iop_VDup8x8: 3064 case Iop_VDup16x4: 3065 case Iop_VDup32x2: { 3066 HReg res = newVRegD(env); 3067 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3068 UInt index; 3069 UInt imm4; 3070 UInt size = 0; 3071 if (e->Iex.Binop.arg2->tag != Iex_Const || 3072 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 3073 vpanic("ARM supports Iop_VDup with constant " 3074 "second argument less than 16 only\n"); 3075 } 3076 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 3077 switch(e->Iex.Binop.op) { 3078 case Iop_VDup8x8: imm4 = (index << 1) + 1; break; 3079 case Iop_VDup16x4: imm4 = (index << 2) + 2; break; 3080 case Iop_VDup32x2: imm4 = (index << 3) + 4; break; 3081 default: vassert(0); 3082 } 3083 if (imm4 >= 16) { 3084 vpanic("ARM supports Iop_VDup with constant " 3085 "second argument less than 16 only\n"); 3086 } 3087 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 3088 res, argL, imm4, False)); 3089 return res; 3090 } 3091 */ 3092 default: 3093 break; 3094 } 3095 } 3096 3097 /* --------- UNARY ops --------- */ 3098 if (e->tag == Iex_Unop) { 3099 switch (e->Iex.Unop.op) { 3100 3101 /* ReinterpF64asI64 */ 3102 case Iop_ReinterpF64asI64: 3103 /* Left64(e) */ 3104 case Iop_Left64: 3105 /* CmpwNEZ64(e) */ 3106 //case Iop_CmpwNEZ64: 3107 case Iop_1Sto64: { 3108 HReg rLo, rHi; 3109 HReg res = newVRegD(env); 3110 iselInt64Expr(&rHi, &rLo, env, e); 3111 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3112 return res; 3113 } 3114 case Iop_Not64: { 3115 DECLARE_PATTERN(p_veqz_8x8); 3116 DECLARE_PATTERN(p_veqz_16x4); 3117 DECLARE_PATTERN(p_veqz_32x2); 3118 DECLARE_PATTERN(p_vcge_8sx8); 3119 DECLARE_PATTERN(p_vcge_16sx4); 3120 DECLARE_PATTERN(p_vcge_32sx2); 3121 DECLARE_PATTERN(p_vcge_8ux8); 3122 DECLARE_PATTERN(p_vcge_16ux4); 3123 DECLARE_PATTERN(p_vcge_32ux2); 3124 DEFINE_PATTERN(p_veqz_8x8, 3125 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0)))); 3126 DEFINE_PATTERN(p_veqz_16x4, 3127 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0)))); 3128 DEFINE_PATTERN(p_veqz_32x2, 3129 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0)))); 3130 DEFINE_PATTERN(p_vcge_8sx8, 3131 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0)))); 3132 DEFINE_PATTERN(p_vcge_16sx4, 3133 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0)))); 3134 DEFINE_PATTERN(p_vcge_32sx2, 3135 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0)))); 3136 DEFINE_PATTERN(p_vcge_8ux8, 3137 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0)))); 3138 DEFINE_PATTERN(p_vcge_16ux4, 3139 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0)))); 3140 DEFINE_PATTERN(p_vcge_32ux2, 3141 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0)))); 3142 if (matchIRExpr(&mi, p_veqz_8x8, e)) { 3143 HReg res = newVRegD(env); 3144 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3145 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False)); 3146 return res; 3147 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) { 3148 HReg res = newVRegD(env); 3149 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3150 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False)); 3151 return res; 3152 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) { 3153 HReg res = newVRegD(env); 3154 HReg arg = iselNeon64Expr(env, mi.bindee[0]); 3155 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False)); 3156 return res; 3157 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) { 3158 HReg res = newVRegD(env); 3159 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3160 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3161 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3162 res, argL, argR, 0, False)); 3163 return res; 3164 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) { 3165 HReg res = newVRegD(env); 3166 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3167 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3168 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3169 res, argL, argR, 1, False)); 3170 return res; 3171 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) { 3172 HReg res = newVRegD(env); 3173 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3174 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3175 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3176 res, argL, argR, 2, False)); 3177 return res; 3178 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) { 3179 HReg res = newVRegD(env); 3180 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3181 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3182 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3183 res, argL, argR, 0, False)); 3184 return res; 3185 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) { 3186 HReg res = newVRegD(env); 3187 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3188 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3189 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3190 res, argL, argR, 1, False)); 3191 return res; 3192 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) { 3193 HReg res = newVRegD(env); 3194 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3195 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3196 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3197 res, argL, argR, 2, False)); 3198 return res; 3199 } else { 3200 HReg res = newVRegD(env); 3201 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3202 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False)); 3203 return res; 3204 } 3205 } 3206 case Iop_Dup8x8: 3207 case Iop_Dup16x4: 3208 case Iop_Dup32x2: { 3209 HReg res, arg; 3210 UInt size; 3211 DECLARE_PATTERN(p_vdup_8x8); 3212 DECLARE_PATTERN(p_vdup_16x4); 3213 DECLARE_PATTERN(p_vdup_32x2); 3214 DEFINE_PATTERN(p_vdup_8x8, 3215 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1)))); 3216 DEFINE_PATTERN(p_vdup_16x4, 3217 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1)))); 3218 DEFINE_PATTERN(p_vdup_32x2, 3219 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1)))); 3220 if (matchIRExpr(&mi, p_vdup_8x8, e)) { 3221 UInt index; 3222 UInt imm4; 3223 if (mi.bindee[1]->tag == Iex_Const && 3224 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3225 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3226 imm4 = (index << 1) + 1; 3227 if (index < 8) { 3228 res = newVRegD(env); 3229 arg = iselNeon64Expr(env, mi.bindee[0]); 3230 addInstr(env, ARMInstr_NUnaryS( 3231 ARMneon_VDUP, 3232 mkARMNRS(ARMNRS_Reg, res, 0), 3233 mkARMNRS(ARMNRS_Scalar, arg, index), 3234 imm4, False 3235 )); 3236 return res; 3237 } 3238 } 3239 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) { 3240 UInt index; 3241 UInt imm4; 3242 if (mi.bindee[1]->tag == Iex_Const && 3243 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3244 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3245 imm4 = (index << 2) + 2; 3246 if (index < 4) { 3247 res = newVRegD(env); 3248 arg = iselNeon64Expr(env, mi.bindee[0]); 3249 addInstr(env, ARMInstr_NUnaryS( 3250 ARMneon_VDUP, 3251 mkARMNRS(ARMNRS_Reg, res, 0), 3252 mkARMNRS(ARMNRS_Scalar, arg, index), 3253 imm4, False 3254 )); 3255 return res; 3256 } 3257 } 3258 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) { 3259 UInt index; 3260 UInt imm4; 3261 if (mi.bindee[1]->tag == Iex_Const && 3262 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3263 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3264 imm4 = (index << 3) + 4; 3265 if (index < 2) { 3266 res = newVRegD(env); 3267 arg = iselNeon64Expr(env, mi.bindee[0]); 3268 addInstr(env, ARMInstr_NUnaryS( 3269 ARMneon_VDUP, 3270 mkARMNRS(ARMNRS_Reg, res, 0), 3271 mkARMNRS(ARMNRS_Scalar, arg, index), 3272 imm4, False 3273 )); 3274 return res; 3275 } 3276 } 3277 } 3278 arg = iselIntExpr_R(env, e->Iex.Unop.arg); 3279 res = newVRegD(env); 3280 switch (e->Iex.Unop.op) { 3281 case Iop_Dup8x8: size = 0; break; 3282 case Iop_Dup16x4: size = 1; break; 3283 case Iop_Dup32x2: size = 2; break; 3284 default: vassert(0); 3285 } 3286 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False)); 3287 return res; 3288 } 3289 case Iop_Abs8x8: 3290 case Iop_Abs16x4: 3291 case Iop_Abs32x2: { 3292 HReg res = newVRegD(env); 3293 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3294 UInt size = 0; 3295 switch(e->Iex.Binop.op) { 3296 case Iop_Abs8x8: size = 0; break; 3297 case Iop_Abs16x4: size = 1; break; 3298 case Iop_Abs32x2: size = 2; break; 3299 default: vassert(0); 3300 } 3301 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False)); 3302 return res; 3303 } 3304 case Iop_Reverse64_8x8: 3305 case Iop_Reverse64_16x4: 3306 case Iop_Reverse64_32x2: { 3307 HReg res = newVRegD(env); 3308 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3309 UInt size = 0; 3310 switch(e->Iex.Binop.op) { 3311 case Iop_Reverse64_8x8: size = 0; break; 3312 case Iop_Reverse64_16x4: size = 1; break; 3313 case Iop_Reverse64_32x2: size = 2; break; 3314 default: vassert(0); 3315 } 3316 addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 3317 res, arg, size, False)); 3318 return res; 3319 } 3320 case Iop_Reverse32_8x8: 3321 case Iop_Reverse32_16x4: { 3322 HReg res = newVRegD(env); 3323 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3324 UInt size = 0; 3325 switch(e->Iex.Binop.op) { 3326 case Iop_Reverse32_8x8: size = 0; break; 3327 case Iop_Reverse32_16x4: size = 1; break; 3328 default: vassert(0); 3329 } 3330 addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 3331 res, arg, size, False)); 3332 return res; 3333 } 3334 case Iop_Reverse16_8x8: { 3335 HReg res = newVRegD(env); 3336 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3337 UInt size = 0; 3338 addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 3339 res, arg, size, False)); 3340 return res; 3341 } 3342 case Iop_CmpwNEZ64: { 3343 HReg x_lsh = newVRegD(env); 3344 HReg x_rsh = newVRegD(env); 3345 HReg lsh_amt = newVRegD(env); 3346 HReg rsh_amt = newVRegD(env); 3347 HReg zero = newVRegD(env); 3348 HReg tmp = newVRegD(env); 3349 HReg tmp2 = newVRegD(env); 3350 HReg res = newVRegD(env); 3351 HReg x = newVRegD(env); 3352 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3353 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False)); 3354 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False)); 3355 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 3356 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 3357 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3358 rsh_amt, zero, lsh_amt, 2, False)); 3359 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3360 x_lsh, x, lsh_amt, 3, False)); 3361 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3362 x_rsh, x, rsh_amt, 3, False)); 3363 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3364 tmp, x_lsh, x_rsh, 0, False)); 3365 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3366 res, tmp, x, 0, False)); 3367 return res; 3368 } 3369 case Iop_CmpNEZ8x8: 3370 case Iop_CmpNEZ16x4: 3371 case Iop_CmpNEZ32x2: { 3372 HReg res = newVRegD(env); 3373 HReg tmp = newVRegD(env); 3374 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3375 UInt size; 3376 switch (e->Iex.Unop.op) { 3377 case Iop_CmpNEZ8x8: size = 0; break; 3378 case Iop_CmpNEZ16x4: size = 1; break; 3379 case Iop_CmpNEZ32x2: size = 2; break; 3380 default: vassert(0); 3381 } 3382 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False)); 3383 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False)); 3384 return res; 3385 } 3386 case Iop_NarrowUn16to8x8: 3387 case Iop_NarrowUn32to16x4: 3388 case Iop_NarrowUn64to32x2: { 3389 HReg res = newVRegD(env); 3390 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3391 UInt size = 0; 3392 switch(e->Iex.Binop.op) { 3393 case Iop_NarrowUn16to8x8: size = 0; break; 3394 case Iop_NarrowUn32to16x4: size = 1; break; 3395 case Iop_NarrowUn64to32x2: size = 2; break; 3396 default: vassert(0); 3397 } 3398 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN, 3399 res, arg, size, False)); 3400 return res; 3401 } 3402 case Iop_QNarrowUn16Sto8Sx8: 3403 case Iop_QNarrowUn32Sto16Sx4: 3404 case Iop_QNarrowUn64Sto32Sx2: { 3405 HReg res = newVRegD(env); 3406 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3407 UInt size = 0; 3408 switch(e->Iex.Binop.op) { 3409 case Iop_QNarrowUn16Sto8Sx8: size = 0; break; 3410 case Iop_QNarrowUn32Sto16Sx4: size = 1; break; 3411 case Iop_QNarrowUn64Sto32Sx2: size = 2; break; 3412 default: vassert(0); 3413 } 3414 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS, 3415 res, arg, size, False)); 3416 return res; 3417 } 3418 case Iop_QNarrowUn16Sto8Ux8: 3419 case Iop_QNarrowUn32Sto16Ux4: 3420 case Iop_QNarrowUn64Sto32Ux2: { 3421 HReg res = newVRegD(env); 3422 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3423 UInt size = 0; 3424 switch(e->Iex.Binop.op) { 3425 case Iop_QNarrowUn16Sto8Ux8: size = 0; break; 3426 case Iop_QNarrowUn32Sto16Ux4: size = 1; break; 3427 case Iop_QNarrowUn64Sto32Ux2: size = 2; break; 3428 default: vassert(0); 3429 } 3430 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS, 3431 res, arg, size, False)); 3432 return res; 3433 } 3434 case Iop_QNarrowUn16Uto8Ux8: 3435 case Iop_QNarrowUn32Uto16Ux4: 3436 case Iop_QNarrowUn64Uto32Ux2: { 3437 HReg res = newVRegD(env); 3438 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3439 UInt size = 0; 3440 switch(e->Iex.Binop.op) { 3441 case Iop_QNarrowUn16Uto8Ux8: size = 0; break; 3442 case Iop_QNarrowUn32Uto16Ux4: size = 1; break; 3443 case Iop_QNarrowUn64Uto32Ux2: size = 2; break; 3444 default: vassert(0); 3445 } 3446 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU, 3447 res, arg, size, False)); 3448 return res; 3449 } 3450 case Iop_PwAddL8Sx8: 3451 case Iop_PwAddL16Sx4: 3452 case Iop_PwAddL32Sx2: { 3453 HReg res = newVRegD(env); 3454 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3455 UInt size = 0; 3456 switch(e->Iex.Binop.op) { 3457 case Iop_PwAddL8Sx8: size = 0; break; 3458 case Iop_PwAddL16Sx4: size = 1; break; 3459 case Iop_PwAddL32Sx2: size = 2; break; 3460 default: vassert(0); 3461 } 3462 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 3463 res, arg, size, False)); 3464 return res; 3465 } 3466 case Iop_PwAddL8Ux8: 3467 case Iop_PwAddL16Ux4: 3468 case Iop_PwAddL32Ux2: { 3469 HReg res = newVRegD(env); 3470 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3471 UInt size = 0; 3472 switch(e->Iex.Binop.op) { 3473 case Iop_PwAddL8Ux8: size = 0; break; 3474 case Iop_PwAddL16Ux4: size = 1; break; 3475 case Iop_PwAddL32Ux2: size = 2; break; 3476 default: vassert(0); 3477 } 3478 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 3479 res, arg, size, False)); 3480 return res; 3481 } 3482 case Iop_Cnt8x8: { 3483 HReg res = newVRegD(env); 3484 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3485 UInt size = 0; 3486 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, 3487 res, arg, size, False)); 3488 return res; 3489 } 3490 case Iop_Clz8Sx8: 3491 case Iop_Clz16Sx4: 3492 case Iop_Clz32Sx2: { 3493 HReg res = newVRegD(env); 3494 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3495 UInt size = 0; 3496 switch(e->Iex.Binop.op) { 3497 case Iop_Clz8Sx8: size = 0; break; 3498 case Iop_Clz16Sx4: size = 1; break; 3499 case Iop_Clz32Sx2: size = 2; break; 3500 default: vassert(0); 3501 } 3502 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, 3503 res, arg, size, False)); 3504 return res; 3505 } 3506 case Iop_Cls8Sx8: 3507 case Iop_Cls16Sx4: 3508 case Iop_Cls32Sx2: { 3509 HReg res = newVRegD(env); 3510 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3511 UInt size = 0; 3512 switch(e->Iex.Binop.op) { 3513 case Iop_Cls8Sx8: size = 0; break; 3514 case Iop_Cls16Sx4: size = 1; break; 3515 case Iop_Cls32Sx2: size = 2; break; 3516 default: vassert(0); 3517 } 3518 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, 3519 res, arg, size, False)); 3520 return res; 3521 } 3522 case Iop_FtoI32Sx2_RZ: { 3523 HReg res = newVRegD(env); 3524 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3525 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 3526 res, arg, 2, False)); 3527 return res; 3528 } 3529 case Iop_FtoI32Ux2_RZ: { 3530 HReg res = newVRegD(env); 3531 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3532 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 3533 res, arg, 2, False)); 3534 return res; 3535 } 3536 case Iop_I32StoFx2: { 3537 HReg res = newVRegD(env); 3538 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3539 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 3540 res, arg, 2, False)); 3541 return res; 3542 } 3543 case Iop_I32UtoFx2: { 3544 HReg res = newVRegD(env); 3545 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3546 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 3547 res, arg, 2, False)); 3548 return res; 3549 } 3550 case Iop_F32toF16x4: { 3551 HReg res = newVRegD(env); 3552 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3553 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16, 3554 res, arg, 2, False)); 3555 return res; 3556 } 3557 case Iop_Recip32Fx2: { 3558 HReg res = newVRegD(env); 3559 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3560 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 3561 res, argL, 0, False)); 3562 return res; 3563 } 3564 case Iop_Recip32x2: { 3565 HReg res = newVRegD(env); 3566 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 3567 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 3568 res, argL, 0, False)); 3569 return res; 3570 } 3571 case Iop_Abs32Fx2: { 3572 DECLARE_PATTERN(p_vabd_32fx2); 3573 DEFINE_PATTERN(p_vabd_32fx2, 3574 unop(Iop_Abs32Fx2, 3575 binop(Iop_Sub32Fx2, 3576 bind(0), 3577 bind(1)))); 3578 if (matchIRExpr(&mi, p_vabd_32fx2, e)) { 3579 HReg res = newVRegD(env); 3580 HReg argL = iselNeon64Expr(env, mi.bindee[0]); 3581 HReg argR = iselNeon64Expr(env, mi.bindee[1]); 3582 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, 3583 res, argL, argR, 0, False)); 3584 return res; 3585 } else { 3586 HReg res = newVRegD(env); 3587 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3588 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, 3589 res, arg, 0, False)); 3590 return res; 3591 } 3592 } 3593 case Iop_Rsqrte32Fx2: { 3594 HReg res = newVRegD(env); 3595 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3596 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 3597 res, arg, 0, False)); 3598 return res; 3599 } 3600 case Iop_Rsqrte32x2: { 3601 HReg res = newVRegD(env); 3602 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3603 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 3604 res, arg, 0, False)); 3605 return res; 3606 } 3607 case Iop_Neg32Fx2: { 3608 HReg res = newVRegD(env); 3609 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 3610 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, 3611 res, arg, 0, False)); 3612 return res; 3613 } 3614 default: 3615 break; 3616 } 3617 } /* if (e->tag == Iex_Unop) */ 3618 3619 if (e->tag == Iex_Triop) { 3620 IRTriop *triop = e->Iex.Triop.details; 3621 3622 switch (triop->op) { 3623 case Iop_Extract64: { 3624 HReg res = newVRegD(env); 3625 HReg argL = iselNeon64Expr(env, triop->arg1); 3626 HReg argR = iselNeon64Expr(env, triop->arg2); 3627 UInt imm4; 3628 if (triop->arg3->tag != Iex_Const || 3629 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 3630 vpanic("ARM target supports Iop_Extract64 with constant " 3631 "third argument less than 16 only\n"); 3632 } 3633 imm4 = triop->arg3->Iex.Const.con->Ico.U8; 3634 if (imm4 >= 8) { 3635 vpanic("ARM target supports Iop_Extract64 with constant " 3636 "third argument less than 16 only\n"); 3637 } 3638 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 3639 res, argL, argR, imm4, False)); 3640 return res; 3641 } 3642 case Iop_SetElem8x8: 3643 case Iop_SetElem16x4: 3644 case Iop_SetElem32x2: { 3645 HReg res = newVRegD(env); 3646 HReg dreg = iselNeon64Expr(env, triop->arg1); 3647 HReg arg = iselIntExpr_R(env, triop->arg3); 3648 UInt index, size; 3649 if (triop->arg2->tag != Iex_Const || 3650 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) { 3651 vpanic("ARM target supports SetElem with constant " 3652 "second argument only\n"); 3653 } 3654 index = triop->arg2->Iex.Const.con->Ico.U8; 3655 switch (triop->op) { 3656 case Iop_SetElem8x8: vassert(index < 8); size = 0; break; 3657 case Iop_SetElem16x4: vassert(index < 4); size = 1; break; 3658 case Iop_SetElem32x2: vassert(index < 2); size = 2; break; 3659 default: vassert(0); 3660 } 3661 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False)); 3662 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM, 3663 mkARMNRS(ARMNRS_Scalar, res, index), 3664 mkARMNRS(ARMNRS_Reg, arg, 0), 3665 size, False)); 3666 return res; 3667 } 3668 default: 3669 break; 3670 } 3671 } 3672 3673 /* --------- MULTIPLEX --------- */ 3674 if (e->tag == Iex_Mux0X) { 3675 HReg rLo, rHi; 3676 HReg res = newVRegD(env); 3677 iselInt64Expr(&rHi, &rLo, env, e); 3678 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); 3679 return res; 3680 } 3681 3682 ppIRExpr(e); 3683 vpanic("iselNeon64Expr"); 3684 } 3685 3686 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e ) 3687 { 3688 HReg r = iselNeonExpr_wrk( env, e ); 3689 vassert(hregClass(r) == HRcVec128); 3690 vassert(hregIsVirtual(r)); 3691 return r; 3692 } 3693 3694 /* DO NOT CALL THIS DIRECTLY */ 3695 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e ) 3696 { 3697 IRType ty = typeOfIRExpr(env->type_env, e); 3698 MatchInfo mi; 3699 vassert(e); 3700 vassert(ty == Ity_V128); 3701 3702 if (e->tag == Iex_RdTmp) { 3703 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3704 } 3705 3706 if (e->tag == Iex_Const) { 3707 /* At the moment there should be no 128-bit constants in IR for ARM 3708 generated during disassemble. They are represented as Iop_64HLtoV128 3709 binary operation and are handled among binary ops. */ 3710 /* But zero can be created by valgrind internal optimizer */ 3711 if (e->Iex.Const.con->Ico.V128 == 0) { 3712 HReg res = newVRegV(env); 3713 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0))); 3714 return res; 3715 } 3716 ppIRExpr(e); 3717 vpanic("128-bit constant is not implemented"); 3718 } 3719 3720 if (e->tag == Iex_Load) { 3721 HReg res = newVRegV(env); 3722 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); 3723 vassert(ty == Ity_V128); 3724 addInstr(env, ARMInstr_NLdStQ(True, res, am)); 3725 return res; 3726 } 3727 3728 if (e->tag == Iex_Get) { 3729 HReg addr = newVRegI(env); 3730 HReg res = newVRegV(env); 3731 vassert(ty == Ity_V128); 3732 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); 3733 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr))); 3734 return res; 3735 } 3736 3737 if (e->tag == Iex_Unop) { 3738 switch (e->Iex.Unop.op) { 3739 case Iop_NotV128: { 3740 DECLARE_PATTERN(p_veqz_8x16); 3741 DECLARE_PATTERN(p_veqz_16x8); 3742 DECLARE_PATTERN(p_veqz_32x4); 3743 DECLARE_PATTERN(p_vcge_8sx16); 3744 DECLARE_PATTERN(p_vcge_16sx8); 3745 DECLARE_PATTERN(p_vcge_32sx4); 3746 DECLARE_PATTERN(p_vcge_8ux16); 3747 DECLARE_PATTERN(p_vcge_16ux8); 3748 DECLARE_PATTERN(p_vcge_32ux4); 3749 DEFINE_PATTERN(p_veqz_8x16, 3750 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0)))); 3751 DEFINE_PATTERN(p_veqz_16x8, 3752 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0)))); 3753 DEFINE_PATTERN(p_veqz_32x4, 3754 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0)))); 3755 DEFINE_PATTERN(p_vcge_8sx16, 3756 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0)))); 3757 DEFINE_PATTERN(p_vcge_16sx8, 3758 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0)))); 3759 DEFINE_PATTERN(p_vcge_32sx4, 3760 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0)))); 3761 DEFINE_PATTERN(p_vcge_8ux16, 3762 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0)))); 3763 DEFINE_PATTERN(p_vcge_16ux8, 3764 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0)))); 3765 DEFINE_PATTERN(p_vcge_32ux4, 3766 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0)))); 3767 if (matchIRExpr(&mi, p_veqz_8x16, e)) { 3768 HReg res = newVRegV(env); 3769 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3770 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True)); 3771 return res; 3772 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) { 3773 HReg res = newVRegV(env); 3774 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3775 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True)); 3776 return res; 3777 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) { 3778 HReg res = newVRegV(env); 3779 HReg arg = iselNeonExpr(env, mi.bindee[0]); 3780 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True)); 3781 return res; 3782 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) { 3783 HReg res = newVRegV(env); 3784 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3785 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3786 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3787 res, argL, argR, 0, True)); 3788 return res; 3789 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) { 3790 HReg res = newVRegV(env); 3791 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3792 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3793 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3794 res, argL, argR, 1, True)); 3795 return res; 3796 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) { 3797 HReg res = newVRegV(env); 3798 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3799 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3800 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, 3801 res, argL, argR, 2, True)); 3802 return res; 3803 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) { 3804 HReg res = newVRegV(env); 3805 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3806 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3807 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3808 res, argL, argR, 0, True)); 3809 return res; 3810 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) { 3811 HReg res = newVRegV(env); 3812 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3813 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3814 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3815 res, argL, argR, 1, True)); 3816 return res; 3817 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) { 3818 HReg res = newVRegV(env); 3819 HReg argL = iselNeonExpr(env, mi.bindee[0]); 3820 HReg argR = iselNeonExpr(env, mi.bindee[1]); 3821 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, 3822 res, argL, argR, 2, True)); 3823 return res; 3824 } else { 3825 HReg res = newVRegV(env); 3826 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3827 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True)); 3828 return res; 3829 } 3830 } 3831 case Iop_Dup8x16: 3832 case Iop_Dup16x8: 3833 case Iop_Dup32x4: { 3834 HReg res, arg; 3835 UInt size; 3836 DECLARE_PATTERN(p_vdup_8x16); 3837 DECLARE_PATTERN(p_vdup_16x8); 3838 DECLARE_PATTERN(p_vdup_32x4); 3839 DEFINE_PATTERN(p_vdup_8x16, 3840 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1)))); 3841 DEFINE_PATTERN(p_vdup_16x8, 3842 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1)))); 3843 DEFINE_PATTERN(p_vdup_32x4, 3844 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1)))); 3845 if (matchIRExpr(&mi, p_vdup_8x16, e)) { 3846 UInt index; 3847 UInt imm4; 3848 if (mi.bindee[1]->tag == Iex_Const && 3849 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3850 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3851 imm4 = (index << 1) + 1; 3852 if (index < 8) { 3853 res = newVRegV(env); 3854 arg = iselNeon64Expr(env, mi.bindee[0]); 3855 addInstr(env, ARMInstr_NUnaryS( 3856 ARMneon_VDUP, 3857 mkARMNRS(ARMNRS_Reg, res, 0), 3858 mkARMNRS(ARMNRS_Scalar, arg, index), 3859 imm4, True 3860 )); 3861 return res; 3862 } 3863 } 3864 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) { 3865 UInt index; 3866 UInt imm4; 3867 if (mi.bindee[1]->tag == Iex_Const && 3868 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3869 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3870 imm4 = (index << 2) + 2; 3871 if (index < 4) { 3872 res = newVRegV(env); 3873 arg = iselNeon64Expr(env, mi.bindee[0]); 3874 addInstr(env, ARMInstr_NUnaryS( 3875 ARMneon_VDUP, 3876 mkARMNRS(ARMNRS_Reg, res, 0), 3877 mkARMNRS(ARMNRS_Scalar, arg, index), 3878 imm4, True 3879 )); 3880 return res; 3881 } 3882 } 3883 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) { 3884 UInt index; 3885 UInt imm4; 3886 if (mi.bindee[1]->tag == Iex_Const && 3887 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { 3888 index = mi.bindee[1]->Iex.Const.con->Ico.U8; 3889 imm4 = (index << 3) + 4; 3890 if (index < 2) { 3891 res = newVRegV(env); 3892 arg = iselNeon64Expr(env, mi.bindee[0]); 3893 addInstr(env, ARMInstr_NUnaryS( 3894 ARMneon_VDUP, 3895 mkARMNRS(ARMNRS_Reg, res, 0), 3896 mkARMNRS(ARMNRS_Scalar, arg, index), 3897 imm4, True 3898 )); 3899 return res; 3900 } 3901 } 3902 } 3903 arg = iselIntExpr_R(env, e->Iex.Unop.arg); 3904 res = newVRegV(env); 3905 switch (e->Iex.Unop.op) { 3906 case Iop_Dup8x16: size = 0; break; 3907 case Iop_Dup16x8: size = 1; break; 3908 case Iop_Dup32x4: size = 2; break; 3909 default: vassert(0); 3910 } 3911 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True)); 3912 return res; 3913 } 3914 case Iop_Abs8x16: 3915 case Iop_Abs16x8: 3916 case Iop_Abs32x4: { 3917 HReg res = newVRegV(env); 3918 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3919 UInt size = 0; 3920 switch(e->Iex.Binop.op) { 3921 case Iop_Abs8x16: size = 0; break; 3922 case Iop_Abs16x8: size = 1; break; 3923 case Iop_Abs32x4: size = 2; break; 3924 default: vassert(0); 3925 } 3926 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True)); 3927 return res; 3928 } 3929 case Iop_Reverse64_8x16: 3930 case Iop_Reverse64_16x8: 3931 case Iop_Reverse64_32x4: { 3932 HReg res = newVRegV(env); 3933 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3934 UInt size = 0; 3935 switch(e->Iex.Binop.op) { 3936 case Iop_Reverse64_8x16: size = 0; break; 3937 case Iop_Reverse64_16x8: size = 1; break; 3938 case Iop_Reverse64_32x4: size = 2; break; 3939 default: vassert(0); 3940 } 3941 addInstr(env, ARMInstr_NUnary(ARMneon_REV64, 3942 res, arg, size, True)); 3943 return res; 3944 } 3945 case Iop_Reverse32_8x16: 3946 case Iop_Reverse32_16x8: { 3947 HReg res = newVRegV(env); 3948 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3949 UInt size = 0; 3950 switch(e->Iex.Binop.op) { 3951 case Iop_Reverse32_8x16: size = 0; break; 3952 case Iop_Reverse32_16x8: size = 1; break; 3953 default: vassert(0); 3954 } 3955 addInstr(env, ARMInstr_NUnary(ARMneon_REV32, 3956 res, arg, size, True)); 3957 return res; 3958 } 3959 case Iop_Reverse16_8x16: { 3960 HReg res = newVRegV(env); 3961 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3962 UInt size = 0; 3963 addInstr(env, ARMInstr_NUnary(ARMneon_REV16, 3964 res, arg, size, True)); 3965 return res; 3966 } 3967 case Iop_CmpNEZ64x2: { 3968 HReg x_lsh = newVRegV(env); 3969 HReg x_rsh = newVRegV(env); 3970 HReg lsh_amt = newVRegV(env); 3971 HReg rsh_amt = newVRegV(env); 3972 HReg zero = newVRegV(env); 3973 HReg tmp = newVRegV(env); 3974 HReg tmp2 = newVRegV(env); 3975 HReg res = newVRegV(env); 3976 HReg x = newVRegV(env); 3977 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 3978 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True)); 3979 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True)); 3980 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); 3981 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); 3982 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 3983 rsh_amt, zero, lsh_amt, 2, True)); 3984 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3985 x_lsh, x, lsh_amt, 3, True)); 3986 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 3987 x_rsh, x, rsh_amt, 3, True)); 3988 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3989 tmp, x_lsh, x_rsh, 0, True)); 3990 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 3991 res, tmp, x, 0, True)); 3992 return res; 3993 } 3994 case Iop_CmpNEZ8x16: 3995 case Iop_CmpNEZ16x8: 3996 case Iop_CmpNEZ32x4: { 3997 HReg res = newVRegV(env); 3998 HReg tmp = newVRegV(env); 3999 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4000 UInt size; 4001 switch (e->Iex.Unop.op) { 4002 case Iop_CmpNEZ8x16: size = 0; break; 4003 case Iop_CmpNEZ16x8: size = 1; break; 4004 case Iop_CmpNEZ32x4: size = 2; break; 4005 default: vassert(0); 4006 } 4007 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True)); 4008 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True)); 4009 return res; 4010 } 4011 case Iop_Widen8Uto16x8: 4012 case Iop_Widen16Uto32x4: 4013 case Iop_Widen32Uto64x2: { 4014 HReg res = newVRegV(env); 4015 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4016 UInt size; 4017 switch (e->Iex.Unop.op) { 4018 case Iop_Widen8Uto16x8: size = 0; break; 4019 case Iop_Widen16Uto32x4: size = 1; break; 4020 case Iop_Widen32Uto64x2: size = 2; break; 4021 default: vassert(0); 4022 } 4023 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU, 4024 res, arg, size, True)); 4025 return res; 4026 } 4027 case Iop_Widen8Sto16x8: 4028 case Iop_Widen16Sto32x4: 4029 case Iop_Widen32Sto64x2: { 4030 HReg res = newVRegV(env); 4031 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4032 UInt size; 4033 switch (e->Iex.Unop.op) { 4034 case Iop_Widen8Sto16x8: size = 0; break; 4035 case Iop_Widen16Sto32x4: size = 1; break; 4036 case Iop_Widen32Sto64x2: size = 2; break; 4037 default: vassert(0); 4038 } 4039 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS, 4040 res, arg, size, True)); 4041 return res; 4042 } 4043 case Iop_PwAddL8Sx16: 4044 case Iop_PwAddL16Sx8: 4045 case Iop_PwAddL32Sx4: { 4046 HReg res = newVRegV(env); 4047 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4048 UInt size = 0; 4049 switch(e->Iex.Binop.op) { 4050 case Iop_PwAddL8Sx16: size = 0; break; 4051 case Iop_PwAddL16Sx8: size = 1; break; 4052 case Iop_PwAddL32Sx4: size = 2; break; 4053 default: vassert(0); 4054 } 4055 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, 4056 res, arg, size, True)); 4057 return res; 4058 } 4059 case Iop_PwAddL8Ux16: 4060 case Iop_PwAddL16Ux8: 4061 case Iop_PwAddL32Ux4: { 4062 HReg res = newVRegV(env); 4063 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4064 UInt size = 0; 4065 switch(e->Iex.Binop.op) { 4066 case Iop_PwAddL8Ux16: size = 0; break; 4067 case Iop_PwAddL16Ux8: size = 1; break; 4068 case Iop_PwAddL32Ux4: size = 2; break; 4069 default: vassert(0); 4070 } 4071 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, 4072 res, arg, size, True)); 4073 return res; 4074 } 4075 case Iop_Cnt8x16: { 4076 HReg res = newVRegV(env); 4077 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4078 UInt size = 0; 4079 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True)); 4080 return res; 4081 } 4082 case Iop_Clz8Sx16: 4083 case Iop_Clz16Sx8: 4084 case Iop_Clz32Sx4: { 4085 HReg res = newVRegV(env); 4086 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4087 UInt size = 0; 4088 switch(e->Iex.Binop.op) { 4089 case Iop_Clz8Sx16: size = 0; break; 4090 case Iop_Clz16Sx8: size = 1; break; 4091 case Iop_Clz32Sx4: size = 2; break; 4092 default: vassert(0); 4093 } 4094 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True)); 4095 return res; 4096 } 4097 case Iop_Cls8Sx16: 4098 case Iop_Cls16Sx8: 4099 case Iop_Cls32Sx4: { 4100 HReg res = newVRegV(env); 4101 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4102 UInt size = 0; 4103 switch(e->Iex.Binop.op) { 4104 case Iop_Cls8Sx16: size = 0; break; 4105 case Iop_Cls16Sx8: size = 1; break; 4106 case Iop_Cls32Sx4: size = 2; break; 4107 default: vassert(0); 4108 } 4109 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True)); 4110 return res; 4111 } 4112 case Iop_FtoI32Sx4_RZ: { 4113 HReg res = newVRegV(env); 4114 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4115 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, 4116 res, arg, 2, True)); 4117 return res; 4118 } 4119 case Iop_FtoI32Ux4_RZ: { 4120 HReg res = newVRegV(env); 4121 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4122 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, 4123 res, arg, 2, True)); 4124 return res; 4125 } 4126 case Iop_I32StoFx4: { 4127 HReg res = newVRegV(env); 4128 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4129 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, 4130 res, arg, 2, True)); 4131 return res; 4132 } 4133 case Iop_I32UtoFx4: { 4134 HReg res = newVRegV(env); 4135 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4136 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, 4137 res, arg, 2, True)); 4138 return res; 4139 } 4140 case Iop_F16toF32x4: { 4141 HReg res = newVRegV(env); 4142 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); 4143 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32, 4144 res, arg, 2, True)); 4145 return res; 4146 } 4147 case Iop_Recip32Fx4: { 4148 HReg res = newVRegV(env); 4149 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4150 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, 4151 res, argL, 0, True)); 4152 return res; 4153 } 4154 case Iop_Recip32x4: { 4155 HReg res = newVRegV(env); 4156 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4157 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, 4158 res, argL, 0, True)); 4159 return res; 4160 } 4161 case Iop_Abs32Fx4: { 4162 DECLARE_PATTERN(p_vabd_32fx4); 4163 DEFINE_PATTERN(p_vabd_32fx4, 4164 unop(Iop_Abs32Fx4, 4165 binop(Iop_Sub32Fx4, 4166 bind(0), 4167 bind(1)))); 4168 if (matchIRExpr(&mi, p_vabd_32fx4, e)) { 4169 HReg res = newVRegV(env); 4170 HReg argL = iselNeonExpr(env, mi.bindee[0]); 4171 HReg argR = iselNeonExpr(env, mi.bindee[1]); 4172 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, 4173 res, argL, argR, 0, True)); 4174 return res; 4175 } else { 4176 HReg res = newVRegV(env); 4177 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4178 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, 4179 res, argL, 0, True)); 4180 return res; 4181 } 4182 } 4183 case Iop_Rsqrte32Fx4: { 4184 HReg res = newVRegV(env); 4185 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4186 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, 4187 res, argL, 0, True)); 4188 return res; 4189 } 4190 case Iop_Rsqrte32x4: { 4191 HReg res = newVRegV(env); 4192 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); 4193 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, 4194 res, argL, 0, True)); 4195 return res; 4196 } 4197 case Iop_Neg32Fx4: { 4198 HReg res = newVRegV(env); 4199 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); 4200 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, 4201 res, arg, 0, True)); 4202 return res; 4203 } 4204 /* ... */ 4205 default: 4206 break; 4207 } 4208 } 4209 4210 if (e->tag == Iex_Binop) { 4211 switch (e->Iex.Binop.op) { 4212 case Iop_64HLtoV128: 4213 /* Try to match into single "VMOV reg, imm" instruction */ 4214 if (e->Iex.Binop.arg1->tag == Iex_Const && 4215 e->Iex.Binop.arg2->tag == Iex_Const && 4216 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 && 4217 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 && 4218 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 4219 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) { 4220 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 4221 ARMNImm *imm = Imm64_to_ARMNImm(imm64); 4222 if (imm) { 4223 HReg res = newVRegV(env); 4224 addInstr(env, ARMInstr_NeonImm(res, imm)); 4225 return res; 4226 } 4227 if ((imm64 >> 32) == 0LL && 4228 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) { 4229 HReg tmp1 = newVRegV(env); 4230 HReg tmp2 = newVRegV(env); 4231 HReg res = newVRegV(env); 4232 if (imm->type < 10) { 4233 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f))); 4234 addInstr(env, ARMInstr_NeonImm(tmp2, imm)); 4235 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4236 res, tmp1, tmp2, 4, True)); 4237 return res; 4238 } 4239 } 4240 if ((imm64 & 0xFFFFFFFFLL) == 0LL && 4241 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) { 4242 HReg tmp1 = newVRegV(env); 4243 HReg tmp2 = newVRegV(env); 4244 HReg res = newVRegV(env); 4245 if (imm->type < 10) { 4246 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0))); 4247 addInstr(env, ARMInstr_NeonImm(tmp2, imm)); 4248 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4249 res, tmp1, tmp2, 4, True)); 4250 return res; 4251 } 4252 } 4253 } 4254 /* Does not match "VMOV Reg, Imm" form. We'll have to do 4255 it the slow way. */ 4256 { 4257 /* local scope */ 4258 /* Done via the stack for ease of use. */ 4259 /* FIXME: assumes little endian host */ 4260 HReg w3, w2, w1, w0; 4261 HReg res = newVRegV(env); 4262 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0); 4263 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4); 4264 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8); 4265 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12); 4266 ARMRI84* c_16 = ARMRI84_I84(16,0); 4267 /* Make space for SP */ 4268 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(), 4269 hregARM_R13(), c_16)); 4270 4271 /* Store the less significant 64 bits */ 4272 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2); 4273 addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0)); 4274 addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4)); 4275 4276 /* Store the more significant 64 bits */ 4277 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1); 4278 addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8)); 4279 addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12)); 4280 4281 /* Load result back from stack. */ 4282 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res, 4283 mkARMAModeN_R(hregARM_R13()))); 4284 4285 /* Restore SP */ 4286 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(), 4287 hregARM_R13(), c_16)); 4288 return res; 4289 } /* local scope */ 4290 goto neon_expr_bad; 4291 case Iop_AndV128: { 4292 HReg res = newVRegV(env); 4293 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4294 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4295 addInstr(env, ARMInstr_NBinary(ARMneon_VAND, 4296 res, argL, argR, 4, True)); 4297 return res; 4298 } 4299 case Iop_OrV128: { 4300 HReg res = newVRegV(env); 4301 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4302 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4303 addInstr(env, ARMInstr_NBinary(ARMneon_VORR, 4304 res, argL, argR, 4, True)); 4305 return res; 4306 } 4307 case Iop_XorV128: { 4308 HReg res = newVRegV(env); 4309 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4310 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4311 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, 4312 res, argL, argR, 4, True)); 4313 return res; 4314 } 4315 case Iop_Add8x16: 4316 case Iop_Add16x8: 4317 case Iop_Add32x4: 4318 case Iop_Add64x2: { 4319 /* 4320 FIXME: remove this if not used 4321 DECLARE_PATTERN(p_vrhadd_32sx4); 4322 ULong one = (1LL << 32) | 1LL; 4323 DEFINE_PATTERN(p_vrhadd_32sx4, 4324 binop(Iop_Add32x4, 4325 binop(Iop_Add32x4, 4326 binop(Iop_SarN32x4, 4327 bind(0), 4328 mkU8(1)), 4329 binop(Iop_SarN32x4, 4330 bind(1), 4331 mkU8(1))), 4332 binop(Iop_SarN32x4, 4333 binop(Iop_Add32x4, 4334 binop(Iop_Add32x4, 4335 binop(Iop_AndV128, 4336 bind(0), 4337 mkU128(one)), 4338 binop(Iop_AndV128, 4339 bind(1), 4340 mkU128(one))), 4341 mkU128(one)), 4342 mkU8(1)))); 4343 */ 4344 HReg res = newVRegV(env); 4345 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4346 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4347 UInt size; 4348 switch (e->Iex.Binop.op) { 4349 case Iop_Add8x16: size = 0; break; 4350 case Iop_Add16x8: size = 1; break; 4351 case Iop_Add32x4: size = 2; break; 4352 case Iop_Add64x2: size = 3; break; 4353 default: 4354 ppIROp(e->Iex.Binop.op); 4355 vpanic("Illegal element size in VADD"); 4356 } 4357 addInstr(env, ARMInstr_NBinary(ARMneon_VADD, 4358 res, argL, argR, size, True)); 4359 return res; 4360 } 4361 case Iop_Add32Fx4: { 4362 HReg res = newVRegV(env); 4363 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4364 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4365 UInt size = 0; 4366 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, 4367 res, argL, argR, size, True)); 4368 return res; 4369 } 4370 case Iop_Recps32Fx4: { 4371 HReg res = newVRegV(env); 4372 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4373 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4374 UInt size = 0; 4375 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, 4376 res, argL, argR, size, True)); 4377 return res; 4378 } 4379 case Iop_Rsqrts32Fx4: { 4380 HReg res = newVRegV(env); 4381 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4382 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4383 UInt size = 0; 4384 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, 4385 res, argL, argR, size, True)); 4386 return res; 4387 } 4388 case Iop_InterleaveEvenLanes8x16: 4389 case Iop_InterleaveEvenLanes16x8: 4390 case Iop_InterleaveEvenLanes32x4: 4391 case Iop_InterleaveOddLanes8x16: 4392 case Iop_InterleaveOddLanes16x8: 4393 case Iop_InterleaveOddLanes32x4: { 4394 HReg tmp = newVRegV(env); 4395 HReg res = newVRegV(env); 4396 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4397 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4398 UInt size; 4399 UInt is_lo; 4400 switch (e->Iex.Binop.op) { 4401 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break; 4402 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break; 4403 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break; 4404 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break; 4405 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break; 4406 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break; 4407 default: 4408 ppIROp(e->Iex.Binop.op); 4409 vpanic("Illegal element size in VTRN"); 4410 } 4411 if (is_lo) { 4412 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4413 tmp, argL, 4, True)); 4414 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4415 res, argR, 4, True)); 4416 addInstr(env, ARMInstr_NDual(ARMneon_TRN, 4417 res, tmp, size, True)); 4418 } else { 4419 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4420 tmp, argR, 4, True)); 4421 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4422 res, argL, 4, True)); 4423 addInstr(env, ARMInstr_NDual(ARMneon_TRN, 4424 tmp, res, size, True)); 4425 } 4426 return res; 4427 } 4428 case Iop_InterleaveHI8x16: 4429 case Iop_InterleaveHI16x8: 4430 case Iop_InterleaveHI32x4: 4431 case Iop_InterleaveLO8x16: 4432 case Iop_InterleaveLO16x8: 4433 case Iop_InterleaveLO32x4: { 4434 HReg tmp = newVRegV(env); 4435 HReg res = newVRegV(env); 4436 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4437 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4438 UInt size; 4439 UInt is_lo; 4440 switch (e->Iex.Binop.op) { 4441 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break; 4442 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break; 4443 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break; 4444 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break; 4445 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break; 4446 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break; 4447 default: 4448 ppIROp(e->Iex.Binop.op); 4449 vpanic("Illegal element size in VZIP"); 4450 } 4451 if (is_lo) { 4452 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4453 tmp, argL, 4, True)); 4454 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4455 res, argR, 4, True)); 4456 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, 4457 res, tmp, size, True)); 4458 } else { 4459 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4460 tmp, argR, 4, True)); 4461 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4462 res, argL, 4, True)); 4463 addInstr(env, ARMInstr_NDual(ARMneon_ZIP, 4464 tmp, res, size, True)); 4465 } 4466 return res; 4467 } 4468 case Iop_CatOddLanes8x16: 4469 case Iop_CatOddLanes16x8: 4470 case Iop_CatOddLanes32x4: 4471 case Iop_CatEvenLanes8x16: 4472 case Iop_CatEvenLanes16x8: 4473 case Iop_CatEvenLanes32x4: { 4474 HReg tmp = newVRegV(env); 4475 HReg res = newVRegV(env); 4476 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4477 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4478 UInt size; 4479 UInt is_lo; 4480 switch (e->Iex.Binop.op) { 4481 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break; 4482 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break; 4483 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break; 4484 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break; 4485 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break; 4486 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break; 4487 default: 4488 ppIROp(e->Iex.Binop.op); 4489 vpanic("Illegal element size in VUZP"); 4490 } 4491 if (is_lo) { 4492 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4493 tmp, argL, 4, True)); 4494 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4495 res, argR, 4, True)); 4496 addInstr(env, ARMInstr_NDual(ARMneon_UZP, 4497 res, tmp, size, True)); 4498 } else { 4499 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4500 tmp, argR, 4, True)); 4501 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, 4502 res, argL, 4, True)); 4503 addInstr(env, ARMInstr_NDual(ARMneon_UZP, 4504 tmp, res, size, True)); 4505 } 4506 return res; 4507 } 4508 case Iop_QAdd8Ux16: 4509 case Iop_QAdd16Ux8: 4510 case Iop_QAdd32Ux4: 4511 case Iop_QAdd64Ux2: { 4512 HReg res = newVRegV(env); 4513 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4514 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4515 UInt size; 4516 switch (e->Iex.Binop.op) { 4517 case Iop_QAdd8Ux16: size = 0; break; 4518 case Iop_QAdd16Ux8: size = 1; break; 4519 case Iop_QAdd32Ux4: size = 2; break; 4520 case Iop_QAdd64Ux2: size = 3; break; 4521 default: 4522 ppIROp(e->Iex.Binop.op); 4523 vpanic("Illegal element size in VQADDU"); 4524 } 4525 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, 4526 res, argL, argR, size, True)); 4527 return res; 4528 } 4529 case Iop_QAdd8Sx16: 4530 case Iop_QAdd16Sx8: 4531 case Iop_QAdd32Sx4: 4532 case Iop_QAdd64Sx2: { 4533 HReg res = newVRegV(env); 4534 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4535 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4536 UInt size; 4537 switch (e->Iex.Binop.op) { 4538 case Iop_QAdd8Sx16: size = 0; break; 4539 case Iop_QAdd16Sx8: size = 1; break; 4540 case Iop_QAdd32Sx4: size = 2; break; 4541 case Iop_QAdd64Sx2: size = 3; break; 4542 default: 4543 ppIROp(e->Iex.Binop.op); 4544 vpanic("Illegal element size in VQADDS"); 4545 } 4546 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, 4547 res, argL, argR, size, True)); 4548 return res; 4549 } 4550 case Iop_Sub8x16: 4551 case Iop_Sub16x8: 4552 case Iop_Sub32x4: 4553 case Iop_Sub64x2: { 4554 HReg res = newVRegV(env); 4555 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4556 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4557 UInt size; 4558 switch (e->Iex.Binop.op) { 4559 case Iop_Sub8x16: size = 0; break; 4560 case Iop_Sub16x8: size = 1; break; 4561 case Iop_Sub32x4: size = 2; break; 4562 case Iop_Sub64x2: size = 3; break; 4563 default: 4564 ppIROp(e->Iex.Binop.op); 4565 vpanic("Illegal element size in VSUB"); 4566 } 4567 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4568 res, argL, argR, size, True)); 4569 return res; 4570 } 4571 case Iop_Sub32Fx4: { 4572 HReg res = newVRegV(env); 4573 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4574 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4575 UInt size = 0; 4576 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, 4577 res, argL, argR, size, True)); 4578 return res; 4579 } 4580 case Iop_QSub8Ux16: 4581 case Iop_QSub16Ux8: 4582 case Iop_QSub32Ux4: 4583 case Iop_QSub64Ux2: { 4584 HReg res = newVRegV(env); 4585 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4586 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4587 UInt size; 4588 switch (e->Iex.Binop.op) { 4589 case Iop_QSub8Ux16: size = 0; break; 4590 case Iop_QSub16Ux8: size = 1; break; 4591 case Iop_QSub32Ux4: size = 2; break; 4592 case Iop_QSub64Ux2: size = 3; break; 4593 default: 4594 ppIROp(e->Iex.Binop.op); 4595 vpanic("Illegal element size in VQSUBU"); 4596 } 4597 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, 4598 res, argL, argR, size, True)); 4599 return res; 4600 } 4601 case Iop_QSub8Sx16: 4602 case Iop_QSub16Sx8: 4603 case Iop_QSub32Sx4: 4604 case Iop_QSub64Sx2: { 4605 HReg res = newVRegV(env); 4606 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4607 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4608 UInt size; 4609 switch (e->Iex.Binop.op) { 4610 case Iop_QSub8Sx16: size = 0; break; 4611 case Iop_QSub16Sx8: size = 1; break; 4612 case Iop_QSub32Sx4: size = 2; break; 4613 case Iop_QSub64Sx2: size = 3; break; 4614 default: 4615 ppIROp(e->Iex.Binop.op); 4616 vpanic("Illegal element size in VQSUBS"); 4617 } 4618 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, 4619 res, argL, argR, size, True)); 4620 return res; 4621 } 4622 case Iop_Max8Ux16: 4623 case Iop_Max16Ux8: 4624 case Iop_Max32Ux4: { 4625 HReg res = newVRegV(env); 4626 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4627 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4628 UInt size; 4629 switch (e->Iex.Binop.op) { 4630 case Iop_Max8Ux16: size = 0; break; 4631 case Iop_Max16Ux8: size = 1; break; 4632 case Iop_Max32Ux4: size = 2; break; 4633 default: vpanic("Illegal element size in VMAXU"); 4634 } 4635 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, 4636 res, argL, argR, size, True)); 4637 return res; 4638 } 4639 case Iop_Max8Sx16: 4640 case Iop_Max16Sx8: 4641 case Iop_Max32Sx4: { 4642 HReg res = newVRegV(env); 4643 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4644 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4645 UInt size; 4646 switch (e->Iex.Binop.op) { 4647 case Iop_Max8Sx16: size = 0; break; 4648 case Iop_Max16Sx8: size = 1; break; 4649 case Iop_Max32Sx4: size = 2; break; 4650 default: vpanic("Illegal element size in VMAXU"); 4651 } 4652 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, 4653 res, argL, argR, size, True)); 4654 return res; 4655 } 4656 case Iop_Min8Ux16: 4657 case Iop_Min16Ux8: 4658 case Iop_Min32Ux4: { 4659 HReg res = newVRegV(env); 4660 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4661 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4662 UInt size; 4663 switch (e->Iex.Binop.op) { 4664 case Iop_Min8Ux16: size = 0; break; 4665 case Iop_Min16Ux8: size = 1; break; 4666 case Iop_Min32Ux4: size = 2; break; 4667 default: vpanic("Illegal element size in VMAXU"); 4668 } 4669 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, 4670 res, argL, argR, size, True)); 4671 return res; 4672 } 4673 case Iop_Min8Sx16: 4674 case Iop_Min16Sx8: 4675 case Iop_Min32Sx4: { 4676 HReg res = newVRegV(env); 4677 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4678 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4679 UInt size; 4680 switch (e->Iex.Binop.op) { 4681 case Iop_Min8Sx16: size = 0; break; 4682 case Iop_Min16Sx8: size = 1; break; 4683 case Iop_Min32Sx4: size = 2; break; 4684 default: vpanic("Illegal element size in VMAXU"); 4685 } 4686 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, 4687 res, argL, argR, size, True)); 4688 return res; 4689 } 4690 case Iop_Sar8x16: 4691 case Iop_Sar16x8: 4692 case Iop_Sar32x4: 4693 case Iop_Sar64x2: { 4694 HReg res = newVRegV(env); 4695 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4696 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4697 HReg argR2 = newVRegV(env); 4698 HReg zero = newVRegV(env); 4699 UInt size; 4700 switch (e->Iex.Binop.op) { 4701 case Iop_Sar8x16: size = 0; break; 4702 case Iop_Sar16x8: size = 1; break; 4703 case Iop_Sar32x4: size = 2; break; 4704 case Iop_Sar64x2: size = 3; break; 4705 default: vassert(0); 4706 } 4707 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 4708 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4709 argR2, zero, argR, size, True)); 4710 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4711 res, argL, argR2, size, True)); 4712 return res; 4713 } 4714 case Iop_Sal8x16: 4715 case Iop_Sal16x8: 4716 case Iop_Sal32x4: 4717 case Iop_Sal64x2: { 4718 HReg res = newVRegV(env); 4719 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4720 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4721 UInt size; 4722 switch (e->Iex.Binop.op) { 4723 case Iop_Sal8x16: size = 0; break; 4724 case Iop_Sal16x8: size = 1; break; 4725 case Iop_Sal32x4: size = 2; break; 4726 case Iop_Sal64x2: size = 3; break; 4727 default: vassert(0); 4728 } 4729 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4730 res, argL, argR, size, True)); 4731 return res; 4732 } 4733 case Iop_Shr8x16: 4734 case Iop_Shr16x8: 4735 case Iop_Shr32x4: 4736 case Iop_Shr64x2: { 4737 HReg res = newVRegV(env); 4738 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4739 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4740 HReg argR2 = newVRegV(env); 4741 HReg zero = newVRegV(env); 4742 UInt size; 4743 switch (e->Iex.Binop.op) { 4744 case Iop_Shr8x16: size = 0; break; 4745 case Iop_Shr16x8: size = 1; break; 4746 case Iop_Shr32x4: size = 2; break; 4747 case Iop_Shr64x2: size = 3; break; 4748 default: vassert(0); 4749 } 4750 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); 4751 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, 4752 argR2, zero, argR, size, True)); 4753 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4754 res, argL, argR2, size, True)); 4755 return res; 4756 } 4757 case Iop_Shl8x16: 4758 case Iop_Shl16x8: 4759 case Iop_Shl32x4: 4760 case Iop_Shl64x2: { 4761 HReg res = newVRegV(env); 4762 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4763 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4764 UInt size; 4765 switch (e->Iex.Binop.op) { 4766 case Iop_Shl8x16: size = 0; break; 4767 case Iop_Shl16x8: size = 1; break; 4768 case Iop_Shl32x4: size = 2; break; 4769 case Iop_Shl64x2: size = 3; break; 4770 default: vassert(0); 4771 } 4772 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4773 res, argL, argR, size, True)); 4774 return res; 4775 } 4776 case Iop_QShl8x16: 4777 case Iop_QShl16x8: 4778 case Iop_QShl32x4: 4779 case Iop_QShl64x2: { 4780 HReg res = newVRegV(env); 4781 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4782 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4783 UInt size; 4784 switch (e->Iex.Binop.op) { 4785 case Iop_QShl8x16: size = 0; break; 4786 case Iop_QShl16x8: size = 1; break; 4787 case Iop_QShl32x4: size = 2; break; 4788 case Iop_QShl64x2: size = 3; break; 4789 default: vassert(0); 4790 } 4791 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, 4792 res, argL, argR, size, True)); 4793 return res; 4794 } 4795 case Iop_QSal8x16: 4796 case Iop_QSal16x8: 4797 case Iop_QSal32x4: 4798 case Iop_QSal64x2: { 4799 HReg res = newVRegV(env); 4800 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4801 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4802 UInt size; 4803 switch (e->Iex.Binop.op) { 4804 case Iop_QSal8x16: size = 0; break; 4805 case Iop_QSal16x8: size = 1; break; 4806 case Iop_QSal32x4: size = 2; break; 4807 case Iop_QSal64x2: size = 3; break; 4808 default: vassert(0); 4809 } 4810 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, 4811 res, argL, argR, size, True)); 4812 return res; 4813 } 4814 case Iop_QShlN8x16: 4815 case Iop_QShlN16x8: 4816 case Iop_QShlN32x4: 4817 case Iop_QShlN64x2: { 4818 HReg res = newVRegV(env); 4819 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4820 UInt size, imm; 4821 if (e->Iex.Binop.arg2->tag != Iex_Const || 4822 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4823 vpanic("ARM taget supports Iop_QShlNAxB with constant " 4824 "second argument only\n"); 4825 } 4826 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4827 switch (e->Iex.Binop.op) { 4828 case Iop_QShlN8x16: size = 8 | imm; break; 4829 case Iop_QShlN16x8: size = 16 | imm; break; 4830 case Iop_QShlN32x4: size = 32 | imm; break; 4831 case Iop_QShlN64x2: size = 64 | imm; break; 4832 default: vassert(0); 4833 } 4834 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, 4835 res, argL, size, True)); 4836 return res; 4837 } 4838 case Iop_QShlN8Sx16: 4839 case Iop_QShlN16Sx8: 4840 case Iop_QShlN32Sx4: 4841 case Iop_QShlN64Sx2: { 4842 HReg res = newVRegV(env); 4843 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4844 UInt size, imm; 4845 if (e->Iex.Binop.arg2->tag != Iex_Const || 4846 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4847 vpanic("ARM taget supports Iop_QShlNASxB with constant " 4848 "second argument only\n"); 4849 } 4850 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4851 switch (e->Iex.Binop.op) { 4852 case Iop_QShlN8Sx16: size = 8 | imm; break; 4853 case Iop_QShlN16Sx8: size = 16 | imm; break; 4854 case Iop_QShlN32Sx4: size = 32 | imm; break; 4855 case Iop_QShlN64Sx2: size = 64 | imm; break; 4856 default: vassert(0); 4857 } 4858 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, 4859 res, argL, size, True)); 4860 return res; 4861 } 4862 case Iop_QSalN8x16: 4863 case Iop_QSalN16x8: 4864 case Iop_QSalN32x4: 4865 case Iop_QSalN64x2: { 4866 HReg res = newVRegV(env); 4867 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4868 UInt size, imm; 4869 if (e->Iex.Binop.arg2->tag != Iex_Const || 4870 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 4871 vpanic("ARM taget supports Iop_QShlNAxB with constant " 4872 "second argument only\n"); 4873 } 4874 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 4875 switch (e->Iex.Binop.op) { 4876 case Iop_QSalN8x16: size = 8 | imm; break; 4877 case Iop_QSalN16x8: size = 16 | imm; break; 4878 case Iop_QSalN32x4: size = 32 | imm; break; 4879 case Iop_QSalN64x2: size = 64 | imm; break; 4880 default: vassert(0); 4881 } 4882 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, 4883 res, argL, size, True)); 4884 return res; 4885 } 4886 case Iop_ShrN8x16: 4887 case Iop_ShrN16x8: 4888 case Iop_ShrN32x4: 4889 case Iop_ShrN64x2: { 4890 HReg res = newVRegV(env); 4891 HReg tmp = newVRegV(env); 4892 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4893 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4894 HReg argR2 = newVRegI(env); 4895 UInt size; 4896 switch (e->Iex.Binop.op) { 4897 case Iop_ShrN8x16: size = 0; break; 4898 case Iop_ShrN16x8: size = 1; break; 4899 case Iop_ShrN32x4: size = 2; break; 4900 case Iop_ShrN64x2: size = 3; break; 4901 default: vassert(0); 4902 } 4903 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 4904 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, 4905 tmp, argR2, 0, True)); 4906 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4907 res, argL, tmp, size, True)); 4908 return res; 4909 } 4910 case Iop_ShlN8x16: 4911 case Iop_ShlN16x8: 4912 case Iop_ShlN32x4: 4913 case Iop_ShlN64x2: { 4914 HReg res = newVRegV(env); 4915 HReg tmp = newVRegV(env); 4916 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4917 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4918 UInt size; 4919 switch (e->Iex.Binop.op) { 4920 case Iop_ShlN8x16: size = 0; break; 4921 case Iop_ShlN16x8: size = 1; break; 4922 case Iop_ShlN32x4: size = 2; break; 4923 case Iop_ShlN64x2: size = 3; break; 4924 default: vassert(0); 4925 } 4926 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True)); 4927 addInstr(env, ARMInstr_NShift(ARMneon_VSHL, 4928 res, argL, tmp, size, True)); 4929 return res; 4930 } 4931 case Iop_SarN8x16: 4932 case Iop_SarN16x8: 4933 case Iop_SarN32x4: 4934 case Iop_SarN64x2: { 4935 HReg res = newVRegV(env); 4936 HReg tmp = newVRegV(env); 4937 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4938 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 4939 HReg argR2 = newVRegI(env); 4940 UInt size; 4941 switch (e->Iex.Binop.op) { 4942 case Iop_SarN8x16: size = 0; break; 4943 case Iop_SarN16x8: size = 1; break; 4944 case Iop_SarN32x4: size = 2; break; 4945 case Iop_SarN64x2: size = 3; break; 4946 default: vassert(0); 4947 } 4948 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); 4949 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True)); 4950 addInstr(env, ARMInstr_NShift(ARMneon_VSAL, 4951 res, argL, tmp, size, True)); 4952 return res; 4953 } 4954 case Iop_CmpGT8Ux16: 4955 case Iop_CmpGT16Ux8: 4956 case Iop_CmpGT32Ux4: { 4957 HReg res = newVRegV(env); 4958 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4959 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4960 UInt size; 4961 switch (e->Iex.Binop.op) { 4962 case Iop_CmpGT8Ux16: size = 0; break; 4963 case Iop_CmpGT16Ux8: size = 1; break; 4964 case Iop_CmpGT32Ux4: size = 2; break; 4965 default: vassert(0); 4966 } 4967 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, 4968 res, argL, argR, size, True)); 4969 return res; 4970 } 4971 case Iop_CmpGT8Sx16: 4972 case Iop_CmpGT16Sx8: 4973 case Iop_CmpGT32Sx4: { 4974 HReg res = newVRegV(env); 4975 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4976 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4977 UInt size; 4978 switch (e->Iex.Binop.op) { 4979 case Iop_CmpGT8Sx16: size = 0; break; 4980 case Iop_CmpGT16Sx8: size = 1; break; 4981 case Iop_CmpGT32Sx4: size = 2; break; 4982 default: vassert(0); 4983 } 4984 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, 4985 res, argL, argR, size, True)); 4986 return res; 4987 } 4988 case Iop_CmpEQ8x16: 4989 case Iop_CmpEQ16x8: 4990 case Iop_CmpEQ32x4: { 4991 HReg res = newVRegV(env); 4992 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 4993 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 4994 UInt size; 4995 switch (e->Iex.Binop.op) { 4996 case Iop_CmpEQ8x16: size = 0; break; 4997 case Iop_CmpEQ16x8: size = 1; break; 4998 case Iop_CmpEQ32x4: size = 2; break; 4999 default: vassert(0); 5000 } 5001 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, 5002 res, argL, argR, size, True)); 5003 return res; 5004 } 5005 case Iop_Mul8x16: 5006 case Iop_Mul16x8: 5007 case Iop_Mul32x4: { 5008 HReg res = newVRegV(env); 5009 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5010 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5011 UInt size = 0; 5012 switch(e->Iex.Binop.op) { 5013 case Iop_Mul8x16: size = 0; break; 5014 case Iop_Mul16x8: size = 1; break; 5015 case Iop_Mul32x4: size = 2; break; 5016 default: vassert(0); 5017 } 5018 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, 5019 res, argL, argR, size, True)); 5020 return res; 5021 } 5022 case Iop_Mul32Fx4: { 5023 HReg res = newVRegV(env); 5024 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5025 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5026 UInt size = 0; 5027 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, 5028 res, argL, argR, size, True)); 5029 return res; 5030 } 5031 case Iop_Mull8Ux8: 5032 case Iop_Mull16Ux4: 5033 case Iop_Mull32Ux2: { 5034 HReg res = newVRegV(env); 5035 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5036 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5037 UInt size = 0; 5038 switch(e->Iex.Binop.op) { 5039 case Iop_Mull8Ux8: size = 0; break; 5040 case Iop_Mull16Ux4: size = 1; break; 5041 case Iop_Mull32Ux2: size = 2; break; 5042 default: vassert(0); 5043 } 5044 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU, 5045 res, argL, argR, size, True)); 5046 return res; 5047 } 5048 5049 case Iop_Mull8Sx8: 5050 case Iop_Mull16Sx4: 5051 case Iop_Mull32Sx2: { 5052 HReg res = newVRegV(env); 5053 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5054 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5055 UInt size = 0; 5056 switch(e->Iex.Binop.op) { 5057 case Iop_Mull8Sx8: size = 0; break; 5058 case Iop_Mull16Sx4: size = 1; break; 5059 case Iop_Mull32Sx2: size = 2; break; 5060 default: vassert(0); 5061 } 5062 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS, 5063 res, argL, argR, size, True)); 5064 return res; 5065 } 5066 5067 case Iop_QDMulHi16Sx8: 5068 case Iop_QDMulHi32Sx4: { 5069 HReg res = newVRegV(env); 5070 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5071 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5072 UInt size = 0; 5073 switch(e->Iex.Binop.op) { 5074 case Iop_QDMulHi16Sx8: size = 1; break; 5075 case Iop_QDMulHi32Sx4: size = 2; break; 5076 default: vassert(0); 5077 } 5078 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, 5079 res, argL, argR, size, True)); 5080 return res; 5081 } 5082 5083 case Iop_QRDMulHi16Sx8: 5084 case Iop_QRDMulHi32Sx4: { 5085 HReg res = newVRegV(env); 5086 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5087 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5088 UInt size = 0; 5089 switch(e->Iex.Binop.op) { 5090 case Iop_QRDMulHi16Sx8: size = 1; break; 5091 case Iop_QRDMulHi32Sx4: size = 2; break; 5092 default: vassert(0); 5093 } 5094 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, 5095 res, argL, argR, size, True)); 5096 return res; 5097 } 5098 5099 case Iop_QDMulLong16Sx4: 5100 case Iop_QDMulLong32Sx2: { 5101 HReg res = newVRegV(env); 5102 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5103 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5104 UInt size = 0; 5105 switch(e->Iex.Binop.op) { 5106 case Iop_QDMulLong16Sx4: size = 1; break; 5107 case Iop_QDMulLong32Sx2: size = 2; break; 5108 default: vassert(0); 5109 } 5110 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL, 5111 res, argL, argR, size, True)); 5112 return res; 5113 } 5114 case Iop_PolynomialMul8x16: { 5115 HReg res = newVRegV(env); 5116 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5117 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5118 UInt size = 0; 5119 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, 5120 res, argL, argR, size, True)); 5121 return res; 5122 } 5123 case Iop_Max32Fx4: { 5124 HReg res = newVRegV(env); 5125 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5126 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5127 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, 5128 res, argL, argR, 2, True)); 5129 return res; 5130 } 5131 case Iop_Min32Fx4: { 5132 HReg res = newVRegV(env); 5133 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5134 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5135 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, 5136 res, argL, argR, 2, True)); 5137 return res; 5138 } 5139 case Iop_PwMax32Fx4: { 5140 HReg res = newVRegV(env); 5141 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5142 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5143 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, 5144 res, argL, argR, 2, True)); 5145 return res; 5146 } 5147 case Iop_PwMin32Fx4: { 5148 HReg res = newVRegV(env); 5149 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5150 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5151 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, 5152 res, argL, argR, 2, True)); 5153 return res; 5154 } 5155 case Iop_CmpGT32Fx4: { 5156 HReg res = newVRegV(env); 5157 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5158 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5159 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, 5160 res, argL, argR, 2, True)); 5161 return res; 5162 } 5163 case Iop_CmpGE32Fx4: { 5164 HReg res = newVRegV(env); 5165 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5166 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5167 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, 5168 res, argL, argR, 2, True)); 5169 return res; 5170 } 5171 case Iop_CmpEQ32Fx4: { 5172 HReg res = newVRegV(env); 5173 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5174 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5175 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, 5176 res, argL, argR, 2, True)); 5177 return res; 5178 } 5179 5180 case Iop_PolynomialMull8x8: { 5181 HReg res = newVRegV(env); 5182 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5183 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); 5184 UInt size = 0; 5185 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP, 5186 res, argL, argR, size, True)); 5187 return res; 5188 } 5189 case Iop_F32ToFixed32Ux4_RZ: 5190 case Iop_F32ToFixed32Sx4_RZ: 5191 case Iop_Fixed32UToF32x4_RN: 5192 case Iop_Fixed32SToF32x4_RN: { 5193 HReg res = newVRegV(env); 5194 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); 5195 ARMNeonUnOp op; 5196 UInt imm6; 5197 if (e->Iex.Binop.arg2->tag != Iex_Const || 5198 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5199 vpanic("ARM supports FP <-> Fixed conversion with constant " 5200 "second argument less than 33 only\n"); 5201 } 5202 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5203 vassert(imm6 <= 32 && imm6 > 0); 5204 imm6 = 64 - imm6; 5205 switch(e->Iex.Binop.op) { 5206 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break; 5207 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break; 5208 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break; 5209 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break; 5210 default: vassert(0); 5211 } 5212 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True)); 5213 return res; 5214 } 5215 /* 5216 FIXME remove if not used 5217 case Iop_VDup8x16: 5218 case Iop_VDup16x8: 5219 case Iop_VDup32x4: { 5220 HReg res = newVRegV(env); 5221 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); 5222 UInt imm4; 5223 UInt index; 5224 if (e->Iex.Binop.arg2->tag != Iex_Const || 5225 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { 5226 vpanic("ARM supports Iop_VDup with constant " 5227 "second argument less than 16 only\n"); 5228 } 5229 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 5230 switch(e->Iex.Binop.op) { 5231 case Iop_VDup8x16: imm4 = (index << 1) + 1; break; 5232 case Iop_VDup16x8: imm4 = (index << 2) + 2; break; 5233 case Iop_VDup32x4: imm4 = (index << 3) + 4; break; 5234 default: vassert(0); 5235 } 5236 if (imm4 >= 16) { 5237 vpanic("ARM supports Iop_VDup with constant " 5238 "second argument less than 16 only\n"); 5239 } 5240 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, 5241 res, argL, imm4, True)); 5242 return res; 5243 } 5244 */ 5245 case Iop_PwAdd8x16: 5246 case Iop_PwAdd16x8: 5247 case Iop_PwAdd32x4: { 5248 HReg res = newVRegV(env); 5249 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); 5250 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); 5251 UInt size = 0; 5252 switch(e->Iex.Binop.op) { 5253 case Iop_PwAdd8x16: size = 0; break; 5254 case Iop_PwAdd16x8: size = 1; break; 5255 case Iop_PwAdd32x4: size = 2; break; 5256 default: vassert(0); 5257 } 5258 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, 5259 res, argL, argR, size, True)); 5260 return res; 5261 } 5262 /* ... */ 5263 default: 5264 break; 5265 } 5266 } 5267 5268 if (e->tag == Iex_Triop) { 5269 IRTriop *triop = e->Iex.Triop.details; 5270 5271 switch (triop->op) { 5272 case Iop_ExtractV128: { 5273 HReg res = newVRegV(env); 5274 HReg argL = iselNeonExpr(env, triop->arg1); 5275 HReg argR = iselNeonExpr(env, triop->arg2); 5276 UInt imm4; 5277 if (triop->arg3->tag != Iex_Const || 5278 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { 5279 vpanic("ARM target supports Iop_ExtractV128 with constant " 5280 "third argument less than 16 only\n"); 5281 } 5282 imm4 = triop->arg3->Iex.Const.con->Ico.U8; 5283 if (imm4 >= 16) { 5284 vpanic("ARM target supports Iop_ExtractV128 with constant " 5285 "third argument less than 16 only\n"); 5286 } 5287 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, 5288 res, argL, argR, imm4, True)); 5289 return res; 5290 } 5291 default: 5292 break; 5293 } 5294 } 5295 5296 if (e->tag == Iex_Mux0X) { 5297 HReg r8; 5298 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX); 5299 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0); 5300 HReg dst = newVRegV(env); 5301 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True)); 5302 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 5303 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8, 5304 ARMRI84_I84(0xFF,0))); 5305 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0)); 5306 return dst; 5307 } 5308 5309 neon_expr_bad: 5310 ppIRExpr(e); 5311 vpanic("iselNeonExpr_wrk"); 5312 } 5313 5314 /*---------------------------------------------------------*/ 5315 /*--- ISEL: Floating point expressions (64 bit) ---*/ 5316 /*---------------------------------------------------------*/ 5317 5318 /* Compute a 64-bit floating point value into a register, the identity 5319 of which is returned. As with iselIntExpr_R, the reg may be either 5320 real or virtual; in any case it must not be changed by subsequent 5321 code emitted by the caller. */ 5322 5323 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 5324 { 5325 HReg r = iselDblExpr_wrk( env, e ); 5326 # if 0 5327 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5328 # endif 5329 vassert(hregClass(r) == HRcFlt64); 5330 vassert(hregIsVirtual(r)); 5331 return r; 5332 } 5333 5334 /* DO NOT CALL THIS DIRECTLY */ 5335 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 5336 { 5337 IRType ty = typeOfIRExpr(env->type_env,e); 5338 vassert(e); 5339 vassert(ty == Ity_F64); 5340 5341 if (e->tag == Iex_RdTmp) { 5342 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 5343 } 5344 5345 if (e->tag == Iex_Const) { 5346 /* Just handle the zero case. */ 5347 IRConst* con = e->Iex.Const.con; 5348 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) { 5349 HReg z32 = newVRegI(env); 5350 HReg dst = newVRegD(env); 5351 addInstr(env, ARMInstr_Imm32(z32, 0)); 5352 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32)); 5353 return dst; 5354 } 5355 } 5356 5357 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 5358 ARMAModeV* am; 5359 HReg res = newVRegD(env); 5360 vassert(e->Iex.Load.ty == Ity_F64); 5361 am = iselIntExpr_AModeV(env, e->Iex.Load.addr); 5362 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am)); 5363 return res; 5364 } 5365 5366 if (e->tag == Iex_Get) { 5367 // XXX This won't work if offset > 1020 or is not 0 % 4. 5368 // In which case we'll have to generate more longwinded code. 5369 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset); 5370 HReg res = newVRegD(env); 5371 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am)); 5372 return res; 5373 } 5374 5375 if (e->tag == Iex_Unop) { 5376 switch (e->Iex.Unop.op) { 5377 case Iop_ReinterpI64asF64: { 5378 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5379 return iselNeon64Expr(env, e->Iex.Unop.arg); 5380 } else { 5381 HReg srcHi, srcLo; 5382 HReg dst = newVRegD(env); 5383 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 5384 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo)); 5385 return dst; 5386 } 5387 } 5388 case Iop_NegF64: { 5389 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 5390 HReg dst = newVRegD(env); 5391 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src)); 5392 return dst; 5393 } 5394 case Iop_AbsF64: { 5395 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 5396 HReg dst = newVRegD(env); 5397 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src)); 5398 return dst; 5399 } 5400 case Iop_F32toF64: { 5401 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5402 HReg dst = newVRegD(env); 5403 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src)); 5404 return dst; 5405 } 5406 case Iop_I32UtoF64: 5407 case Iop_I32StoF64: { 5408 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 5409 HReg f32 = newVRegF(env); 5410 HReg dst = newVRegD(env); 5411 Bool syned = e->Iex.Unop.op == Iop_I32StoF64; 5412 /* VMOV f32, src */ 5413 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src)); 5414 /* FSITOD dst, f32 */ 5415 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned, 5416 dst, f32)); 5417 return dst; 5418 } 5419 default: 5420 break; 5421 } 5422 } 5423 5424 if (e->tag == Iex_Binop) { 5425 switch (e->Iex.Binop.op) { 5426 case Iop_SqrtF64: { 5427 /* first arg is rounding mode; we ignore it. */ 5428 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 5429 HReg dst = newVRegD(env); 5430 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src)); 5431 return dst; 5432 } 5433 default: 5434 break; 5435 } 5436 } 5437 5438 if (e->tag == Iex_Triop) { 5439 IRTriop *triop = e->Iex.Triop.details; 5440 5441 switch (triop->op) { 5442 case Iop_DivF64: 5443 case Iop_MulF64: 5444 case Iop_AddF64: 5445 case Iop_SubF64: { 5446 ARMVfpOp op = 0; /*INVALID*/ 5447 HReg argL = iselDblExpr(env, triop->arg2); 5448 HReg argR = iselDblExpr(env, triop->arg3); 5449 HReg dst = newVRegD(env); 5450 switch (triop->op) { 5451 case Iop_DivF64: op = ARMvfp_DIV; break; 5452 case Iop_MulF64: op = ARMvfp_MUL; break; 5453 case Iop_AddF64: op = ARMvfp_ADD; break; 5454 case Iop_SubF64: op = ARMvfp_SUB; break; 5455 default: vassert(0); 5456 } 5457 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR)); 5458 return dst; 5459 } 5460 default: 5461 break; 5462 } 5463 } 5464 5465 if (e->tag == Iex_Mux0X) { 5466 if (ty == Ity_F64 5467 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 5468 HReg r8; 5469 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 5470 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 5471 HReg dst = newVRegD(env); 5472 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX)); 5473 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 5474 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8, 5475 ARMRI84_I84(0xFF,0))); 5476 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0)); 5477 return dst; 5478 } 5479 } 5480 5481 ppIRExpr(e); 5482 vpanic("iselDblExpr_wrk"); 5483 } 5484 5485 5486 /*---------------------------------------------------------*/ 5487 /*--- ISEL: Floating point expressions (32 bit) ---*/ 5488 /*---------------------------------------------------------*/ 5489 5490 /* Compute a 64-bit floating point value into a register, the identity 5491 of which is returned. As with iselIntExpr_R, the reg may be either 5492 real or virtual; in any case it must not be changed by subsequent 5493 code emitted by the caller. */ 5494 5495 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 5496 { 5497 HReg r = iselFltExpr_wrk( env, e ); 5498 # if 0 5499 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5500 # endif 5501 vassert(hregClass(r) == HRcFlt32); 5502 vassert(hregIsVirtual(r)); 5503 return r; 5504 } 5505 5506 /* DO NOT CALL THIS DIRECTLY */ 5507 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 5508 { 5509 IRType ty = typeOfIRExpr(env->type_env,e); 5510 vassert(e); 5511 vassert(ty == Ity_F32); 5512 5513 if (e->tag == Iex_RdTmp) { 5514 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 5515 } 5516 5517 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 5518 ARMAModeV* am; 5519 HReg res = newVRegF(env); 5520 vassert(e->Iex.Load.ty == Ity_F32); 5521 am = iselIntExpr_AModeV(env, e->Iex.Load.addr); 5522 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); 5523 return res; 5524 } 5525 5526 if (e->tag == Iex_Get) { 5527 // XXX This won't work if offset > 1020 or is not 0 % 4. 5528 // In which case we'll have to generate more longwinded code. 5529 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset); 5530 HReg res = newVRegF(env); 5531 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); 5532 return res; 5533 } 5534 5535 if (e->tag == Iex_Unop) { 5536 switch (e->Iex.Unop.op) { 5537 case Iop_ReinterpI32asF32: { 5538 HReg dst = newVRegF(env); 5539 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 5540 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src)); 5541 return dst; 5542 } 5543 case Iop_NegF32: { 5544 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5545 HReg dst = newVRegF(env); 5546 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src)); 5547 return dst; 5548 } 5549 case Iop_AbsF32: { 5550 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 5551 HReg dst = newVRegF(env); 5552 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src)); 5553 return dst; 5554 } 5555 default: 5556 break; 5557 } 5558 } 5559 5560 if (e->tag == Iex_Binop) { 5561 switch (e->Iex.Binop.op) { 5562 case Iop_SqrtF32: { 5563 /* first arg is rounding mode; we ignore it. */ 5564 HReg src = iselFltExpr(env, e->Iex.Binop.arg2); 5565 HReg dst = newVRegF(env); 5566 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src)); 5567 return dst; 5568 } 5569 case Iop_F64toF32: { 5570 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2); 5571 set_VFP_rounding_mode(env, e->Iex.Binop.arg1); 5572 HReg valS = newVRegF(env); 5573 /* FCVTSD valS, valD */ 5574 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD)); 5575 set_VFP_rounding_default(env); 5576 return valS; 5577 } 5578 default: 5579 break; 5580 } 5581 } 5582 5583 if (e->tag == Iex_Triop) { 5584 IRTriop *triop = e->Iex.Triop.details; 5585 5586 switch (triop->op) { 5587 case Iop_DivF32: 5588 case Iop_MulF32: 5589 case Iop_AddF32: 5590 case Iop_SubF32: { 5591 ARMVfpOp op = 0; /*INVALID*/ 5592 HReg argL = iselFltExpr(env, triop->arg2); 5593 HReg argR = iselFltExpr(env, triop->arg3); 5594 HReg dst = newVRegF(env); 5595 switch (triop->op) { 5596 case Iop_DivF32: op = ARMvfp_DIV; break; 5597 case Iop_MulF32: op = ARMvfp_MUL; break; 5598 case Iop_AddF32: op = ARMvfp_ADD; break; 5599 case Iop_SubF32: op = ARMvfp_SUB; break; 5600 default: vassert(0); 5601 } 5602 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR)); 5603 return dst; 5604 } 5605 default: 5606 break; 5607 } 5608 } 5609 5610 if (e->tag == Iex_Mux0X) { 5611 if (ty == Ity_F32 5612 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 5613 HReg r8; 5614 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX); 5615 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0); 5616 HReg dst = newVRegF(env); 5617 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX)); 5618 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 5619 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8, 5620 ARMRI84_I84(0xFF,0))); 5621 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0)); 5622 return dst; 5623 } 5624 } 5625 5626 ppIRExpr(e); 5627 vpanic("iselFltExpr_wrk"); 5628 } 5629 5630 5631 /*---------------------------------------------------------*/ 5632 /*--- ISEL: Statements ---*/ 5633 /*---------------------------------------------------------*/ 5634 5635 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 5636 { 5637 if (vex_traceflags & VEX_TRACE_VCODE) { 5638 vex_printf("\n-- "); 5639 ppIRStmt(stmt); 5640 vex_printf("\n"); 5641 } 5642 switch (stmt->tag) { 5643 5644 /* --------- STORE --------- */ 5645 /* little-endian write to memory */ 5646 case Ist_Store: { 5647 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 5648 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 5649 IREndness end = stmt->Ist.Store.end; 5650 5651 if (tya != Ity_I32 || end != Iend_LE) 5652 goto stmt_fail; 5653 5654 if (tyd == Ity_I32) { 5655 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5656 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); 5657 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am)); 5658 return; 5659 } 5660 if (tyd == Ity_I16) { 5661 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5662 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr); 5663 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/, 5664 False/*!isSignedLoad*/, rD, am)); 5665 return; 5666 } 5667 if (tyd == Ity_I8) { 5668 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); 5669 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); 5670 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am)); 5671 return; 5672 } 5673 if (tyd == Ity_I64) { 5674 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5675 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); 5676 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 5677 addInstr(env, ARMInstr_NLdStD(False, dD, am)); 5678 } else { 5679 HReg rDhi, rDlo, rA; 5680 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data); 5681 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 5682 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi, 5683 ARMAMode1_RI(rA,4))); 5684 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo, 5685 ARMAMode1_RI(rA,0))); 5686 } 5687 return; 5688 } 5689 if (tyd == Ity_F64) { 5690 HReg dD = iselDblExpr(env, stmt->Ist.Store.data); 5691 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 5692 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am)); 5693 return; 5694 } 5695 if (tyd == Ity_F32) { 5696 HReg fD = iselFltExpr(env, stmt->Ist.Store.data); 5697 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); 5698 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am)); 5699 return; 5700 } 5701 if (tyd == Ity_V128) { 5702 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data); 5703 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); 5704 addInstr(env, ARMInstr_NLdStQ(False, qD, am)); 5705 return; 5706 } 5707 5708 break; 5709 } 5710 5711 /* --------- PUT --------- */ 5712 /* write guest state, fixed offset */ 5713 case Ist_Put: { 5714 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 5715 5716 if (tyd == Ity_I32) { 5717 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); 5718 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset); 5719 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am)); 5720 return; 5721 } 5722 if (tyd == Ity_I64) { 5723 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5724 HReg addr = newVRegI(env); 5725 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); 5726 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), 5727 stmt->Ist.Put.offset)); 5728 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr))); 5729 } else { 5730 HReg rDhi, rDlo; 5731 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), 5732 stmt->Ist.Put.offset + 0); 5733 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), 5734 stmt->Ist.Put.offset + 4); 5735 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data); 5736 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4)); 5737 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0)); 5738 } 5739 return; 5740 } 5741 if (tyd == Ity_F64) { 5742 // XXX This won't work if offset > 1020 or is not 0 % 4. 5743 // In which case we'll have to generate more longwinded code. 5744 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 5745 HReg rD = iselDblExpr(env, stmt->Ist.Put.data); 5746 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am)); 5747 return; 5748 } 5749 if (tyd == Ity_F32) { 5750 // XXX This won't work if offset > 1020 or is not 0 % 4. 5751 // In which case we'll have to generate more longwinded code. 5752 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); 5753 HReg rD = iselFltExpr(env, stmt->Ist.Put.data); 5754 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am)); 5755 return; 5756 } 5757 if (tyd == Ity_V128) { 5758 HReg addr = newVRegI(env); 5759 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data); 5760 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), 5761 stmt->Ist.Put.offset)); 5762 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr))); 5763 return; 5764 } 5765 break; 5766 } 5767 5768 //zz /* --------- Indexed PUT --------- */ 5769 //zz /* write guest state, run-time offset */ 5770 //zz case Ist_PutI: { 5771 //zz ARMAMode2* am2 5772 //zz = genGuestArrayOffset( 5773 //zz env, stmt->Ist.PutI.descr, 5774 //zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias ); 5775 //zz 5776 //zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data); 5777 //zz 5778 //zz if (tyd == Ity_I8) { 5779 //zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data); 5780 //zz addInstr(env, ARMInstr_StoreB(reg, am2)); 5781 //zz return; 5782 //zz } 5783 //zz// CAB: Ity_I32, Ity_I16 ? 5784 //zz break; 5785 //zz } 5786 5787 /* --------- TMP --------- */ 5788 /* assign value to temporary */ 5789 case Ist_WrTmp: { 5790 IRTemp tmp = stmt->Ist.WrTmp.tmp; 5791 IRType ty = typeOfIRTemp(env->type_env, tmp); 5792 5793 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 5794 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False, 5795 env, stmt->Ist.WrTmp.data); 5796 HReg dst = lookupIRTemp(env, tmp); 5797 addInstr(env, ARMInstr_Mov(dst,ri84)); 5798 return; 5799 } 5800 if (ty == Ity_I1) { 5801 HReg dst = lookupIRTemp(env, tmp); 5802 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 5803 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); 5804 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); 5805 return; 5806 } 5807 if (ty == Ity_I64) { 5808 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5809 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data); 5810 HReg dst = lookupIRTemp(env, tmp); 5811 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False)); 5812 } else { 5813 HReg rHi, rLo, dstHi, dstLo; 5814 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 5815 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 5816 addInstr(env, mk_iMOVds_RR(dstHi, rHi) ); 5817 addInstr(env, mk_iMOVds_RR(dstLo, rLo) ); 5818 } 5819 return; 5820 } 5821 if (ty == Ity_F64) { 5822 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 5823 HReg dst = lookupIRTemp(env, tmp); 5824 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src)); 5825 return; 5826 } 5827 if (ty == Ity_F32) { 5828 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 5829 HReg dst = lookupIRTemp(env, tmp); 5830 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src)); 5831 return; 5832 } 5833 if (ty == Ity_V128) { 5834 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data); 5835 HReg dst = lookupIRTemp(env, tmp); 5836 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True)); 5837 return; 5838 } 5839 break; 5840 } 5841 5842 /* --------- Call to DIRTY helper --------- */ 5843 /* call complex ("dirty") helper function */ 5844 case Ist_Dirty: { 5845 IRType retty; 5846 IRDirty* d = stmt->Ist.Dirty.details; 5847 Bool passBBP = False; 5848 5849 if (d->nFxState == 0) 5850 vassert(!d->needsBBP); 5851 5852 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 5853 5854 /* Marshal args, do the call, clear stack. */ 5855 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 5856 if (!ok) 5857 break; /* will go to stmt_fail: */ 5858 5859 /* Now figure out what to do with the returned value, if any. */ 5860 if (d->tmp == IRTemp_INVALID) 5861 /* No return value. Nothing to do. */ 5862 return; 5863 5864 retty = typeOfIRTemp(env->type_env, d->tmp); 5865 5866 if (retty == Ity_I64) { 5867 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5868 HReg tmp = lookupIRTemp(env, d->tmp); 5869 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(), 5870 hregARM_R0())); 5871 } else { 5872 HReg dstHi, dstLo; 5873 /* The returned value is in r1:r0. Park it in the 5874 register-pair associated with tmp. */ 5875 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 5876 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) ); 5877 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) ); 5878 } 5879 return; 5880 } 5881 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) { 5882 /* The returned value is in r0. Park it in the register 5883 associated with tmp. */ 5884 HReg dst = lookupIRTemp(env, d->tmp); 5885 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) ); 5886 return; 5887 } 5888 5889 break; 5890 } 5891 5892 /* --------- Load Linked and Store Conditional --------- */ 5893 case Ist_LLSC: { 5894 if (stmt->Ist.LLSC.storedata == NULL) { 5895 /* LL */ 5896 IRTemp res = stmt->Ist.LLSC.result; 5897 IRType ty = typeOfIRTemp(env->type_env, res); 5898 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 5899 Int szB = 0; 5900 HReg r_dst = lookupIRTemp(env, res); 5901 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 5902 switch (ty) { 5903 case Ity_I8: szB = 1; break; 5904 case Ity_I16: szB = 2; break; 5905 case Ity_I32: szB = 4; break; 5906 default: vassert(0); 5907 } 5908 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); 5909 addInstr(env, ARMInstr_LdrEX(szB)); 5910 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2())); 5911 return; 5912 } 5913 if (ty == Ity_I64) { 5914 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 5915 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); 5916 addInstr(env, ARMInstr_LdrEX(8)); 5917 /* Result is in r3:r2. On a non-NEON capable CPU, we must 5918 move it into a result register pair. On a NEON capable 5919 CPU, the result register will be a 64 bit NEON 5920 register, so we must move it there instead. */ 5921 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { 5922 HReg dst = lookupIRTemp(env, res); 5923 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(), 5924 hregARM_R2())); 5925 } else { 5926 HReg r_dst_hi, r_dst_lo; 5927 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res); 5928 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2())); 5929 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3())); 5930 } 5931 return; 5932 } 5933 /*NOTREACHED*/ 5934 vassert(0); 5935 } else { 5936 /* SC */ 5937 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); 5938 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) { 5939 Int szB = 0; 5940 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); 5941 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 5942 switch (tyd) { 5943 case Ity_I8: szB = 1; break; 5944 case Ity_I16: szB = 2; break; 5945 case Ity_I32: szB = 4; break; 5946 default: vassert(0); 5947 } 5948 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD)); 5949 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); 5950 addInstr(env, ARMInstr_StrEX(szB)); 5951 } else { 5952 vassert(tyd == Ity_I64); 5953 /* This is really ugly. There is no is/is-not NEON 5954 decision akin to the case for LL, because iselInt64Expr 5955 fudges this for us, and always gets the result into two 5956 GPRs even if this means moving it from a NEON 5957 register. */ 5958 HReg rDhi, rDlo; 5959 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata); 5960 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); 5961 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo)); 5962 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi)); 5963 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); 5964 addInstr(env, ARMInstr_StrEX(8)); 5965 } 5966 /* now r0 is 1 if failed, 0 if success. Change to IR 5967 conventions (0 is fail, 1 is success). Also transfer 5968 result to r_res. */ 5969 IRTemp res = stmt->Ist.LLSC.result; 5970 IRType ty = typeOfIRTemp(env->type_env, res); 5971 HReg r_res = lookupIRTemp(env, res); 5972 ARMRI84* one = ARMRI84_I84(1,0); 5973 vassert(ty == Ity_I1); 5974 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one)); 5975 /* And be conservative -- mask off all but the lowest bit */ 5976 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one)); 5977 return; 5978 } 5979 break; 5980 } 5981 5982 /* --------- MEM FENCE --------- */ 5983 case Ist_MBE: 5984 switch (stmt->Ist.MBE.event) { 5985 case Imbe_Fence: 5986 addInstr(env, ARMInstr_MFence()); 5987 return; 5988 case Imbe_CancelReservation: 5989 addInstr(env, ARMInstr_CLREX()); 5990 return; 5991 default: 5992 break; 5993 } 5994 break; 5995 5996 /* --------- INSTR MARK --------- */ 5997 /* Doesn't generate any executable code ... */ 5998 case Ist_IMark: 5999 return; 6000 6001 /* --------- NO-OP --------- */ 6002 case Ist_NoOp: 6003 return; 6004 6005 /* --------- EXIT --------- */ 6006 case Ist_Exit: { 6007 if (stmt->Ist.Exit.dst->tag != Ico_U32) 6008 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value"); 6009 6010 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 6011 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), 6012 stmt->Ist.Exit.offsIP); 6013 6014 /* Case: boring transfer to known address */ 6015 if (stmt->Ist.Exit.jk == Ijk_Boring 6016 || stmt->Ist.Exit.jk == Ijk_Call 6017 || stmt->Ist.Exit.jk == Ijk_Ret) { 6018 if (env->chainingAllowed) { 6019 /* .. almost always true .. */ 6020 /* Skip the event check at the dst if this is a forwards 6021 edge. */ 6022 Bool toFastEP 6023 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; 6024 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 6025 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32, 6026 amR15T, cc, toFastEP)); 6027 } else { 6028 /* .. very occasionally .. */ 6029 /* We can't use chaining, so ask for an assisted transfer, 6030 as that's the only alternative that is allowable. */ 6031 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6032 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring)); 6033 } 6034 return; 6035 } 6036 6037 /* Case: assisted transfer to arbitrary address */ 6038 switch (stmt->Ist.Exit.jk) { 6039 /* Keep this list in sync with that in iselNext below */ 6040 case Ijk_ClientReq: 6041 case Ijk_NoDecode: 6042 case Ijk_NoRedir: 6043 case Ijk_Sys_syscall: 6044 case Ijk_TInval: 6045 { 6046 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 6047 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, 6048 stmt->Ist.Exit.jk)); 6049 return; 6050 } 6051 default: 6052 break; 6053 } 6054 6055 /* Do we ever expect to see any other kind? */ 6056 goto stmt_fail; 6057 } 6058 6059 default: break; 6060 } 6061 stmt_fail: 6062 ppIRStmt(stmt); 6063 vpanic("iselStmt"); 6064 } 6065 6066 6067 /*---------------------------------------------------------*/ 6068 /*--- ISEL: Basic block terminators (Nexts) ---*/ 6069 /*---------------------------------------------------------*/ 6070 6071 static void iselNext ( ISelEnv* env, 6072 IRExpr* next, IRJumpKind jk, Int offsIP ) 6073 { 6074 if (vex_traceflags & VEX_TRACE_VCODE) { 6075 vex_printf( "\n-- PUT(%d) = ", offsIP); 6076 ppIRExpr( next ); 6077 vex_printf( "; exit-"); 6078 ppIRJumpKind(jk); 6079 vex_printf( "\n"); 6080 } 6081 6082 /* Case: boring transfer to known address */ 6083 if (next->tag == Iex_Const) { 6084 IRConst* cdst = next->Iex.Const.con; 6085 vassert(cdst->tag == Ico_U32); 6086 if (jk == Ijk_Boring || jk == Ijk_Call) { 6087 /* Boring transfer to known address */ 6088 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6089 if (env->chainingAllowed) { 6090 /* .. almost always true .. */ 6091 /* Skip the event check at the dst if this is a forwards 6092 edge. */ 6093 Bool toFastEP 6094 = ((Addr64)cdst->Ico.U32) > env->max_ga; 6095 if (0) vex_printf("%s", toFastEP ? "X" : "."); 6096 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32, 6097 amR15T, ARMcc_AL, 6098 toFastEP)); 6099 } else { 6100 /* .. very occasionally .. */ 6101 /* We can't use chaining, so ask for an assisted transfer, 6102 as that's the only alternative that is allowable. */ 6103 HReg r = iselIntExpr_R(env, next); 6104 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, 6105 Ijk_Boring)); 6106 } 6107 return; 6108 } 6109 } 6110 6111 /* Case: call/return (==boring) transfer to any address */ 6112 switch (jk) { 6113 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 6114 HReg r = iselIntExpr_R(env, next); 6115 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6116 if (env->chainingAllowed) { 6117 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL)); 6118 } else { 6119 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, 6120 Ijk_Boring)); 6121 } 6122 return; 6123 } 6124 default: 6125 break; 6126 } 6127 6128 /* Case: assisted transfer to arbitrary address */ 6129 switch (jk) { 6130 /* Keep this list in sync with that for Ist_Exit above */ 6131 case Ijk_ClientReq: 6132 case Ijk_NoDecode: 6133 case Ijk_NoRedir: 6134 case Ijk_Sys_syscall: 6135 { 6136 HReg r = iselIntExpr_R(env, next); 6137 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); 6138 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk)); 6139 return; 6140 } 6141 default: 6142 break; 6143 } 6144 6145 vex_printf( "\n-- PUT(%d) = ", offsIP); 6146 ppIRExpr( next ); 6147 vex_printf( "; exit-"); 6148 ppIRJumpKind(jk); 6149 vex_printf( "\n"); 6150 vassert(0); // are we expecting any other kind? 6151 } 6152 6153 6154 /*---------------------------------------------------------*/ 6155 /*--- Insn selector top-level ---*/ 6156 /*---------------------------------------------------------*/ 6157 6158 /* Translate an entire SB to arm code. */ 6159 6160 HInstrArray* iselSB_ARM ( IRSB* bb, 6161 VexArch arch_host, 6162 VexArchInfo* archinfo_host, 6163 VexAbiInfo* vbi/*UNUSED*/, 6164 Int offs_Host_EvC_Counter, 6165 Int offs_Host_EvC_FailAddr, 6166 Bool chainingAllowed, 6167 Bool addProfInc, 6168 Addr64 max_ga ) 6169 { 6170 Int i, j; 6171 HReg hreg, hregHI; 6172 ISelEnv* env; 6173 UInt hwcaps_host = archinfo_host->hwcaps; 6174 ARMAMode1 *amCounter, *amFailAddr; 6175 6176 /* sanity ... */ 6177 vassert(arch_host == VexArchARM); 6178 6179 /* hwcaps should not change from one ISEL call to another. */ 6180 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM) 6181 6182 /* Make up an initial environment to use. */ 6183 env = LibVEX_Alloc(sizeof(ISelEnv)); 6184 env->vreg_ctr = 0; 6185 6186 /* Set up output code array. */ 6187 env->code = newHInstrArray(); 6188 6189 /* Copy BB's type env. */ 6190 env->type_env = bb->tyenv; 6191 6192 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 6193 change as we go along. */ 6194 env->n_vregmap = bb->tyenv->types_used; 6195 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 6196 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 6197 6198 /* and finally ... */ 6199 env->chainingAllowed = chainingAllowed; 6200 env->hwcaps = hwcaps_host; 6201 env->max_ga = max_ga; 6202 6203 /* For each IR temporary, allocate a suitably-kinded virtual 6204 register. */ 6205 j = 0; 6206 for (i = 0; i < env->n_vregmap; i++) { 6207 hregHI = hreg = INVALID_HREG; 6208 switch (bb->tyenv->types[i]) { 6209 case Ity_I1: 6210 case Ity_I8: 6211 case Ity_I16: 6212 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 6213 case Ity_I64: 6214 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) { 6215 hreg = mkHReg(j++, HRcFlt64, True); 6216 } else { 6217 hregHI = mkHReg(j++, HRcInt32, True); 6218 hreg = mkHReg(j++, HRcInt32, True); 6219 } 6220 break; 6221 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break; 6222 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 6223 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 6224 default: ppIRType(bb->tyenv->types[i]); 6225 vpanic("iselBB: IRTemp type"); 6226 } 6227 env->vregmap[i] = hreg; 6228 env->vregmapHI[i] = hregHI; 6229 } 6230 env->vreg_ctr = j; 6231 6232 /* The very first instruction must be an event check. */ 6233 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter); 6234 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr); 6235 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr)); 6236 6237 /* Possibly a block counter increment (for profiling). At this 6238 point we don't know the address of the counter, so just pretend 6239 it is zero. It will have to be patched later, but before this 6240 translation is used, by a call to LibVEX_patchProfCtr. */ 6241 if (addProfInc) { 6242 addInstr(env, ARMInstr_ProfInc()); 6243 } 6244 6245 /* Ok, finally we can iterate over the statements. */ 6246 for (i = 0; i < bb->stmts_used; i++) 6247 iselStmt(env, bb->stmts[i]); 6248 6249 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 6250 6251 /* record the number of vregs we used. */ 6252 env->code->n_vregs = env->vreg_ctr; 6253 return env->code; 6254 } 6255 6256 6257 /*---------------------------------------------------------------*/ 6258 /*--- end host_arm_isel.c ---*/ 6259 /*---------------------------------------------------------------*/ 6260