1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_amd64_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 40 #include "ir_match.h" 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" 45 #include "host_generic_simd128.h" 46 #include "host_amd64_defs.h" 47 48 49 /*---------------------------------------------------------*/ 50 /*--- x87/SSE control word stuff ---*/ 51 /*---------------------------------------------------------*/ 52 53 /* Vex-generated code expects to run with the FPU set as follows: all 54 exceptions masked, round-to-nearest, precision = 53 bits. This 55 corresponds to a FPU control word value of 0x027F. 56 57 Similarly the SSE control word (%mxcsr) should be 0x1F80. 58 59 %fpucw and %mxcsr should have these values on entry to 60 Vex-generated code, and should those values should be 61 unchanged at exit. 62 */ 63 64 #define DEFAULT_FPUCW 0x027F 65 66 #define DEFAULT_MXCSR 0x1F80 67 68 /* debugging only, do not use */ 69 /* define DEFAULT_FPUCW 0x037F */ 70 71 72 /*---------------------------------------------------------*/ 73 /*--- misc helpers ---*/ 74 /*---------------------------------------------------------*/ 75 76 /* These are duplicated in guest-amd64/toIR.c */ 77 static IRExpr* unop ( IROp op, IRExpr* a ) 78 { 79 return IRExpr_Unop(op, a); 80 } 81 82 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 83 { 84 return IRExpr_Binop(op, a1, a2); 85 } 86 87 static IRExpr* bind ( Int binder ) 88 { 89 return IRExpr_Binder(binder); 90 } 91 92 93 /*---------------------------------------------------------*/ 94 /*--- ISelEnv ---*/ 95 /*---------------------------------------------------------*/ 96 97 /* This carries around: 98 99 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 100 might encounter. This is computed before insn selection starts, 101 and does not change. 102 103 - A mapping from IRTemp to HReg. This tells the insn selector 104 which virtual register is associated with each IRTemp 105 temporary. This is computed before insn selection starts, and 106 does not change. We expect this mapping to map precisely the 107 same set of IRTemps as the type mapping does. 108 109 - vregmap holds the primary register for the IRTemp. 110 - vregmapHI is only used for 128-bit integer-typed 111 IRTemps. It holds the identity of a second 112 64-bit virtual HReg, which holds the high half 113 of the value. 114 115 - The host subarchitecture we are selecting insns for. 116 This is set at the start and does not change. 117 118 - The code array, that is, the insns selected so far. 119 120 - A counter, for generating new virtual registers. 121 122 - A Bool for indicating whether we may generate chain-me 123 instructions for control flow transfers, or whether we must use 124 XAssisted. 125 126 - The maximum guest address of any guest insn in this block. 127 Actually, the address of the highest-addressed byte from any insn 128 in this block. Is set at the start and does not change. This is 129 used for detecting jumps which are definitely forward-edges from 130 this block, and therefore can be made (chained) to the fast entry 131 point of the destination, thereby avoiding the destination's 132 event check. 133 134 Note, this is all host-independent. (JRS 20050201: well, kinda 135 ... not completely. Compare with ISelEnv for X86.) 136 */ 137 138 typedef 139 struct { 140 /* Constant -- are set at the start and do not change. */ 141 IRTypeEnv* type_env; 142 143 HReg* vregmap; 144 HReg* vregmapHI; 145 Int n_vregmap; 146 147 UInt hwcaps; 148 149 Bool chainingAllowed; 150 Addr64 max_ga; 151 152 /* These are modified as we go along. */ 153 HInstrArray* code; 154 Int vreg_ctr; 155 } 156 ISelEnv; 157 158 159 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 160 { 161 vassert(tmp >= 0); 162 vassert(tmp < env->n_vregmap); 163 return env->vregmap[tmp]; 164 } 165 166 static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, 167 ISelEnv* env, IRTemp tmp ) 168 { 169 vassert(tmp >= 0); 170 vassert(tmp < env->n_vregmap); 171 vassert(env->vregmapHI[tmp] != INVALID_HREG); 172 *vrLO = env->vregmap[tmp]; 173 *vrHI = env->vregmapHI[tmp]; 174 } 175 176 static void addInstr ( ISelEnv* env, AMD64Instr* instr ) 177 { 178 addHInstr(env->code, instr); 179 if (vex_traceflags & VEX_TRACE_VCODE) { 180 ppAMD64Instr(instr, True); 181 vex_printf("\n"); 182 } 183 } 184 185 static HReg newVRegI ( ISelEnv* env ) 186 { 187 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); 188 env->vreg_ctr++; 189 return reg; 190 } 191 192 static HReg newVRegV ( ISelEnv* env ) 193 { 194 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 195 env->vreg_ctr++; 196 return reg; 197 } 198 199 200 /*---------------------------------------------------------*/ 201 /*--- ISEL: Forward declarations ---*/ 202 /*---------------------------------------------------------*/ 203 204 /* These are organised as iselXXX and iselXXX_wrk pairs. The 205 iselXXX_wrk do the real work, but are not to be called directly. 206 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 207 checks that all returned registers are virtual. You should not 208 call the _wrk version directly. 209 */ 210 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 211 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 212 213 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 214 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 215 216 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 217 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 218 219 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 220 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 221 222 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 223 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 224 225 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 226 ISelEnv* env, IRExpr* e ); 227 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, 228 ISelEnv* env, IRExpr* e ); 229 230 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 231 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 232 233 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 234 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 235 236 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 237 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 238 239 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 240 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 241 242 static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 243 ISelEnv* env, IRExpr* e ); 244 static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo, 245 ISelEnv* env, IRExpr* e ); 246 247 248 /*---------------------------------------------------------*/ 249 /*--- ISEL: Misc helpers ---*/ 250 /*---------------------------------------------------------*/ 251 252 static Bool sane_AMode ( AMD64AMode* am ) 253 { 254 switch (am->tag) { 255 case Aam_IR: 256 return 257 toBool( hregClass(am->Aam.IR.reg) == HRcInt64 258 && (hregIsVirtual(am->Aam.IR.reg) 259 || am->Aam.IR.reg == hregAMD64_RBP()) ); 260 case Aam_IRRS: 261 return 262 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64 263 && hregIsVirtual(am->Aam.IRRS.base) 264 && hregClass(am->Aam.IRRS.index) == HRcInt64 265 && hregIsVirtual(am->Aam.IRRS.index) ); 266 default: 267 vpanic("sane_AMode: unknown amd64 amode tag"); 268 } 269 } 270 271 272 /* Can the lower 32 bits be signedly widened to produce the whole 273 64-bit value? In other words, are the top 33 bits either all 0 or 274 all 1 ? */ 275 static Bool fitsIn32Bits ( ULong x ) 276 { 277 Long y0 = (Long)x; 278 Long y1 = y0; 279 y1 <<= 32; 280 y1 >>=/*s*/ 32; 281 return toBool(x == y1); 282 } 283 284 /* Is this a 64-bit zero expression? */ 285 286 static Bool isZeroU64 ( IRExpr* e ) 287 { 288 return e->tag == Iex_Const 289 && e->Iex.Const.con->tag == Ico_U64 290 && e->Iex.Const.con->Ico.U64 == 0ULL; 291 } 292 293 static Bool isZeroU32 ( IRExpr* e ) 294 { 295 return e->tag == Iex_Const 296 && e->Iex.Const.con->tag == Ico_U32 297 && e->Iex.Const.con->Ico.U32 == 0; 298 } 299 300 /* Make a int reg-reg move. */ 301 302 static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 303 { 304 vassert(hregClass(src) == HRcInt64); 305 vassert(hregClass(dst) == HRcInt64); 306 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst); 307 } 308 309 /* Make a vector (128 bit) reg-reg move. */ 310 311 static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 312 { 313 vassert(hregClass(src) == HRcVec128); 314 vassert(hregClass(dst) == HRcVec128); 315 return AMD64Instr_SseReRg(Asse_MOV, src, dst); 316 } 317 318 /* Advance/retreat %rsp by n. */ 319 320 static void add_to_rsp ( ISelEnv* env, Int n ) 321 { 322 vassert(n > 0 && n < 256 && (n%8) == 0); 323 addInstr(env, 324 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n), 325 hregAMD64_RSP())); 326 } 327 328 static void sub_from_rsp ( ISelEnv* env, Int n ) 329 { 330 vassert(n > 0 && n < 256 && (n%8) == 0); 331 addInstr(env, 332 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n), 333 hregAMD64_RSP())); 334 } 335 336 /* Push 64-bit constants on the stack. */ 337 static void push_uimm64( ISelEnv* env, ULong uimm64 ) 338 { 339 /* If uimm64 can be expressed as the sign extension of its 340 lower 32 bits, we can do it the easy way. */ 341 Long simm64 = (Long)uimm64; 342 if ( simm64 == ((simm64 << 32) >> 32) ) { 343 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) ); 344 } else { 345 HReg tmp = newVRegI(env); 346 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) ); 347 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) ); 348 } 349 } 350 351 352 /* Used only in doHelperCall. If possible, produce a single 353 instruction which computes 'e' into 'dst'. If not possible, return 354 NULL. */ 355 356 static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env, 357 HReg dst, 358 IRExpr* e ) 359 { 360 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64); 361 362 if (e->tag == Iex_Const) { 363 vassert(e->Iex.Const.con->tag == Ico_U64); 364 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 365 return AMD64Instr_Alu64R( 366 Aalu_MOV, 367 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)), 368 dst 369 ); 370 } else { 371 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst); 372 } 373 } 374 375 if (e->tag == Iex_RdTmp) { 376 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp); 377 return mk_iMOVsd_RR(src, dst); 378 } 379 380 if (e->tag == Iex_Get) { 381 vassert(e->Iex.Get.ty == Ity_I64); 382 return AMD64Instr_Alu64R( 383 Aalu_MOV, 384 AMD64RMI_Mem( 385 AMD64AMode_IR(e->Iex.Get.offset, 386 hregAMD64_RBP())), 387 dst); 388 } 389 390 if (e->tag == Iex_Unop 391 && e->Iex.Unop.op == Iop_32Uto64 392 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 393 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); 394 return AMD64Instr_MovxLQ(False, src, dst); 395 } 396 397 if (0) { ppIRExpr(e); vex_printf("\n"); } 398 399 return NULL; 400 } 401 402 403 /* Do a complete function call. guard is a Ity_Bit expression 404 indicating whether or not the call happens. If guard==NULL, the 405 call is unconditional. */ 406 407 static 408 void doHelperCall ( ISelEnv* env, 409 Bool passBBP, 410 IRExpr* guard, IRCallee* cee, IRExpr** args ) 411 { 412 AMD64CondCode cc; 413 HReg argregs[6]; 414 HReg tmpregs[6]; 415 AMD64Instr* fastinstrs[6]; 416 Int n_args, i, argreg; 417 418 /* Marshal args for a call and do the call. 419 420 If passBBP is True, %rbp (the baseblock pointer) is to be passed 421 as the first arg. 422 423 This function only deals with a tiny set of possibilities, which 424 cover all helpers in practice. The restrictions are that only 425 arguments in registers are supported, hence only 6x64 integer 426 bits in total can be passed. In fact the only supported arg 427 type is I64. 428 429 Generating code which is both efficient and correct when 430 parameters are to be passed in registers is difficult, for the 431 reasons elaborated in detail in comments attached to 432 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant 433 of the method described in those comments. 434 435 The problem is split into two cases: the fast scheme and the 436 slow scheme. In the fast scheme, arguments are computed 437 directly into the target (real) registers. This is only safe 438 when we can be sure that computation of each argument will not 439 trash any real registers set by computation of any other 440 argument. 441 442 In the slow scheme, all args are first computed into vregs, and 443 once they are all done, they are moved to the relevant real 444 regs. This always gives correct code, but it also gives a bunch 445 of vreg-to-rreg moves which are usually redundant but are hard 446 for the register allocator to get rid of. 447 448 To decide which scheme to use, all argument expressions are 449 first examined. If they are all so simple that it is clear they 450 will be evaluated without use of any fixed registers, use the 451 fast scheme, else use the slow scheme. Note also that only 452 unconditional calls may use the fast scheme, since having to 453 compute a condition expression could itself trash real 454 registers. 455 456 Note this requires being able to examine an expression and 457 determine whether or not evaluation of it might use a fixed 458 register. That requires knowledge of how the rest of this insn 459 selector works. Currently just the following 3 are regarded as 460 safe -- hopefully they cover the majority of arguments in 461 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 462 */ 463 464 /* Note that the cee->regparms field is meaningless on AMD64 host 465 (since there is only one calling convention) and so we always 466 ignore it. */ 467 468 n_args = 0; 469 for (i = 0; args[i]; i++) 470 n_args++; 471 472 if (6 < n_args + (passBBP ? 1 : 0)) 473 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args"); 474 475 argregs[0] = hregAMD64_RDI(); 476 argregs[1] = hregAMD64_RSI(); 477 argregs[2] = hregAMD64_RDX(); 478 argregs[3] = hregAMD64_RCX(); 479 argregs[4] = hregAMD64_R8(); 480 argregs[5] = hregAMD64_R9(); 481 482 tmpregs[0] = tmpregs[1] = tmpregs[2] = 483 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG; 484 485 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] = 486 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL; 487 488 /* First decide which scheme (slow or fast) is to be used. First 489 assume the fast scheme, and select slow if any contraindications 490 (wow) appear. */ 491 492 if (guard) { 493 if (guard->tag == Iex_Const 494 && guard->Iex.Const.con->tag == Ico_U1 495 && guard->Iex.Const.con->Ico.U1 == True) { 496 /* unconditional */ 497 } else { 498 /* Not manifestly unconditional -- be conservative. */ 499 goto slowscheme; 500 } 501 } 502 503 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll 504 use the slow scheme. Because this is tentative, we can't call 505 addInstr (that is, commit to) any instructions until we're 506 handled all the arguments. So park the resulting instructions 507 in a buffer and emit that if we're successful. */ 508 509 /* FAST SCHEME */ 510 argreg = 0; 511 if (passBBP) { 512 fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]); 513 argreg++; 514 } 515 516 for (i = 0; i < n_args; i++) { 517 vassert(argreg < 6); 518 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 519 fastinstrs[argreg] 520 = iselIntExpr_single_instruction( env, argregs[argreg], args[i] ); 521 if (fastinstrs[argreg] == NULL) 522 goto slowscheme; 523 argreg++; 524 } 525 526 /* Looks like we're in luck. Emit the accumulated instructions and 527 move on to doing the call itself. */ 528 vassert(argreg <= 6); 529 for (i = 0; i < argreg; i++) 530 addInstr(env, fastinstrs[i]); 531 532 /* Fast scheme only applies for unconditional calls. Hence: */ 533 cc = Acc_ALWAYS; 534 535 goto handle_call; 536 537 538 /* SLOW SCHEME; move via temporaries */ 539 slowscheme: 540 # if 0 /* debug only */ 541 if (n_args > 0) {for (i = 0; args[i]; i++) { 542 ppIRExpr(args[i]); vex_printf(" "); } 543 vex_printf("\n");} 544 # endif 545 argreg = 0; 546 547 if (passBBP) { 548 /* This is pretty stupid; better to move directly to rdi 549 after the rest of the args are done. */ 550 tmpregs[argreg] = newVRegI(env); 551 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg])); 552 argreg++; 553 } 554 555 for (i = 0; i < n_args; i++) { 556 vassert(argreg < 6); 557 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64); 558 tmpregs[argreg] = iselIntExpr_R(env, args[i]); 559 argreg++; 560 } 561 562 /* Now we can compute the condition. We can't do it earlier 563 because the argument computations could trash the condition 564 codes. Be a bit clever to handle the common case where the 565 guard is 1:Bit. */ 566 cc = Acc_ALWAYS; 567 if (guard) { 568 if (guard->tag == Iex_Const 569 && guard->Iex.Const.con->tag == Ico_U1 570 && guard->Iex.Const.con->Ico.U1 == True) { 571 /* unconditional -- do nothing */ 572 } else { 573 cc = iselCondCode( env, guard ); 574 } 575 } 576 577 /* Move the args to their final destinations. */ 578 for (i = 0; i < argreg; i++) { 579 /* None of these insns, including any spill code that might 580 be generated, may alter the condition codes. */ 581 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) ); 582 } 583 584 585 /* Finally, the call itself. */ 586 handle_call: 587 addInstr(env, AMD64Instr_Call( 588 cc, 589 Ptr_to_ULong(cee->addr), 590 n_args + (passBBP ? 1 : 0) 591 ) 592 ); 593 } 594 595 596 /* Given a guest-state array descriptor, an index expression and a 597 bias, generate an AMD64AMode holding the relevant guest state 598 offset. */ 599 600 static 601 AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 602 IRExpr* off, Int bias ) 603 { 604 HReg tmp, roff; 605 Int elemSz = sizeofIRType(descr->elemTy); 606 Int nElems = descr->nElems; 607 608 /* Throw out any cases not generated by an amd64 front end. In 609 theory there might be a day where we need to handle them -- if 610 we ever run non-amd64-guest on amd64 host. */ 611 612 if (nElems != 8 || (elemSz != 1 && elemSz != 8)) 613 vpanic("genGuestArrayOffset(amd64 host)"); 614 615 /* Compute off into a reg, %off. Then return: 616 617 movq %off, %tmp 618 addq $bias, %tmp (if bias != 0) 619 andq %tmp, 7 620 ... base(%rbp, %tmp, shift) ... 621 */ 622 tmp = newVRegI(env); 623 roff = iselIntExpr_R(env, off); 624 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 625 if (bias != 0) { 626 /* Make sure the bias is sane, in the sense that there are 627 no significant bits above bit 30 in it. */ 628 vassert(-10000 < bias && bias < 10000); 629 addInstr(env, 630 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp)); 631 } 632 addInstr(env, 633 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp)); 634 vassert(elemSz == 1 || elemSz == 8); 635 return 636 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp, 637 elemSz==8 ? 3 : 0); 638 } 639 640 641 /* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */ 642 static 643 void set_SSE_rounding_default ( ISelEnv* env ) 644 { 645 /* pushq $DEFAULT_MXCSR 646 ldmxcsr 0(%rsp) 647 addq $8, %rsp 648 */ 649 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 650 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR))); 651 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 652 add_to_rsp(env, 8); 653 } 654 655 /* Mess with the FPU's rounding mode: set to the default rounding mode 656 (DEFAULT_FPUCW). */ 657 static 658 void set_FPU_rounding_default ( ISelEnv* env ) 659 { 660 /* movq $DEFAULT_FPUCW, -8(%rsp) 661 fldcw -8(%esp) 662 */ 663 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 664 addInstr(env, AMD64Instr_Alu64M( 665 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp)); 666 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 667 } 668 669 670 /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed 671 expression denoting a value in the range 0 .. 3, indicating a round 672 mode encoded as per type IRRoundingMode. Set the SSE machinery to 673 have the same rounding. 674 */ 675 static 676 void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode ) 677 { 678 /* Note: this sequence only makes sense because DEFAULT_MXCSR has 679 both rounding bits == 0. If that wasn't the case, we couldn't 680 create a new rounding field simply by ORing the new value into 681 place. */ 682 683 /* movq $3, %reg 684 andq [[mode]], %reg -- shouldn't be needed; paranoia 685 shlq $13, %reg 686 orq $DEFAULT_MXCSR, %reg 687 pushq %reg 688 ldmxcsr 0(%esp) 689 addq $8, %rsp 690 */ 691 HReg reg = newVRegI(env); 692 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP()); 693 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg)); 694 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 695 iselIntExpr_RMI(env, mode), reg)); 696 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg)); 697 addInstr(env, AMD64Instr_Alu64R( 698 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg)); 699 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg))); 700 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp)); 701 add_to_rsp(env, 8); 702 } 703 704 705 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed 706 expression denoting a value in the range 0 .. 3, indicating a round 707 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 708 the same rounding. 709 */ 710 static 711 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 712 { 713 HReg rrm = iselIntExpr_R(env, mode); 714 HReg rrm2 = newVRegI(env); 715 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 716 717 /* movq %rrm, %rrm2 718 andq $3, %rrm2 -- shouldn't be needed; paranoia 719 shlq $10, %rrm2 720 orq $DEFAULT_FPUCW, %rrm2 721 movq %rrm2, -8(%rsp) 722 fldcw -8(%esp) 723 */ 724 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 725 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2)); 726 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2)); 727 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 728 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2)); 729 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, 730 AMD64RI_Reg(rrm2), m8_rsp)); 731 addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); 732 } 733 734 735 /* Generate all-zeroes into a new vector register. 736 */ 737 static HReg generate_zeroes_V128 ( ISelEnv* env ) 738 { 739 HReg dst = newVRegV(env); 740 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst)); 741 return dst; 742 } 743 744 /* Generate all-ones into a new vector register. 745 */ 746 static HReg generate_ones_V128 ( ISelEnv* env ) 747 { 748 HReg dst = newVRegV(env); 749 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst)); 750 return dst; 751 } 752 753 754 /* Generate !src into a new vector register. Amazing that there isn't 755 a less crappy way to do this. 756 */ 757 static HReg do_sse_NotV128 ( ISelEnv* env, HReg src ) 758 { 759 HReg dst = generate_ones_V128(env); 760 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst)); 761 return dst; 762 } 763 764 765 /* Expand the given byte into a 64-bit word, by cloning each bit 766 8 times. */ 767 static ULong bitmask8_to_bytemask64 ( UShort w8 ) 768 { 769 vassert(w8 == (w8 & 0xFF)); 770 ULong w64 = 0; 771 Int i; 772 for (i = 0; i < 8; i++) { 773 if (w8 & (1<<i)) 774 w64 |= (0xFFULL << (8 * i)); 775 } 776 return w64; 777 } 778 779 780 /*---------------------------------------------------------*/ 781 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/ 782 /*---------------------------------------------------------*/ 783 784 /* Select insns for an integer-typed expression, and add them to the 785 code list. Return a reg holding the result. This reg will be a 786 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 787 want to modify it, ask for a new vreg, copy it in there, and modify 788 the copy. The register allocator will do its best to map both 789 vregs to the same real register, so the copies will often disappear 790 later in the game. 791 792 This should handle expressions of 64, 32, 16 and 8-bit type. All 793 results are returned in a 64-bit register. For 32-, 16- and 8-bit 794 expressions, the upper 32/16/24 bits are arbitrary, so you should 795 mask or sign extend partial values if necessary. 796 */ 797 798 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 799 { 800 HReg r = iselIntExpr_R_wrk(env, e); 801 /* sanity checks ... */ 802 # if 0 803 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n"); 804 # endif 805 vassert(hregClass(r) == HRcInt64); 806 vassert(hregIsVirtual(r)); 807 return r; 808 } 809 810 /* DO NOT CALL THIS DIRECTLY ! */ 811 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 812 { 813 /* Used for unary/binary SIMD64 ops. */ 814 HWord fn = 0; 815 Bool second_is_UInt; 816 817 MatchInfo mi; 818 DECLARE_PATTERN(p_1Uto8_64to1); 819 DECLARE_PATTERN(p_LDle8_then_8Uto64); 820 DECLARE_PATTERN(p_LDle16_then_16Uto64); 821 822 IRType ty = typeOfIRExpr(env->type_env,e); 823 switch (ty) { 824 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break; 825 default: vassert(0); 826 } 827 828 switch (e->tag) { 829 830 /* --------- TEMP --------- */ 831 case Iex_RdTmp: { 832 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 833 } 834 835 /* --------- LOAD --------- */ 836 case Iex_Load: { 837 HReg dst = newVRegI(env); 838 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 839 840 /* We can't handle big-endian loads, nor load-linked. */ 841 if (e->Iex.Load.end != Iend_LE) 842 goto irreducible; 843 844 if (ty == Ity_I64) { 845 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 846 AMD64RMI_Mem(amode), dst) ); 847 return dst; 848 } 849 if (ty == Ity_I32) { 850 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst)); 851 return dst; 852 } 853 if (ty == Ity_I16) { 854 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 855 return dst; 856 } 857 if (ty == Ity_I8) { 858 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 859 return dst; 860 } 861 break; 862 } 863 864 /* --------- BINARY OP --------- */ 865 case Iex_Binop: { 866 AMD64AluOp aluOp; 867 AMD64ShiftOp shOp; 868 869 /* Pattern: Sub64(0,x) */ 870 /* and: Sub32(0,x) */ 871 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1)) 872 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) { 873 HReg dst = newVRegI(env); 874 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 875 addInstr(env, mk_iMOVsd_RR(reg,dst)); 876 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 877 return dst; 878 } 879 880 /* Is it an addition or logical style op? */ 881 switch (e->Iex.Binop.op) { 882 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: 883 aluOp = Aalu_ADD; break; 884 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64: 885 aluOp = Aalu_SUB; break; 886 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64: 887 aluOp = Aalu_AND; break; 888 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64: 889 aluOp = Aalu_OR; break; 890 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64: 891 aluOp = Aalu_XOR; break; 892 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64: 893 aluOp = Aalu_MUL; break; 894 default: 895 aluOp = Aalu_INVALID; break; 896 } 897 /* For commutative ops we assume any literal 898 values are on the second operand. */ 899 if (aluOp != Aalu_INVALID) { 900 HReg dst = newVRegI(env); 901 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 902 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 903 addInstr(env, mk_iMOVsd_RR(reg,dst)); 904 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst)); 905 return dst; 906 } 907 908 /* Perhaps a shift op? */ 909 switch (e->Iex.Binop.op) { 910 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 911 shOp = Ash_SHL; break; 912 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 913 shOp = Ash_SHR; break; 914 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 915 shOp = Ash_SAR; break; 916 default: 917 shOp = Ash_INVALID; break; 918 } 919 if (shOp != Ash_INVALID) { 920 HReg dst = newVRegI(env); 921 922 /* regL = the value to be shifted */ 923 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 924 addInstr(env, mk_iMOVsd_RR(regL,dst)); 925 926 /* Do any necessary widening for 32/16/8 bit operands */ 927 switch (e->Iex.Binop.op) { 928 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 929 break; 930 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 931 break; 932 case Iop_Shr8: 933 addInstr(env, AMD64Instr_Alu64R( 934 Aalu_AND, AMD64RMI_Imm(0xFF), dst)); 935 break; 936 case Iop_Shr16: 937 addInstr(env, AMD64Instr_Alu64R( 938 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst)); 939 break; 940 case Iop_Shr32: 941 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst)); 942 break; 943 case Iop_Sar8: 944 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst)); 945 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst)); 946 break; 947 case Iop_Sar16: 948 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst)); 949 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst)); 950 break; 951 case Iop_Sar32: 952 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst)); 953 break; 954 default: 955 ppIROp(e->Iex.Binop.op); 956 vassert(0); 957 } 958 959 /* Now consider the shift amount. If it's a literal, we 960 can do a much better job than the general case. */ 961 if (e->Iex.Binop.arg2->tag == Iex_Const) { 962 /* assert that the IR is well-typed */ 963 Int nshift; 964 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 965 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 966 vassert(nshift >= 0); 967 if (nshift > 0) 968 /* Can't allow nshift==0 since that means %cl */ 969 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); 970 } else { 971 /* General case; we have to force the amount into %cl. */ 972 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 973 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX())); 974 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); 975 } 976 return dst; 977 } 978 979 /* Deal with 64-bit SIMD binary ops */ 980 second_is_UInt = False; 981 switch (e->Iex.Binop.op) { 982 case Iop_Add8x8: 983 fn = (HWord)h_generic_calc_Add8x8; break; 984 case Iop_Add16x4: 985 fn = (HWord)h_generic_calc_Add16x4; break; 986 case Iop_Add32x2: 987 fn = (HWord)h_generic_calc_Add32x2; break; 988 989 case Iop_Avg8Ux8: 990 fn = (HWord)h_generic_calc_Avg8Ux8; break; 991 case Iop_Avg16Ux4: 992 fn = (HWord)h_generic_calc_Avg16Ux4; break; 993 994 case Iop_CmpEQ8x8: 995 fn = (HWord)h_generic_calc_CmpEQ8x8; break; 996 case Iop_CmpEQ16x4: 997 fn = (HWord)h_generic_calc_CmpEQ16x4; break; 998 case Iop_CmpEQ32x2: 999 fn = (HWord)h_generic_calc_CmpEQ32x2; break; 1000 1001 case Iop_CmpGT8Sx8: 1002 fn = (HWord)h_generic_calc_CmpGT8Sx8; break; 1003 case Iop_CmpGT16Sx4: 1004 fn = (HWord)h_generic_calc_CmpGT16Sx4; break; 1005 case Iop_CmpGT32Sx2: 1006 fn = (HWord)h_generic_calc_CmpGT32Sx2; break; 1007 1008 case Iop_InterleaveHI8x8: 1009 fn = (HWord)h_generic_calc_InterleaveHI8x8; break; 1010 case Iop_InterleaveLO8x8: 1011 fn = (HWord)h_generic_calc_InterleaveLO8x8; break; 1012 case Iop_InterleaveHI16x4: 1013 fn = (HWord)h_generic_calc_InterleaveHI16x4; break; 1014 case Iop_InterleaveLO16x4: 1015 fn = (HWord)h_generic_calc_InterleaveLO16x4; break; 1016 case Iop_InterleaveHI32x2: 1017 fn = (HWord)h_generic_calc_InterleaveHI32x2; break; 1018 case Iop_InterleaveLO32x2: 1019 fn = (HWord)h_generic_calc_InterleaveLO32x2; break; 1020 case Iop_CatOddLanes16x4: 1021 fn = (HWord)h_generic_calc_CatOddLanes16x4; break; 1022 case Iop_CatEvenLanes16x4: 1023 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break; 1024 case Iop_Perm8x8: 1025 fn = (HWord)h_generic_calc_Perm8x8; break; 1026 1027 case Iop_Max8Ux8: 1028 fn = (HWord)h_generic_calc_Max8Ux8; break; 1029 case Iop_Max16Sx4: 1030 fn = (HWord)h_generic_calc_Max16Sx4; break; 1031 case Iop_Min8Ux8: 1032 fn = (HWord)h_generic_calc_Min8Ux8; break; 1033 case Iop_Min16Sx4: 1034 fn = (HWord)h_generic_calc_Min16Sx4; break; 1035 1036 case Iop_Mul16x4: 1037 fn = (HWord)h_generic_calc_Mul16x4; break; 1038 case Iop_Mul32x2: 1039 fn = (HWord)h_generic_calc_Mul32x2; break; 1040 case Iop_MulHi16Sx4: 1041 fn = (HWord)h_generic_calc_MulHi16Sx4; break; 1042 case Iop_MulHi16Ux4: 1043 fn = (HWord)h_generic_calc_MulHi16Ux4; break; 1044 1045 case Iop_QAdd8Sx8: 1046 fn = (HWord)h_generic_calc_QAdd8Sx8; break; 1047 case Iop_QAdd16Sx4: 1048 fn = (HWord)h_generic_calc_QAdd16Sx4; break; 1049 case Iop_QAdd8Ux8: 1050 fn = (HWord)h_generic_calc_QAdd8Ux8; break; 1051 case Iop_QAdd16Ux4: 1052 fn = (HWord)h_generic_calc_QAdd16Ux4; break; 1053 1054 case Iop_QNarrowBin32Sto16Sx4: 1055 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break; 1056 case Iop_QNarrowBin16Sto8Sx8: 1057 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break; 1058 case Iop_QNarrowBin16Sto8Ux8: 1059 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break; 1060 case Iop_NarrowBin16to8x8: 1061 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break; 1062 case Iop_NarrowBin32to16x4: 1063 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break; 1064 1065 case Iop_QSub8Sx8: 1066 fn = (HWord)h_generic_calc_QSub8Sx8; break; 1067 case Iop_QSub16Sx4: 1068 fn = (HWord)h_generic_calc_QSub16Sx4; break; 1069 case Iop_QSub8Ux8: 1070 fn = (HWord)h_generic_calc_QSub8Ux8; break; 1071 case Iop_QSub16Ux4: 1072 fn = (HWord)h_generic_calc_QSub16Ux4; break; 1073 1074 case Iop_Sub8x8: 1075 fn = (HWord)h_generic_calc_Sub8x8; break; 1076 case Iop_Sub16x4: 1077 fn = (HWord)h_generic_calc_Sub16x4; break; 1078 case Iop_Sub32x2: 1079 fn = (HWord)h_generic_calc_Sub32x2; break; 1080 1081 case Iop_ShlN32x2: 1082 fn = (HWord)h_generic_calc_ShlN32x2; 1083 second_is_UInt = True; 1084 break; 1085 case Iop_ShlN16x4: 1086 fn = (HWord)h_generic_calc_ShlN16x4; 1087 second_is_UInt = True; 1088 break; 1089 case Iop_ShlN8x8: 1090 fn = (HWord)h_generic_calc_ShlN8x8; 1091 second_is_UInt = True; 1092 break; 1093 case Iop_ShrN32x2: 1094 fn = (HWord)h_generic_calc_ShrN32x2; 1095 second_is_UInt = True; 1096 break; 1097 case Iop_ShrN16x4: 1098 fn = (HWord)h_generic_calc_ShrN16x4; 1099 second_is_UInt = True; 1100 break; 1101 case Iop_SarN32x2: 1102 fn = (HWord)h_generic_calc_SarN32x2; 1103 second_is_UInt = True; 1104 break; 1105 case Iop_SarN16x4: 1106 fn = (HWord)h_generic_calc_SarN16x4; 1107 second_is_UInt = True; 1108 break; 1109 case Iop_SarN8x8: 1110 fn = (HWord)h_generic_calc_SarN8x8; 1111 second_is_UInt = True; 1112 break; 1113 1114 default: 1115 fn = (HWord)0; break; 1116 } 1117 if (fn != (HWord)0) { 1118 /* Note: the following assumes all helpers are of signature 1119 ULong fn ( ULong, ULong ), and they are 1120 not marked as regparm functions. 1121 */ 1122 HReg dst = newVRegI(env); 1123 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); 1124 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1125 if (second_is_UInt) 1126 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR)); 1127 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) ); 1128 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) ); 1129 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 )); 1130 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1131 return dst; 1132 } 1133 1134 /* Handle misc other ops. */ 1135 1136 if (e->Iex.Binop.op == Iop_Max32U) { 1137 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1138 HReg dst = newVRegI(env); 1139 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1140 addInstr(env, mk_iMOVsd_RR(src1, dst)); 1141 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst)); 1142 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst)); 1143 return dst; 1144 } 1145 1146 if (e->Iex.Binop.op == Iop_DivModS64to32 1147 || e->Iex.Binop.op == Iop_DivModU64to32) { 1148 /* 64 x 32 -> (32(rem),32(div)) division */ 1149 /* Get the 64-bit operand into edx:eax, and the other into 1150 any old R/M. */ 1151 HReg rax = hregAMD64_RAX(); 1152 HReg rdx = hregAMD64_RDX(); 1153 HReg dst = newVRegI(env); 1154 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 1155 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 1156 /* Compute the left operand into a reg, and then 1157 put the top half in edx and the bottom in eax. */ 1158 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1159 addInstr(env, mk_iMOVsd_RR(left64, rdx)); 1160 addInstr(env, mk_iMOVsd_RR(left64, rax)); 1161 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx)); 1162 addInstr(env, AMD64Instr_Div(syned, 4, rmRight)); 1163 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx)); 1164 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax)); 1165 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx)); 1166 addInstr(env, mk_iMOVsd_RR(rax, dst)); 1167 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst)); 1168 return dst; 1169 } 1170 1171 if (e->Iex.Binop.op == Iop_32HLto64) { 1172 HReg hi32 = newVRegI(env); 1173 HReg lo32 = newVRegI(env); 1174 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1175 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1176 addInstr(env, mk_iMOVsd_RR(hi32s, hi32)); 1177 addInstr(env, mk_iMOVsd_RR(lo32s, lo32)); 1178 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32)); 1179 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32)); 1180 addInstr(env, AMD64Instr_Alu64R( 1181 Aalu_OR, AMD64RMI_Reg(lo32), hi32)); 1182 return hi32; 1183 } 1184 1185 if (e->Iex.Binop.op == Iop_16HLto32) { 1186 HReg hi16 = newVRegI(env); 1187 HReg lo16 = newVRegI(env); 1188 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1189 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1190 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1191 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1192 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16)); 1193 addInstr(env, AMD64Instr_Alu64R( 1194 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16)); 1195 addInstr(env, AMD64Instr_Alu64R( 1196 Aalu_OR, AMD64RMI_Reg(lo16), hi16)); 1197 return hi16; 1198 } 1199 1200 if (e->Iex.Binop.op == Iop_8HLto16) { 1201 HReg hi8 = newVRegI(env); 1202 HReg lo8 = newVRegI(env); 1203 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1204 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1205 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1206 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1207 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8)); 1208 addInstr(env, AMD64Instr_Alu64R( 1209 Aalu_AND, AMD64RMI_Imm(0xFF), lo8)); 1210 addInstr(env, AMD64Instr_Alu64R( 1211 Aalu_OR, AMD64RMI_Reg(lo8), hi8)); 1212 return hi8; 1213 } 1214 1215 if (e->Iex.Binop.op == Iop_MullS32 1216 || e->Iex.Binop.op == Iop_MullS16 1217 || e->Iex.Binop.op == Iop_MullS8 1218 || e->Iex.Binop.op == Iop_MullU32 1219 || e->Iex.Binop.op == Iop_MullU16 1220 || e->Iex.Binop.op == Iop_MullU8) { 1221 HReg a32 = newVRegI(env); 1222 HReg b32 = newVRegI(env); 1223 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1224 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1225 Int shift = 0; 1226 AMD64ShiftOp shr_op = Ash_SHR; 1227 switch (e->Iex.Binop.op) { 1228 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break; 1229 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break; 1230 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break; 1231 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break; 1232 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break; 1233 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break; 1234 default: vassert(0); 1235 } 1236 1237 addInstr(env, mk_iMOVsd_RR(a32s, a32)); 1238 addInstr(env, mk_iMOVsd_RR(b32s, b32)); 1239 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32)); 1240 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32)); 1241 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32)); 1242 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32)); 1243 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32)); 1244 return b32; 1245 } 1246 1247 if (e->Iex.Binop.op == Iop_CmpF64) { 1248 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1249 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1250 HReg dst = newVRegI(env); 1251 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst)); 1252 /* Mask out irrelevant parts of the result so as to conform 1253 to the CmpF64 definition. */ 1254 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst)); 1255 return dst; 1256 } 1257 1258 if (e->Iex.Binop.op == Iop_F64toI32S 1259 || e->Iex.Binop.op == Iop_F64toI64S) { 1260 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8; 1261 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1262 HReg dst = newVRegI(env); 1263 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 1264 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst )); 1265 set_SSE_rounding_default(env); 1266 return dst; 1267 } 1268 1269 break; 1270 } 1271 1272 /* --------- UNARY OP --------- */ 1273 case Iex_Unop: { 1274 1275 /* 1Uto8(64to1(expr64)) */ 1276 { 1277 DEFINE_PATTERN( p_1Uto8_64to1, 1278 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) ); 1279 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) { 1280 IRExpr* expr64 = mi.bindee[0]; 1281 HReg dst = newVRegI(env); 1282 HReg src = iselIntExpr_R(env, expr64); 1283 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1284 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1285 AMD64RMI_Imm(1), dst)); 1286 return dst; 1287 } 1288 } 1289 1290 /* 8Uto64(LDle(expr64)) */ 1291 { 1292 DEFINE_PATTERN(p_LDle8_then_8Uto64, 1293 unop(Iop_8Uto64, 1294 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1295 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) { 1296 HReg dst = newVRegI(env); 1297 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1298 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst)); 1299 return dst; 1300 } 1301 } 1302 1303 /* 16Uto64(LDle(expr64)) */ 1304 { 1305 DEFINE_PATTERN(p_LDle16_then_16Uto64, 1306 unop(Iop_16Uto64, 1307 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1308 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) { 1309 HReg dst = newVRegI(env); 1310 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1311 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst)); 1312 return dst; 1313 } 1314 } 1315 1316 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) ) 1317 Use 32 bit arithmetic and let the default zero-extend rule 1318 do the 32Uto64 for free. */ 1319 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) { 1320 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */ 1321 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1; 1322 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2; 1323 AMD64AluOp aluOp = Aalu_INVALID; 1324 switch (opi) { 1325 case Iop_Add32: aluOp = Aalu_ADD; break; 1326 case Iop_Sub32: aluOp = Aalu_SUB; break; 1327 case Iop_And32: aluOp = Aalu_AND; break; 1328 case Iop_Or32: aluOp = Aalu_OR; break; 1329 case Iop_Xor32: aluOp = Aalu_XOR; break; 1330 default: break; 1331 } 1332 if (aluOp != Aalu_INVALID) { 1333 /* For commutative ops we assume any literal values are on 1334 the second operand. */ 1335 HReg dst = newVRegI(env); 1336 HReg reg = iselIntExpr_R(env, argL); 1337 AMD64RMI* rmi = iselIntExpr_RMI(env, argR); 1338 addInstr(env, mk_iMOVsd_RR(reg,dst)); 1339 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst)); 1340 return dst; 1341 } 1342 /* just fall through to normal handling for Iop_32Uto64 */ 1343 } 1344 1345 /* Fallback cases */ 1346 switch (e->Iex.Unop.op) { 1347 case Iop_32Uto64: 1348 case Iop_32Sto64: { 1349 HReg dst = newVRegI(env); 1350 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1351 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64, 1352 src, dst) ); 1353 return dst; 1354 } 1355 case Iop_128HIto64: { 1356 HReg rHi, rLo; 1357 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1358 return rHi; /* and abandon rLo */ 1359 } 1360 case Iop_128to64: { 1361 HReg rHi, rLo; 1362 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1363 return rLo; /* and abandon rHi */ 1364 } 1365 case Iop_8Uto16: 1366 case Iop_8Uto32: 1367 case Iop_8Uto64: 1368 case Iop_16Uto64: 1369 case Iop_16Uto32: { 1370 HReg dst = newVRegI(env); 1371 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1372 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32 1373 || e->Iex.Unop.op==Iop_16Uto64 ); 1374 UInt mask = srcIs16 ? 0xFFFF : 0xFF; 1375 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1376 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 1377 AMD64RMI_Imm(mask), dst)); 1378 return dst; 1379 } 1380 case Iop_8Sto16: 1381 case Iop_8Sto64: 1382 case Iop_8Sto32: 1383 case Iop_16Sto32: 1384 case Iop_16Sto64: { 1385 HReg dst = newVRegI(env); 1386 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1387 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32 1388 || e->Iex.Unop.op==Iop_16Sto64 ); 1389 UInt amt = srcIs16 ? 48 : 56; 1390 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1391 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst)); 1392 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst)); 1393 return dst; 1394 } 1395 case Iop_Not8: 1396 case Iop_Not16: 1397 case Iop_Not32: 1398 case Iop_Not64: { 1399 HReg dst = newVRegI(env); 1400 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1401 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1402 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst)); 1403 return dst; 1404 } 1405 case Iop_16HIto8: 1406 case Iop_32HIto16: 1407 case Iop_64HIto32: { 1408 HReg dst = newVRegI(env); 1409 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1410 Int shift = 0; 1411 switch (e->Iex.Unop.op) { 1412 case Iop_16HIto8: shift = 8; break; 1413 case Iop_32HIto16: shift = 16; break; 1414 case Iop_64HIto32: shift = 32; break; 1415 default: vassert(0); 1416 } 1417 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1418 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst)); 1419 return dst; 1420 } 1421 case Iop_1Uto64: 1422 case Iop_1Uto32: 1423 case Iop_1Uto8: { 1424 HReg dst = newVRegI(env); 1425 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1426 addInstr(env, AMD64Instr_Set64(cond,dst)); 1427 return dst; 1428 } 1429 case Iop_1Sto8: 1430 case Iop_1Sto16: 1431 case Iop_1Sto32: 1432 case Iop_1Sto64: { 1433 /* could do better than this, but for now ... */ 1434 HReg dst = newVRegI(env); 1435 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1436 addInstr(env, AMD64Instr_Set64(cond,dst)); 1437 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst)); 1438 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1439 return dst; 1440 } 1441 case Iop_Ctz64: { 1442 /* Count trailing zeroes, implemented by amd64 'bsfq' */ 1443 HReg dst = newVRegI(env); 1444 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1445 addInstr(env, AMD64Instr_Bsfr64(True,src,dst)); 1446 return dst; 1447 } 1448 case Iop_Clz64: { 1449 /* Count leading zeroes. Do 'bsrq' to establish the index 1450 of the highest set bit, and subtract that value from 1451 63. */ 1452 HReg tmp = newVRegI(env); 1453 HReg dst = newVRegI(env); 1454 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1455 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp)); 1456 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, 1457 AMD64RMI_Imm(63), dst)); 1458 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB, 1459 AMD64RMI_Reg(tmp), dst)); 1460 return dst; 1461 } 1462 1463 case Iop_CmpwNEZ64: { 1464 HReg dst = newVRegI(env); 1465 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1466 addInstr(env, mk_iMOVsd_RR(src,dst)); 1467 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1468 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1469 AMD64RMI_Reg(src), dst)); 1470 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1471 return dst; 1472 } 1473 1474 case Iop_CmpwNEZ32: { 1475 HReg src = newVRegI(env); 1476 HReg dst = newVRegI(env); 1477 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); 1478 addInstr(env, mk_iMOVsd_RR(pre,src)); 1479 addInstr(env, AMD64Instr_MovxLQ(False, src, src)); 1480 addInstr(env, mk_iMOVsd_RR(src,dst)); 1481 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); 1482 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, 1483 AMD64RMI_Reg(src), dst)); 1484 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); 1485 return dst; 1486 } 1487 1488 case Iop_Left8: 1489 case Iop_Left16: 1490 case Iop_Left32: 1491 case Iop_Left64: { 1492 HReg dst = newVRegI(env); 1493 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1494 addInstr(env, mk_iMOVsd_RR(src, dst)); 1495 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst)); 1496 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst)); 1497 return dst; 1498 } 1499 1500 case Iop_V128to32: { 1501 HReg dst = newVRegI(env); 1502 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1503 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 1504 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16)); 1505 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst)); 1506 return dst; 1507 } 1508 1509 /* V128{HI}to64 */ 1510 case Iop_V128HIto64: 1511 case Iop_V128to64: { 1512 HReg dst = newVRegI(env); 1513 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16; 1514 HReg rsp = hregAMD64_RSP(); 1515 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1516 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1517 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1518 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1519 16, vec, m16_rsp)); 1520 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1521 AMD64RMI_Mem(off_rsp), dst )); 1522 return dst; 1523 } 1524 1525 case Iop_V256to64_0: case Iop_V256to64_1: 1526 case Iop_V256to64_2: case Iop_V256to64_3: { 1527 HReg vHi, vLo, vec; 1528 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 1529 /* Do the first part of the selection by deciding which of 1530 the 128 bit registers do look at, and second part using 1531 the same scheme as for V128{HI}to64 above. */ 1532 Int off = 0; 1533 switch (e->Iex.Unop.op) { 1534 case Iop_V256to64_0: vec = vLo; off = -16; break; 1535 case Iop_V256to64_1: vec = vLo; off = -8; break; 1536 case Iop_V256to64_2: vec = vHi; off = -16; break; 1537 case Iop_V256to64_3: vec = vHi; off = -8; break; 1538 default: vassert(0); 1539 } 1540 HReg dst = newVRegI(env); 1541 HReg rsp = hregAMD64_RSP(); 1542 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 1543 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp); 1544 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 1545 16, vec, m16_rsp)); 1546 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, 1547 AMD64RMI_Mem(off_rsp), dst )); 1548 return dst; 1549 } 1550 1551 /* ReinterpF64asI64(e) */ 1552 /* Given an IEEE754 double, produce an I64 with the same bit 1553 pattern. */ 1554 case Iop_ReinterpF64asI64: { 1555 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1556 HReg dst = newVRegI(env); 1557 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 1558 /* paranoia */ 1559 set_SSE_rounding_default(env); 1560 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp)); 1561 addInstr(env, AMD64Instr_Alu64R( 1562 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst)); 1563 return dst; 1564 } 1565 1566 /* ReinterpF32asI32(e) */ 1567 /* Given an IEEE754 single, produce an I64 with the same bit 1568 pattern in the lower half. */ 1569 case Iop_ReinterpF32asI32: { 1570 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1571 HReg dst = newVRegI(env); 1572 HReg src = iselFltExpr(env, e->Iex.Unop.arg); 1573 /* paranoia */ 1574 set_SSE_rounding_default(env); 1575 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp)); 1576 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst )); 1577 return dst; 1578 } 1579 1580 case Iop_16to8: 1581 case Iop_32to8: 1582 case Iop_64to8: 1583 case Iop_32to16: 1584 case Iop_64to16: 1585 case Iop_64to32: 1586 /* These are no-ops. */ 1587 return iselIntExpr_R(env, e->Iex.Unop.arg); 1588 1589 default: 1590 break; 1591 } 1592 1593 /* Deal with unary 64-bit SIMD ops. */ 1594 switch (e->Iex.Unop.op) { 1595 case Iop_CmpNEZ32x2: 1596 fn = (HWord)h_generic_calc_CmpNEZ32x2; break; 1597 case Iop_CmpNEZ16x4: 1598 fn = (HWord)h_generic_calc_CmpNEZ16x4; break; 1599 case Iop_CmpNEZ8x8: 1600 fn = (HWord)h_generic_calc_CmpNEZ8x8; break; 1601 default: 1602 fn = (HWord)0; break; 1603 } 1604 if (fn != (HWord)0) { 1605 /* Note: the following assumes all helpers are of 1606 signature 1607 ULong fn ( ULong ), and they are 1608 not marked as regparm functions. 1609 */ 1610 HReg dst = newVRegI(env); 1611 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); 1612 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); 1613 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 )); 1614 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1615 return dst; 1616 } 1617 1618 break; 1619 } 1620 1621 /* --------- GET --------- */ 1622 case Iex_Get: { 1623 if (ty == Ity_I64) { 1624 HReg dst = newVRegI(env); 1625 addInstr(env, AMD64Instr_Alu64R( 1626 Aalu_MOV, 1627 AMD64RMI_Mem( 1628 AMD64AMode_IR(e->Iex.Get.offset, 1629 hregAMD64_RBP())), 1630 dst)); 1631 return dst; 1632 } 1633 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 1634 HReg dst = newVRegI(env); 1635 addInstr(env, AMD64Instr_LoadEX( 1636 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 1637 False, 1638 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()), 1639 dst)); 1640 return dst; 1641 } 1642 break; 1643 } 1644 1645 case Iex_GetI: { 1646 AMD64AMode* am 1647 = genGuestArrayOffset( 1648 env, e->Iex.GetI.descr, 1649 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1650 HReg dst = newVRegI(env); 1651 if (ty == Ity_I8) { 1652 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst )); 1653 return dst; 1654 } 1655 if (ty == Ity_I64) { 1656 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst )); 1657 return dst; 1658 } 1659 break; 1660 } 1661 1662 /* --------- CCALL --------- */ 1663 case Iex_CCall: { 1664 HReg dst = newVRegI(env); 1665 vassert(ty == e->Iex.CCall.retty); 1666 1667 /* be very restrictive for now. Only 64-bit ints allowed 1668 for args, and 64 or 32 bits for return type. */ 1669 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32) 1670 goto irreducible; 1671 1672 /* Marshal args, do the call. */ 1673 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1674 1675 /* Move to dst, and zero out the top 32 bits if the result type is 1676 Ity_I32. Probably overkill, but still .. */ 1677 if (e->Iex.CCall.retty == Ity_I64) 1678 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst)); 1679 else 1680 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); 1681 1682 return dst; 1683 } 1684 1685 /* --------- LITERAL --------- */ 1686 /* 64/32/16/8-bit literals */ 1687 case Iex_Const: 1688 if (ty == Ity_I64) { 1689 HReg r = newVRegI(env); 1690 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r)); 1691 return r; 1692 } else { 1693 AMD64RMI* rmi = iselIntExpr_RMI ( env, e ); 1694 HReg r = newVRegI(env); 1695 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r)); 1696 return r; 1697 } 1698 1699 /* --------- MULTIPLEX --------- */ 1700 case Iex_Mux0X: { 1701 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1702 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 1703 HReg r8; 1704 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1705 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); 1706 HReg dst = newVRegI(env); 1707 addInstr(env, mk_iMOVsd_RR(rX,dst)); 1708 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 1709 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 1710 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst)); 1711 return dst; 1712 } 1713 break; 1714 } 1715 1716 /* --------- TERNARY OP --------- */ 1717 case Iex_Triop: { 1718 IRTriop *triop = e->Iex.Triop.details; 1719 /* C3210 flags following FPU partial remainder (fprem), both 1720 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 1721 if (triop->op == Iop_PRemC3210F64 1722 || triop->op == Iop_PRem1C3210F64) { 1723 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 1724 HReg arg1 = iselDblExpr(env, triop->arg2); 1725 HReg arg2 = iselDblExpr(env, triop->arg3); 1726 HReg dst = newVRegI(env); 1727 addInstr(env, AMD64Instr_A87Free(2)); 1728 1729 /* one arg -> top of x87 stack */ 1730 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp)); 1731 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1732 1733 /* other arg -> top of x87 stack */ 1734 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp)); 1735 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 1736 1737 switch (triop->op) { 1738 case Iop_PRemC3210F64: 1739 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 1740 break; 1741 case Iop_PRem1C3210F64: 1742 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 1743 break; 1744 default: 1745 vassert(0); 1746 } 1747 /* Ignore the result, and instead make off with the FPU's 1748 C3210 flags (in the status word). */ 1749 addInstr(env, AMD64Instr_A87StSW(m8_rsp)); 1750 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst)); 1751 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst)); 1752 return dst; 1753 } 1754 break; 1755 } 1756 1757 default: 1758 break; 1759 } /* switch (e->tag) */ 1760 1761 /* We get here if no pattern matched. */ 1762 irreducible: 1763 ppIRExpr(e); 1764 vpanic("iselIntExpr_R(amd64): cannot reduce tree"); 1765 } 1766 1767 1768 /*---------------------------------------------------------*/ 1769 /*--- ISEL: Integer expression auxiliaries ---*/ 1770 /*---------------------------------------------------------*/ 1771 1772 /* --------------------- AMODEs --------------------- */ 1773 1774 /* Return an AMode which computes the value of the specified 1775 expression, possibly also adding insns to the code list as a 1776 result. The expression may only be a 32-bit one. 1777 */ 1778 1779 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1780 { 1781 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e); 1782 vassert(sane_AMode(am)); 1783 return am; 1784 } 1785 1786 /* DO NOT CALL THIS DIRECTLY ! */ 1787 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1788 { 1789 MatchInfo mi; 1790 DECLARE_PATTERN(p_complex); 1791 IRType ty = typeOfIRExpr(env->type_env,e); 1792 vassert(ty == Ity_I64); 1793 1794 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */ 1795 /* bind0 bind1 bind2 bind3 */ 1796 DEFINE_PATTERN(p_complex, 1797 binop( Iop_Add64, 1798 binop( Iop_Add64, 1799 bind(0), 1800 binop(Iop_Shl64, bind(1), bind(2)) 1801 ), 1802 bind(3) 1803 ) 1804 ); 1805 if (matchIRExpr(&mi, p_complex, e)) { 1806 IRExpr* expr1 = mi.bindee[0]; 1807 IRExpr* expr2 = mi.bindee[1]; 1808 IRExpr* imm8 = mi.bindee[2]; 1809 IRExpr* simm32 = mi.bindee[3]; 1810 if (imm8->tag == Iex_Const 1811 && imm8->Iex.Const.con->tag == Ico_U8 1812 && imm8->Iex.Const.con->Ico.U8 < 4 1813 /* imm8 is OK, now check simm32 */ 1814 && simm32->tag == Iex_Const 1815 && simm32->Iex.Const.con->tag == Ico_U64 1816 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) { 1817 UInt shift = imm8->Iex.Const.con->Ico.U8; 1818 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64); 1819 HReg r1 = iselIntExpr_R(env, expr1); 1820 HReg r2 = iselIntExpr_R(env, expr2); 1821 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3); 1822 return AMD64AMode_IRRS(offset, r1, r2, shift); 1823 } 1824 } 1825 1826 /* Add64(expr1, Shl64(expr2, imm)) */ 1827 if (e->tag == Iex_Binop 1828 && e->Iex.Binop.op == Iop_Add64 1829 && e->Iex.Binop.arg2->tag == Iex_Binop 1830 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64 1831 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1832 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1833 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1834 if (shift == 1 || shift == 2 || shift == 3) { 1835 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1836 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1837 return AMD64AMode_IRRS(0, r1, r2, shift); 1838 } 1839 } 1840 1841 /* Add64(expr,i) */ 1842 if (e->tag == Iex_Binop 1843 && e->Iex.Binop.op == Iop_Add64 1844 && e->Iex.Binop.arg2->tag == Iex_Const 1845 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64 1846 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) { 1847 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1848 return AMD64AMode_IR( 1849 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64), 1850 r1 1851 ); 1852 } 1853 1854 /* Doesn't match anything in particular. Generate it into 1855 a register and use that. */ 1856 { 1857 HReg r1 = iselIntExpr_R(env, e); 1858 return AMD64AMode_IR(0, r1); 1859 } 1860 } 1861 1862 1863 /* --------------------- RMIs --------------------- */ 1864 1865 /* Similarly, calculate an expression into an X86RMI operand. As with 1866 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1867 1868 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1869 { 1870 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1871 /* sanity checks ... */ 1872 switch (rmi->tag) { 1873 case Armi_Imm: 1874 return rmi; 1875 case Armi_Reg: 1876 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64); 1877 vassert(hregIsVirtual(rmi->Armi.Reg.reg)); 1878 return rmi; 1879 case Armi_Mem: 1880 vassert(sane_AMode(rmi->Armi.Mem.am)); 1881 return rmi; 1882 default: 1883 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag"); 1884 } 1885 } 1886 1887 /* DO NOT CALL THIS DIRECTLY ! */ 1888 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1889 { 1890 IRType ty = typeOfIRExpr(env->type_env,e); 1891 vassert(ty == Ity_I64 || ty == Ity_I32 1892 || ty == Ity_I16 || ty == Ity_I8); 1893 1894 /* special case: immediate 64/32/16/8 */ 1895 if (e->tag == Iex_Const) { 1896 switch (e->Iex.Const.con->tag) { 1897 case Ico_U64: 1898 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 1899 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 1900 } 1901 break; 1902 case Ico_U32: 1903 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break; 1904 case Ico_U16: 1905 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break; 1906 case Ico_U8: 1907 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break; 1908 default: 1909 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 1910 } 1911 } 1912 1913 /* special case: 64-bit GET */ 1914 if (e->tag == Iex_Get && ty == Ity_I64) { 1915 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset, 1916 hregAMD64_RBP())); 1917 } 1918 1919 /* special case: 64-bit load from memory */ 1920 if (e->tag == Iex_Load && ty == Ity_I64 1921 && e->Iex.Load.end == Iend_LE) { 1922 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1923 return AMD64RMI_Mem(am); 1924 } 1925 1926 /* default case: calculate into a register and return that */ 1927 { 1928 HReg r = iselIntExpr_R ( env, e ); 1929 return AMD64RMI_Reg(r); 1930 } 1931 } 1932 1933 1934 /* --------------------- RIs --------------------- */ 1935 1936 /* Calculate an expression into an AMD64RI operand. As with 1937 iselIntExpr_R, the expression can have type 64, 32, 16 or 8 1938 bits. */ 1939 1940 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 1941 { 1942 AMD64RI* ri = iselIntExpr_RI_wrk(env, e); 1943 /* sanity checks ... */ 1944 switch (ri->tag) { 1945 case Ari_Imm: 1946 return ri; 1947 case Ari_Reg: 1948 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64); 1949 vassert(hregIsVirtual(ri->Ari.Reg.reg)); 1950 return ri; 1951 default: 1952 vpanic("iselIntExpr_RI: unknown amd64 RI tag"); 1953 } 1954 } 1955 1956 /* DO NOT CALL THIS DIRECTLY ! */ 1957 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 1958 { 1959 IRType ty = typeOfIRExpr(env->type_env,e); 1960 vassert(ty == Ity_I64 || ty == Ity_I32 1961 || ty == Ity_I16 || ty == Ity_I8); 1962 1963 /* special case: immediate */ 1964 if (e->tag == Iex_Const) { 1965 switch (e->Iex.Const.con->tag) { 1966 case Ico_U64: 1967 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) { 1968 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64)); 1969 } 1970 break; 1971 case Ico_U32: 1972 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32); 1973 case Ico_U16: 1974 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); 1975 case Ico_U8: 1976 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8); 1977 default: 1978 vpanic("iselIntExpr_RMI.Iex_Const(amd64)"); 1979 } 1980 } 1981 1982 /* default case: calculate into a register and return that */ 1983 { 1984 HReg r = iselIntExpr_R ( env, e ); 1985 return AMD64RI_Reg(r); 1986 } 1987 } 1988 1989 1990 /* --------------------- RMs --------------------- */ 1991 1992 /* Similarly, calculate an expression into an AMD64RM operand. As 1993 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8 1994 bits. */ 1995 1996 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 1997 { 1998 AMD64RM* rm = iselIntExpr_RM_wrk(env, e); 1999 /* sanity checks ... */ 2000 switch (rm->tag) { 2001 case Arm_Reg: 2002 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64); 2003 vassert(hregIsVirtual(rm->Arm.Reg.reg)); 2004 return rm; 2005 case Arm_Mem: 2006 vassert(sane_AMode(rm->Arm.Mem.am)); 2007 return rm; 2008 default: 2009 vpanic("iselIntExpr_RM: unknown amd64 RM tag"); 2010 } 2011 } 2012 2013 /* DO NOT CALL THIS DIRECTLY ! */ 2014 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 2015 { 2016 IRType ty = typeOfIRExpr(env->type_env,e); 2017 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 2018 2019 /* special case: 64-bit GET */ 2020 if (e->tag == Iex_Get && ty == Ity_I64) { 2021 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset, 2022 hregAMD64_RBP())); 2023 } 2024 2025 /* special case: load from memory */ 2026 2027 /* default case: calculate into a register and return that */ 2028 { 2029 HReg r = iselIntExpr_R ( env, e ); 2030 return AMD64RM_Reg(r); 2031 } 2032 } 2033 2034 2035 /* --------------------- CONDCODE --------------------- */ 2036 2037 /* Generate code to evaluated a bit-typed expression, returning the 2038 condition code which would correspond when the expression would 2039 notionally have returned 1. */ 2040 2041 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 2042 { 2043 /* Uh, there's nothing we can sanity check here, unfortunately. */ 2044 return iselCondCode_wrk(env,e); 2045 } 2046 2047 /* DO NOT CALL THIS DIRECTLY ! */ 2048 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 2049 { 2050 MatchInfo mi; 2051 2052 vassert(e); 2053 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 2054 2055 /* var */ 2056 if (e->tag == Iex_RdTmp) { 2057 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 2058 HReg dst = newVRegI(env); 2059 addInstr(env, mk_iMOVsd_RR(r64,dst)); 2060 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst)); 2061 return Acc_NZ; 2062 } 2063 2064 /* Constant 1:Bit */ 2065 if (e->tag == Iex_Const) { 2066 HReg r; 2067 vassert(e->Iex.Const.con->tag == Ico_U1); 2068 vassert(e->Iex.Const.con->Ico.U1 == True 2069 || e->Iex.Const.con->Ico.U1 == False); 2070 r = newVRegI(env); 2071 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r)); 2072 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r)); 2073 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ; 2074 } 2075 2076 /* Not1(...) */ 2077 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 2078 /* Generate code for the arg, and negate the test condition */ 2079 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 2080 } 2081 2082 /* --- patterns rooted at: 64to1 --- */ 2083 2084 /* 64to1 */ 2085 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { 2086 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2087 addInstr(env, AMD64Instr_Test64(1,reg)); 2088 return Acc_NZ; 2089 } 2090 2091 /* --- patterns rooted at: 32to1 --- */ 2092 2093 /* 32to1 */ 2094 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) { 2095 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); 2096 addInstr(env, AMD64Instr_Test64(1,reg)); 2097 return Acc_NZ; 2098 } 2099 2100 /* --- patterns rooted at: CmpNEZ8 --- */ 2101 2102 /* CmpNEZ8(x) */ 2103 if (e->tag == Iex_Unop 2104 && e->Iex.Unop.op == Iop_CmpNEZ8) { 2105 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2106 addInstr(env, AMD64Instr_Test64(0xFF,r)); 2107 return Acc_NZ; 2108 } 2109 2110 /* --- patterns rooted at: CmpNEZ16 --- */ 2111 2112 /* CmpNEZ16(x) */ 2113 if (e->tag == Iex_Unop 2114 && e->Iex.Unop.op == Iop_CmpNEZ16) { 2115 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg); 2116 addInstr(env, AMD64Instr_Test64(0xFFFF,r)); 2117 return Acc_NZ; 2118 } 2119 2120 /* --- patterns rooted at: CmpNEZ32 --- */ 2121 2122 /* CmpNEZ32(x) */ 2123 if (e->tag == Iex_Unop 2124 && e->Iex.Unop.op == Iop_CmpNEZ32) { 2125 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2126 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2127 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2128 return Acc_NZ; 2129 } 2130 2131 /* --- patterns rooted at: CmpNEZ64 --- */ 2132 2133 /* CmpNEZ64(Or64(x,y)) */ 2134 { 2135 DECLARE_PATTERN(p_CmpNEZ64_Or64); 2136 DEFINE_PATTERN(p_CmpNEZ64_Or64, 2137 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 2138 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 2139 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 2140 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 2141 HReg tmp = newVRegI(env); 2142 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 2143 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp)); 2144 return Acc_NZ; 2145 } 2146 } 2147 2148 /* CmpNEZ64(x) */ 2149 if (e->tag == Iex_Unop 2150 && e->Iex.Unop.op == Iop_CmpNEZ64) { 2151 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 2152 AMD64RMI* rmi2 = AMD64RMI_Imm(0); 2153 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2154 return Acc_NZ; 2155 } 2156 2157 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */ 2158 2159 /* CmpEQ8 / CmpNE8 */ 2160 if (e->tag == Iex_Binop 2161 && (e->Iex.Binop.op == Iop_CmpEQ8 2162 || e->Iex.Binop.op == Iop_CmpNE8 2163 || e->Iex.Binop.op == Iop_CasCmpEQ8 2164 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 2165 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2166 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2167 HReg r = newVRegI(env); 2168 addInstr(env, mk_iMOVsd_RR(r1,r)); 2169 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2170 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r)); 2171 switch (e->Iex.Binop.op) { 2172 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z; 2173 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ; 2174 default: vpanic("iselCondCode(amd64): CmpXX8"); 2175 } 2176 } 2177 2178 /* CmpEQ16 / CmpNE16 */ 2179 if (e->tag == Iex_Binop 2180 && (e->Iex.Binop.op == Iop_CmpEQ16 2181 || e->Iex.Binop.op == Iop_CmpNE16 2182 || e->Iex.Binop.op == Iop_CasCmpEQ16 2183 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 2184 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2185 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2186 HReg r = newVRegI(env); 2187 addInstr(env, mk_iMOVsd_RR(r1,r)); 2188 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r)); 2189 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r)); 2190 switch (e->Iex.Binop.op) { 2191 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z; 2192 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ; 2193 default: vpanic("iselCondCode(amd64): CmpXX16"); 2194 } 2195 } 2196 2197 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation). 2198 Saves a "movq %rax, %tmp" compared to the default route. */ 2199 if (e->tag == Iex_Binop 2200 && e->Iex.Binop.op == Iop_CmpNE64 2201 && e->Iex.Binop.arg1->tag == Iex_CCall 2202 && e->Iex.Binop.arg2->tag == Iex_Const) { 2203 IRExpr* cal = e->Iex.Binop.arg1; 2204 IRExpr* con = e->Iex.Binop.arg2; 2205 HReg tmp = newVRegI(env); 2206 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 2207 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */ 2208 vassert(con->Iex.Const.con->tag == Ico_U64); 2209 /* Marshal args, do the call. */ 2210 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args ); 2211 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp)); 2212 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, 2213 AMD64RMI_Reg(hregAMD64_RAX()), tmp)); 2214 return Acc_NZ; 2215 } 2216 2217 /* Cmp*64*(x,y) */ 2218 if (e->tag == Iex_Binop 2219 && (e->Iex.Binop.op == Iop_CmpEQ64 2220 || e->Iex.Binop.op == Iop_CmpNE64 2221 || e->Iex.Binop.op == Iop_CmpLT64S 2222 || e->Iex.Binop.op == Iop_CmpLT64U 2223 || e->Iex.Binop.op == Iop_CmpLE64S 2224 || e->Iex.Binop.op == Iop_CmpLE64U 2225 || e->Iex.Binop.op == Iop_CasCmpEQ64 2226 || e->Iex.Binop.op == Iop_CasCmpNE64)) { 2227 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2228 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2229 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); 2230 switch (e->Iex.Binop.op) { 2231 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z; 2232 case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ; 2233 case Iop_CmpLT64S: return Acc_L; 2234 case Iop_CmpLT64U: return Acc_B; 2235 case Iop_CmpLE64S: return Acc_LE; 2236 case Iop_CmpLE64U: return Acc_BE; 2237 default: vpanic("iselCondCode(amd64): CmpXX64"); 2238 } 2239 } 2240 2241 /* Cmp*32*(x,y) */ 2242 if (e->tag == Iex_Binop 2243 && (e->Iex.Binop.op == Iop_CmpEQ32 2244 || e->Iex.Binop.op == Iop_CmpNE32 2245 || e->Iex.Binop.op == Iop_CmpLT32S 2246 || e->Iex.Binop.op == Iop_CmpLT32U 2247 || e->Iex.Binop.op == Iop_CmpLE32S 2248 || e->Iex.Binop.op == Iop_CmpLE32U 2249 || e->Iex.Binop.op == Iop_CasCmpEQ32 2250 || e->Iex.Binop.op == Iop_CasCmpNE32)) { 2251 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2252 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2253 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1)); 2254 switch (e->Iex.Binop.op) { 2255 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z; 2256 case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ; 2257 case Iop_CmpLT32S: return Acc_L; 2258 case Iop_CmpLT32U: return Acc_B; 2259 case Iop_CmpLE32S: return Acc_LE; 2260 case Iop_CmpLE32U: return Acc_BE; 2261 default: vpanic("iselCondCode(amd64): CmpXX32"); 2262 } 2263 } 2264 2265 ppIRExpr(e); 2266 vpanic("iselCondCode(amd64)"); 2267 } 2268 2269 2270 /*---------------------------------------------------------*/ 2271 /*--- ISEL: Integer expressions (128 bit) ---*/ 2272 /*---------------------------------------------------------*/ 2273 2274 /* Compute a 128-bit value into a register pair, which is returned as 2275 the first two parameters. As with iselIntExpr_R, these may be 2276 either real or virtual regs; in any case they must not be changed 2277 by subsequent code emitted by the caller. */ 2278 2279 static void iselInt128Expr ( HReg* rHi, HReg* rLo, 2280 ISelEnv* env, IRExpr* e ) 2281 { 2282 iselInt128Expr_wrk(rHi, rLo, env, e); 2283 # if 0 2284 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2285 # endif 2286 vassert(hregClass(*rHi) == HRcInt64); 2287 vassert(hregIsVirtual(*rHi)); 2288 vassert(hregClass(*rLo) == HRcInt64); 2289 vassert(hregIsVirtual(*rLo)); 2290 } 2291 2292 /* DO NOT CALL THIS DIRECTLY ! */ 2293 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 2294 ISelEnv* env, IRExpr* e ) 2295 { 2296 vassert(e); 2297 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); 2298 2299 /* read 128-bit IRTemp */ 2300 if (e->tag == Iex_RdTmp) { 2301 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 2302 return; 2303 } 2304 2305 /* --------- BINARY ops --------- */ 2306 if (e->tag == Iex_Binop) { 2307 switch (e->Iex.Binop.op) { 2308 /* 64 x 64 -> 128 multiply */ 2309 case Iop_MullU64: 2310 case Iop_MullS64: { 2311 /* get one operand into %rax, and the other into a R/M. 2312 Need to make an educated guess about which is better in 2313 which. */ 2314 HReg tLo = newVRegI(env); 2315 HReg tHi = newVRegI(env); 2316 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); 2317 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2318 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2319 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX())); 2320 addInstr(env, AMD64Instr_MulL(syned, rmLeft)); 2321 /* Result is now in RDX:RAX. Tell the caller. */ 2322 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2323 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2324 *rHi = tHi; 2325 *rLo = tLo; 2326 return; 2327 } 2328 2329 /* 128 x 64 -> (64(rem),64(div)) division */ 2330 case Iop_DivModU128to64: 2331 case Iop_DivModS128to64: { 2332 /* Get the 128-bit operand into rdx:rax, and the other into 2333 any old R/M. */ 2334 HReg sHi, sLo; 2335 HReg tLo = newVRegI(env); 2336 HReg tHi = newVRegI(env); 2337 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64); 2338 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2339 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2340 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX())); 2341 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX())); 2342 addInstr(env, AMD64Instr_Div(syned, 8, rmRight)); 2343 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi)); 2344 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo)); 2345 *rHi = tHi; 2346 *rLo = tLo; 2347 return; 2348 } 2349 2350 /* 64HLto128(e1,e2) */ 2351 case Iop_64HLto128: 2352 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2353 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2354 return; 2355 2356 default: 2357 break; 2358 } 2359 } /* if (e->tag == Iex_Binop) */ 2360 2361 ppIRExpr(e); 2362 vpanic("iselInt128Expr"); 2363 } 2364 2365 2366 /*---------------------------------------------------------*/ 2367 /*--- ISEL: Floating point expressions (32 bit) ---*/ 2368 /*---------------------------------------------------------*/ 2369 2370 /* Nothing interesting here; really just wrappers for 2371 64-bit stuff. */ 2372 2373 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2374 { 2375 HReg r = iselFltExpr_wrk( env, e ); 2376 # if 0 2377 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2378 # endif 2379 vassert(hregClass(r) == HRcVec128); 2380 vassert(hregIsVirtual(r)); 2381 return r; 2382 } 2383 2384 /* DO NOT CALL THIS DIRECTLY */ 2385 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2386 { 2387 IRType ty = typeOfIRExpr(env->type_env,e); 2388 vassert(ty == Ity_F32); 2389 2390 if (e->tag == Iex_RdTmp) { 2391 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2392 } 2393 2394 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2395 AMD64AMode* am; 2396 HReg res = newVRegV(env); 2397 vassert(e->Iex.Load.ty == Ity_F32); 2398 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2399 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am)); 2400 return res; 2401 } 2402 2403 if (e->tag == Iex_Binop 2404 && e->Iex.Binop.op == Iop_F64toF32) { 2405 /* Although the result is still held in a standard SSE register, 2406 we need to round it to reflect the loss of accuracy/range 2407 entailed in casting it to a 32-bit float. */ 2408 HReg dst = newVRegV(env); 2409 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2410 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2411 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst)); 2412 set_SSE_rounding_default( env ); 2413 return dst; 2414 } 2415 2416 if (e->tag == Iex_Get) { 2417 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2418 hregAMD64_RBP() ); 2419 HReg res = newVRegV(env); 2420 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am )); 2421 return res; 2422 } 2423 2424 if (e->tag == Iex_Unop 2425 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2426 /* Given an I32, produce an IEEE754 float with the same bit 2427 pattern. */ 2428 HReg dst = newVRegV(env); 2429 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2430 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP()); 2431 addInstr(env, AMD64Instr_Store(4, src, m4_rsp)); 2432 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp )); 2433 return dst; 2434 } 2435 2436 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2437 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2438 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2); 2439 HReg dst = newVRegV(env); 2440 2441 /* rf now holds the value to be rounded. The first thing to do 2442 is set the FPU's rounding mode accordingly. */ 2443 2444 /* Set host x87 rounding mode */ 2445 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2446 2447 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp)); 2448 addInstr(env, AMD64Instr_A87Free(1)); 2449 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4)); 2450 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2451 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4)); 2452 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp)); 2453 2454 /* Restore default x87 rounding. */ 2455 set_FPU_rounding_default( env ); 2456 2457 return dst; 2458 } 2459 2460 ppIRExpr(e); 2461 vpanic("iselFltExpr_wrk"); 2462 } 2463 2464 2465 /*---------------------------------------------------------*/ 2466 /*--- ISEL: Floating point expressions (64 bit) ---*/ 2467 /*---------------------------------------------------------*/ 2468 2469 /* Compute a 64-bit floating point value into the lower half of an xmm 2470 register, the identity of which is returned. As with 2471 iselIntExpr_R, the returned reg will be virtual, and it must not be 2472 changed by subsequent code emitted by the caller. 2473 */ 2474 2475 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2476 2477 Type S (1 bit) E (11 bits) F (52 bits) 2478 ---- --------- ----------- ----------- 2479 signalling NaN u 2047 (max) .0uuuuu---u 2480 (with at least 2481 one 1 bit) 2482 quiet NaN u 2047 (max) .1uuuuu---u 2483 2484 negative infinity 1 2047 (max) .000000---0 2485 2486 positive infinity 0 2047 (max) .000000---0 2487 2488 negative zero 1 0 .000000---0 2489 2490 positive zero 0 0 .000000---0 2491 */ 2492 2493 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2494 { 2495 HReg r = iselDblExpr_wrk( env, e ); 2496 # if 0 2497 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2498 # endif 2499 vassert(hregClass(r) == HRcVec128); 2500 vassert(hregIsVirtual(r)); 2501 return r; 2502 } 2503 2504 /* DO NOT CALL THIS DIRECTLY */ 2505 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2506 { 2507 IRType ty = typeOfIRExpr(env->type_env,e); 2508 vassert(e); 2509 vassert(ty == Ity_F64); 2510 2511 if (e->tag == Iex_RdTmp) { 2512 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2513 } 2514 2515 if (e->tag == Iex_Const) { 2516 union { ULong u64; Double f64; } u; 2517 HReg res = newVRegV(env); 2518 HReg tmp = newVRegI(env); 2519 vassert(sizeof(u) == 8); 2520 vassert(sizeof(u.u64) == 8); 2521 vassert(sizeof(u.f64) == 8); 2522 2523 if (e->Iex.Const.con->tag == Ico_F64) { 2524 u.f64 = e->Iex.Const.con->Ico.F64; 2525 } 2526 else if (e->Iex.Const.con->tag == Ico_F64i) { 2527 u.u64 = e->Iex.Const.con->Ico.F64i; 2528 } 2529 else 2530 vpanic("iselDblExpr(amd64): const"); 2531 2532 addInstr(env, AMD64Instr_Imm64(u.u64, tmp)); 2533 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp))); 2534 addInstr(env, AMD64Instr_SseLdSt( 2535 True/*load*/, 8, res, 2536 AMD64AMode_IR(0, hregAMD64_RSP()) 2537 )); 2538 add_to_rsp(env, 8); 2539 return res; 2540 } 2541 2542 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2543 AMD64AMode* am; 2544 HReg res = newVRegV(env); 2545 vassert(e->Iex.Load.ty == Ity_F64); 2546 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2547 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2548 return res; 2549 } 2550 2551 if (e->tag == Iex_Get) { 2552 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset, 2553 hregAMD64_RBP() ); 2554 HReg res = newVRegV(env); 2555 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2556 return res; 2557 } 2558 2559 if (e->tag == Iex_GetI) { 2560 AMD64AMode* am 2561 = genGuestArrayOffset( 2562 env, e->Iex.GetI.descr, 2563 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2564 HReg res = newVRegV(env); 2565 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am )); 2566 return res; 2567 } 2568 2569 if (e->tag == Iex_Triop) { 2570 IRTriop *triop = e->Iex.Triop.details; 2571 AMD64SseOp op = Asse_INVALID; 2572 switch (triop->op) { 2573 case Iop_AddF64: op = Asse_ADDF; break; 2574 case Iop_SubF64: op = Asse_SUBF; break; 2575 case Iop_MulF64: op = Asse_MULF; break; 2576 case Iop_DivF64: op = Asse_DIVF; break; 2577 default: break; 2578 } 2579 if (op != Asse_INVALID) { 2580 HReg dst = newVRegV(env); 2581 HReg argL = iselDblExpr(env, triop->arg2); 2582 HReg argR = iselDblExpr(env, triop->arg3); 2583 addInstr(env, mk_vMOVsd_RR(argL, dst)); 2584 /* XXXROUNDINGFIXME */ 2585 /* set roundingmode here */ 2586 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 2587 return dst; 2588 } 2589 } 2590 2591 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2592 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2593 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 2594 HReg dst = newVRegV(env); 2595 2596 /* rf now holds the value to be rounded. The first thing to do 2597 is set the FPU's rounding mode accordingly. */ 2598 2599 /* Set host x87 rounding mode */ 2600 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2601 2602 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 2603 addInstr(env, AMD64Instr_A87Free(1)); 2604 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2605 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); 2606 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2607 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2608 2609 /* Restore default x87 rounding. */ 2610 set_FPU_rounding_default( env ); 2611 2612 return dst; 2613 } 2614 2615 IRTriop *triop = e->Iex.Triop.details; 2616 if (e->tag == Iex_Triop 2617 && (triop->op == Iop_ScaleF64 2618 || triop->op == Iop_AtanF64 2619 || triop->op == Iop_Yl2xF64 2620 || triop->op == Iop_Yl2xp1F64 2621 || triop->op == Iop_PRemF64 2622 || triop->op == Iop_PRem1F64) 2623 ) { 2624 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2625 HReg arg1 = iselDblExpr(env, triop->arg2); 2626 HReg arg2 = iselDblExpr(env, triop->arg3); 2627 HReg dst = newVRegV(env); 2628 Bool arg2first = toBool(triop->op == Iop_ScaleF64 2629 || triop->op == Iop_PRemF64 2630 || triop->op == Iop_PRem1F64); 2631 addInstr(env, AMD64Instr_A87Free(2)); 2632 2633 /* one arg -> top of x87 stack */ 2634 addInstr(env, AMD64Instr_SseLdSt( 2635 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); 2636 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2637 2638 /* other arg -> top of x87 stack */ 2639 addInstr(env, AMD64Instr_SseLdSt( 2640 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); 2641 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2642 2643 /* do it */ 2644 /* XXXROUNDINGFIXME */ 2645 /* set roundingmode here */ 2646 switch (triop->op) { 2647 case Iop_ScaleF64: 2648 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE)); 2649 break; 2650 case Iop_AtanF64: 2651 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN)); 2652 break; 2653 case Iop_Yl2xF64: 2654 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X)); 2655 break; 2656 case Iop_Yl2xp1F64: 2657 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1)); 2658 break; 2659 case Iop_PRemF64: 2660 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM)); 2661 break; 2662 case Iop_PRem1F64: 2663 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1)); 2664 break; 2665 default: 2666 vassert(0); 2667 } 2668 2669 /* save result */ 2670 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2671 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2672 return dst; 2673 } 2674 2675 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 2676 HReg dst = newVRegV(env); 2677 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); 2678 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 ); 2679 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst )); 2680 set_SSE_rounding_default( env ); 2681 return dst; 2682 } 2683 2684 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) { 2685 HReg dst = newVRegV(env); 2686 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2687 set_SSE_rounding_default( env ); 2688 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst )); 2689 return dst; 2690 } 2691 2692 if (e->tag == Iex_Unop 2693 && (e->Iex.Unop.op == Iop_NegF64 2694 || e->Iex.Unop.op == Iop_AbsF64)) { 2695 /* Sigh ... very rough code. Could do much better. */ 2696 /* Get the 128-bit literal 00---0 10---0 into a register 2697 and xor/nand it with the value to be negated. */ 2698 HReg r1 = newVRegI(env); 2699 HReg dst = newVRegV(env); 2700 HReg tmp = newVRegV(env); 2701 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 2702 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2703 addInstr(env, mk_vMOVsd_RR(src,tmp)); 2704 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 2705 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 )); 2706 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1))); 2707 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0)); 2708 2709 if (e->Iex.Unop.op == Iop_NegF64) 2710 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst)); 2711 else 2712 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst)); 2713 2714 add_to_rsp(env, 16); 2715 return dst; 2716 } 2717 2718 if (e->tag == Iex_Binop) { 2719 A87FpOp fpop = Afp_INVALID; 2720 switch (e->Iex.Binop.op) { 2721 case Iop_SqrtF64: fpop = Afp_SQRT; break; 2722 case Iop_SinF64: fpop = Afp_SIN; break; 2723 case Iop_CosF64: fpop = Afp_COS; break; 2724 case Iop_TanF64: fpop = Afp_TAN; break; 2725 case Iop_2xm1F64: fpop = Afp_2XM1; break; 2726 default: break; 2727 } 2728 if (fpop != Afp_INVALID) { 2729 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2730 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); 2731 HReg dst = newVRegV(env); 2732 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1; 2733 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); 2734 addInstr(env, AMD64Instr_A87Free(nNeeded)); 2735 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); 2736 /* XXXROUNDINGFIXME */ 2737 /* set roundingmode here */ 2738 addInstr(env, AMD64Instr_A87FpOp(fpop)); 2739 if (e->Iex.Binop.op==Iop_TanF64) { 2740 /* get rid of the extra 1.0 that fptan pushes */ 2741 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2742 } 2743 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); 2744 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2745 return dst; 2746 } 2747 } 2748 2749 if (e->tag == Iex_Unop) { 2750 switch (e->Iex.Unop.op) { 2751 //.. case Iop_I32toF64: { 2752 //.. HReg dst = newVRegF(env); 2753 //.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 2754 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 2755 //.. set_FPU_rounding_default(env); 2756 //.. addInstr(env, X86Instr_FpLdStI( 2757 //.. True/*load*/, 4, dst, 2758 //.. X86AMode_IR(0, hregX86_ESP()))); 2759 //.. add_to_esp(env, 4); 2760 //.. return dst; 2761 //.. } 2762 case Iop_ReinterpI64asF64: { 2763 /* Given an I64, produce an IEEE754 double with the same 2764 bit pattern. */ 2765 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); 2766 HReg dst = newVRegV(env); 2767 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg); 2768 /* paranoia */ 2769 set_SSE_rounding_default(env); 2770 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp)); 2771 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); 2772 return dst; 2773 } 2774 case Iop_F32toF64: { 2775 HReg f32; 2776 HReg f64 = newVRegV(env); 2777 /* this shouldn't be necessary, but be paranoid ... */ 2778 set_SSE_rounding_default(env); 2779 f32 = iselFltExpr(env, e->Iex.Unop.arg); 2780 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64)); 2781 return f64; 2782 } 2783 default: 2784 break; 2785 } 2786 } 2787 2788 /* --------- MULTIPLEX --------- */ 2789 if (e->tag == Iex_Mux0X) { 2790 HReg r8, rX, r0, dst; 2791 vassert(ty == Ity_F64); 2792 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8); 2793 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 2794 rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 2795 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 2796 dst = newVRegV(env); 2797 addInstr(env, mk_vMOVsd_RR(rX,dst)); 2798 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 2799 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst)); 2800 return dst; 2801 } 2802 2803 ppIRExpr(e); 2804 vpanic("iselDblExpr_wrk"); 2805 } 2806 2807 2808 /*---------------------------------------------------------*/ 2809 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 2810 /*---------------------------------------------------------*/ 2811 2812 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 2813 { 2814 HReg r = iselVecExpr_wrk( env, e ); 2815 # if 0 2816 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2817 # endif 2818 vassert(hregClass(r) == HRcVec128); 2819 vassert(hregIsVirtual(r)); 2820 return r; 2821 } 2822 2823 2824 /* DO NOT CALL THIS DIRECTLY */ 2825 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 2826 { 2827 HWord fn = 0; /* address of helper fn, if required */ 2828 Bool arg1isEReg = False; 2829 AMD64SseOp op = Asse_INVALID; 2830 IRType ty = typeOfIRExpr(env->type_env,e); 2831 vassert(e); 2832 vassert(ty == Ity_V128); 2833 2834 if (e->tag == Iex_RdTmp) { 2835 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2836 } 2837 2838 if (e->tag == Iex_Get) { 2839 HReg dst = newVRegV(env); 2840 addInstr(env, AMD64Instr_SseLdSt( 2841 True/*load*/, 2842 16, 2843 dst, 2844 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP()) 2845 ) 2846 ); 2847 return dst; 2848 } 2849 2850 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2851 HReg dst = newVRegV(env); 2852 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2853 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am )); 2854 return dst; 2855 } 2856 2857 if (e->tag == Iex_Const) { 2858 HReg dst = newVRegV(env); 2859 vassert(e->Iex.Const.con->tag == Ico_V128); 2860 switch (e->Iex.Const.con->Ico.V128) { 2861 case 0x0000: 2862 dst = generate_zeroes_V128(env); 2863 break; 2864 case 0xFFFF: 2865 dst = generate_ones_V128(env); 2866 break; 2867 default: { 2868 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2869 /* do push_uimm64 twice, first time for the high-order half. */ 2870 push_uimm64(env, bitmask8_to_bytemask64( 2871 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF 2872 )); 2873 push_uimm64(env, bitmask8_to_bytemask64( 2874 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF 2875 )); 2876 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 )); 2877 add_to_rsp(env, 16); 2878 break; 2879 } 2880 } 2881 return dst; 2882 } 2883 2884 if (e->tag == Iex_Unop) { 2885 switch (e->Iex.Unop.op) { 2886 2887 case Iop_NotV128: { 2888 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2889 return do_sse_NotV128(env, arg); 2890 } 2891 2892 case Iop_CmpNEZ64x2: { 2893 /* We can use SSE2 instructions for this. */ 2894 /* Ideally, we want to do a 64Ix2 comparison against zero of 2895 the operand. Problem is no such insn exists. Solution 2896 therefore is to do a 32Ix4 comparison instead, and bitwise- 2897 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 2898 let the not'd result of this initial comparison be a:b:c:d. 2899 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 2900 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 2901 giving the required result. 2902 2903 The required selection sequence is 2,3,0,1, which 2904 according to Intel's documentation means the pshufd 2905 literal value is 0xB1, that is, 2906 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 2907 */ 2908 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2909 HReg tmp = generate_zeroes_V128(env); 2910 HReg dst = newVRegV(env); 2911 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp)); 2912 tmp = do_sse_NotV128(env, tmp); 2913 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst)); 2914 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst)); 2915 return dst; 2916 } 2917 2918 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 2919 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector; 2920 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector; 2921 do_CmpNEZ_vector: 2922 { 2923 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2924 HReg tmp = newVRegV(env); 2925 HReg zero = generate_zeroes_V128(env); 2926 HReg dst; 2927 addInstr(env, mk_vMOVsd_RR(arg, tmp)); 2928 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp)); 2929 dst = do_sse_NotV128(env, tmp); 2930 return dst; 2931 } 2932 2933 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary; 2934 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary; 2935 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary; 2936 do_32Fx4_unary: 2937 { 2938 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2939 HReg dst = newVRegV(env); 2940 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst)); 2941 return dst; 2942 } 2943 2944 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary; 2945 do_64Fx2_unary: 2946 { 2947 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2948 HReg dst = newVRegV(env); 2949 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst)); 2950 return dst; 2951 } 2952 2953 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary; 2954 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary; 2955 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary; 2956 do_32F0x4_unary: 2957 { 2958 /* A bit subtle. We have to copy the arg to the result 2959 register first, because actually doing the SSE scalar insn 2960 leaves the upper 3/4 of the destination register 2961 unchanged. Whereas the required semantics of these 2962 primops is that the upper 3/4 is simply copied in from the 2963 argument. */ 2964 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2965 HReg dst = newVRegV(env); 2966 addInstr(env, mk_vMOVsd_RR(arg, dst)); 2967 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst)); 2968 return dst; 2969 } 2970 2971 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary; 2972 do_64F0x2_unary: 2973 { 2974 /* A bit subtle. We have to copy the arg to the result 2975 register first, because actually doing the SSE scalar insn 2976 leaves the upper half of the destination register 2977 unchanged. Whereas the required semantics of these 2978 primops is that the upper half is simply copied in from the 2979 argument. */ 2980 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 2981 HReg dst = newVRegV(env); 2982 addInstr(env, mk_vMOVsd_RR(arg, dst)); 2983 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst)); 2984 return dst; 2985 } 2986 2987 case Iop_32UtoV128: { 2988 HReg dst = newVRegV(env); 2989 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP()); 2990 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg); 2991 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32)); 2992 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32)); 2993 return dst; 2994 } 2995 2996 case Iop_64UtoV128: { 2997 HReg dst = newVRegV(env); 2998 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 2999 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3000 addInstr(env, AMD64Instr_Push(rmi)); 3001 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0)); 3002 add_to_rsp(env, 8); 3003 return dst; 3004 } 3005 3006 case Iop_V256toV128_0: 3007 case Iop_V256toV128_1: { 3008 HReg vHi, vLo; 3009 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg); 3010 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo; 3011 } 3012 3013 default: 3014 break; 3015 } /* switch (e->Iex.Unop.op) */ 3016 } /* if (e->tag == Iex_Unop) */ 3017 3018 if (e->tag == Iex_Binop) { 3019 switch (e->Iex.Binop.op) { 3020 3021 /* FIXME: could we generate MOVQ here? */ 3022 case Iop_SetV128lo64: { 3023 HReg dst = newVRegV(env); 3024 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3025 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3026 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3027 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3028 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16)); 3029 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3030 return dst; 3031 } 3032 3033 /* FIXME: could we generate MOVD here? */ 3034 case Iop_SetV128lo32: { 3035 HReg dst = newVRegV(env); 3036 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3037 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3038 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP()); 3039 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16)); 3040 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16)); 3041 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16)); 3042 return dst; 3043 } 3044 3045 case Iop_64HLtoV128: { 3046 HReg rsp = hregAMD64_RSP(); 3047 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 3048 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 3049 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1); 3050 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2); 3051 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp)); 3052 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp)); 3053 HReg dst = newVRegV(env); 3054 /* One store-forwarding stall coming up, oh well :-( */ 3055 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp)); 3056 return dst; 3057 } 3058 3059 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4; 3060 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4; 3061 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4; 3062 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4; 3063 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4; 3064 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4; 3065 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4; 3066 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4; 3067 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4; 3068 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4; 3069 do_32Fx4: 3070 { 3071 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3072 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3073 HReg dst = newVRegV(env); 3074 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3075 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst)); 3076 return dst; 3077 } 3078 3079 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2; 3080 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2; 3081 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2; 3082 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2; 3083 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2; 3084 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2; 3085 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2; 3086 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2; 3087 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2; 3088 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2; 3089 do_64Fx2: 3090 { 3091 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3092 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3093 HReg dst = newVRegV(env); 3094 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3095 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); 3096 return dst; 3097 } 3098 3099 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4; 3100 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4; 3101 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4; 3102 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4; 3103 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4; 3104 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4; 3105 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4; 3106 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4; 3107 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4; 3108 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4; 3109 do_32F0x4: { 3110 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3111 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3112 HReg dst = newVRegV(env); 3113 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3114 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst)); 3115 return dst; 3116 } 3117 3118 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2; 3119 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2; 3120 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2; 3121 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2; 3122 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2; 3123 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2; 3124 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2; 3125 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2; 3126 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2; 3127 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2; 3128 do_64F0x2: { 3129 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3130 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3131 HReg dst = newVRegV(env); 3132 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3133 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst)); 3134 return dst; 3135 } 3136 3137 case Iop_QNarrowBin32Sto16Sx8: 3138 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3139 case Iop_QNarrowBin16Sto8Sx16: 3140 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3141 case Iop_QNarrowBin16Sto8Ux16: 3142 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3143 3144 case Iop_InterleaveHI8x16: 3145 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3146 case Iop_InterleaveHI16x8: 3147 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3148 case Iop_InterleaveHI32x4: 3149 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3150 case Iop_InterleaveHI64x2: 3151 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3152 3153 case Iop_InterleaveLO8x16: 3154 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3155 case Iop_InterleaveLO16x8: 3156 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3157 case Iop_InterleaveLO32x4: 3158 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3159 case Iop_InterleaveLO64x2: 3160 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3161 3162 case Iop_AndV128: op = Asse_AND; goto do_SseReRg; 3163 case Iop_OrV128: op = Asse_OR; goto do_SseReRg; 3164 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg; 3165 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg; 3166 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg; 3167 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg; 3168 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg; 3169 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg; 3170 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg; 3171 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg; 3172 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg; 3173 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg; 3174 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg; 3175 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg; 3176 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg; 3177 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg; 3178 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg; 3179 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg; 3180 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg; 3181 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg; 3182 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg; 3183 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg; 3184 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg; 3185 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg; 3186 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg; 3187 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg; 3188 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg; 3189 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg; 3190 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg; 3191 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg; 3192 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg; 3193 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg; 3194 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg; 3195 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg; 3196 do_SseReRg: { 3197 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3198 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3199 HReg dst = newVRegV(env); 3200 if (arg1isEReg) { 3201 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3202 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst)); 3203 } else { 3204 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3205 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst)); 3206 } 3207 return dst; 3208 } 3209 3210 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift; 3211 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift; 3212 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift; 3213 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift; 3214 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift; 3215 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift; 3216 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift; 3217 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift; 3218 do_SseShift: { 3219 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3220 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3221 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP()); 3222 HReg ereg = newVRegV(env); 3223 HReg dst = newVRegV(env); 3224 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0))); 3225 addInstr(env, AMD64Instr_Push(rmi)); 3226 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0)); 3227 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3228 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst)); 3229 add_to_rsp(env, 16); 3230 return dst; 3231 } 3232 3233 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4; 3234 goto do_SseAssistedBinary; 3235 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4; 3236 goto do_SseAssistedBinary; 3237 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4; 3238 goto do_SseAssistedBinary; 3239 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4; 3240 goto do_SseAssistedBinary; 3241 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4; 3242 goto do_SseAssistedBinary; 3243 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8; 3244 goto do_SseAssistedBinary; 3245 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8; 3246 goto do_SseAssistedBinary; 3247 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16; 3248 goto do_SseAssistedBinary; 3249 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16; 3250 goto do_SseAssistedBinary; 3251 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2; 3252 goto do_SseAssistedBinary; 3253 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2; 3254 goto do_SseAssistedBinary; 3255 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4; 3256 goto do_SseAssistedBinary; 3257 case Iop_QNarrowBin32Sto16Ux8: 3258 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8; 3259 goto do_SseAssistedBinary; 3260 case Iop_NarrowBin16to8x16: 3261 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3262 goto do_SseAssistedBinary; 3263 case Iop_NarrowBin32to16x8: 3264 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3265 goto do_SseAssistedBinary; 3266 do_SseAssistedBinary: { 3267 /* RRRufff! RRRufff code is what we're generating here. Oh 3268 well. */ 3269 vassert(fn != 0); 3270 HReg dst = newVRegV(env); 3271 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3272 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3273 HReg argp = newVRegI(env); 3274 /* subq $112, %rsp -- make a space*/ 3275 sub_from_rsp(env, 112); 3276 /* leaq 48(%rsp), %r_argp -- point into it */ 3277 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3278 argp)); 3279 /* andq $-16, %r_argp -- 16-align the pointer */ 3280 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3281 AMD64RMI_Imm( ~(UInt)15 ), 3282 argp)); 3283 /* Prepare 3 arg regs: 3284 leaq 0(%r_argp), %rdi 3285 leaq 16(%r_argp), %rsi 3286 leaq 32(%r_argp), %rdx 3287 */ 3288 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3289 hregAMD64_RDI())); 3290 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3291 hregAMD64_RSI())); 3292 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp), 3293 hregAMD64_RDX())); 3294 /* Store the two args, at (%rsi) and (%rdx): 3295 movupd %argL, 0(%rsi) 3296 movupd %argR, 0(%rdx) 3297 */ 3298 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3299 AMD64AMode_IR(0, hregAMD64_RSI()))); 3300 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR, 3301 AMD64AMode_IR(0, hregAMD64_RDX()))); 3302 /* call the helper */ 3303 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 )); 3304 /* fetch the result from memory, using %r_argp, which the 3305 register allocator will keep alive across the call. */ 3306 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3307 AMD64AMode_IR(0, argp))); 3308 /* and finally, clear the space */ 3309 add_to_rsp(env, 112); 3310 return dst; 3311 } 3312 3313 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2; 3314 goto do_SseAssistedVectorAndScalar; 3315 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16; 3316 goto do_SseAssistedVectorAndScalar; 3317 do_SseAssistedVectorAndScalar: { 3318 /* RRRufff! RRRufff code is what we're generating here. Oh 3319 well. */ 3320 vassert(fn != 0); 3321 HReg dst = newVRegV(env); 3322 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3323 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); 3324 HReg argp = newVRegI(env); 3325 /* subq $112, %rsp -- make a space*/ 3326 sub_from_rsp(env, 112); 3327 /* leaq 48(%rsp), %r_argp -- point into it */ 3328 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()), 3329 argp)); 3330 /* andq $-16, %r_argp -- 16-align the pointer */ 3331 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, 3332 AMD64RMI_Imm( ~(UInt)15 ), 3333 argp)); 3334 /* Prepare 2 vector arg regs: 3335 leaq 0(%r_argp), %rdi 3336 leaq 16(%r_argp), %rsi 3337 */ 3338 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp), 3339 hregAMD64_RDI())); 3340 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp), 3341 hregAMD64_RSI())); 3342 /* Store the vector arg, at (%rsi): 3343 movupd %argL, 0(%rsi) 3344 */ 3345 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL, 3346 AMD64AMode_IR(0, hregAMD64_RSI()))); 3347 /* And get the scalar value into rdx */ 3348 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX())); 3349 3350 /* call the helper */ 3351 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 )); 3352 /* fetch the result from memory, using %r_argp, which the 3353 register allocator will keep alive across the call. */ 3354 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst, 3355 AMD64AMode_IR(0, argp))); 3356 /* and finally, clear the space */ 3357 add_to_rsp(env, 112); 3358 return dst; 3359 } 3360 3361 default: 3362 break; 3363 } /* switch (e->Iex.Binop.op) */ 3364 } /* if (e->tag == Iex_Binop) */ 3365 3366 if (e->tag == Iex_Mux0X) { 3367 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond); 3368 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); 3369 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); 3370 HReg dst = newVRegV(env); 3371 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3372 addInstr(env, AMD64Instr_Test64(0xFF, r8)); 3373 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst)); 3374 return dst; 3375 } 3376 3377 //vec_fail: 3378 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n", 3379 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 3380 ppIRExpr(e); 3381 vpanic("iselVecExpr_wrk"); 3382 } 3383 3384 3385 /*---------------------------------------------------------*/ 3386 /*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/ 3387 /*---------------------------------------------------------*/ 3388 3389 static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3390 ISelEnv* env, IRExpr* e ) 3391 { 3392 iselDVecExpr_wrk( rHi, rLo, env, e ); 3393 # if 0 3394 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3395 # endif 3396 vassert(hregClass(*rHi) == HRcVec128); 3397 vassert(hregClass(*rLo) == HRcVec128); 3398 vassert(hregIsVirtual(*rHi)); 3399 vassert(hregIsVirtual(*rLo)); 3400 } 3401 3402 3403 /* DO NOT CALL THIS DIRECTLY */ 3404 static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, 3405 ISelEnv* env, IRExpr* e ) 3406 { 3407 vassert(e); 3408 IRType ty = typeOfIRExpr(env->type_env,e); 3409 vassert(ty == Ity_V256); 3410 3411 AMD64SseOp op = Asse_INVALID; 3412 3413 /* read 256-bit IRTemp */ 3414 if (e->tag == Iex_RdTmp) { 3415 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp); 3416 return; 3417 } 3418 3419 if (e->tag == Iex_Get) { 3420 HReg vHi = newVRegV(env); 3421 HReg vLo = newVRegV(env); 3422 HReg rbp = hregAMD64_RBP(); 3423 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp); 3424 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp); 3425 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3426 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3427 *rHi = vHi; 3428 *rLo = vLo; 3429 return; 3430 } 3431 3432 if (e->tag == Iex_Load) { 3433 HReg vHi = newVRegV(env); 3434 HReg vLo = newVRegV(env); 3435 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr); 3436 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 3437 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 3438 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0)); 3439 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16)); 3440 *rHi = vHi; 3441 *rLo = vLo; 3442 return; 3443 } 3444 3445 if (e->tag == Iex_Const) { 3446 vassert(e->Iex.Const.con->tag == Ico_V256); 3447 switch (e->Iex.Const.con->Ico.V256) { 3448 case 0x00000000: { 3449 HReg vHi = generate_zeroes_V128(env); 3450 HReg vLo = newVRegV(env); 3451 addInstr(env, mk_vMOVsd_RR(vHi, vLo)); 3452 *rHi = vHi; 3453 *rLo = vLo; 3454 return; 3455 } 3456 default: 3457 break; /* give up. Until such time as is necessary. */ 3458 } 3459 } 3460 3461 if (e->tag == Iex_Unop) { 3462 switch (e->Iex.Unop.op) { 3463 3464 case Iop_NotV256: { 3465 HReg argHi, argLo; 3466 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3467 *rHi = do_sse_NotV128(env, argHi); 3468 *rLo = do_sse_NotV128(env, argLo); 3469 return; 3470 } 3471 3472 case Iop_Recip32Fx8: op = Asse_RCPF; goto do_32Fx8_unary; 3473 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary; 3474 case Iop_RSqrt32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary; 3475 do_32Fx8_unary: 3476 { 3477 HReg argHi, argLo; 3478 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3479 HReg dstHi = newVRegV(env); 3480 HReg dstLo = newVRegV(env); 3481 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi)); 3482 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo)); 3483 *rHi = dstHi; 3484 *rLo = dstLo; 3485 return; 3486 } 3487 3488 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary; 3489 do_64Fx4_unary: 3490 { 3491 HReg argHi, argLo; 3492 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3493 HReg dstHi = newVRegV(env); 3494 HReg dstLo = newVRegV(env); 3495 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi)); 3496 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo)); 3497 *rHi = dstHi; 3498 *rLo = dstLo; 3499 return; 3500 } 3501 3502 case Iop_CmpNEZ64x4: { 3503 /* We can use SSE2 instructions for this. */ 3504 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide 3505 (obviously). See comment on Iop_CmpNEZ64x2 for 3506 explanation of what's going on here. */ 3507 HReg argHi, argLo; 3508 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3509 HReg tmpHi = generate_zeroes_V128(env); 3510 HReg tmpLo = newVRegV(env); 3511 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo)); 3512 HReg dstHi = newVRegV(env); 3513 HReg dstLo = newVRegV(env); 3514 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi)); 3515 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo)); 3516 tmpHi = do_sse_NotV128(env, tmpHi); 3517 tmpLo = do_sse_NotV128(env, tmpLo); 3518 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi)); 3519 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo)); 3520 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi)); 3521 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo)); 3522 *rHi = dstHi; 3523 *rLo = dstLo; 3524 return; 3525 } 3526 3527 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector; 3528 do_CmpNEZ_vector: 3529 { 3530 HReg argHi, argLo; 3531 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg); 3532 HReg tmpHi = newVRegV(env); 3533 HReg tmpLo = newVRegV(env); 3534 HReg zero = generate_zeroes_V128(env); 3535 HReg dstHi, dstLo; 3536 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi)); 3537 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo)); 3538 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi)); 3539 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo)); 3540 dstHi = do_sse_NotV128(env, tmpHi); 3541 dstLo = do_sse_NotV128(env, tmpLo); 3542 *rHi = dstHi; 3543 *rLo = dstLo; 3544 return; 3545 } 3546 3547 default: 3548 break; 3549 } /* switch (e->Iex.Unop.op) */ 3550 } /* if (e->tag == Iex_Unop) */ 3551 3552 if (e->tag == Iex_Binop) { 3553 switch (e->Iex.Binop.op) { 3554 3555 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4; 3556 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4; 3557 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4; 3558 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4; 3559 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4; 3560 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4; 3561 do_64Fx4: 3562 { 3563 HReg argLhi, argLlo, argRhi, argRlo; 3564 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3565 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3566 HReg dstHi = newVRegV(env); 3567 HReg dstLo = newVRegV(env); 3568 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3569 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3570 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi)); 3571 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo)); 3572 *rHi = dstHi; 3573 *rLo = dstLo; 3574 return; 3575 } 3576 3577 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8; 3578 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8; 3579 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8; 3580 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8; 3581 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8; 3582 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8; 3583 do_32Fx8: 3584 { 3585 HReg argLhi, argLlo, argRhi, argRlo; 3586 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3587 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3588 HReg dstHi = newVRegV(env); 3589 HReg dstLo = newVRegV(env); 3590 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3591 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3592 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi)); 3593 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo)); 3594 *rHi = dstHi; 3595 *rLo = dstLo; 3596 return; 3597 } 3598 3599 case Iop_AndV256: op = Asse_AND; goto do_SseReRg; 3600 case Iop_OrV256: op = Asse_OR; goto do_SseReRg; 3601 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg; 3602 do_SseReRg: 3603 { 3604 HReg argLhi, argLlo, argRhi, argRlo; 3605 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1); 3606 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2); 3607 HReg dstHi = newVRegV(env); 3608 HReg dstLo = newVRegV(env); 3609 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi)); 3610 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo)); 3611 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi)); 3612 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo)); 3613 *rHi = dstHi; 3614 *rLo = dstLo; 3615 return; 3616 } 3617 3618 case Iop_V128HLtoV256: { 3619 *rHi = iselVecExpr(env, e->Iex.Binop.arg1); 3620 *rLo = iselVecExpr(env, e->Iex.Binop.arg2); 3621 return; 3622 } 3623 3624 default: 3625 break; 3626 } /* switch (e->Iex.Binop.op) */ 3627 } /* if (e->tag == Iex_Binop) */ 3628 3629 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) { 3630 HReg rsp = hregAMD64_RSP(); 3631 HReg vHi = newVRegV(env); 3632 HReg vLo = newVRegV(env); 3633 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp); 3634 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp); 3635 /* arg1 is the most significant (Q3), arg4 the least (Q0) */ 3636 /* Get all the args into regs, before messing with the stack. */ 3637 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1); 3638 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2); 3639 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3); 3640 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4); 3641 /* less significant lane (Q2) at the lower address (-16(rsp)) */ 3642 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp)); 3643 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp)); 3644 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp)); 3645 /* and then the lower half .. */ 3646 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp)); 3647 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp)); 3648 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp)); 3649 *rHi = vHi; 3650 *rLo = vLo; 3651 return; 3652 } 3653 3654 //avx_fail: 3655 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n", 3656 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); 3657 ppIRExpr(e); 3658 vpanic("iselDVecExpr_wrk"); 3659 } 3660 3661 3662 /*---------------------------------------------------------*/ 3663 /*--- ISEL: Statements ---*/ 3664 /*---------------------------------------------------------*/ 3665 3666 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3667 { 3668 if (vex_traceflags & VEX_TRACE_VCODE) { 3669 vex_printf("\n-- "); 3670 ppIRStmt(stmt); 3671 vex_printf("\n"); 3672 } 3673 3674 switch (stmt->tag) { 3675 3676 /* --------- STORE --------- */ 3677 case Ist_Store: { 3678 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3679 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3680 IREndness end = stmt->Ist.Store.end; 3681 3682 if (tya != Ity_I64 || end != Iend_LE) 3683 goto stmt_fail; 3684 3685 if (tyd == Ity_I64) { 3686 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3687 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3688 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am)); 3689 return; 3690 } 3691 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) { 3692 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3693 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3694 addInstr(env, AMD64Instr_Store( 3695 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)), 3696 r,am)); 3697 return; 3698 } 3699 if (tyd == Ity_F64) { 3700 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3701 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3702 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am)); 3703 return; 3704 } 3705 if (tyd == Ity_F32) { 3706 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3707 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3708 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am)); 3709 return; 3710 } 3711 if (tyd == Ity_V128) { 3712 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3713 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3714 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am)); 3715 return; 3716 } 3717 if (tyd == Ity_V256) { 3718 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3719 AMD64AMode* am0 = AMD64AMode_IR(0, rA); 3720 AMD64AMode* am16 = AMD64AMode_IR(16, rA); 3721 HReg vHi, vLo; 3722 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data); 3723 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 3724 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 3725 return; 3726 } 3727 break; 3728 } 3729 3730 /* --------- PUT --------- */ 3731 case Ist_Put: { 3732 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3733 if (ty == Ity_I64) { 3734 /* We're going to write to memory, so compute the RHS into an 3735 AMD64RI. */ 3736 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3737 addInstr(env, 3738 AMD64Instr_Alu64M( 3739 Aalu_MOV, 3740 ri, 3741 AMD64AMode_IR(stmt->Ist.Put.offset, 3742 hregAMD64_RBP()) 3743 )); 3744 return; 3745 } 3746 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) { 3747 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3748 addInstr(env, AMD64Instr_Store( 3749 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)), 3750 r, 3751 AMD64AMode_IR(stmt->Ist.Put.offset, 3752 hregAMD64_RBP()))); 3753 return; 3754 } 3755 if (ty == Ity_F32) { 3756 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3757 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP()); 3758 set_SSE_rounding_default(env); /* paranoia */ 3759 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am )); 3760 return; 3761 } 3762 if (ty == Ity_F64) { 3763 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3764 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset, 3765 hregAMD64_RBP() ); 3766 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am )); 3767 return; 3768 } 3769 if (ty == Ity_V128) { 3770 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3771 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, 3772 hregAMD64_RBP()); 3773 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am)); 3774 return; 3775 } 3776 if (ty == Ity_V256) { 3777 HReg vHi, vLo; 3778 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data); 3779 HReg rbp = hregAMD64_RBP(); 3780 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp); 3781 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp); 3782 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0)); 3783 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16)); 3784 return; 3785 } 3786 break; 3787 } 3788 3789 /* --------- Indexed PUT --------- */ 3790 case Ist_PutI: { 3791 IRPutI *puti = stmt->Ist.PutI.details; 3792 3793 AMD64AMode* am 3794 = genGuestArrayOffset( 3795 env, puti->descr, 3796 puti->ix, puti->bias ); 3797 3798 IRType ty = typeOfIRExpr(env->type_env, puti->data); 3799 if (ty == Ity_F64) { 3800 HReg val = iselDblExpr(env, puti->data); 3801 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am )); 3802 return; 3803 } 3804 if (ty == Ity_I8) { 3805 HReg r = iselIntExpr_R(env, puti->data); 3806 addInstr(env, AMD64Instr_Store( 1, r, am )); 3807 return; 3808 } 3809 if (ty == Ity_I64) { 3810 AMD64RI* ri = iselIntExpr_RI(env, puti->data); 3811 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am )); 3812 return; 3813 } 3814 break; 3815 } 3816 3817 /* --------- TMP --------- */ 3818 case Ist_WrTmp: { 3819 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3820 IRType ty = typeOfIRTemp(env->type_env, tmp); 3821 3822 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..), 3823 compute it into an AMode and then use LEA. This usually 3824 produces fewer instructions, often because (for memcheck 3825 created IR) we get t = address-expression, (t is later used 3826 twice) and so doing this naturally turns address-expression 3827 back into an AMD64 amode. */ 3828 if (ty == Ity_I64 3829 && stmt->Ist.WrTmp.data->tag == Iex_Binop 3830 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) { 3831 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 3832 HReg dst = lookupIRTemp(env, tmp); 3833 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) { 3834 /* Hmm, iselIntExpr_AMode wimped out and just computed the 3835 value into a register. Just emit a normal reg-reg move 3836 so reg-alloc can coalesce it away in the usual way. */ 3837 HReg src = am->Aam.IR.reg; 3838 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst)); 3839 } else { 3840 addInstr(env, AMD64Instr_Lea64(am,dst)); 3841 } 3842 return; 3843 } 3844 3845 if (ty == Ity_I64 || ty == Ity_I32 3846 || ty == Ity_I16 || ty == Ity_I8) { 3847 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 3848 HReg dst = lookupIRTemp(env, tmp); 3849 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst)); 3850 return; 3851 } 3852 if (ty == Ity_I128) { 3853 HReg rHi, rLo, dstHi, dstLo; 3854 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3855 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 3856 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 3857 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 3858 return; 3859 } 3860 if (ty == Ity_I1) { 3861 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 3862 HReg dst = lookupIRTemp(env, tmp); 3863 addInstr(env, AMD64Instr_Set64(cond, dst)); 3864 return; 3865 } 3866 if (ty == Ity_F64) { 3867 HReg dst = lookupIRTemp(env, tmp); 3868 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3869 addInstr(env, mk_vMOVsd_RR(src, dst)); 3870 return; 3871 } 3872 if (ty == Ity_F32) { 3873 HReg dst = lookupIRTemp(env, tmp); 3874 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3875 addInstr(env, mk_vMOVsd_RR(src, dst)); 3876 return; 3877 } 3878 if (ty == Ity_V128) { 3879 HReg dst = lookupIRTemp(env, tmp); 3880 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 3881 addInstr(env, mk_vMOVsd_RR(src, dst)); 3882 return; 3883 } 3884 if (ty == Ity_V256) { 3885 HReg rHi, rLo, dstHi, dstLo; 3886 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3887 lookupIRTempPair( &dstHi, &dstLo, env, tmp); 3888 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) ); 3889 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) ); 3890 return; 3891 } 3892 break; 3893 } 3894 3895 /* --------- Call to DIRTY helper --------- */ 3896 case Ist_Dirty: { 3897 IRType retty; 3898 IRDirty* d = stmt->Ist.Dirty.details; 3899 Bool passBBP = False; 3900 3901 if (d->nFxState == 0) 3902 vassert(!d->needsBBP); 3903 3904 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 3905 3906 /* Marshal args, do the call, clear stack. */ 3907 doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 3908 3909 /* Now figure out what to do with the returned value, if any. */ 3910 if (d->tmp == IRTemp_INVALID) 3911 /* No return value. Nothing to do. */ 3912 return; 3913 3914 retty = typeOfIRTemp(env->type_env, d->tmp); 3915 if (retty == Ity_I64 || retty == Ity_I32 3916 || retty == Ity_I16 || retty == Ity_I8) { 3917 /* The returned value is in %rax. Park it in the register 3918 associated with tmp. */ 3919 HReg dst = lookupIRTemp(env, d->tmp); 3920 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) ); 3921 return; 3922 } 3923 break; 3924 } 3925 3926 /* --------- MEM FENCE --------- */ 3927 case Ist_MBE: 3928 switch (stmt->Ist.MBE.event) { 3929 case Imbe_Fence: 3930 addInstr(env, AMD64Instr_MFence()); 3931 return; 3932 default: 3933 break; 3934 } 3935 break; 3936 3937 /* --------- ACAS --------- */ 3938 case Ist_CAS: 3939 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 3940 /* "normal" singleton CAS */ 3941 UChar sz; 3942 IRCAS* cas = stmt->Ist.CAS.details; 3943 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3944 /* get: cas->expd into %rax, and cas->data into %rbx */ 3945 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 3946 HReg rData = iselIntExpr_R(env, cas->dataLo); 3947 HReg rExpd = iselIntExpr_R(env, cas->expdLo); 3948 HReg rOld = lookupIRTemp(env, cas->oldLo); 3949 vassert(cas->expdHi == NULL); 3950 vassert(cas->dataHi == NULL); 3951 addInstr(env, mk_iMOVsd_RR(rExpd, rOld)); 3952 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX())); 3953 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX())); 3954 switch (ty) { 3955 case Ity_I64: sz = 8; break; 3956 case Ity_I32: sz = 4; break; 3957 case Ity_I16: sz = 2; break; 3958 case Ity_I8: sz = 1; break; 3959 default: goto unhandled_cas; 3960 } 3961 addInstr(env, AMD64Instr_ACAS(am, sz)); 3962 addInstr(env, AMD64Instr_CMov64( 3963 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld)); 3964 return; 3965 } else { 3966 /* double CAS */ 3967 UChar sz; 3968 IRCAS* cas = stmt->Ist.CAS.details; 3969 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3970 /* only 32-bit and 64-bit allowed in this case */ 3971 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */ 3972 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */ 3973 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr); 3974 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 3975 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 3976 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 3977 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 3978 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 3979 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 3980 switch (ty) { 3981 case Ity_I64: 3982 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16)) 3983 goto unhandled_cas; /* we'd have to generate 3984 cmpxchg16b, but the host 3985 doesn't support that */ 3986 sz = 8; 3987 break; 3988 case Ity_I32: 3989 sz = 4; 3990 break; 3991 default: 3992 goto unhandled_cas; 3993 } 3994 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 3995 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 3996 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX())); 3997 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX())); 3998 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX())); 3999 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX())); 4000 addInstr(env, AMD64Instr_DACAS(am, sz)); 4001 addInstr(env, 4002 AMD64Instr_CMov64( 4003 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi)); 4004 addInstr(env, 4005 AMD64Instr_CMov64( 4006 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo)); 4007 return; 4008 } 4009 unhandled_cas: 4010 break; 4011 4012 /* --------- INSTR MARK --------- */ 4013 /* Doesn't generate any executable code ... */ 4014 case Ist_IMark: 4015 return; 4016 4017 /* --------- ABI HINT --------- */ 4018 /* These have no meaning (denotation in the IR) and so we ignore 4019 them ... if any actually made it this far. */ 4020 case Ist_AbiHint: 4021 return; 4022 4023 /* --------- NO-OP --------- */ 4024 case Ist_NoOp: 4025 return; 4026 4027 /* --------- EXIT --------- */ 4028 case Ist_Exit: { 4029 if (stmt->Ist.Exit.dst->tag != Ico_U64) 4030 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value"); 4031 4032 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4033 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP, 4034 hregAMD64_RBP()); 4035 4036 /* Case: boring transfer to known address */ 4037 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4038 if (env->chainingAllowed) { 4039 /* .. almost always true .. */ 4040 /* Skip the event check at the dst if this is a forwards 4041 edge. */ 4042 Bool toFastEP 4043 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; 4044 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4045 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, 4046 amRIP, cc, toFastEP)); 4047 } else { 4048 /* .. very occasionally .. */ 4049 /* We can't use chaining, so ask for an assisted transfer, 4050 as that's the only alternative that is allowable. */ 4051 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4052 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring)); 4053 } 4054 return; 4055 } 4056 4057 /* Case: assisted transfer to arbitrary address */ 4058 switch (stmt->Ist.Exit.jk) { 4059 /* Keep this list in sync with that in iselNext below */ 4060 case Ijk_ClientReq: 4061 case Ijk_EmWarn: 4062 case Ijk_NoDecode: 4063 case Ijk_NoRedir: 4064 case Ijk_SigSEGV: 4065 case Ijk_SigTRAP: 4066 case Ijk_Sys_syscall: 4067 case Ijk_TInval: 4068 case Ijk_Yield: 4069 { 4070 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4071 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk)); 4072 return; 4073 } 4074 default: 4075 break; 4076 } 4077 4078 /* Do we ever expect to see any other kind? */ 4079 goto stmt_fail; 4080 } 4081 4082 default: break; 4083 } 4084 stmt_fail: 4085 ppIRStmt(stmt); 4086 vpanic("iselStmt(amd64)"); 4087 } 4088 4089 4090 /*---------------------------------------------------------*/ 4091 /*--- ISEL: Basic block terminators (Nexts) ---*/ 4092 /*---------------------------------------------------------*/ 4093 4094 static void iselNext ( ISelEnv* env, 4095 IRExpr* next, IRJumpKind jk, Int offsIP ) 4096 { 4097 if (vex_traceflags & VEX_TRACE_VCODE) { 4098 vex_printf( "\n-- PUT(%d) = ", offsIP); 4099 ppIRExpr( next ); 4100 vex_printf( "; exit-"); 4101 ppIRJumpKind(jk); 4102 vex_printf( "\n"); 4103 } 4104 4105 /* Case: boring transfer to known address */ 4106 if (next->tag == Iex_Const) { 4107 IRConst* cdst = next->Iex.Const.con; 4108 vassert(cdst->tag == Ico_U64); 4109 if (jk == Ijk_Boring || jk == Ijk_Call) { 4110 /* Boring transfer to known address */ 4111 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4112 if (env->chainingAllowed) { 4113 /* .. almost always true .. */ 4114 /* Skip the event check at the dst if this is a forwards 4115 edge. */ 4116 Bool toFastEP 4117 = ((Addr64)cdst->Ico.U64) > env->max_ga; 4118 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4119 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64, 4120 amRIP, Acc_ALWAYS, 4121 toFastEP)); 4122 } else { 4123 /* .. very occasionally .. */ 4124 /* We can't use chaining, so ask for an indirect transfer, 4125 as that's the cheapest alternative that is 4126 allowable. */ 4127 HReg r = iselIntExpr_R(env, next); 4128 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4129 Ijk_Boring)); 4130 } 4131 return; 4132 } 4133 } 4134 4135 /* Case: call/return (==boring) transfer to any address */ 4136 switch (jk) { 4137 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4138 HReg r = iselIntExpr_R(env, next); 4139 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4140 if (env->chainingAllowed) { 4141 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS)); 4142 } else { 4143 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, 4144 Ijk_Boring)); 4145 } 4146 return; 4147 } 4148 default: 4149 break; 4150 } 4151 4152 /* Case: assisted transfer to arbitrary address */ 4153 switch (jk) { 4154 /* Keep this list in sync with that for Ist_Exit above */ 4155 case Ijk_ClientReq: 4156 case Ijk_EmWarn: 4157 case Ijk_NoDecode: 4158 case Ijk_NoRedir: 4159 case Ijk_SigSEGV: 4160 case Ijk_SigTRAP: 4161 case Ijk_Sys_syscall: 4162 case Ijk_TInval: 4163 case Ijk_Yield: { 4164 HReg r = iselIntExpr_R(env, next); 4165 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); 4166 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk)); 4167 return; 4168 } 4169 default: 4170 break; 4171 } 4172 4173 vex_printf( "\n-- PUT(%d) = ", offsIP); 4174 ppIRExpr( next ); 4175 vex_printf( "; exit-"); 4176 ppIRJumpKind(jk); 4177 vex_printf( "\n"); 4178 vassert(0); // are we expecting any other kind? 4179 } 4180 4181 4182 /*---------------------------------------------------------*/ 4183 /*--- Insn selector top-level ---*/ 4184 /*---------------------------------------------------------*/ 4185 4186 /* Translate an entire SB to amd64 code. */ 4187 4188 HInstrArray* iselSB_AMD64 ( IRSB* bb, 4189 VexArch arch_host, 4190 VexArchInfo* archinfo_host, 4191 VexAbiInfo* vbi/*UNUSED*/, 4192 Int offs_Host_EvC_Counter, 4193 Int offs_Host_EvC_FailAddr, 4194 Bool chainingAllowed, 4195 Bool addProfInc, 4196 Addr64 max_ga ) 4197 { 4198 Int i, j; 4199 HReg hreg, hregHI; 4200 ISelEnv* env; 4201 UInt hwcaps_host = archinfo_host->hwcaps; 4202 AMD64AMode *amCounter, *amFailAddr; 4203 4204 /* sanity ... */ 4205 vassert(arch_host == VexArchAMD64); 4206 vassert(0 == (hwcaps_host 4207 & ~(VEX_HWCAPS_AMD64_SSE3 4208 | VEX_HWCAPS_AMD64_CX16 4209 | VEX_HWCAPS_AMD64_LZCNT 4210 | VEX_HWCAPS_AMD64_AVX))); 4211 4212 /* Make up an initial environment to use. */ 4213 env = LibVEX_Alloc(sizeof(ISelEnv)); 4214 env->vreg_ctr = 0; 4215 4216 /* Set up output code array. */ 4217 env->code = newHInstrArray(); 4218 4219 /* Copy BB's type env. */ 4220 env->type_env = bb->tyenv; 4221 4222 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4223 change as we go along. */ 4224 env->n_vregmap = bb->tyenv->types_used; 4225 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4226 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4227 4228 /* and finally ... */ 4229 env->chainingAllowed = chainingAllowed; 4230 env->hwcaps = hwcaps_host; 4231 env->max_ga = max_ga; 4232 4233 /* For each IR temporary, allocate a suitably-kinded virtual 4234 register. */ 4235 j = 0; 4236 for (i = 0; i < env->n_vregmap; i++) { 4237 hregHI = hreg = INVALID_HREG; 4238 switch (bb->tyenv->types[i]) { 4239 case Ity_I1: 4240 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: 4241 hreg = mkHReg(j++, HRcInt64, True); 4242 break; 4243 case Ity_I128: 4244 hreg = mkHReg(j++, HRcInt64, True); 4245 hregHI = mkHReg(j++, HRcInt64, True); 4246 break; 4247 case Ity_F32: 4248 case Ity_F64: 4249 case Ity_V128: 4250 hreg = mkHReg(j++, HRcVec128, True); 4251 break; 4252 case Ity_V256: 4253 hreg = mkHReg(j++, HRcVec128, True); 4254 hregHI = mkHReg(j++, HRcVec128, True); 4255 break; 4256 default: 4257 ppIRType(bb->tyenv->types[i]); 4258 vpanic("iselBB(amd64): IRTemp type"); 4259 } 4260 env->vregmap[i] = hreg; 4261 env->vregmapHI[i] = hregHI; 4262 } 4263 env->vreg_ctr = j; 4264 4265 /* The very first instruction must be an event check. */ 4266 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP()); 4267 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP()); 4268 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr)); 4269 4270 /* Possibly a block counter increment (for profiling). At this 4271 point we don't know the address of the counter, so just pretend 4272 it is zero. It will have to be patched later, but before this 4273 translation is used, by a call to LibVEX_patchProfCtr. */ 4274 if (addProfInc) { 4275 addInstr(env, AMD64Instr_ProfInc()); 4276 } 4277 4278 /* Ok, finally we can iterate over the statements. */ 4279 for (i = 0; i < bb->stmts_used; i++) 4280 if (bb->stmts[i]) 4281 iselStmt(env, bb->stmts[i]); 4282 4283 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4284 4285 /* record the number of vregs we used. */ 4286 env->code->n_vregs = env->vreg_ctr; 4287 return env->code; 4288 } 4289 4290 4291 /*---------------------------------------------------------------*/ 4292 /*--- end host_amd64_isel.c ---*/ 4293 /*---------------------------------------------------------------*/ 4294