1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 40 #include "ir_match.h" 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" 45 #include "host_x86_defs.h" 46 47 /* TODO 21 Apr 2005: 48 49 -- (Really an assembler issue) don't emit CMov32 as a cmov 50 insn, since that's expensive on P4 and conditional branch 51 is cheaper if (as we expect) the condition is highly predictable 52 53 -- preserve xmm registers across function calls (by declaring them 54 as trashed by call insns) 55 56 -- preserve x87 ST stack discipline across function calls. Sigh. 57 58 -- Check doHelperCall: if a call is conditional, we cannot safely 59 compute any regparm args directly to registers. Hence, the 60 fast-regparm marshalling should be restricted to unconditional 61 calls only. 62 */ 63 64 /*---------------------------------------------------------*/ 65 /*--- x87 control word stuff ---*/ 66 /*---------------------------------------------------------*/ 67 68 /* Vex-generated code expects to run with the FPU set as follows: all 69 exceptions masked, round-to-nearest, precision = 53 bits. This 70 corresponds to a FPU control word value of 0x027F. 71 72 Similarly the SSE control word (%mxcsr) should be 0x1F80. 73 74 %fpucw and %mxcsr should have these values on entry to 75 Vex-generated code, and should those values should be 76 unchanged at exit. 77 */ 78 79 #define DEFAULT_FPUCW 0x027F 80 81 /* debugging only, do not use */ 82 /* define DEFAULT_FPUCW 0x037F */ 83 84 85 /*---------------------------------------------------------*/ 86 /*--- misc helpers ---*/ 87 /*---------------------------------------------------------*/ 88 89 /* These are duplicated in guest-x86/toIR.c */ 90 static IRExpr* unop ( IROp op, IRExpr* a ) 91 { 92 return IRExpr_Unop(op, a); 93 } 94 95 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 96 { 97 return IRExpr_Binop(op, a1, a2); 98 } 99 100 static IRExpr* bind ( Int binder ) 101 { 102 return IRExpr_Binder(binder); 103 } 104 105 static Bool isZeroU8 ( IRExpr* e ) 106 { 107 return e->tag == Iex_Const 108 && e->Iex.Const.con->tag == Ico_U8 109 && e->Iex.Const.con->Ico.U8 == 0; 110 } 111 112 static Bool isZeroU32 ( IRExpr* e ) 113 { 114 return e->tag == Iex_Const 115 && e->Iex.Const.con->tag == Ico_U32 116 && e->Iex.Const.con->Ico.U32 == 0; 117 } 118 119 static Bool isZeroU64 ( IRExpr* e ) 120 { 121 return e->tag == Iex_Const 122 && e->Iex.Const.con->tag == Ico_U64 123 && e->Iex.Const.con->Ico.U64 == 0ULL; 124 } 125 126 127 /*---------------------------------------------------------*/ 128 /*--- ISelEnv ---*/ 129 /*---------------------------------------------------------*/ 130 131 /* This carries around: 132 133 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 134 might encounter. This is computed before insn selection starts, 135 and does not change. 136 137 - A mapping from IRTemp to HReg. This tells the insn selector 138 which virtual register(s) are associated with each IRTemp 139 temporary. This is computed before insn selection starts, and 140 does not change. We expect this mapping to map precisely the 141 same set of IRTemps as the type mapping does. 142 143 - vregmap holds the primary register for the IRTemp. 144 - vregmapHI is only used for 64-bit integer-typed 145 IRTemps. It holds the identity of a second 146 32-bit virtual HReg, which holds the high half 147 of the value. 148 149 - The code array, that is, the insns selected so far. 150 151 - A counter, for generating new virtual registers. 152 153 - The host subarchitecture we are selecting insns for. 154 This is set at the start and does not change. 155 156 Note, this is all host-independent. */ 157 158 typedef 159 struct { 160 IRTypeEnv* type_env; 161 162 HReg* vregmap; 163 HReg* vregmapHI; 164 Int n_vregmap; 165 166 HInstrArray* code; 167 168 Int vreg_ctr; 169 170 UInt hwcaps; 171 } 172 ISelEnv; 173 174 175 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 176 { 177 vassert(tmp >= 0); 178 vassert(tmp < env->n_vregmap); 179 return env->vregmap[tmp]; 180 } 181 182 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 183 { 184 vassert(tmp >= 0); 185 vassert(tmp < env->n_vregmap); 186 vassert(env->vregmapHI[tmp] != INVALID_HREG); 187 *vrLO = env->vregmap[tmp]; 188 *vrHI = env->vregmapHI[tmp]; 189 } 190 191 static void addInstr ( ISelEnv* env, X86Instr* instr ) 192 { 193 addHInstr(env->code, instr); 194 if (vex_traceflags & VEX_TRACE_VCODE) { 195 ppX86Instr(instr, False); 196 vex_printf("\n"); 197 } 198 } 199 200 static HReg newVRegI ( ISelEnv* env ) 201 { 202 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 203 env->vreg_ctr++; 204 return reg; 205 } 206 207 static HReg newVRegF ( ISelEnv* env ) 208 { 209 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 210 env->vreg_ctr++; 211 return reg; 212 } 213 214 static HReg newVRegV ( ISelEnv* env ) 215 { 216 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 217 env->vreg_ctr++; 218 return reg; 219 } 220 221 222 /*---------------------------------------------------------*/ 223 /*--- ISEL: Forward declarations ---*/ 224 /*---------------------------------------------------------*/ 225 226 /* These are organised as iselXXX and iselXXX_wrk pairs. The 227 iselXXX_wrk do the real work, but are not to be called directly. 228 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 229 checks that all returned registers are virtual. You should not 230 call the _wrk version directly. 231 */ 232 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 233 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 234 235 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 236 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 237 238 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 239 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 240 241 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 242 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 243 244 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 245 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 246 247 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 248 ISelEnv* env, IRExpr* e ); 249 static void iselInt64Expr ( HReg* rHi, HReg* rLo, 250 ISelEnv* env, IRExpr* e ); 251 252 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 253 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 254 255 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 256 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 257 258 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 259 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 260 261 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 262 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 263 264 265 /*---------------------------------------------------------*/ 266 /*--- ISEL: Misc helpers ---*/ 267 /*---------------------------------------------------------*/ 268 269 /* Make a int reg-reg move. */ 270 271 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 272 { 273 vassert(hregClass(src) == HRcInt32); 274 vassert(hregClass(dst) == HRcInt32); 275 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst); 276 } 277 278 279 /* Make a vector reg-reg move. */ 280 281 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 282 { 283 vassert(hregClass(src) == HRcVec128); 284 vassert(hregClass(dst) == HRcVec128); 285 return X86Instr_SseReRg(Xsse_MOV, src, dst); 286 } 287 288 /* Advance/retreat %esp by n. */ 289 290 static void add_to_esp ( ISelEnv* env, Int n ) 291 { 292 vassert(n > 0 && n < 256 && (n%4) == 0); 293 addInstr(env, 294 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP())); 295 } 296 297 static void sub_from_esp ( ISelEnv* env, Int n ) 298 { 299 vassert(n > 0 && n < 256 && (n%4) == 0); 300 addInstr(env, 301 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP())); 302 } 303 304 305 /* Given an amode, return one which references 4 bytes further 306 along. */ 307 308 static X86AMode* advance4 ( X86AMode* am ) 309 { 310 X86AMode* am4 = dopyX86AMode(am); 311 switch (am4->tag) { 312 case Xam_IRRS: 313 am4->Xam.IRRS.imm += 4; break; 314 case Xam_IR: 315 am4->Xam.IR.imm += 4; break; 316 default: 317 vpanic("advance4(x86,host)"); 318 } 319 return am4; 320 } 321 322 323 /* Push an arg onto the host stack, in preparation for a call to a 324 helper function of some kind. Returns the number of 32-bit words 325 pushed. */ 326 327 static Int pushArg ( ISelEnv* env, IRExpr* arg ) 328 { 329 IRType arg_ty = typeOfIRExpr(env->type_env, arg); 330 if (arg_ty == Ity_I32) { 331 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 332 return 1; 333 } else 334 if (arg_ty == Ity_I64) { 335 HReg rHi, rLo; 336 iselInt64Expr(&rHi, &rLo, env, arg); 337 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 338 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 339 return 2; 340 } 341 ppIRExpr(arg); 342 vpanic("pushArg(x86): can't handle arg of this type"); 343 } 344 345 346 /* Complete the call to a helper function, by calling the 347 helper and clearing the args off the stack. */ 348 349 static 350 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc, 351 IRCallee* cee, Int n_arg_ws ) 352 { 353 /* Complication. Need to decide which reg to use as the fn address 354 pointer, in a way that doesn't trash regparm-passed 355 parameters. */ 356 vassert(sizeof(void*) == 4); 357 358 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)), 359 cee->regparms)); 360 if (n_arg_ws > 0) 361 add_to_esp(env, 4*n_arg_ws); 362 } 363 364 365 /* Used only in doHelperCall. See big comment in doHelperCall re 366 handling of regparm args. This function figures out whether 367 evaluation of an expression might require use of a fixed register. 368 If in doubt return True (safe but suboptimal). 369 */ 370 static 371 Bool mightRequireFixedRegs ( IRExpr* e ) 372 { 373 switch (e->tag) { 374 case Iex_RdTmp: case Iex_Const: case Iex_Get: 375 return False; 376 default: 377 return True; 378 } 379 } 380 381 382 /* Do a complete function call. guard is a Ity_Bit expression 383 indicating whether or not the call happens. If guard==NULL, the 384 call is unconditional. */ 385 386 static 387 void doHelperCall ( ISelEnv* env, 388 Bool passBBP, 389 IRExpr* guard, IRCallee* cee, IRExpr** args ) 390 { 391 X86CondCode cc; 392 HReg argregs[3]; 393 HReg tmpregs[3]; 394 Bool danger; 395 Int not_done_yet, n_args, n_arg_ws, stack_limit, 396 i, argreg, argregX; 397 398 /* Marshal args for a call, do the call, and clear the stack. 399 Complexities to consider: 400 401 * if passBBP is True, %ebp (the baseblock pointer) is to be 402 passed as the first arg. 403 404 * If the callee claims regparmness of 1, 2 or 3, we must pass the 405 first 1, 2 or 3 args in registers (EAX, EDX, and ECX 406 respectively). To keep things relatively simple, only args of 407 type I32 may be passed as regparms -- just bomb out if anything 408 else turns up. Clearly this depends on the front ends not 409 trying to pass any other types as regparms. 410 */ 411 412 /* 16 Nov 2004: the regparm handling is complicated by the 413 following problem. 414 415 Consider a call two a function with two regparm parameters: 416 f(e1,e2). We need to compute e1 into %eax and e2 into %edx. 417 Suppose code is first generated to compute e1 into %eax. Then, 418 code is generated to compute e2 into %edx. Unfortunately, if 419 the latter code sequence uses %eax, it will trash the value of 420 e1 computed by the former sequence. This could happen if (for 421 example) e2 itself involved a function call. In the code below, 422 args are evaluated right-to-left, not left-to-right, but the 423 principle and the problem are the same. 424 425 One solution is to compute all regparm-bound args into vregs 426 first, and once they are all done, move them to the relevant 427 real regs. This always gives correct code, but it also gives 428 a bunch of vreg-to-rreg moves which are usually redundant but 429 are hard for the register allocator to get rid of. 430 431 A compromise is to first examine all regparm'd argument 432 expressions. If they are all so simple that it is clear 433 they will be evaluated without use of any fixed registers, 434 use the old compute-directly-to-fixed-target scheme. If not, 435 be safe and use the via-vregs scheme. 436 437 Note this requires being able to examine an expression and 438 determine whether or not evaluation of it might use a fixed 439 register. That requires knowledge of how the rest of this 440 insn selector works. Currently just the following 3 are 441 regarded as safe -- hopefully they cover the majority of 442 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 443 */ 444 vassert(cee->regparms >= 0 && cee->regparms <= 3); 445 446 n_args = n_arg_ws = 0; 447 while (args[n_args]) n_args++; 448 449 not_done_yet = n_args; 450 if (passBBP) 451 not_done_yet++; 452 453 stack_limit = cee->regparms; 454 if (cee->regparms > 0 && passBBP) stack_limit--; 455 456 /* ------ BEGIN marshall all arguments ------ */ 457 458 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */ 459 for (i = n_args-1; i >= stack_limit; i--) { 460 n_arg_ws += pushArg(env, args[i]); 461 not_done_yet--; 462 } 463 464 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in 465 registers. */ 466 467 if (cee->regparms > 0) { 468 469 /* ------ BEGIN deal with regparms ------ */ 470 471 /* deal with regparms, not forgetting %ebp if needed. */ 472 argregs[0] = hregX86_EAX(); 473 argregs[1] = hregX86_EDX(); 474 argregs[2] = hregX86_ECX(); 475 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG; 476 477 argreg = cee->regparms; 478 479 /* In keeping with big comment above, detect potential danger 480 and use the via-vregs scheme if needed. */ 481 danger = False; 482 for (i = stack_limit-1; i >= 0; i--) { 483 if (mightRequireFixedRegs(args[i])) { 484 danger = True; 485 break; 486 } 487 } 488 489 if (danger) { 490 491 /* Move via temporaries */ 492 argregX = argreg; 493 for (i = stack_limit-1; i >= 0; i--) { 494 495 if (0) { 496 vex_printf("x86 host: register param is complex: "); 497 ppIRExpr(args[i]); 498 vex_printf("\n"); 499 } 500 501 argreg--; 502 vassert(argreg >= 0); 503 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 504 tmpregs[argreg] = iselIntExpr_R(env, args[i]); 505 not_done_yet--; 506 } 507 for (i = stack_limit-1; i >= 0; i--) { 508 argregX--; 509 vassert(argregX >= 0); 510 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) ); 511 } 512 513 } else { 514 /* It's safe to compute all regparm args directly into their 515 target registers. */ 516 for (i = stack_limit-1; i >= 0; i--) { 517 argreg--; 518 vassert(argreg >= 0); 519 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 520 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 521 iselIntExpr_RMI(env, args[i]), 522 argregs[argreg])); 523 not_done_yet--; 524 } 525 526 } 527 528 /* Not forgetting %ebp if needed. */ 529 if (passBBP) { 530 vassert(argreg == 1); 531 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0])); 532 not_done_yet--; 533 } 534 535 /* ------ END deal with regparms ------ */ 536 537 } else { 538 539 /* No regparms. Heave %ebp on the stack if needed. */ 540 if (passBBP) { 541 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP()))); 542 n_arg_ws++; 543 not_done_yet--; 544 } 545 546 } 547 548 vassert(not_done_yet == 0); 549 550 /* ------ END marshall all arguments ------ */ 551 552 /* Now we can compute the condition. We can't do it earlier 553 because the argument computations could trash the condition 554 codes. Be a bit clever to handle the common case where the 555 guard is 1:Bit. */ 556 cc = Xcc_ALWAYS; 557 if (guard) { 558 if (guard->tag == Iex_Const 559 && guard->Iex.Const.con->tag == Ico_U1 560 && guard->Iex.Const.con->Ico.U1 == True) { 561 /* unconditional -- do nothing */ 562 } else { 563 cc = iselCondCode( env, guard ); 564 } 565 } 566 567 /* call the helper, and get the args off the stack afterwards. */ 568 callHelperAndClearArgs( env, cc, cee, n_arg_ws ); 569 } 570 571 572 /* Given a guest-state array descriptor, an index expression and a 573 bias, generate an X86AMode holding the relevant guest state 574 offset. */ 575 576 static 577 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 578 IRExpr* off, Int bias ) 579 { 580 HReg tmp, roff; 581 Int elemSz = sizeofIRType(descr->elemTy); 582 Int nElems = descr->nElems; 583 Int shift = 0; 584 585 /* throw out any cases not generated by an x86 front end. In 586 theory there might be a day where we need to handle them -- if 587 we ever run non-x86-guest on x86 host. */ 588 589 if (nElems != 8) 590 vpanic("genGuestArrayOffset(x86 host)(1)"); 591 592 switch (elemSz) { 593 case 1: shift = 0; break; 594 case 4: shift = 2; break; 595 case 8: shift = 3; break; 596 default: vpanic("genGuestArrayOffset(x86 host)(2)"); 597 } 598 599 /* Compute off into a reg, %off. Then return: 600 601 movl %off, %tmp 602 addl $bias, %tmp (if bias != 0) 603 andl %tmp, 7 604 ... base(%ebp, %tmp, shift) ... 605 */ 606 tmp = newVRegI(env); 607 roff = iselIntExpr_R(env, off); 608 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 609 if (bias != 0) { 610 addInstr(env, 611 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp)); 612 } 613 addInstr(env, 614 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp)); 615 return 616 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift ); 617 } 618 619 620 /* Mess with the FPU's rounding mode: set to the default rounding mode 621 (DEFAULT_FPUCW). */ 622 static 623 void set_FPU_rounding_default ( ISelEnv* env ) 624 { 625 /* pushl $DEFAULT_FPUCW 626 fldcw 0(%esp) 627 addl $4, %esp 628 */ 629 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 630 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); 631 addInstr(env, X86Instr_FpLdCW(zero_esp)); 632 add_to_esp(env, 4); 633 } 634 635 636 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed 637 expression denoting a value in the range 0 .. 3, indicating a round 638 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 639 the same rounding. 640 */ 641 static 642 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 643 { 644 HReg rrm = iselIntExpr_R(env, mode); 645 HReg rrm2 = newVRegI(env); 646 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 647 648 /* movl %rrm, %rrm2 649 andl $3, %rrm2 -- shouldn't be needed; paranoia 650 shll $10, %rrm2 651 orl $DEFAULT_FPUCW, %rrm2 652 pushl %rrm2 653 fldcw 0(%esp) 654 addl $4, %esp 655 */ 656 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 657 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); 658 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2)); 659 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); 660 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); 661 addInstr(env, X86Instr_FpLdCW(zero_esp)); 662 add_to_esp(env, 4); 663 } 664 665 666 /* Generate !src into a new vector register, and be sure that the code 667 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy 668 way to do this. 669 */ 670 static HReg do_sse_Not128 ( ISelEnv* env, HReg src ) 671 { 672 HReg dst = newVRegV(env); 673 /* Set dst to zero. If dst contains a NaN then all hell might 674 break loose after the comparison. So, first zero it. */ 675 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst)); 676 /* And now make it all 1s ... */ 677 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst)); 678 /* Finally, xor 'src' into it. */ 679 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst)); 680 /* Doesn't that just totally suck? */ 681 return dst; 682 } 683 684 685 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used 686 after most non-simple FPU operations (simple = +, -, *, / and 687 sqrt). 688 689 This could be done a lot more efficiently if needed, by loading 690 zero and adding it to the value to be rounded (fldz ; faddp?). 691 */ 692 static void roundToF64 ( ISelEnv* env, HReg reg ) 693 { 694 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 695 sub_from_esp(env, 8); 696 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 697 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 698 add_to_esp(env, 8); 699 } 700 701 702 /*---------------------------------------------------------*/ 703 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 704 /*---------------------------------------------------------*/ 705 706 /* Select insns for an integer-typed expression, and add them to the 707 code list. Return a reg holding the result. This reg will be a 708 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 709 want to modify it, ask for a new vreg, copy it in there, and modify 710 the copy. The register allocator will do its best to map both 711 vregs to the same real register, so the copies will often disappear 712 later in the game. 713 714 This should handle expressions of 32, 16 and 8-bit type. All 715 results are returned in a 32-bit register. For 16- and 8-bit 716 expressions, the upper 16/24 bits are arbitrary, so you should mask 717 or sign extend partial values if necessary. 718 */ 719 720 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 721 { 722 HReg r = iselIntExpr_R_wrk(env, e); 723 /* sanity checks ... */ 724 # if 0 725 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 726 # endif 727 vassert(hregClass(r) == HRcInt32); 728 vassert(hregIsVirtual(r)); 729 return r; 730 } 731 732 /* DO NOT CALL THIS DIRECTLY ! */ 733 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 734 { 735 MatchInfo mi; 736 737 IRType ty = typeOfIRExpr(env->type_env,e); 738 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 739 740 switch (e->tag) { 741 742 /* --------- TEMP --------- */ 743 case Iex_RdTmp: { 744 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 745 } 746 747 /* --------- LOAD --------- */ 748 case Iex_Load: { 749 HReg dst = newVRegI(env); 750 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 751 752 /* We can't handle big-endian loads, nor load-linked. */ 753 if (e->Iex.Load.end != Iend_LE) 754 goto irreducible; 755 756 if (ty == Ity_I32) { 757 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 758 X86RMI_Mem(amode), dst) ); 759 return dst; 760 } 761 if (ty == Ity_I16) { 762 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 763 return dst; 764 } 765 if (ty == Ity_I8) { 766 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 767 return dst; 768 } 769 break; 770 } 771 772 /* --------- TERNARY OP --------- */ 773 case Iex_Triop: { 774 /* C3210 flags following FPU partial remainder (fprem), both 775 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 776 if (e->Iex.Triop.op == Iop_PRemC3210F64 777 || e->Iex.Triop.op == Iop_PRem1C3210F64) { 778 HReg junk = newVRegF(env); 779 HReg dst = newVRegI(env); 780 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2); 781 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3); 782 /* XXXROUNDINGFIXME */ 783 /* set roundingmode here */ 784 addInstr(env, X86Instr_FpBinary( 785 e->Iex.Binop.op==Iop_PRemC3210F64 786 ? Xfp_PREM : Xfp_PREM1, 787 srcL,srcR,junk 788 )); 789 /* The previous pseudo-insn will have left the FPU's C3210 790 flags set correctly. So bag them. */ 791 addInstr(env, X86Instr_FpStSW_AX()); 792 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 793 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 794 return dst; 795 } 796 797 break; 798 } 799 800 /* --------- BINARY OP --------- */ 801 case Iex_Binop: { 802 X86AluOp aluOp; 803 X86ShiftOp shOp; 804 805 /* Pattern: Sub32(0,x) */ 806 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { 807 HReg dst = newVRegI(env); 808 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 809 addInstr(env, mk_iMOVsd_RR(reg,dst)); 810 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 811 return dst; 812 } 813 814 /* Is it an addition or logical style op? */ 815 switch (e->Iex.Binop.op) { 816 case Iop_Add8: case Iop_Add16: case Iop_Add32: 817 aluOp = Xalu_ADD; break; 818 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: 819 aluOp = Xalu_SUB; break; 820 case Iop_And8: case Iop_And16: case Iop_And32: 821 aluOp = Xalu_AND; break; 822 case Iop_Or8: case Iop_Or16: case Iop_Or32: 823 aluOp = Xalu_OR; break; 824 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: 825 aluOp = Xalu_XOR; break; 826 case Iop_Mul16: case Iop_Mul32: 827 aluOp = Xalu_MUL; break; 828 default: 829 aluOp = Xalu_INVALID; break; 830 } 831 /* For commutative ops we assume any literal 832 values are on the second operand. */ 833 if (aluOp != Xalu_INVALID) { 834 HReg dst = newVRegI(env); 835 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 836 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 837 addInstr(env, mk_iMOVsd_RR(reg,dst)); 838 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst)); 839 return dst; 840 } 841 /* Could do better here; forcing the first arg into a reg 842 isn't always clever. 843 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)), 844 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32( 845 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32))) 846 movl 0xFFFFFFA0(%vr41),%vr107 847 movl 0xFFFFFFA4(%vr41),%vr108 848 movl %vr107,%vr106 849 xorl %vr108,%vr106 850 movl 0xFFFFFFA8(%vr41),%vr109 851 movl %vr106,%vr105 852 andl %vr109,%vr105 853 movl 0xFFFFFFA0(%vr41),%vr110 854 movl %vr105,%vr104 855 xorl %vr110,%vr104 856 movl %vr104,%vr70 857 */ 858 859 /* Perhaps a shift op? */ 860 switch (e->Iex.Binop.op) { 861 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 862 shOp = Xsh_SHL; break; 863 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 864 shOp = Xsh_SHR; break; 865 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 866 shOp = Xsh_SAR; break; 867 default: 868 shOp = Xsh_INVALID; break; 869 } 870 if (shOp != Xsh_INVALID) { 871 HReg dst = newVRegI(env); 872 873 /* regL = the value to be shifted */ 874 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 875 addInstr(env, mk_iMOVsd_RR(regL,dst)); 876 877 /* Do any necessary widening for 16/8 bit operands */ 878 switch (e->Iex.Binop.op) { 879 case Iop_Shr8: 880 addInstr(env, X86Instr_Alu32R( 881 Xalu_AND, X86RMI_Imm(0xFF), dst)); 882 break; 883 case Iop_Shr16: 884 addInstr(env, X86Instr_Alu32R( 885 Xalu_AND, X86RMI_Imm(0xFFFF), dst)); 886 break; 887 case Iop_Sar8: 888 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst)); 889 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst)); 890 break; 891 case Iop_Sar16: 892 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst)); 893 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst)); 894 break; 895 default: break; 896 } 897 898 /* Now consider the shift amount. If it's a literal, we 899 can do a much better job than the general case. */ 900 if (e->Iex.Binop.arg2->tag == Iex_Const) { 901 /* assert that the IR is well-typed */ 902 Int nshift; 903 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 904 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 905 vassert(nshift >= 0); 906 if (nshift > 0) 907 /* Can't allow nshift==0 since that means %cl */ 908 addInstr(env, X86Instr_Sh32( shOp, nshift, dst )); 909 } else { 910 /* General case; we have to force the amount into %cl. */ 911 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 912 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX())); 913 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst)); 914 } 915 return dst; 916 } 917 918 /* Handle misc other ops. */ 919 920 if (e->Iex.Binop.op == Iop_Max32U) { 921 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 922 HReg dst = newVRegI(env); 923 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 924 addInstr(env, mk_iMOVsd_RR(src1,dst)); 925 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst)); 926 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst)); 927 return dst; 928 } 929 930 if (e->Iex.Binop.op == Iop_8HLto16) { 931 HReg hi8 = newVRegI(env); 932 HReg lo8 = newVRegI(env); 933 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 934 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 935 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 936 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 937 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8)); 938 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8)); 939 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8)); 940 return hi8; 941 } 942 943 if (e->Iex.Binop.op == Iop_16HLto32) { 944 HReg hi16 = newVRegI(env); 945 HReg lo16 = newVRegI(env); 946 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 947 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 948 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 949 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 950 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16)); 951 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16)); 952 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16)); 953 return hi16; 954 } 955 956 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8 957 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) { 958 HReg a16 = newVRegI(env); 959 HReg b16 = newVRegI(env); 960 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 961 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 962 Int shift = (e->Iex.Binop.op == Iop_MullS8 963 || e->Iex.Binop.op == Iop_MullU8) 964 ? 24 : 16; 965 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8 966 || e->Iex.Binop.op == Iop_MullS16) 967 ? Xsh_SAR : Xsh_SHR; 968 969 addInstr(env, mk_iMOVsd_RR(a16s, a16)); 970 addInstr(env, mk_iMOVsd_RR(b16s, b16)); 971 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16)); 972 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16)); 973 addInstr(env, X86Instr_Sh32(shr_op, shift, a16)); 974 addInstr(env, X86Instr_Sh32(shr_op, shift, b16)); 975 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16)); 976 return b16; 977 } 978 979 if (e->Iex.Binop.op == Iop_CmpF64) { 980 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 981 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 982 HReg dst = newVRegI(env); 983 addInstr(env, X86Instr_FpCmp(fL,fR,dst)); 984 /* shift this right 8 bits so as to conform to CmpF64 985 definition. */ 986 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst)); 987 return dst; 988 } 989 990 if (e->Iex.Binop.op == Iop_F64toI32S 991 || e->Iex.Binop.op == Iop_F64toI16S) { 992 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4; 993 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 994 HReg dst = newVRegI(env); 995 996 /* Used several times ... */ 997 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 998 999 /* rf now holds the value to be converted, and rrm holds the 1000 rounding mode value, encoded as per the IRRoundingMode 1001 enum. The first thing to do is set the FPU's rounding 1002 mode accordingly. */ 1003 1004 /* Create a space for the format conversion. */ 1005 /* subl $4, %esp */ 1006 sub_from_esp(env, 4); 1007 1008 /* Set host rounding mode */ 1009 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1010 1011 /* gistw/l %rf, 0(%esp) */ 1012 addInstr(env, X86Instr_FpLdStI(False/*store*/, 1013 toUChar(sz), rf, zero_esp)); 1014 1015 if (sz == 2) { 1016 /* movzwl 0(%esp), %dst */ 1017 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1018 } else { 1019 /* movl 0(%esp), %dst */ 1020 vassert(sz == 4); 1021 addInstr(env, X86Instr_Alu32R( 1022 Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1023 } 1024 1025 /* Restore default FPU rounding. */ 1026 set_FPU_rounding_default( env ); 1027 1028 /* addl $4, %esp */ 1029 add_to_esp(env, 4); 1030 return dst; 1031 } 1032 1033 break; 1034 } 1035 1036 /* --------- UNARY OP --------- */ 1037 case Iex_Unop: { 1038 1039 /* 1Uto8(32to1(expr32)) */ 1040 if (e->Iex.Unop.op == Iop_1Uto8) { 1041 DECLARE_PATTERN(p_32to1_then_1Uto8); 1042 DEFINE_PATTERN(p_32to1_then_1Uto8, 1043 unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1044 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1045 IRExpr* expr32 = mi.bindee[0]; 1046 HReg dst = newVRegI(env); 1047 HReg src = iselIntExpr_R(env, expr32); 1048 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1049 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1050 X86RMI_Imm(1), dst)); 1051 return dst; 1052 } 1053 } 1054 1055 /* 8Uto32(LDle(expr32)) */ 1056 if (e->Iex.Unop.op == Iop_8Uto32) { 1057 DECLARE_PATTERN(p_LDle8_then_8Uto32); 1058 DEFINE_PATTERN(p_LDle8_then_8Uto32, 1059 unop(Iop_8Uto32, 1060 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1061 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1062 HReg dst = newVRegI(env); 1063 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1064 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1065 return dst; 1066 } 1067 } 1068 1069 /* 8Sto32(LDle(expr32)) */ 1070 if (e->Iex.Unop.op == Iop_8Sto32) { 1071 DECLARE_PATTERN(p_LDle8_then_8Sto32); 1072 DEFINE_PATTERN(p_LDle8_then_8Sto32, 1073 unop(Iop_8Sto32, 1074 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1075 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1076 HReg dst = newVRegI(env); 1077 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1078 addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1079 return dst; 1080 } 1081 } 1082 1083 /* 16Uto32(LDle(expr32)) */ 1084 if (e->Iex.Unop.op == Iop_16Uto32) { 1085 DECLARE_PATTERN(p_LDle16_then_16Uto32); 1086 DEFINE_PATTERN(p_LDle16_then_16Uto32, 1087 unop(Iop_16Uto32, 1088 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1089 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1090 HReg dst = newVRegI(env); 1091 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1092 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1093 return dst; 1094 } 1095 } 1096 1097 /* 8Uto32(GET:I8) */ 1098 if (e->Iex.Unop.op == Iop_8Uto32) { 1099 if (e->Iex.Unop.arg->tag == Iex_Get) { 1100 HReg dst; 1101 X86AMode* amode; 1102 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1103 dst = newVRegI(env); 1104 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1105 hregX86_EBP()); 1106 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1107 return dst; 1108 } 1109 } 1110 1111 /* 16to32(GET:I16) */ 1112 if (e->Iex.Unop.op == Iop_16Uto32) { 1113 if (e->Iex.Unop.arg->tag == Iex_Get) { 1114 HReg dst; 1115 X86AMode* amode; 1116 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1117 dst = newVRegI(env); 1118 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1119 hregX86_EBP()); 1120 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1121 return dst; 1122 } 1123 } 1124 1125 switch (e->Iex.Unop.op) { 1126 case Iop_8Uto16: 1127 case Iop_8Uto32: 1128 case Iop_16Uto32: { 1129 HReg dst = newVRegI(env); 1130 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1131 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1132 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1133 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1134 X86RMI_Imm(mask), dst)); 1135 return dst; 1136 } 1137 case Iop_8Sto16: 1138 case Iop_8Sto32: 1139 case Iop_16Sto32: { 1140 HReg dst = newVRegI(env); 1141 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1142 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24; 1143 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1144 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst)); 1145 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst)); 1146 return dst; 1147 } 1148 case Iop_Not8: 1149 case Iop_Not16: 1150 case Iop_Not32: { 1151 HReg dst = newVRegI(env); 1152 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1153 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1154 addInstr(env, X86Instr_Unary32(Xun_NOT,dst)); 1155 return dst; 1156 } 1157 case Iop_64HIto32: { 1158 HReg rHi, rLo; 1159 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1160 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1161 } 1162 case Iop_64to32: { 1163 HReg rHi, rLo; 1164 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1165 return rLo; /* similar stupid comment to the above ... */ 1166 } 1167 case Iop_16HIto8: 1168 case Iop_32HIto16: { 1169 HReg dst = newVRegI(env); 1170 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1171 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1172 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1173 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1174 return dst; 1175 } 1176 case Iop_1Uto32: 1177 case Iop_1Uto8: { 1178 HReg dst = newVRegI(env); 1179 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1180 addInstr(env, X86Instr_Set32(cond,dst)); 1181 return dst; 1182 } 1183 case Iop_1Sto8: 1184 case Iop_1Sto16: 1185 case Iop_1Sto32: { 1186 /* could do better than this, but for now ... */ 1187 HReg dst = newVRegI(env); 1188 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1189 addInstr(env, X86Instr_Set32(cond,dst)); 1190 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1191 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1192 return dst; 1193 } 1194 case Iop_Ctz32: { 1195 /* Count trailing zeroes, implemented by x86 'bsfl' */ 1196 HReg dst = newVRegI(env); 1197 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1198 addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1199 return dst; 1200 } 1201 case Iop_Clz32: { 1202 /* Count leading zeroes. Do 'bsrl' to establish the index 1203 of the highest set bit, and subtract that value from 1204 31. */ 1205 HReg tmp = newVRegI(env); 1206 HReg dst = newVRegI(env); 1207 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1208 addInstr(env, X86Instr_Bsfr32(False,src,tmp)); 1209 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 1210 X86RMI_Imm(31), dst)); 1211 addInstr(env, X86Instr_Alu32R(Xalu_SUB, 1212 X86RMI_Reg(tmp), dst)); 1213 return dst; 1214 } 1215 1216 case Iop_CmpwNEZ32: { 1217 HReg dst = newVRegI(env); 1218 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1219 addInstr(env, mk_iMOVsd_RR(src,dst)); 1220 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 1221 addInstr(env, X86Instr_Alu32R(Xalu_OR, 1222 X86RMI_Reg(src), dst)); 1223 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1224 return dst; 1225 } 1226 case Iop_Left8: 1227 case Iop_Left16: 1228 case Iop_Left32: { 1229 HReg dst = newVRegI(env); 1230 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1231 addInstr(env, mk_iMOVsd_RR(src, dst)); 1232 addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); 1233 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); 1234 return dst; 1235 } 1236 1237 case Iop_V128to32: { 1238 HReg dst = newVRegI(env); 1239 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1240 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1241 sub_from_esp(env, 16); 1242 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1243 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1244 add_to_esp(env, 16); 1245 return dst; 1246 } 1247 1248 /* ReinterpF32asI32(e) */ 1249 /* Given an IEEE754 single, produce an I32 with the same bit 1250 pattern. Keep stack 8-aligned even though only using 4 1251 bytes. */ 1252 case Iop_ReinterpF32asI32: { 1253 HReg rf = iselFltExpr(env, e->Iex.Unop.arg); 1254 HReg dst = newVRegI(env); 1255 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1256 /* paranoia */ 1257 set_FPU_rounding_default(env); 1258 /* subl $8, %esp */ 1259 sub_from_esp(env, 8); 1260 /* gstF %rf, 0(%esp) */ 1261 addInstr(env, 1262 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp)); 1263 /* movl 0(%esp), %dst */ 1264 addInstr(env, 1265 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1266 /* addl $8, %esp */ 1267 add_to_esp(env, 8); 1268 return dst; 1269 } 1270 1271 case Iop_16to8: 1272 case Iop_32to8: 1273 case Iop_32to16: 1274 /* These are no-ops. */ 1275 return iselIntExpr_R(env, e->Iex.Unop.arg); 1276 1277 default: 1278 break; 1279 } 1280 break; 1281 } 1282 1283 /* --------- GET --------- */ 1284 case Iex_Get: { 1285 if (ty == Ity_I32) { 1286 HReg dst = newVRegI(env); 1287 addInstr(env, X86Instr_Alu32R( 1288 Xalu_MOV, 1289 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1290 hregX86_EBP())), 1291 dst)); 1292 return dst; 1293 } 1294 if (ty == Ity_I8 || ty == Ity_I16) { 1295 HReg dst = newVRegI(env); 1296 addInstr(env, X86Instr_LoadEX( 1297 toUChar(ty==Ity_I8 ? 1 : 2), 1298 False, 1299 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1300 dst)); 1301 return dst; 1302 } 1303 break; 1304 } 1305 1306 case Iex_GetI: { 1307 X86AMode* am 1308 = genGuestArrayOffset( 1309 env, e->Iex.GetI.descr, 1310 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1311 HReg dst = newVRegI(env); 1312 if (ty == Ity_I8) { 1313 addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1314 return dst; 1315 } 1316 if (ty == Ity_I32) { 1317 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1318 return dst; 1319 } 1320 break; 1321 } 1322 1323 /* --------- CCALL --------- */ 1324 case Iex_CCall: { 1325 HReg dst = newVRegI(env); 1326 vassert(ty == e->Iex.CCall.retty); 1327 1328 /* be very restrictive for now. Only 32/64-bit ints allowed 1329 for args, and 32 bits for return type. */ 1330 if (e->Iex.CCall.retty != Ity_I32) 1331 goto irreducible; 1332 1333 /* Marshal args, do the call, clear stack. */ 1334 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1335 1336 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1337 return dst; 1338 } 1339 1340 /* --------- LITERAL --------- */ 1341 /* 32/16/8-bit literals */ 1342 case Iex_Const: { 1343 X86RMI* rmi = iselIntExpr_RMI ( env, e ); 1344 HReg r = newVRegI(env); 1345 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r)); 1346 return r; 1347 } 1348 1349 /* --------- MULTIPLEX --------- */ 1350 case Iex_Mux0X: { 1351 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1352 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 1353 X86RM* r8; 1354 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1355 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); 1356 HReg dst = newVRegI(env); 1357 addInstr(env, mk_iMOVsd_RR(rX,dst)); 1358 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 1359 addInstr(env, X86Instr_Test32(0xFF, r8)); 1360 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst)); 1361 return dst; 1362 } 1363 break; 1364 } 1365 1366 default: 1367 break; 1368 } /* switch (e->tag) */ 1369 1370 /* We get here if no pattern matched. */ 1371 irreducible: 1372 ppIRExpr(e); 1373 vpanic("iselIntExpr_R: cannot reduce tree"); 1374 } 1375 1376 1377 /*---------------------------------------------------------*/ 1378 /*--- ISEL: Integer expression auxiliaries ---*/ 1379 /*---------------------------------------------------------*/ 1380 1381 /* --------------------- AMODEs --------------------- */ 1382 1383 /* Return an AMode which computes the value of the specified 1384 expression, possibly also adding insns to the code list as a 1385 result. The expression may only be a 32-bit one. 1386 */ 1387 1388 static Bool sane_AMode ( X86AMode* am ) 1389 { 1390 switch (am->tag) { 1391 case Xam_IR: 1392 return 1393 toBool( hregClass(am->Xam.IR.reg) == HRcInt32 1394 && (hregIsVirtual(am->Xam.IR.reg) 1395 || am->Xam.IR.reg == hregX86_EBP()) ); 1396 case Xam_IRRS: 1397 return 1398 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32 1399 && hregIsVirtual(am->Xam.IRRS.base) 1400 && hregClass(am->Xam.IRRS.index) == HRcInt32 1401 && hregIsVirtual(am->Xam.IRRS.index) ); 1402 default: 1403 vpanic("sane_AMode: unknown x86 amode tag"); 1404 } 1405 } 1406 1407 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1408 { 1409 X86AMode* am = iselIntExpr_AMode_wrk(env, e); 1410 vassert(sane_AMode(am)); 1411 return am; 1412 } 1413 1414 /* DO NOT CALL THIS DIRECTLY ! */ 1415 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1416 { 1417 IRType ty = typeOfIRExpr(env->type_env,e); 1418 vassert(ty == Ity_I32); 1419 1420 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ 1421 if (e->tag == Iex_Binop 1422 && e->Iex.Binop.op == Iop_Add32 1423 && e->Iex.Binop.arg2->tag == Iex_Const 1424 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 1425 && e->Iex.Binop.arg1->tag == Iex_Binop 1426 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 1427 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop 1428 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1429 && e->Iex.Binop.arg1 1430 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1431 && e->Iex.Binop.arg1 1432 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1433 UInt shift = e->Iex.Binop.arg1 1434 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1435 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1436 if (shift == 1 || shift == 2 || shift == 3) { 1437 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); 1438 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 1439 ->Iex.Binop.arg2->Iex.Binop.arg1 ); 1440 return X86AMode_IRRS(imm32, r1, r2, shift); 1441 } 1442 } 1443 1444 /* Add32(expr1, Shl32(expr2, imm)) */ 1445 if (e->tag == Iex_Binop 1446 && e->Iex.Binop.op == Iop_Add32 1447 && e->Iex.Binop.arg2->tag == Iex_Binop 1448 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1449 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1450 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1451 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1452 if (shift == 1 || shift == 2 || shift == 3) { 1453 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1454 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1455 return X86AMode_IRRS(0, r1, r2, shift); 1456 } 1457 } 1458 1459 /* Add32(expr,i) */ 1460 if (e->tag == Iex_Binop 1461 && e->Iex.Binop.op == Iop_Add32 1462 && e->Iex.Binop.arg2->tag == Iex_Const 1463 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 1464 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1465 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1); 1466 } 1467 1468 /* Doesn't match anything in particular. Generate it into 1469 a register and use that. */ 1470 { 1471 HReg r1 = iselIntExpr_R(env, e); 1472 return X86AMode_IR(0, r1); 1473 } 1474 } 1475 1476 1477 /* --------------------- RMIs --------------------- */ 1478 1479 /* Similarly, calculate an expression into an X86RMI operand. As with 1480 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1481 1482 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1483 { 1484 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1485 /* sanity checks ... */ 1486 switch (rmi->tag) { 1487 case Xrmi_Imm: 1488 return rmi; 1489 case Xrmi_Reg: 1490 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32); 1491 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg)); 1492 return rmi; 1493 case Xrmi_Mem: 1494 vassert(sane_AMode(rmi->Xrmi.Mem.am)); 1495 return rmi; 1496 default: 1497 vpanic("iselIntExpr_RMI: unknown x86 RMI tag"); 1498 } 1499 } 1500 1501 /* DO NOT CALL THIS DIRECTLY ! */ 1502 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1503 { 1504 IRType ty = typeOfIRExpr(env->type_env,e); 1505 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1506 1507 /* special case: immediate */ 1508 if (e->tag == Iex_Const) { 1509 UInt u; 1510 switch (e->Iex.Const.con->tag) { 1511 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1512 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1513 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1514 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1515 } 1516 return X86RMI_Imm(u); 1517 } 1518 1519 /* special case: 32-bit GET */ 1520 if (e->tag == Iex_Get && ty == Ity_I32) { 1521 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1522 hregX86_EBP())); 1523 } 1524 1525 /* special case: 32-bit load from memory */ 1526 if (e->tag == Iex_Load && ty == Ity_I32 1527 && e->Iex.Load.end == Iend_LE) { 1528 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1529 return X86RMI_Mem(am); 1530 } 1531 1532 /* default case: calculate into a register and return that */ 1533 { 1534 HReg r = iselIntExpr_R ( env, e ); 1535 return X86RMI_Reg(r); 1536 } 1537 } 1538 1539 1540 /* --------------------- RIs --------------------- */ 1541 1542 /* Calculate an expression into an X86RI operand. As with 1543 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1544 1545 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 1546 { 1547 X86RI* ri = iselIntExpr_RI_wrk(env, e); 1548 /* sanity checks ... */ 1549 switch (ri->tag) { 1550 case Xri_Imm: 1551 return ri; 1552 case Xri_Reg: 1553 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32); 1554 vassert(hregIsVirtual(ri->Xri.Reg.reg)); 1555 return ri; 1556 default: 1557 vpanic("iselIntExpr_RI: unknown x86 RI tag"); 1558 } 1559 } 1560 1561 /* DO NOT CALL THIS DIRECTLY ! */ 1562 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 1563 { 1564 IRType ty = typeOfIRExpr(env->type_env,e); 1565 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1566 1567 /* special case: immediate */ 1568 if (e->tag == Iex_Const) { 1569 UInt u; 1570 switch (e->Iex.Const.con->tag) { 1571 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1572 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1573 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1574 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1575 } 1576 return X86RI_Imm(u); 1577 } 1578 1579 /* default case: calculate into a register and return that */ 1580 { 1581 HReg r = iselIntExpr_R ( env, e ); 1582 return X86RI_Reg(r); 1583 } 1584 } 1585 1586 1587 /* --------------------- RMs --------------------- */ 1588 1589 /* Similarly, calculate an expression into an X86RM operand. As with 1590 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1591 1592 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 1593 { 1594 X86RM* rm = iselIntExpr_RM_wrk(env, e); 1595 /* sanity checks ... */ 1596 switch (rm->tag) { 1597 case Xrm_Reg: 1598 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32); 1599 vassert(hregIsVirtual(rm->Xrm.Reg.reg)); 1600 return rm; 1601 case Xrm_Mem: 1602 vassert(sane_AMode(rm->Xrm.Mem.am)); 1603 return rm; 1604 default: 1605 vpanic("iselIntExpr_RM: unknown x86 RM tag"); 1606 } 1607 } 1608 1609 /* DO NOT CALL THIS DIRECTLY ! */ 1610 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 1611 { 1612 IRType ty = typeOfIRExpr(env->type_env,e); 1613 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1614 1615 /* special case: 32-bit GET */ 1616 if (e->tag == Iex_Get && ty == Ity_I32) { 1617 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset, 1618 hregX86_EBP())); 1619 } 1620 1621 /* special case: load from memory */ 1622 1623 /* default case: calculate into a register and return that */ 1624 { 1625 HReg r = iselIntExpr_R ( env, e ); 1626 return X86RM_Reg(r); 1627 } 1628 } 1629 1630 1631 /* --------------------- CONDCODE --------------------- */ 1632 1633 /* Generate code to evaluated a bit-typed expression, returning the 1634 condition code which would correspond when the expression would 1635 notionally have returned 1. */ 1636 1637 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1638 { 1639 /* Uh, there's nothing we can sanity check here, unfortunately. */ 1640 return iselCondCode_wrk(env,e); 1641 } 1642 1643 /* DO NOT CALL THIS DIRECTLY ! */ 1644 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1645 { 1646 MatchInfo mi; 1647 1648 vassert(e); 1649 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1650 1651 /* var */ 1652 if (e->tag == Iex_RdTmp) { 1653 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1654 /* Test32 doesn't modify r32; so this is OK. */ 1655 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); 1656 return Xcc_NZ; 1657 } 1658 1659 /* Constant 1:Bit */ 1660 if (e->tag == Iex_Const) { 1661 HReg r; 1662 vassert(e->Iex.Const.con->tag == Ico_U1); 1663 vassert(e->Iex.Const.con->Ico.U1 == True 1664 || e->Iex.Const.con->Ico.U1 == False); 1665 r = newVRegI(env); 1666 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r)); 1667 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r)); 1668 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ; 1669 } 1670 1671 /* Not1(e) */ 1672 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1673 /* Generate code for the arg, and negate the test condition */ 1674 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1675 } 1676 1677 /* --- patterns rooted at: 32to1 --- */ 1678 1679 if (e->tag == Iex_Unop 1680 && e->Iex.Unop.op == Iop_32to1) { 1681 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1682 addInstr(env, X86Instr_Test32(1,rm)); 1683 return Xcc_NZ; 1684 } 1685 1686 /* --- patterns rooted at: CmpNEZ8 --- */ 1687 1688 /* CmpNEZ8(x) */ 1689 if (e->tag == Iex_Unop 1690 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1691 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1692 addInstr(env, X86Instr_Test32(0xFF,rm)); 1693 return Xcc_NZ; 1694 } 1695 1696 /* --- patterns rooted at: CmpNEZ16 --- */ 1697 1698 /* CmpNEZ16(x) */ 1699 if (e->tag == Iex_Unop 1700 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1701 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1702 addInstr(env, X86Instr_Test32(0xFFFF,rm)); 1703 return Xcc_NZ; 1704 } 1705 1706 /* --- patterns rooted at: CmpNEZ32 --- */ 1707 1708 /* CmpNEZ32(And32(x,y)) */ 1709 { 1710 DECLARE_PATTERN(p_CmpNEZ32_And32); 1711 DEFINE_PATTERN(p_CmpNEZ32_And32, 1712 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1)))); 1713 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) { 1714 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1715 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1716 HReg tmp = newVRegI(env); 1717 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1718 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp)); 1719 return Xcc_NZ; 1720 } 1721 } 1722 1723 /* CmpNEZ32(Or32(x,y)) */ 1724 { 1725 DECLARE_PATTERN(p_CmpNEZ32_Or32); 1726 DEFINE_PATTERN(p_CmpNEZ32_Or32, 1727 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1)))); 1728 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) { 1729 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1730 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1731 HReg tmp = newVRegI(env); 1732 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1733 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp)); 1734 return Xcc_NZ; 1735 } 1736 } 1737 1738 /* CmpNEZ32(GET(..):I32) */ 1739 if (e->tag == Iex_Unop 1740 && e->Iex.Unop.op == Iop_CmpNEZ32 1741 && e->Iex.Unop.arg->tag == Iex_Get) { 1742 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1743 hregX86_EBP()); 1744 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); 1745 return Xcc_NZ; 1746 } 1747 1748 /* CmpNEZ32(x) */ 1749 if (e->tag == Iex_Unop 1750 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1751 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1752 X86RMI* rmi2 = X86RMI_Imm(0); 1753 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1754 return Xcc_NZ; 1755 } 1756 1757 /* --- patterns rooted at: CmpNEZ64 --- */ 1758 1759 /* CmpNEZ64(Or64(x,y)) */ 1760 { 1761 DECLARE_PATTERN(p_CmpNEZ64_Or64); 1762 DEFINE_PATTERN(p_CmpNEZ64_Or64, 1763 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 1764 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 1765 HReg hi1, lo1, hi2, lo2; 1766 HReg tmp = newVRegI(env); 1767 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] ); 1768 addInstr(env, mk_iMOVsd_RR(hi1, tmp)); 1769 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp)); 1770 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] ); 1771 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp)); 1772 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp)); 1773 return Xcc_NZ; 1774 } 1775 } 1776 1777 /* CmpNEZ64(x) */ 1778 if (e->tag == Iex_Unop 1779 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1780 HReg hi, lo; 1781 HReg tmp = newVRegI(env); 1782 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg ); 1783 addInstr(env, mk_iMOVsd_RR(hi, tmp)); 1784 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); 1785 return Xcc_NZ; 1786 } 1787 1788 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */ 1789 1790 /* CmpEQ8 / CmpNE8 */ 1791 if (e->tag == Iex_Binop 1792 && (e->Iex.Binop.op == Iop_CmpEQ8 1793 || e->Iex.Binop.op == Iop_CmpNE8 1794 || e->Iex.Binop.op == Iop_CasCmpEQ8 1795 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 1796 if (isZeroU8(e->Iex.Binop.arg2)) { 1797 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1798 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); 1799 switch (e->Iex.Binop.op) { 1800 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1801 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1802 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); 1803 } 1804 } else { 1805 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1806 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1807 HReg r = newVRegI(env); 1808 addInstr(env, mk_iMOVsd_RR(r1,r)); 1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1810 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); 1811 switch (e->Iex.Binop.op) { 1812 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1813 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1814 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); 1815 } 1816 } 1817 } 1818 1819 /* CmpEQ16 / CmpNE16 */ 1820 if (e->tag == Iex_Binop 1821 && (e->Iex.Binop.op == Iop_CmpEQ16 1822 || e->Iex.Binop.op == Iop_CmpNE16 1823 || e->Iex.Binop.op == Iop_CasCmpEQ16 1824 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 1825 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1826 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1827 HReg r = newVRegI(env); 1828 addInstr(env, mk_iMOVsd_RR(r1,r)); 1829 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1830 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); 1831 switch (e->Iex.Binop.op) { 1832 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z; 1833 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ; 1834 default: vpanic("iselCondCode(x86): CmpXX16"); 1835 } 1836 } 1837 1838 /* Cmp*32*(x,y) */ 1839 if (e->tag == Iex_Binop 1840 && (e->Iex.Binop.op == Iop_CmpEQ32 1841 || e->Iex.Binop.op == Iop_CmpNE32 1842 || e->Iex.Binop.op == Iop_CmpLT32S 1843 || e->Iex.Binop.op == Iop_CmpLT32U 1844 || e->Iex.Binop.op == Iop_CmpLE32S 1845 || e->Iex.Binop.op == Iop_CmpLE32U 1846 || e->Iex.Binop.op == Iop_CasCmpEQ32 1847 || e->Iex.Binop.op == Iop_CasCmpNE32)) { 1848 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1849 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1850 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1851 switch (e->Iex.Binop.op) { 1852 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; 1853 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ; 1854 case Iop_CmpLT32S: return Xcc_L; 1855 case Iop_CmpLT32U: return Xcc_B; 1856 case Iop_CmpLE32S: return Xcc_LE; 1857 case Iop_CmpLE32U: return Xcc_BE; 1858 default: vpanic("iselCondCode(x86): CmpXX32"); 1859 } 1860 } 1861 1862 /* CmpNE64 */ 1863 if (e->tag == Iex_Binop 1864 && (e->Iex.Binop.op == Iop_CmpNE64 1865 || e->Iex.Binop.op == Iop_CmpEQ64)) { 1866 HReg hi1, hi2, lo1, lo2; 1867 HReg tHi = newVRegI(env); 1868 HReg tLo = newVRegI(env); 1869 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); 1870 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); 1871 addInstr(env, mk_iMOVsd_RR(hi1, tHi)); 1872 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); 1873 addInstr(env, mk_iMOVsd_RR(lo1, tLo)); 1874 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); 1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); 1876 switch (e->Iex.Binop.op) { 1877 case Iop_CmpNE64: return Xcc_NZ; 1878 case Iop_CmpEQ64: return Xcc_Z; 1879 default: vpanic("iselCondCode(x86): CmpXX64"); 1880 } 1881 } 1882 1883 ppIRExpr(e); 1884 vpanic("iselCondCode"); 1885 } 1886 1887 1888 /*---------------------------------------------------------*/ 1889 /*--- ISEL: Integer expressions (64 bit) ---*/ 1890 /*---------------------------------------------------------*/ 1891 1892 /* Compute a 64-bit value into a register pair, which is returned as 1893 the first two parameters. As with iselIntExpr_R, these may be 1894 either real or virtual regs; in any case they must not be changed 1895 by subsequent code emitted by the caller. */ 1896 1897 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1898 { 1899 iselInt64Expr_wrk(rHi, rLo, env, e); 1900 # if 0 1901 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1902 # endif 1903 vassert(hregClass(*rHi) == HRcInt32); 1904 vassert(hregIsVirtual(*rHi)); 1905 vassert(hregClass(*rLo) == HRcInt32); 1906 vassert(hregIsVirtual(*rLo)); 1907 } 1908 1909 /* DO NOT CALL THIS DIRECTLY ! */ 1910 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1911 { 1912 MatchInfo mi; 1913 HWord fn = 0; /* helper fn for most SIMD64 stuff */ 1914 vassert(e); 1915 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 1916 1917 /* 64-bit literal */ 1918 if (e->tag == Iex_Const) { 1919 ULong w64 = e->Iex.Const.con->Ico.U64; 1920 UInt wHi = toUInt(w64 >> 32); 1921 UInt wLo = toUInt(w64); 1922 HReg tLo = newVRegI(env); 1923 HReg tHi = newVRegI(env); 1924 vassert(e->Iex.Const.con->tag == Ico_U64); 1925 if (wLo == wHi) { 1926 /* Save a precious Int register in this special case. */ 1927 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1928 *rHi = tLo; 1929 *rLo = tLo; 1930 } else { 1931 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 1932 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1933 *rHi = tHi; 1934 *rLo = tLo; 1935 } 1936 return; 1937 } 1938 1939 /* read 64-bit IRTemp */ 1940 if (e->tag == Iex_RdTmp) { 1941 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 1942 return; 1943 } 1944 1945 /* 64-bit load */ 1946 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 1947 HReg tLo, tHi; 1948 X86AMode *am0, *am4; 1949 vassert(e->Iex.Load.ty == Ity_I64); 1950 tLo = newVRegI(env); 1951 tHi = newVRegI(env); 1952 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr); 1953 am4 = advance4(am0); 1954 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 1955 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1956 *rHi = tHi; 1957 *rLo = tLo; 1958 return; 1959 } 1960 1961 /* 64-bit GET */ 1962 if (e->tag == Iex_Get) { 1963 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 1964 X86AMode* am4 = advance4(am); 1965 HReg tLo = newVRegI(env); 1966 HReg tHi = newVRegI(env); 1967 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 1968 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1969 *rHi = tHi; 1970 *rLo = tLo; 1971 return; 1972 } 1973 1974 /* 64-bit GETI */ 1975 if (e->tag == Iex_GetI) { 1976 X86AMode* am 1977 = genGuestArrayOffset( env, e->Iex.GetI.descr, 1978 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1979 X86AMode* am4 = advance4(am); 1980 HReg tLo = newVRegI(env); 1981 HReg tHi = newVRegI(env); 1982 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 1983 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1984 *rHi = tHi; 1985 *rLo = tLo; 1986 return; 1987 } 1988 1989 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */ 1990 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) { 1991 X86RM* r8; 1992 HReg e0Lo, e0Hi; 1993 HReg tLo = newVRegI(env); 1994 HReg tHi = newVRegI(env); 1995 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1996 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 1997 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 1998 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 1999 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2000 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2001 addInstr(env, X86Instr_Test32(0xFF, r8)); 2002 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi)); 2003 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo)); 2004 add_to_esp(env, 4); 2005 *rHi = tHi; 2006 *rLo = tLo; 2007 return; 2008 } 2009 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */ 2010 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) { 2011 X86RM* r8; 2012 HReg e0Lo, e0Hi; 2013 HReg tLo = newVRegI(env); 2014 HReg tHi = newVRegI(env); 2015 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2016 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX); 2017 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2018 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 2019 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2020 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2021 addInstr(env, X86Instr_Test32(0xFF, r8)); 2022 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi)); 2023 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo)); 2024 add_to_esp(env, 4); 2025 *rHi = tHi; 2026 *rLo = tLo; 2027 return; 2028 } 2029 2030 /* 64-bit Mux0X: Mux0X(g, expr, expr) */ 2031 if (e->tag == Iex_Mux0X) { 2032 X86RM* r8; 2033 HReg e0Lo, e0Hi, eXLo, eXHi; 2034 HReg tLo = newVRegI(env); 2035 HReg tHi = newVRegI(env); 2036 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2037 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); 2038 addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); 2039 addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); 2040 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2041 addInstr(env, X86Instr_Test32(0xFF, r8)); 2042 /* This assumes the first cmov32 doesn't trash the condition 2043 codes, so they are still available for the second cmov32 */ 2044 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); 2045 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo)); 2046 *rHi = tHi; 2047 *rLo = tLo; 2048 return; 2049 } 2050 2051 /* --------- BINARY ops --------- */ 2052 if (e->tag == Iex_Binop) { 2053 switch (e->Iex.Binop.op) { 2054 /* 32 x 32 -> 64 multiply */ 2055 case Iop_MullU32: 2056 case Iop_MullS32: { 2057 /* get one operand into %eax, and the other into a R/M. 2058 Need to make an educated guess about which is better in 2059 which. */ 2060 HReg tLo = newVRegI(env); 2061 HReg tHi = newVRegI(env); 2062 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); 2063 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2064 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2065 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX())); 2066 addInstr(env, X86Instr_MulL(syned, rmLeft)); 2067 /* Result is now in EDX:EAX. Tell the caller. */ 2068 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2069 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2070 *rHi = tHi; 2071 *rLo = tLo; 2072 return; 2073 } 2074 2075 /* 64 x 32 -> (32(rem),32(div)) division */ 2076 case Iop_DivModU64to32: 2077 case Iop_DivModS64to32: { 2078 /* Get the 64-bit operand into edx:eax, and the other into 2079 any old R/M. */ 2080 HReg sHi, sLo; 2081 HReg tLo = newVRegI(env); 2082 HReg tHi = newVRegI(env); 2083 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 2084 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2085 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2086 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX())); 2087 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX())); 2088 addInstr(env, X86Instr_Div(syned, rmRight)); 2089 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2090 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2091 *rHi = tHi; 2092 *rLo = tLo; 2093 return; 2094 } 2095 2096 /* Or64/And64/Xor64 */ 2097 case Iop_Or64: 2098 case Iop_And64: 2099 case Iop_Xor64: { 2100 HReg xLo, xHi, yLo, yHi; 2101 HReg tLo = newVRegI(env); 2102 HReg tHi = newVRegI(env); 2103 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2104 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2105 : Xalu_XOR; 2106 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2107 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2108 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2109 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2110 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2111 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2112 *rHi = tHi; 2113 *rLo = tLo; 2114 return; 2115 } 2116 2117 /* Add64/Sub64 */ 2118 case Iop_Add64: 2119 if (e->Iex.Binop.arg2->tag == Iex_Const) { 2120 /* special case Add64(e, const) */ 2121 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2122 UInt wHi = toUInt(w64 >> 32); 2123 UInt wLo = toUInt(w64); 2124 HReg tLo = newVRegI(env); 2125 HReg tHi = newVRegI(env); 2126 HReg xLo, xHi; 2127 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64); 2128 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2129 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2130 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2131 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo)); 2132 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi)); 2133 *rHi = tHi; 2134 *rLo = tLo; 2135 return; 2136 } 2137 /* else fall through to the generic case */ 2138 case Iop_Sub64: { 2139 HReg xLo, xHi, yLo, yHi; 2140 HReg tLo = newVRegI(env); 2141 HReg tHi = newVRegI(env); 2142 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2143 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2144 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2145 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2146 if (e->Iex.Binop.op==Iop_Add64) { 2147 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2148 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2149 } else { 2150 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2151 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2152 } 2153 *rHi = tHi; 2154 *rLo = tLo; 2155 return; 2156 } 2157 2158 /* 32HLto64(e1,e2) */ 2159 case Iop_32HLto64: 2160 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2161 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2162 return; 2163 2164 /* 64-bit shifts */ 2165 case Iop_Shl64: { 2166 /* We use the same ingenious scheme as gcc. Put the value 2167 to be shifted into %hi:%lo, and the shift amount into 2168 %cl. Then (dsts on right, a la ATT syntax): 2169 2170 shldl %cl, %lo, %hi -- make %hi be right for the 2171 -- shift amt %cl % 32 2172 shll %cl, %lo -- make %lo be right for the 2173 -- shift amt %cl % 32 2174 2175 Now, if (shift amount % 64) is in the range 32 .. 63, 2176 we have to do a fixup, which puts the result low half 2177 into the result high half, and zeroes the low half: 2178 2179 testl $32, %ecx 2180 2181 cmovnz %lo, %hi 2182 movl $0, %tmp -- sigh; need yet another reg 2183 cmovnz %tmp, %lo 2184 */ 2185 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2186 tLo = newVRegI(env); 2187 tHi = newVRegI(env); 2188 tTemp = newVRegI(env); 2189 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2190 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2191 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2192 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2193 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2194 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2195 and those regs are legitimately modifiable. */ 2196 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2197 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); 2198 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2199 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2200 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2201 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2202 *rHi = tHi; 2203 *rLo = tLo; 2204 return; 2205 } 2206 2207 case Iop_Shr64: { 2208 /* We use the same ingenious scheme as gcc. Put the value 2209 to be shifted into %hi:%lo, and the shift amount into 2210 %cl. Then: 2211 2212 shrdl %cl, %hi, %lo -- make %lo be right for the 2213 -- shift amt %cl % 32 2214 shrl %cl, %hi -- make %hi be right for the 2215 -- shift amt %cl % 32 2216 2217 Now, if (shift amount % 64) is in the range 32 .. 63, 2218 we have to do a fixup, which puts the result high half 2219 into the result low half, and zeroes the high half: 2220 2221 testl $32, %ecx 2222 2223 cmovnz %hi, %lo 2224 movl $0, %tmp -- sigh; need yet another reg 2225 cmovnz %tmp, %hi 2226 */ 2227 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2228 tLo = newVRegI(env); 2229 tHi = newVRegI(env); 2230 tTemp = newVRegI(env); 2231 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2232 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2233 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2234 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2235 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2236 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2237 and those regs are legitimately modifiable. */ 2238 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2239 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); 2240 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2241 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2242 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2243 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2244 *rHi = tHi; 2245 *rLo = tLo; 2246 return; 2247 } 2248 2249 /* F64 -> I64 */ 2250 /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2251 case. Unfortunately I see no easy way to avoid the 2252 duplication. */ 2253 case Iop_F64toI64S: { 2254 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2255 HReg tLo = newVRegI(env); 2256 HReg tHi = newVRegI(env); 2257 2258 /* Used several times ... */ 2259 /* Careful ... this sharing is only safe because 2260 zero_esp/four_esp do not hold any registers which the 2261 register allocator could attempt to swizzle later. */ 2262 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2263 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2264 2265 /* rf now holds the value to be converted, and rrm holds 2266 the rounding mode value, encoded as per the 2267 IRRoundingMode enum. The first thing to do is set the 2268 FPU's rounding mode accordingly. */ 2269 2270 /* Create a space for the format conversion. */ 2271 /* subl $8, %esp */ 2272 sub_from_esp(env, 8); 2273 2274 /* Set host rounding mode */ 2275 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2276 2277 /* gistll %rf, 0(%esp) */ 2278 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2279 2280 /* movl 0(%esp), %dstLo */ 2281 /* movl 4(%esp), %dstHi */ 2282 addInstr(env, X86Instr_Alu32R( 2283 Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2284 addInstr(env, X86Instr_Alu32R( 2285 Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2286 2287 /* Restore default FPU rounding. */ 2288 set_FPU_rounding_default( env ); 2289 2290 /* addl $8, %esp */ 2291 add_to_esp(env, 8); 2292 2293 *rHi = tHi; 2294 *rLo = tLo; 2295 return; 2296 } 2297 2298 case Iop_Add8x8: 2299 fn = (HWord)h_generic_calc_Add8x8; goto binnish; 2300 case Iop_Add16x4: 2301 fn = (HWord)h_generic_calc_Add16x4; goto binnish; 2302 case Iop_Add32x2: 2303 fn = (HWord)h_generic_calc_Add32x2; goto binnish; 2304 2305 case Iop_Avg8Ux8: 2306 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish; 2307 case Iop_Avg16Ux4: 2308 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish; 2309 2310 case Iop_CmpEQ8x8: 2311 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish; 2312 case Iop_CmpEQ16x4: 2313 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish; 2314 case Iop_CmpEQ32x2: 2315 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish; 2316 2317 case Iop_CmpGT8Sx8: 2318 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish; 2319 case Iop_CmpGT16Sx4: 2320 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish; 2321 case Iop_CmpGT32Sx2: 2322 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish; 2323 2324 case Iop_InterleaveHI8x8: 2325 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish; 2326 case Iop_InterleaveLO8x8: 2327 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish; 2328 case Iop_InterleaveHI16x4: 2329 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish; 2330 case Iop_InterleaveLO16x4: 2331 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish; 2332 case Iop_InterleaveHI32x2: 2333 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; 2334 case Iop_InterleaveLO32x2: 2335 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; 2336 case Iop_CatOddLanes16x4: 2337 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; 2338 case Iop_CatEvenLanes16x4: 2339 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; 2340 case Iop_Perm8x8: 2341 fn = (HWord)h_generic_calc_Perm8x8; goto binnish; 2342 2343 case Iop_Max8Ux8: 2344 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; 2345 case Iop_Max16Sx4: 2346 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish; 2347 case Iop_Min8Ux8: 2348 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish; 2349 case Iop_Min16Sx4: 2350 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish; 2351 2352 case Iop_Mul16x4: 2353 fn = (HWord)h_generic_calc_Mul16x4; goto binnish; 2354 case Iop_Mul32x2: 2355 fn = (HWord)h_generic_calc_Mul32x2; goto binnish; 2356 case Iop_MulHi16Sx4: 2357 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; 2358 case Iop_MulHi16Ux4: 2359 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish; 2360 2361 case Iop_QAdd8Sx8: 2362 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish; 2363 case Iop_QAdd16Sx4: 2364 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish; 2365 case Iop_QAdd8Ux8: 2366 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish; 2367 case Iop_QAdd16Ux4: 2368 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; 2369 2370 case Iop_QNarrow32Sx2: 2371 fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish; 2372 case Iop_QNarrow16Sx4: 2373 fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish; 2374 case Iop_QNarrow16Ux4: 2375 fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish; 2376 2377 case Iop_QSub8Sx8: 2378 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; 2379 case Iop_QSub16Sx4: 2380 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish; 2381 case Iop_QSub8Ux8: 2382 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish; 2383 case Iop_QSub16Ux4: 2384 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish; 2385 2386 case Iop_Sub8x8: 2387 fn = (HWord)h_generic_calc_Sub8x8; goto binnish; 2388 case Iop_Sub16x4: 2389 fn = (HWord)h_generic_calc_Sub16x4; goto binnish; 2390 case Iop_Sub32x2: 2391 fn = (HWord)h_generic_calc_Sub32x2; goto binnish; 2392 2393 binnish: { 2394 /* Note: the following assumes all helpers are of 2395 signature 2396 ULong fn ( ULong, ULong ), and they are 2397 not marked as regparm functions. 2398 */ 2399 HReg xLo, xHi, yLo, yHi; 2400 HReg tLo = newVRegI(env); 2401 HReg tHi = newVRegI(env); 2402 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2403 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi))); 2404 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo))); 2405 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2406 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2407 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2408 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2409 add_to_esp(env, 4*4); 2410 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2411 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2412 *rHi = tHi; 2413 *rLo = tLo; 2414 return; 2415 } 2416 2417 case Iop_ShlN32x2: 2418 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; 2419 case Iop_ShlN16x4: 2420 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; 2421 case Iop_ShlN8x8: 2422 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; 2423 case Iop_ShrN32x2: 2424 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; 2425 case Iop_ShrN16x4: 2426 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty; 2427 case Iop_SarN32x2: 2428 fn = (HWord)h_generic_calc_SarN32x2; goto shifty; 2429 case Iop_SarN16x4: 2430 fn = (HWord)h_generic_calc_SarN16x4; goto shifty; 2431 case Iop_SarN8x8: 2432 fn = (HWord)h_generic_calc_SarN8x8; goto shifty; 2433 shifty: { 2434 /* Note: the following assumes all helpers are of 2435 signature 2436 ULong fn ( ULong, UInt ), and they are 2437 not marked as regparm functions. 2438 */ 2439 HReg xLo, xHi; 2440 HReg tLo = newVRegI(env); 2441 HReg tHi = newVRegI(env); 2442 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2443 addInstr(env, X86Instr_Push(y)); 2444 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2445 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2446 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2447 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2448 add_to_esp(env, 3*4); 2449 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2450 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2451 *rHi = tHi; 2452 *rLo = tLo; 2453 return; 2454 } 2455 2456 default: 2457 break; 2458 } 2459 } /* if (e->tag == Iex_Binop) */ 2460 2461 2462 /* --------- UNARY ops --------- */ 2463 if (e->tag == Iex_Unop) { 2464 switch (e->Iex.Unop.op) { 2465 2466 /* 32Sto64(e) */ 2467 case Iop_32Sto64: { 2468 HReg tLo = newVRegI(env); 2469 HReg tHi = newVRegI(env); 2470 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2471 addInstr(env, mk_iMOVsd_RR(src,tHi)); 2472 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2473 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi)); 2474 *rHi = tHi; 2475 *rLo = tLo; 2476 return; 2477 } 2478 2479 /* 32Uto64(e) */ 2480 case Iop_32Uto64: { 2481 HReg tLo = newVRegI(env); 2482 HReg tHi = newVRegI(env); 2483 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2484 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2485 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2486 *rHi = tHi; 2487 *rLo = tLo; 2488 return; 2489 } 2490 2491 /* 16Uto64(e) */ 2492 case Iop_16Uto64: { 2493 HReg tLo = newVRegI(env); 2494 HReg tHi = newVRegI(env); 2495 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2496 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2497 addInstr(env, X86Instr_Alu32R(Xalu_AND, 2498 X86RMI_Imm(0xFFFF), tLo)); 2499 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2500 *rHi = tHi; 2501 *rLo = tLo; 2502 return; 2503 } 2504 2505 /* V128{HI}to64 */ 2506 case Iop_V128HIto64: 2507 case Iop_V128to64: { 2508 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 2509 HReg tLo = newVRegI(env); 2510 HReg tHi = newVRegI(env); 2511 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 2512 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 2513 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP()); 2514 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP()); 2515 sub_from_esp(env, 16); 2516 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 2517 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2518 X86RMI_Mem(espLO), tLo )); 2519 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2520 X86RMI_Mem(espHI), tHi )); 2521 add_to_esp(env, 16); 2522 *rHi = tHi; 2523 *rLo = tLo; 2524 return; 2525 } 2526 2527 /* could do better than this, but for now ... */ 2528 case Iop_1Sto64: { 2529 HReg tLo = newVRegI(env); 2530 HReg tHi = newVRegI(env); 2531 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2532 addInstr(env, X86Instr_Set32(cond,tLo)); 2533 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo)); 2534 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo)); 2535 addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2536 *rHi = tHi; 2537 *rLo = tLo; 2538 return; 2539 } 2540 2541 /* Not64(e) */ 2542 case Iop_Not64: { 2543 HReg tLo = newVRegI(env); 2544 HReg tHi = newVRegI(env); 2545 HReg sHi, sLo; 2546 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2547 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2548 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2549 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi)); 2550 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo)); 2551 *rHi = tHi; 2552 *rLo = tLo; 2553 return; 2554 } 2555 2556 /* Left64(e) */ 2557 case Iop_Left64: { 2558 HReg yLo, yHi; 2559 HReg tLo = newVRegI(env); 2560 HReg tHi = newVRegI(env); 2561 /* yHi:yLo = arg */ 2562 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2563 /* tLo = 0 - yLo, and set carry */ 2564 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo)); 2565 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2566 /* tHi = 0 - yHi - carry */ 2567 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2568 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2569 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2570 back in, so as to give the final result 2571 tHi:tLo = arg | -arg. */ 2572 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); 2573 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); 2574 *rHi = tHi; 2575 *rLo = tLo; 2576 return; 2577 } 2578 2579 /* --- patterns rooted at: CmpwNEZ64 --- */ 2580 2581 /* CmpwNEZ64(e) */ 2582 case Iop_CmpwNEZ64: { 2583 2584 DECLARE_PATTERN(p_CmpwNEZ64_Or64); 2585 DEFINE_PATTERN(p_CmpwNEZ64_Or64, 2586 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); 2587 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { 2588 /* CmpwNEZ64(Or64(x,y)) */ 2589 HReg xHi,xLo,yHi,yLo; 2590 HReg xBoth = newVRegI(env); 2591 HReg merged = newVRegI(env); 2592 HReg tmp2 = newVRegI(env); 2593 2594 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); 2595 addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); 2596 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2597 X86RMI_Reg(xLo),xBoth)); 2598 2599 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); 2600 addInstr(env, mk_iMOVsd_RR(yHi,merged)); 2601 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2602 X86RMI_Reg(yLo),merged)); 2603 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2604 X86RMI_Reg(xBoth),merged)); 2605 2606 /* tmp2 = (merged | -merged) >>s 31 */ 2607 addInstr(env, mk_iMOVsd_RR(merged,tmp2)); 2608 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2609 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2610 X86RMI_Reg(merged), tmp2)); 2611 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2612 *rHi = tmp2; 2613 *rLo = tmp2; 2614 return; 2615 } else { 2616 /* CmpwNEZ64(e) */ 2617 HReg srcLo, srcHi; 2618 HReg tmp1 = newVRegI(env); 2619 HReg tmp2 = newVRegI(env); 2620 /* srcHi:srcLo = arg */ 2621 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2622 /* tmp1 = srcHi | srcLo */ 2623 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); 2624 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2625 X86RMI_Reg(srcLo), tmp1)); 2626 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2627 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); 2628 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2629 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2630 X86RMI_Reg(tmp1), tmp2)); 2631 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2632 *rHi = tmp2; 2633 *rLo = tmp2; 2634 return; 2635 } 2636 } 2637 2638 /* ReinterpF64asI64(e) */ 2639 /* Given an IEEE754 double, produce an I64 with the same bit 2640 pattern. */ 2641 case Iop_ReinterpF64asI64: { 2642 HReg rf = iselDblExpr(env, e->Iex.Unop.arg); 2643 HReg tLo = newVRegI(env); 2644 HReg tHi = newVRegI(env); 2645 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2646 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2647 /* paranoia */ 2648 set_FPU_rounding_default(env); 2649 /* subl $8, %esp */ 2650 sub_from_esp(env, 8); 2651 /* gstD %rf, 0(%esp) */ 2652 addInstr(env, 2653 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp)); 2654 /* movl 0(%esp), %tLo */ 2655 addInstr(env, 2656 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2657 /* movl 4(%esp), %tHi */ 2658 addInstr(env, 2659 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2660 /* addl $8, %esp */ 2661 add_to_esp(env, 8); 2662 *rHi = tHi; 2663 *rLo = tLo; 2664 return; 2665 } 2666 2667 case Iop_CmpNEZ32x2: 2668 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish; 2669 case Iop_CmpNEZ16x4: 2670 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish; 2671 case Iop_CmpNEZ8x8: 2672 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish; 2673 unish: { 2674 /* Note: the following assumes all helpers are of 2675 signature 2676 ULong fn ( ULong ), and they are 2677 not marked as regparm functions. 2678 */ 2679 HReg xLo, xHi; 2680 HReg tLo = newVRegI(env); 2681 HReg tHi = newVRegI(env); 2682 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 2683 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2684 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2685 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2686 add_to_esp(env, 2*4); 2687 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2688 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2689 *rHi = tHi; 2690 *rLo = tLo; 2691 return; 2692 } 2693 2694 default: 2695 break; 2696 } 2697 } /* if (e->tag == Iex_Unop) */ 2698 2699 2700 /* --------- CCALL --------- */ 2701 if (e->tag == Iex_CCall) { 2702 HReg tLo = newVRegI(env); 2703 HReg tHi = newVRegI(env); 2704 2705 /* Marshal args, do the call, clear stack. */ 2706 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 2707 2708 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2709 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2710 *rHi = tHi; 2711 *rLo = tLo; 2712 return; 2713 } 2714 2715 ppIRExpr(e); 2716 vpanic("iselInt64Expr"); 2717 } 2718 2719 2720 /*---------------------------------------------------------*/ 2721 /*--- ISEL: Floating point expressions (32 bit) ---*/ 2722 /*---------------------------------------------------------*/ 2723 2724 /* Nothing interesting here; really just wrappers for 2725 64-bit stuff. */ 2726 2727 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2728 { 2729 HReg r = iselFltExpr_wrk( env, e ); 2730 # if 0 2731 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2732 # endif 2733 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */ 2734 vassert(hregIsVirtual(r)); 2735 return r; 2736 } 2737 2738 /* DO NOT CALL THIS DIRECTLY */ 2739 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2740 { 2741 IRType ty = typeOfIRExpr(env->type_env,e); 2742 vassert(ty == Ity_F32); 2743 2744 if (e->tag == Iex_RdTmp) { 2745 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2746 } 2747 2748 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2749 X86AMode* am; 2750 HReg res = newVRegF(env); 2751 vassert(e->Iex.Load.ty == Ity_F32); 2752 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2753 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am)); 2754 return res; 2755 } 2756 2757 if (e->tag == Iex_Binop 2758 && e->Iex.Binop.op == Iop_F64toF32) { 2759 /* Although the result is still held in a standard FPU register, 2760 we need to round it to reflect the loss of accuracy/range 2761 entailed in casting it to a 32-bit float. */ 2762 HReg dst = newVRegF(env); 2763 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2764 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2765 addInstr(env, X86Instr_Fp64to32(src,dst)); 2766 set_FPU_rounding_default( env ); 2767 return dst; 2768 } 2769 2770 if (e->tag == Iex_Get) { 2771 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2772 hregX86_EBP() ); 2773 HReg res = newVRegF(env); 2774 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am )); 2775 return res; 2776 } 2777 2778 if (e->tag == Iex_Unop 2779 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2780 /* Given an I32, produce an IEEE754 float with the same bit 2781 pattern. */ 2782 HReg dst = newVRegF(env); 2783 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 2784 /* paranoia */ 2785 addInstr(env, X86Instr_Push(rmi)); 2786 addInstr(env, X86Instr_FpLdSt( 2787 True/*load*/, 4, dst, 2788 X86AMode_IR(0, hregX86_ESP()))); 2789 add_to_esp(env, 4); 2790 return dst; 2791 } 2792 2793 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2794 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); 2795 HReg dst = newVRegF(env); 2796 2797 /* rf now holds the value to be rounded. The first thing to do 2798 is set the FPU's rounding mode accordingly. */ 2799 2800 /* Set host rounding mode */ 2801 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2802 2803 /* grndint %rf, %dst */ 2804 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2805 2806 /* Restore default FPU rounding. */ 2807 set_FPU_rounding_default( env ); 2808 2809 return dst; 2810 } 2811 2812 ppIRExpr(e); 2813 vpanic("iselFltExpr_wrk"); 2814 } 2815 2816 2817 /*---------------------------------------------------------*/ 2818 /*--- ISEL: Floating point expressions (64 bit) ---*/ 2819 /*---------------------------------------------------------*/ 2820 2821 /* Compute a 64-bit floating point value into a register, the identity 2822 of which is returned. As with iselIntExpr_R, the reg may be either 2823 real or virtual; in any case it must not be changed by subsequent 2824 code emitted by the caller. */ 2825 2826 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2827 2828 Type S (1 bit) E (11 bits) F (52 bits) 2829 ---- --------- ----------- ----------- 2830 signalling NaN u 2047 (max) .0uuuuu---u 2831 (with at least 2832 one 1 bit) 2833 quiet NaN u 2047 (max) .1uuuuu---u 2834 2835 negative infinity 1 2047 (max) .000000---0 2836 2837 positive infinity 0 2047 (max) .000000---0 2838 2839 negative zero 1 0 .000000---0 2840 2841 positive zero 0 0 .000000---0 2842 */ 2843 2844 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2845 { 2846 HReg r = iselDblExpr_wrk( env, e ); 2847 # if 0 2848 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2849 # endif 2850 vassert(hregClass(r) == HRcFlt64); 2851 vassert(hregIsVirtual(r)); 2852 return r; 2853 } 2854 2855 /* DO NOT CALL THIS DIRECTLY */ 2856 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2857 { 2858 IRType ty = typeOfIRExpr(env->type_env,e); 2859 vassert(e); 2860 vassert(ty == Ity_F64); 2861 2862 if (e->tag == Iex_RdTmp) { 2863 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2864 } 2865 2866 if (e->tag == Iex_Const) { 2867 union { UInt u32x2[2]; ULong u64; Double f64; } u; 2868 HReg freg = newVRegF(env); 2869 vassert(sizeof(u) == 8); 2870 vassert(sizeof(u.u64) == 8); 2871 vassert(sizeof(u.f64) == 8); 2872 vassert(sizeof(u.u32x2) == 8); 2873 2874 if (e->Iex.Const.con->tag == Ico_F64) { 2875 u.f64 = e->Iex.Const.con->Ico.F64; 2876 } 2877 else if (e->Iex.Const.con->tag == Ico_F64i) { 2878 u.u64 = e->Iex.Const.con->Ico.F64i; 2879 } 2880 else 2881 vpanic("iselDblExpr(x86): const"); 2882 2883 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1]))); 2884 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0]))); 2885 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg, 2886 X86AMode_IR(0, hregX86_ESP()))); 2887 add_to_esp(env, 8); 2888 return freg; 2889 } 2890 2891 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2892 X86AMode* am; 2893 HReg res = newVRegF(env); 2894 vassert(e->Iex.Load.ty == Ity_F64); 2895 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2896 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am)); 2897 return res; 2898 } 2899 2900 if (e->tag == Iex_Get) { 2901 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2902 hregX86_EBP() ); 2903 HReg res = newVRegF(env); 2904 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2905 return res; 2906 } 2907 2908 if (e->tag == Iex_GetI) { 2909 X86AMode* am 2910 = genGuestArrayOffset( 2911 env, e->Iex.GetI.descr, 2912 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2913 HReg res = newVRegF(env); 2914 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2915 return res; 2916 } 2917 2918 if (e->tag == Iex_Triop) { 2919 X86FpOp fpop = Xfp_INVALID; 2920 switch (e->Iex.Triop.op) { 2921 case Iop_AddF64: fpop = Xfp_ADD; break; 2922 case Iop_SubF64: fpop = Xfp_SUB; break; 2923 case Iop_MulF64: fpop = Xfp_MUL; break; 2924 case Iop_DivF64: fpop = Xfp_DIV; break; 2925 case Iop_ScaleF64: fpop = Xfp_SCALE; break; 2926 case Iop_Yl2xF64: fpop = Xfp_YL2X; break; 2927 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break; 2928 case Iop_AtanF64: fpop = Xfp_ATAN; break; 2929 case Iop_PRemF64: fpop = Xfp_PREM; break; 2930 case Iop_PRem1F64: fpop = Xfp_PREM1; break; 2931 default: break; 2932 } 2933 if (fpop != Xfp_INVALID) { 2934 HReg res = newVRegF(env); 2935 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2); 2936 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3); 2937 /* XXXROUNDINGFIXME */ 2938 /* set roundingmode here */ 2939 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res)); 2940 if (fpop != Xfp_ADD && fpop != Xfp_SUB 2941 && fpop != Xfp_MUL && fpop != Xfp_DIV) 2942 roundToF64(env, res); 2943 return res; 2944 } 2945 } 2946 2947 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2948 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2949 HReg dst = newVRegF(env); 2950 2951 /* rf now holds the value to be rounded. The first thing to do 2952 is set the FPU's rounding mode accordingly. */ 2953 2954 /* Set host rounding mode */ 2955 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2956 2957 /* grndint %rf, %dst */ 2958 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2959 2960 /* Restore default FPU rounding. */ 2961 set_FPU_rounding_default( env ); 2962 2963 return dst; 2964 } 2965 2966 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 2967 HReg dst = newVRegF(env); 2968 HReg rHi,rLo; 2969 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2); 2970 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 2971 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 2972 2973 /* Set host rounding mode */ 2974 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2975 2976 addInstr(env, X86Instr_FpLdStI( 2977 True/*load*/, 8, dst, 2978 X86AMode_IR(0, hregX86_ESP()))); 2979 2980 /* Restore default FPU rounding. */ 2981 set_FPU_rounding_default( env ); 2982 2983 add_to_esp(env, 8); 2984 return dst; 2985 } 2986 2987 if (e->tag == Iex_Binop) { 2988 X86FpOp fpop = Xfp_INVALID; 2989 switch (e->Iex.Binop.op) { 2990 case Iop_SinF64: fpop = Xfp_SIN; break; 2991 case Iop_CosF64: fpop = Xfp_COS; break; 2992 case Iop_TanF64: fpop = Xfp_TAN; break; 2993 case Iop_2xm1F64: fpop = Xfp_2XM1; break; 2994 case Iop_SqrtF64: fpop = Xfp_SQRT; break; 2995 default: break; 2996 } 2997 if (fpop != Xfp_INVALID) { 2998 HReg res = newVRegF(env); 2999 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3000 /* XXXROUNDINGFIXME */ 3001 /* set roundingmode here */ 3002 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3003 if (fpop != Xfp_SQRT 3004 && fpop != Xfp_NEG && fpop != Xfp_ABS) 3005 roundToF64(env, res); 3006 return res; 3007 } 3008 } 3009 3010 if (e->tag == Iex_Unop) { 3011 X86FpOp fpop = Xfp_INVALID; 3012 switch (e->Iex.Unop.op) { 3013 case Iop_NegF64: fpop = Xfp_NEG; break; 3014 case Iop_AbsF64: fpop = Xfp_ABS; break; 3015 default: break; 3016 } 3017 if (fpop != Xfp_INVALID) { 3018 HReg res = newVRegF(env); 3019 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3020 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3021 if (fpop != Xfp_NEG && fpop != Xfp_ABS) 3022 roundToF64(env, res); 3023 return res; 3024 } 3025 } 3026 3027 if (e->tag == Iex_Unop) { 3028 switch (e->Iex.Unop.op) { 3029 case Iop_I32StoF64: { 3030 HReg dst = newVRegF(env); 3031 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3032 addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3033 set_FPU_rounding_default(env); 3034 addInstr(env, X86Instr_FpLdStI( 3035 True/*load*/, 4, dst, 3036 X86AMode_IR(0, hregX86_ESP()))); 3037 add_to_esp(env, 4); 3038 return dst; 3039 } 3040 case Iop_ReinterpI64asF64: { 3041 /* Given an I64, produce an IEEE754 double with the same 3042 bit pattern. */ 3043 HReg dst = newVRegF(env); 3044 HReg rHi, rLo; 3045 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg); 3046 /* paranoia */ 3047 set_FPU_rounding_default(env); 3048 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3049 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3050 addInstr(env, X86Instr_FpLdSt( 3051 True/*load*/, 8, dst, 3052 X86AMode_IR(0, hregX86_ESP()))); 3053 add_to_esp(env, 8); 3054 return dst; 3055 } 3056 case Iop_F32toF64: { 3057 /* this is a no-op */ 3058 HReg res = iselFltExpr(env, e->Iex.Unop.arg); 3059 return res; 3060 } 3061 default: 3062 break; 3063 } 3064 } 3065 3066 /* --------- MULTIPLEX --------- */ 3067 if (e->tag == Iex_Mux0X) { 3068 if (ty == Ity_F64 3069 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 3070 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3071 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 3072 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 3073 HReg dst = newVRegF(env); 3074 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst)); 3075 addInstr(env, X86Instr_Test32(0xFF, r8)); 3076 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst)); 3077 return dst; 3078 } 3079 } 3080 3081 ppIRExpr(e); 3082 vpanic("iselDblExpr_wrk"); 3083 } 3084 3085 3086 /*---------------------------------------------------------*/ 3087 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3088 /*---------------------------------------------------------*/ 3089 3090 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3091 { 3092 HReg r = iselVecExpr_wrk( env, e ); 3093 # if 0 3094 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3095 # endif 3096 vassert(hregClass(r) == HRcVec128); 3097 vassert(hregIsVirtual(r)); 3098 return r; 3099 } 3100 3101 3102 /* DO NOT CALL THIS DIRECTLY */ 3103 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3104 { 3105 3106 # define REQUIRE_SSE1 \ 3107 do { if (env->hwcaps == 0/*baseline, no sse*/) \ 3108 goto vec_fail; \ 3109 } while (0) 3110 3111 # define REQUIRE_SSE2 \ 3112 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ 3113 goto vec_fail; \ 3114 } while (0) 3115 3116 # define SSE2_OR_ABOVE \ 3117 (env->hwcaps & VEX_HWCAPS_X86_SSE2) 3118 3119 MatchInfo mi; 3120 Bool arg1isEReg = False; 3121 X86SseOp op = Xsse_INVALID; 3122 IRType ty = typeOfIRExpr(env->type_env,e); 3123 vassert(e); 3124 vassert(ty == Ity_V128); 3125 3126 REQUIRE_SSE1; 3127 3128 if (e->tag == Iex_RdTmp) { 3129 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3130 } 3131 3132 if (e->tag == Iex_Get) { 3133 HReg dst = newVRegV(env); 3134 addInstr(env, X86Instr_SseLdSt( 3135 True/*load*/, 3136 dst, 3137 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()) 3138 ) 3139 ); 3140 return dst; 3141 } 3142 3143 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3144 HReg dst = newVRegV(env); 3145 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3146 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 3147 return dst; 3148 } 3149 3150 if (e->tag == Iex_Const) { 3151 HReg dst = newVRegV(env); 3152 vassert(e->Iex.Const.con->tag == Ico_V128); 3153 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst)); 3154 return dst; 3155 } 3156 3157 if (e->tag == Iex_Unop) { 3158 3159 if (SSE2_OR_ABOVE) { 3160 /* 64UtoV128(LDle:I64(addr)) */ 3161 DECLARE_PATTERN(p_zwiden_load64); 3162 DEFINE_PATTERN(p_zwiden_load64, 3163 unop(Iop_64UtoV128, 3164 IRExpr_Load(Iend_LE,Ity_I64,bind(0)))); 3165 if (matchIRExpr(&mi, p_zwiden_load64, e)) { 3166 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]); 3167 HReg dst = newVRegV(env); 3168 addInstr(env, X86Instr_SseLdzLO(8, dst, am)); 3169 return dst; 3170 } 3171 } 3172 3173 switch (e->Iex.Unop.op) { 3174 3175 case Iop_NotV128: { 3176 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3177 return do_sse_Not128(env, arg); 3178 } 3179 3180 case Iop_CmpNEZ64x2: { 3181 /* We can use SSE2 instructions for this. */ 3182 /* Ideally, we want to do a 64Ix2 comparison against zero of 3183 the operand. Problem is no such insn exists. Solution 3184 therefore is to do a 32Ix4 comparison instead, and bitwise- 3185 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3186 let the not'd result of this initial comparison be a:b:c:d. 3187 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3188 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3189 giving the required result. 3190 3191 The required selection sequence is 2,3,0,1, which 3192 according to Intel's documentation means the pshufd 3193 literal value is 0xB1, that is, 3194 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3195 */ 3196 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3197 HReg tmp = newVRegV(env); 3198 HReg dst = newVRegV(env); 3199 REQUIRE_SSE2; 3200 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp)); 3201 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp)); 3202 tmp = do_sse_Not128(env, tmp); 3203 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst)); 3204 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst)); 3205 return dst; 3206 } 3207 3208 case Iop_CmpNEZ32x4: { 3209 /* Sigh, we have to generate lousy code since this has to 3210 work on SSE1 hosts */ 3211 /* basically, the idea is: for each lane: 3212 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1) 3213 sbbl %r, %r (now %r = 1Sto32(CF)) 3214 movl %r, lane 3215 */ 3216 Int i; 3217 X86AMode* am; 3218 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3219 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3220 HReg dst = newVRegV(env); 3221 HReg r32 = newVRegI(env); 3222 sub_from_esp(env, 16); 3223 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0)); 3224 for (i = 0; i < 4; i++) { 3225 am = X86AMode_IR(i*4, hregX86_ESP()); 3226 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32)); 3227 addInstr(env, X86Instr_Unary32(Xun_NEG, r32)); 3228 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32)); 3229 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am)); 3230 } 3231 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3232 add_to_esp(env, 16); 3233 return dst; 3234 } 3235 3236 case Iop_CmpNEZ8x16: 3237 case Iop_CmpNEZ16x8: { 3238 /* We can use SSE2 instructions for this. */ 3239 HReg arg; 3240 HReg vec0 = newVRegV(env); 3241 HReg vec1 = newVRegV(env); 3242 HReg dst = newVRegV(env); 3243 X86SseOp cmpOp 3244 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16 3245 : Xsse_CMPEQ8; 3246 REQUIRE_SSE2; 3247 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0)); 3248 addInstr(env, mk_vMOVsd_RR(vec0, vec1)); 3249 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1)); 3250 /* defer arg computation to here so as to give CMPEQF as long 3251 as possible to complete */ 3252 arg = iselVecExpr(env, e->Iex.Unop.arg); 3253 /* vec0 is all 0s; vec1 is all 1s */ 3254 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3255 /* 16x8 or 8x16 comparison == */ 3256 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst)); 3257 /* invert result */ 3258 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst)); 3259 return dst; 3260 } 3261 3262 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary; 3263 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary; 3264 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary; 3265 do_32Fx4_unary: 3266 { 3267 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3268 HReg dst = newVRegV(env); 3269 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst)); 3270 return dst; 3271 } 3272 3273 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary; 3274 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary; 3275 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary; 3276 do_64Fx2_unary: 3277 { 3278 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3279 HReg dst = newVRegV(env); 3280 REQUIRE_SSE2; 3281 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst)); 3282 return dst; 3283 } 3284 3285 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary; 3286 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary; 3287 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary; 3288 do_32F0x4_unary: 3289 { 3290 /* A bit subtle. We have to copy the arg to the result 3291 register first, because actually doing the SSE scalar insn 3292 leaves the upper 3/4 of the destination register 3293 unchanged. Whereas the required semantics of these 3294 primops is that the upper 3/4 is simply copied in from the 3295 argument. */ 3296 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3297 HReg dst = newVRegV(env); 3298 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3299 addInstr(env, X86Instr_Sse32FLo(op, arg, dst)); 3300 return dst; 3301 } 3302 3303 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary; 3304 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary; 3305 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary; 3306 do_64F0x2_unary: 3307 { 3308 /* A bit subtle. We have to copy the arg to the result 3309 register first, because actually doing the SSE scalar insn 3310 leaves the upper half of the destination register 3311 unchanged. Whereas the required semantics of these 3312 primops is that the upper half is simply copied in from the 3313 argument. */ 3314 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3315 HReg dst = newVRegV(env); 3316 REQUIRE_SSE2; 3317 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3318 addInstr(env, X86Instr_Sse64FLo(op, arg, dst)); 3319 return dst; 3320 } 3321 3322 case Iop_32UtoV128: { 3323 HReg dst = newVRegV(env); 3324 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3325 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3326 addInstr(env, X86Instr_Push(rmi)); 3327 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0)); 3328 add_to_esp(env, 4); 3329 return dst; 3330 } 3331 3332 case Iop_64UtoV128: { 3333 HReg rHi, rLo; 3334 HReg dst = newVRegV(env); 3335 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3336 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg); 3337 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3338 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3339 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0)); 3340 add_to_esp(env, 8); 3341 return dst; 3342 } 3343 3344 default: 3345 break; 3346 } /* switch (e->Iex.Unop.op) */ 3347 } /* if (e->tag == Iex_Unop) */ 3348 3349 if (e->tag == Iex_Binop) { 3350 switch (e->Iex.Binop.op) { 3351 3352 case Iop_SetV128lo32: { 3353 HReg dst = newVRegV(env); 3354 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3355 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3356 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3357 sub_from_esp(env, 16); 3358 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3359 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0)); 3360 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3361 add_to_esp(env, 16); 3362 return dst; 3363 } 3364 3365 case Iop_SetV128lo64: { 3366 HReg dst = newVRegV(env); 3367 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3368 HReg srcIhi, srcIlo; 3369 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3370 X86AMode* esp4 = advance4(esp0); 3371 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2); 3372 sub_from_esp(env, 16); 3373 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3374 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0)); 3375 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4)); 3376 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3377 add_to_esp(env, 16); 3378 return dst; 3379 } 3380 3381 case Iop_64HLtoV128: { 3382 HReg r3, r2, r1, r0; 3383 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3384 X86AMode* esp4 = advance4(esp0); 3385 X86AMode* esp8 = advance4(esp4); 3386 X86AMode* esp12 = advance4(esp8); 3387 HReg dst = newVRegV(env); 3388 /* do this via the stack (easy, convenient, etc) */ 3389 sub_from_esp(env, 16); 3390 /* Do the less significant 64 bits */ 3391 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2); 3392 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0)); 3393 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4)); 3394 /* Do the more significant 64 bits */ 3395 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1); 3396 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8)); 3397 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12)); 3398 /* Fetch result back from stack. */ 3399 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3400 add_to_esp(env, 16); 3401 return dst; 3402 } 3403 3404 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4; 3405 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; 3406 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; 3407 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; 3408 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4; 3409 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4; 3410 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; 3411 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; 3412 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4; 3413 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4; 3414 do_32Fx4: 3415 { 3416 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3417 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3418 HReg dst = newVRegV(env); 3419 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3420 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3421 return dst; 3422 } 3423 3424 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; 3425 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; 3426 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; 3427 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; 3428 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2; 3429 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2; 3430 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; 3431 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; 3432 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2; 3433 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2; 3434 do_64Fx2: 3435 { 3436 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3437 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3438 HReg dst = newVRegV(env); 3439 REQUIRE_SSE2; 3440 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3441 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3442 return dst; 3443 } 3444 3445 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; 3446 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; 3447 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; 3448 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4; 3449 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4; 3450 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4; 3451 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4; 3452 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4; 3453 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4; 3454 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4; 3455 do_32F0x4: { 3456 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3457 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3458 HReg dst = newVRegV(env); 3459 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3460 addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); 3461 return dst; 3462 } 3463 3464 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; 3465 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; 3466 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; 3467 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2; 3468 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2; 3469 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2; 3470 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2; 3471 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2; 3472 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2; 3473 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2; 3474 do_64F0x2: { 3475 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3476 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3477 HReg dst = newVRegV(env); 3478 REQUIRE_SSE2; 3479 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3480 addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); 3481 return dst; 3482 } 3483 3484 case Iop_QNarrow32Sx4: 3485 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3486 case Iop_QNarrow16Sx8: 3487 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3488 case Iop_QNarrow16Ux8: 3489 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3490 3491 case Iop_InterleaveHI8x16: 3492 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3493 case Iop_InterleaveHI16x8: 3494 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3495 case Iop_InterleaveHI32x4: 3496 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3497 case Iop_InterleaveHI64x2: 3498 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3499 3500 case Iop_InterleaveLO8x16: 3501 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3502 case Iop_InterleaveLO16x8: 3503 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3504 case Iop_InterleaveLO32x4: 3505 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3506 case Iop_InterleaveLO64x2: 3507 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3508 3509 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg; 3510 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg; 3511 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg; 3512 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; 3513 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; 3514 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; 3515 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; 3516 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; 3517 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; 3518 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg; 3519 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg; 3520 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg; 3521 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg; 3522 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg; 3523 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg; 3524 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg; 3525 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; 3526 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; 3527 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; 3528 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; 3529 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; 3530 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; 3531 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; 3532 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; 3533 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; 3534 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; 3535 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; 3536 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; 3537 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; 3538 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; 3539 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; 3540 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; 3541 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; 3542 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; 3543 do_SseReRg: { 3544 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3545 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3546 HReg dst = newVRegV(env); 3547 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR) 3548 REQUIRE_SSE2; 3549 if (arg1isEReg) { 3550 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3551 addInstr(env, X86Instr_SseReRg(op, arg1, dst)); 3552 } else { 3553 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3554 addInstr(env, X86Instr_SseReRg(op, arg2, dst)); 3555 } 3556 return dst; 3557 } 3558 3559 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift; 3560 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift; 3561 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift; 3562 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift; 3563 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift; 3564 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; 3565 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; 3566 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; 3567 do_SseShift: { 3568 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3569 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3570 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3571 HReg ereg = newVRegV(env); 3572 HReg dst = newVRegV(env); 3573 REQUIRE_SSE2; 3574 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3575 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3576 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3577 addInstr(env, X86Instr_Push(rmi)); 3578 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); 3579 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3580 addInstr(env, X86Instr_SseReRg(op, ereg, dst)); 3581 add_to_esp(env, 16); 3582 return dst; 3583 } 3584 3585 default: 3586 break; 3587 } /* switch (e->Iex.Binop.op) */ 3588 } /* if (e->tag == Iex_Binop) */ 3589 3590 if (e->tag == Iex_Mux0X) { 3591 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3592 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); 3593 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); 3594 HReg dst = newVRegV(env); 3595 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3596 addInstr(env, X86Instr_Test32(0xFF, r8)); 3597 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst)); 3598 return dst; 3599 } 3600 3601 vec_fail: 3602 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", 3603 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); 3604 ppIRExpr(e); 3605 vpanic("iselVecExpr_wrk"); 3606 3607 # undef REQUIRE_SSE1 3608 # undef REQUIRE_SSE2 3609 # undef SSE2_OR_ABOVE 3610 } 3611 3612 3613 /*---------------------------------------------------------*/ 3614 /*--- ISEL: Statements ---*/ 3615 /*---------------------------------------------------------*/ 3616 3617 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3618 { 3619 if (vex_traceflags & VEX_TRACE_VCODE) { 3620 vex_printf("\n-- "); 3621 ppIRStmt(stmt); 3622 vex_printf("\n"); 3623 } 3624 3625 switch (stmt->tag) { 3626 3627 /* --------- STORE --------- */ 3628 case Ist_Store: { 3629 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3630 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3631 IREndness end = stmt->Ist.Store.end; 3632 3633 if (tya != Ity_I32 || end != Iend_LE) 3634 goto stmt_fail; 3635 3636 if (tyd == Ity_I32) { 3637 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3638 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3639 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am)); 3640 return; 3641 } 3642 if (tyd == Ity_I8 || tyd == Ity_I16) { 3643 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3644 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3645 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2), 3646 r,am )); 3647 return; 3648 } 3649 if (tyd == Ity_F64) { 3650 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3651 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3652 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am)); 3653 return; 3654 } 3655 if (tyd == Ity_F32) { 3656 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3657 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3658 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am)); 3659 return; 3660 } 3661 if (tyd == Ity_I64) { 3662 HReg vHi, vLo, rA; 3663 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data); 3664 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3665 addInstr(env, X86Instr_Alu32M( 3666 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA))); 3667 addInstr(env, X86Instr_Alu32M( 3668 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA))); 3669 return; 3670 } 3671 if (tyd == Ity_V128) { 3672 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3673 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3674 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am)); 3675 return; 3676 } 3677 break; 3678 } 3679 3680 /* --------- PUT --------- */ 3681 case Ist_Put: { 3682 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3683 if (ty == Ity_I32) { 3684 /* We're going to write to memory, so compute the RHS into an 3685 X86RI. */ 3686 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3687 addInstr(env, 3688 X86Instr_Alu32M( 3689 Xalu_MOV, 3690 ri, 3691 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP()) 3692 )); 3693 return; 3694 } 3695 if (ty == Ity_I8 || ty == Ity_I16) { 3696 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3697 addInstr(env, X86Instr_Store( 3698 toUChar(ty==Ity_I8 ? 1 : 2), 3699 r, 3700 X86AMode_IR(stmt->Ist.Put.offset, 3701 hregX86_EBP()))); 3702 return; 3703 } 3704 if (ty == Ity_I64) { 3705 HReg vHi, vLo; 3706 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3707 X86AMode* am4 = advance4(am); 3708 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data); 3709 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am )); 3710 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 )); 3711 return; 3712 } 3713 if (ty == Ity_V128) { 3714 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3715 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3716 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am)); 3717 return; 3718 } 3719 if (ty == Ity_F32) { 3720 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3721 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3722 set_FPU_rounding_default(env); /* paranoia */ 3723 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am )); 3724 return; 3725 } 3726 if (ty == Ity_F64) { 3727 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3728 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3729 set_FPU_rounding_default(env); /* paranoia */ 3730 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am )); 3731 return; 3732 } 3733 break; 3734 } 3735 3736 /* --------- Indexed PUT --------- */ 3737 case Ist_PutI: { 3738 X86AMode* am 3739 = genGuestArrayOffset( 3740 env, stmt->Ist.PutI.descr, 3741 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias ); 3742 3743 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data); 3744 if (ty == Ity_F64) { 3745 HReg val = iselDblExpr(env, stmt->Ist.PutI.data); 3746 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am )); 3747 return; 3748 } 3749 if (ty == Ity_I8) { 3750 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data); 3751 addInstr(env, X86Instr_Store( 1, r, am )); 3752 return; 3753 } 3754 if (ty == Ity_I32) { 3755 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data); 3756 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am )); 3757 return; 3758 } 3759 if (ty == Ity_I64) { 3760 HReg rHi, rLo; 3761 X86AMode* am4 = advance4(am); 3762 iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data); 3763 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am )); 3764 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 )); 3765 return; 3766 } 3767 break; 3768 } 3769 3770 /* --------- TMP --------- */ 3771 case Ist_WrTmp: { 3772 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3773 IRType ty = typeOfIRTemp(env->type_env, tmp); 3774 3775 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), 3776 compute it into an AMode and then use LEA. This usually 3777 produces fewer instructions, often because (for memcheck 3778 created IR) we get t = address-expression, (t is later used 3779 twice) and so doing this naturally turns address-expression 3780 back into an X86 amode. */ 3781 if (ty == Ity_I32 3782 && stmt->Ist.WrTmp.data->tag == Iex_Binop 3783 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { 3784 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 3785 HReg dst = lookupIRTemp(env, tmp); 3786 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { 3787 /* Hmm, iselIntExpr_AMode wimped out and just computed the 3788 value into a register. Just emit a normal reg-reg move 3789 so reg-alloc can coalesce it away in the usual way. */ 3790 HReg src = am->Xam.IR.reg; 3791 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); 3792 } else { 3793 addInstr(env, X86Instr_Lea32(am,dst)); 3794 } 3795 return; 3796 } 3797 3798 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 3799 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 3800 HReg dst = lookupIRTemp(env, tmp); 3801 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst)); 3802 return; 3803 } 3804 if (ty == Ity_I64) { 3805 HReg rHi, rLo, dstHi, dstLo; 3806 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3807 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 3808 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 3809 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 3810 return; 3811 } 3812 if (ty == Ity_I1) { 3813 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 3814 HReg dst = lookupIRTemp(env, tmp); 3815 addInstr(env, X86Instr_Set32(cond, dst)); 3816 return; 3817 } 3818 if (ty == Ity_F64) { 3819 HReg dst = lookupIRTemp(env, tmp); 3820 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3821 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3822 return; 3823 } 3824 if (ty == Ity_F32) { 3825 HReg dst = lookupIRTemp(env, tmp); 3826 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3827 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3828 return; 3829 } 3830 if (ty == Ity_V128) { 3831 HReg dst = lookupIRTemp(env, tmp); 3832 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 3833 addInstr(env, mk_vMOVsd_RR(src,dst)); 3834 return; 3835 } 3836 break; 3837 } 3838 3839 /* --------- Call to DIRTY helper --------- */ 3840 case Ist_Dirty: { 3841 IRType retty; 3842 IRDirty* d = stmt->Ist.Dirty.details; 3843 Bool passBBP = False; 3844 3845 if (d->nFxState == 0) 3846 vassert(!d->needsBBP); 3847 3848 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 3849 3850 /* Marshal args, do the call, clear stack. */ 3851 doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 3852 3853 /* Now figure out what to do with the returned value, if any. */ 3854 if (d->tmp == IRTemp_INVALID) 3855 /* No return value. Nothing to do. */ 3856 return; 3857 3858 retty = typeOfIRTemp(env->type_env, d->tmp); 3859 if (retty == Ity_I64) { 3860 HReg dstHi, dstLo; 3861 /* The returned value is in %edx:%eax. Park it in the 3862 register-pair associated with tmp. */ 3863 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 3864 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) ); 3865 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) ); 3866 return; 3867 } 3868 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) { 3869 /* The returned value is in %eax. Park it in the register 3870 associated with tmp. */ 3871 HReg dst = lookupIRTemp(env, d->tmp); 3872 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) ); 3873 return; 3874 } 3875 break; 3876 } 3877 3878 /* --------- MEM FENCE --------- */ 3879 case Ist_MBE: 3880 switch (stmt->Ist.MBE.event) { 3881 case Imbe_Fence: 3882 addInstr(env, X86Instr_MFence(env->hwcaps)); 3883 return; 3884 default: 3885 break; 3886 } 3887 break; 3888 3889 /* --------- ACAS --------- */ 3890 case Ist_CAS: 3891 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 3892 /* "normal" singleton CAS */ 3893 UChar sz; 3894 IRCAS* cas = stmt->Ist.CAS.details; 3895 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3896 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 3897 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 3898 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 3899 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 3900 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 3901 vassert(cas->expdHi == NULL); 3902 vassert(cas->dataHi == NULL); 3903 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 3904 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 3905 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 3906 switch (ty) { 3907 case Ity_I32: sz = 4; break; 3908 case Ity_I16: sz = 2; break; 3909 case Ity_I8: sz = 1; break; 3910 default: goto unhandled_cas; 3911 } 3912 addInstr(env, X86Instr_ACAS(am, sz)); 3913 addInstr(env, 3914 X86Instr_CMov32(Xcc_NZ, 3915 X86RM_Reg(hregX86_EAX()), rOldLo)); 3916 return; 3917 } else { 3918 /* double CAS */ 3919 IRCAS* cas = stmt->Ist.CAS.details; 3920 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3921 /* only 32-bit allowed in this case */ 3922 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 3923 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */ 3924 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 3925 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 3926 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 3927 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 3928 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 3929 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 3930 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 3931 if (ty != Ity_I32) 3932 goto unhandled_cas; 3933 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 3934 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 3935 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX())); 3936 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 3937 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX())); 3938 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 3939 addInstr(env, X86Instr_DACAS(am)); 3940 addInstr(env, 3941 X86Instr_CMov32(Xcc_NZ, 3942 X86RM_Reg(hregX86_EDX()), rOldHi)); 3943 addInstr(env, 3944 X86Instr_CMov32(Xcc_NZ, 3945 X86RM_Reg(hregX86_EAX()), rOldLo)); 3946 return; 3947 } 3948 unhandled_cas: 3949 break; 3950 3951 /* --------- INSTR MARK --------- */ 3952 /* Doesn't generate any executable code ... */ 3953 case Ist_IMark: 3954 return; 3955 3956 /* --------- NO-OP --------- */ 3957 /* Fairly self-explanatory, wouldn't you say? */ 3958 case Ist_NoOp: 3959 return; 3960 3961 /* --------- EXIT --------- */ 3962 case Ist_Exit: { 3963 X86RI* dst; 3964 X86CondCode cc; 3965 if (stmt->Ist.Exit.dst->tag != Ico_U32) 3966 vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value"); 3967 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); 3968 cc = iselCondCode(env,stmt->Ist.Exit.guard); 3969 addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst)); 3970 return; 3971 } 3972 3973 default: break; 3974 } 3975 stmt_fail: 3976 ppIRStmt(stmt); 3977 vpanic("iselStmt"); 3978 } 3979 3980 3981 /*---------------------------------------------------------*/ 3982 /*--- ISEL: Basic block terminators (Nexts) ---*/ 3983 /*---------------------------------------------------------*/ 3984 3985 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) 3986 { 3987 X86RI* ri; 3988 if (vex_traceflags & VEX_TRACE_VCODE) { 3989 vex_printf("\n-- goto {"); 3990 ppIRJumpKind(jk); 3991 vex_printf("} "); 3992 ppIRExpr(next); 3993 vex_printf("\n"); 3994 } 3995 ri = iselIntExpr_RI(env, next); 3996 addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri)); 3997 } 3998 3999 4000 /*---------------------------------------------------------*/ 4001 /*--- Insn selector top-level ---*/ 4002 /*---------------------------------------------------------*/ 4003 4004 /* Translate an entire SB to x86 code. */ 4005 4006 HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, 4007 VexArchInfo* archinfo_host, 4008 VexAbiInfo* vbi/*UNUSED*/ ) 4009 { 4010 Int i, j; 4011 HReg hreg, hregHI; 4012 ISelEnv* env; 4013 UInt hwcaps_host = archinfo_host->hwcaps; 4014 4015 /* sanity ... */ 4016 vassert(arch_host == VexArchX86); 4017 vassert(0 == (hwcaps_host 4018 & ~(VEX_HWCAPS_X86_SSE1 4019 | VEX_HWCAPS_X86_SSE2 4020 | VEX_HWCAPS_X86_SSE3 4021 | VEX_HWCAPS_X86_LZCNT))); 4022 4023 /* Make up an initial environment to use. */ 4024 env = LibVEX_Alloc(sizeof(ISelEnv)); 4025 env->vreg_ctr = 0; 4026 4027 /* Set up output code array. */ 4028 env->code = newHInstrArray(); 4029 4030 /* Copy BB's type env. */ 4031 env->type_env = bb->tyenv; 4032 4033 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4034 change as we go along. */ 4035 env->n_vregmap = bb->tyenv->types_used; 4036 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4037 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4038 4039 /* and finally ... */ 4040 env->hwcaps = hwcaps_host; 4041 4042 /* For each IR temporary, allocate a suitably-kinded virtual 4043 register. */ 4044 j = 0; 4045 for (i = 0; i < env->n_vregmap; i++) { 4046 hregHI = hreg = INVALID_HREG; 4047 switch (bb->tyenv->types[i]) { 4048 case Ity_I1: 4049 case Ity_I8: 4050 case Ity_I16: 4051 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 4052 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True); 4053 hregHI = mkHReg(j++, HRcInt32, True); break; 4054 case Ity_F32: 4055 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 4056 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 4057 default: ppIRType(bb->tyenv->types[i]); 4058 vpanic("iselBB: IRTemp type"); 4059 } 4060 env->vregmap[i] = hreg; 4061 env->vregmapHI[i] = hregHI; 4062 } 4063 env->vreg_ctr = j; 4064 4065 /* Ok, finally we can iterate over the statements. */ 4066 for (i = 0; i < bb->stmts_used; i++) 4067 iselStmt(env,bb->stmts[i]); 4068 4069 iselNext(env,bb->next,bb->jumpkind); 4070 4071 /* record the number of vregs we used. */ 4072 env->code->n_vregs = env->vreg_ctr; 4073 return env->code; 4074 } 4075 4076 4077 /*---------------------------------------------------------------*/ 4078 /*--- end host_x86_isel.c ---*/ 4079 /*---------------------------------------------------------------*/ 4080