1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 40 #include "ir_match.h" 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" 45 #include "host_generic_simd128.h" 46 #include "host_x86_defs.h" 47 48 /* TODO 21 Apr 2005: 49 50 -- (Really an assembler issue) don't emit CMov32 as a cmov 51 insn, since that's expensive on P4 and conditional branch 52 is cheaper if (as we expect) the condition is highly predictable 53 54 -- preserve xmm registers across function calls (by declaring them 55 as trashed by call insns) 56 57 -- preserve x87 ST stack discipline across function calls. Sigh. 58 59 -- Check doHelperCall: if a call is conditional, we cannot safely 60 compute any regparm args directly to registers. Hence, the 61 fast-regparm marshalling should be restricted to unconditional 62 calls only. 63 */ 64 65 /*---------------------------------------------------------*/ 66 /*--- x87 control word stuff ---*/ 67 /*---------------------------------------------------------*/ 68 69 /* Vex-generated code expects to run with the FPU set as follows: all 70 exceptions masked, round-to-nearest, precision = 53 bits. This 71 corresponds to a FPU control word value of 0x027F. 72 73 Similarly the SSE control word (%mxcsr) should be 0x1F80. 74 75 %fpucw and %mxcsr should have these values on entry to 76 Vex-generated code, and should those values should be 77 unchanged at exit. 78 */ 79 80 #define DEFAULT_FPUCW 0x027F 81 82 /* debugging only, do not use */ 83 /* define DEFAULT_FPUCW 0x037F */ 84 85 86 /*---------------------------------------------------------*/ 87 /*--- misc helpers ---*/ 88 /*---------------------------------------------------------*/ 89 90 /* These are duplicated in guest-x86/toIR.c */ 91 static IRExpr* unop ( IROp op, IRExpr* a ) 92 { 93 return IRExpr_Unop(op, a); 94 } 95 96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 97 { 98 return IRExpr_Binop(op, a1, a2); 99 } 100 101 static IRExpr* bind ( Int binder ) 102 { 103 return IRExpr_Binder(binder); 104 } 105 106 static Bool isZeroU8 ( IRExpr* e ) 107 { 108 return e->tag == Iex_Const 109 && e->Iex.Const.con->tag == Ico_U8 110 && e->Iex.Const.con->Ico.U8 == 0; 111 } 112 113 static Bool isZeroU32 ( IRExpr* e ) 114 { 115 return e->tag == Iex_Const 116 && e->Iex.Const.con->tag == Ico_U32 117 && e->Iex.Const.con->Ico.U32 == 0; 118 } 119 120 static Bool isZeroU64 ( IRExpr* e ) 121 { 122 return e->tag == Iex_Const 123 && e->Iex.Const.con->tag == Ico_U64 124 && e->Iex.Const.con->Ico.U64 == 0ULL; 125 } 126 127 128 /*---------------------------------------------------------*/ 129 /*--- ISelEnv ---*/ 130 /*---------------------------------------------------------*/ 131 132 /* This carries around: 133 134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 135 might encounter. This is computed before insn selection starts, 136 and does not change. 137 138 - A mapping from IRTemp to HReg. This tells the insn selector 139 which virtual register(s) are associated with each IRTemp 140 temporary. This is computed before insn selection starts, and 141 does not change. We expect this mapping to map precisely the 142 same set of IRTemps as the type mapping does. 143 144 - vregmap holds the primary register for the IRTemp. 145 - vregmapHI is only used for 64-bit integer-typed 146 IRTemps. It holds the identity of a second 147 32-bit virtual HReg, which holds the high half 148 of the value. 149 150 - The code array, that is, the insns selected so far. 151 152 - A counter, for generating new virtual registers. 153 154 - The host subarchitecture we are selecting insns for. 155 This is set at the start and does not change. 156 157 - A Bool for indicating whether we may generate chain-me 158 instructions for control flow transfers, or whether we must use 159 XAssisted. 160 161 - The maximum guest address of any guest insn in this block. 162 Actually, the address of the highest-addressed byte from any insn 163 in this block. Is set at the start and does not change. This is 164 used for detecting jumps which are definitely forward-edges from 165 this block, and therefore can be made (chained) to the fast entry 166 point of the destination, thereby avoiding the destination's 167 event check. 168 169 Note, this is all (well, mostly) host-independent. 170 */ 171 172 typedef 173 struct { 174 /* Constant -- are set at the start and do not change. */ 175 IRTypeEnv* type_env; 176 177 HReg* vregmap; 178 HReg* vregmapHI; 179 Int n_vregmap; 180 181 UInt hwcaps; 182 183 Bool chainingAllowed; 184 Addr64 max_ga; 185 186 /* These are modified as we go along. */ 187 HInstrArray* code; 188 Int vreg_ctr; 189 } 190 ISelEnv; 191 192 193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 194 { 195 vassert(tmp >= 0); 196 vassert(tmp < env->n_vregmap); 197 return env->vregmap[tmp]; 198 } 199 200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 201 { 202 vassert(tmp >= 0); 203 vassert(tmp < env->n_vregmap); 204 vassert(env->vregmapHI[tmp] != INVALID_HREG); 205 *vrLO = env->vregmap[tmp]; 206 *vrHI = env->vregmapHI[tmp]; 207 } 208 209 static void addInstr ( ISelEnv* env, X86Instr* instr ) 210 { 211 addHInstr(env->code, instr); 212 if (vex_traceflags & VEX_TRACE_VCODE) { 213 ppX86Instr(instr, False); 214 vex_printf("\n"); 215 } 216 } 217 218 static HReg newVRegI ( ISelEnv* env ) 219 { 220 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 221 env->vreg_ctr++; 222 return reg; 223 } 224 225 static HReg newVRegF ( ISelEnv* env ) 226 { 227 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 228 env->vreg_ctr++; 229 return reg; 230 } 231 232 static HReg newVRegV ( ISelEnv* env ) 233 { 234 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 235 env->vreg_ctr++; 236 return reg; 237 } 238 239 240 /*---------------------------------------------------------*/ 241 /*--- ISEL: Forward declarations ---*/ 242 /*---------------------------------------------------------*/ 243 244 /* These are organised as iselXXX and iselXXX_wrk pairs. The 245 iselXXX_wrk do the real work, but are not to be called directly. 246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 247 checks that all returned registers are virtual. You should not 248 call the _wrk version directly. 249 */ 250 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 251 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 252 253 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 254 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 255 256 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 257 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 258 259 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 260 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 261 262 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 263 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 264 265 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 266 ISelEnv* env, IRExpr* e ); 267 static void iselInt64Expr ( HReg* rHi, HReg* rLo, 268 ISelEnv* env, IRExpr* e ); 269 270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 271 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 272 273 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 274 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 275 276 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 277 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 278 279 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 280 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 281 282 283 /*---------------------------------------------------------*/ 284 /*--- ISEL: Misc helpers ---*/ 285 /*---------------------------------------------------------*/ 286 287 /* Make a int reg-reg move. */ 288 289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 290 { 291 vassert(hregClass(src) == HRcInt32); 292 vassert(hregClass(dst) == HRcInt32); 293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst); 294 } 295 296 297 /* Make a vector reg-reg move. */ 298 299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 300 { 301 vassert(hregClass(src) == HRcVec128); 302 vassert(hregClass(dst) == HRcVec128); 303 return X86Instr_SseReRg(Xsse_MOV, src, dst); 304 } 305 306 /* Advance/retreat %esp by n. */ 307 308 static void add_to_esp ( ISelEnv* env, Int n ) 309 { 310 vassert(n > 0 && n < 256 && (n%4) == 0); 311 addInstr(env, 312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP())); 313 } 314 315 static void sub_from_esp ( ISelEnv* env, Int n ) 316 { 317 vassert(n > 0 && n < 256 && (n%4) == 0); 318 addInstr(env, 319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP())); 320 } 321 322 323 /* Given an amode, return one which references 4 bytes further 324 along. */ 325 326 static X86AMode* advance4 ( X86AMode* am ) 327 { 328 X86AMode* am4 = dopyX86AMode(am); 329 switch (am4->tag) { 330 case Xam_IRRS: 331 am4->Xam.IRRS.imm += 4; break; 332 case Xam_IR: 333 am4->Xam.IR.imm += 4; break; 334 default: 335 vpanic("advance4(x86,host)"); 336 } 337 return am4; 338 } 339 340 341 /* Push an arg onto the host stack, in preparation for a call to a 342 helper function of some kind. Returns the number of 32-bit words 343 pushed. */ 344 345 static Int pushArg ( ISelEnv* env, IRExpr* arg ) 346 { 347 IRType arg_ty = typeOfIRExpr(env->type_env, arg); 348 if (arg_ty == Ity_I32) { 349 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 350 return 1; 351 } else 352 if (arg_ty == Ity_I64) { 353 HReg rHi, rLo; 354 iselInt64Expr(&rHi, &rLo, env, arg); 355 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 356 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 357 return 2; 358 } 359 ppIRExpr(arg); 360 vpanic("pushArg(x86): can't handle arg of this type"); 361 } 362 363 364 /* Complete the call to a helper function, by calling the 365 helper and clearing the args off the stack. */ 366 367 static 368 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc, 369 IRCallee* cee, Int n_arg_ws ) 370 { 371 /* Complication. Need to decide which reg to use as the fn address 372 pointer, in a way that doesn't trash regparm-passed 373 parameters. */ 374 vassert(sizeof(void*) == 4); 375 376 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)), 377 cee->regparms)); 378 if (n_arg_ws > 0) 379 add_to_esp(env, 4*n_arg_ws); 380 } 381 382 383 /* Used only in doHelperCall. See big comment in doHelperCall re 384 handling of regparm args. This function figures out whether 385 evaluation of an expression might require use of a fixed register. 386 If in doubt return True (safe but suboptimal). 387 */ 388 static 389 Bool mightRequireFixedRegs ( IRExpr* e ) 390 { 391 switch (e->tag) { 392 case Iex_RdTmp: case Iex_Const: case Iex_Get: 393 return False; 394 default: 395 return True; 396 } 397 } 398 399 400 /* Do a complete function call. guard is a Ity_Bit expression 401 indicating whether or not the call happens. If guard==NULL, the 402 call is unconditional. */ 403 404 static 405 void doHelperCall ( ISelEnv* env, 406 Bool passBBP, 407 IRExpr* guard, IRCallee* cee, IRExpr** args ) 408 { 409 X86CondCode cc; 410 HReg argregs[3]; 411 HReg tmpregs[3]; 412 Bool danger; 413 Int not_done_yet, n_args, n_arg_ws, stack_limit, 414 i, argreg, argregX; 415 416 /* Marshal args for a call, do the call, and clear the stack. 417 Complexities to consider: 418 419 * if passBBP is True, %ebp (the baseblock pointer) is to be 420 passed as the first arg. 421 422 * If the callee claims regparmness of 1, 2 or 3, we must pass the 423 first 1, 2 or 3 args in registers (EAX, EDX, and ECX 424 respectively). To keep things relatively simple, only args of 425 type I32 may be passed as regparms -- just bomb out if anything 426 else turns up. Clearly this depends on the front ends not 427 trying to pass any other types as regparms. 428 */ 429 430 /* 16 Nov 2004: the regparm handling is complicated by the 431 following problem. 432 433 Consider a call two a function with two regparm parameters: 434 f(e1,e2). We need to compute e1 into %eax and e2 into %edx. 435 Suppose code is first generated to compute e1 into %eax. Then, 436 code is generated to compute e2 into %edx. Unfortunately, if 437 the latter code sequence uses %eax, it will trash the value of 438 e1 computed by the former sequence. This could happen if (for 439 example) e2 itself involved a function call. In the code below, 440 args are evaluated right-to-left, not left-to-right, but the 441 principle and the problem are the same. 442 443 One solution is to compute all regparm-bound args into vregs 444 first, and once they are all done, move them to the relevant 445 real regs. This always gives correct code, but it also gives 446 a bunch of vreg-to-rreg moves which are usually redundant but 447 are hard for the register allocator to get rid of. 448 449 A compromise is to first examine all regparm'd argument 450 expressions. If they are all so simple that it is clear 451 they will be evaluated without use of any fixed registers, 452 use the old compute-directly-to-fixed-target scheme. If not, 453 be safe and use the via-vregs scheme. 454 455 Note this requires being able to examine an expression and 456 determine whether or not evaluation of it might use a fixed 457 register. That requires knowledge of how the rest of this 458 insn selector works. Currently just the following 3 are 459 regarded as safe -- hopefully they cover the majority of 460 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 461 */ 462 vassert(cee->regparms >= 0 && cee->regparms <= 3); 463 464 n_args = n_arg_ws = 0; 465 while (args[n_args]) n_args++; 466 467 not_done_yet = n_args; 468 if (passBBP) 469 not_done_yet++; 470 471 stack_limit = cee->regparms; 472 if (cee->regparms > 0 && passBBP) stack_limit--; 473 474 /* ------ BEGIN marshall all arguments ------ */ 475 476 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */ 477 for (i = n_args-1; i >= stack_limit; i--) { 478 n_arg_ws += pushArg(env, args[i]); 479 not_done_yet--; 480 } 481 482 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in 483 registers. */ 484 485 if (cee->regparms > 0) { 486 487 /* ------ BEGIN deal with regparms ------ */ 488 489 /* deal with regparms, not forgetting %ebp if needed. */ 490 argregs[0] = hregX86_EAX(); 491 argregs[1] = hregX86_EDX(); 492 argregs[2] = hregX86_ECX(); 493 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG; 494 495 argreg = cee->regparms; 496 497 /* In keeping with big comment above, detect potential danger 498 and use the via-vregs scheme if needed. */ 499 danger = False; 500 for (i = stack_limit-1; i >= 0; i--) { 501 if (mightRequireFixedRegs(args[i])) { 502 danger = True; 503 break; 504 } 505 } 506 507 if (danger) { 508 509 /* Move via temporaries */ 510 argregX = argreg; 511 for (i = stack_limit-1; i >= 0; i--) { 512 513 if (0) { 514 vex_printf("x86 host: register param is complex: "); 515 ppIRExpr(args[i]); 516 vex_printf("\n"); 517 } 518 519 argreg--; 520 vassert(argreg >= 0); 521 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 522 tmpregs[argreg] = iselIntExpr_R(env, args[i]); 523 not_done_yet--; 524 } 525 for (i = stack_limit-1; i >= 0; i--) { 526 argregX--; 527 vassert(argregX >= 0); 528 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) ); 529 } 530 531 } else { 532 /* It's safe to compute all regparm args directly into their 533 target registers. */ 534 for (i = stack_limit-1; i >= 0; i--) { 535 argreg--; 536 vassert(argreg >= 0); 537 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32); 538 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 539 iselIntExpr_RMI(env, args[i]), 540 argregs[argreg])); 541 not_done_yet--; 542 } 543 544 } 545 546 /* Not forgetting %ebp if needed. */ 547 if (passBBP) { 548 vassert(argreg == 1); 549 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0])); 550 not_done_yet--; 551 } 552 553 /* ------ END deal with regparms ------ */ 554 555 } else { 556 557 /* No regparms. Heave %ebp on the stack if needed. */ 558 if (passBBP) { 559 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP()))); 560 n_arg_ws++; 561 not_done_yet--; 562 } 563 564 } 565 566 vassert(not_done_yet == 0); 567 568 /* ------ END marshall all arguments ------ */ 569 570 /* Now we can compute the condition. We can't do it earlier 571 because the argument computations could trash the condition 572 codes. Be a bit clever to handle the common case where the 573 guard is 1:Bit. */ 574 cc = Xcc_ALWAYS; 575 if (guard) { 576 if (guard->tag == Iex_Const 577 && guard->Iex.Const.con->tag == Ico_U1 578 && guard->Iex.Const.con->Ico.U1 == True) { 579 /* unconditional -- do nothing */ 580 } else { 581 cc = iselCondCode( env, guard ); 582 } 583 } 584 585 /* call the helper, and get the args off the stack afterwards. */ 586 callHelperAndClearArgs( env, cc, cee, n_arg_ws ); 587 } 588 589 590 /* Given a guest-state array descriptor, an index expression and a 591 bias, generate an X86AMode holding the relevant guest state 592 offset. */ 593 594 static 595 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 596 IRExpr* off, Int bias ) 597 { 598 HReg tmp, roff; 599 Int elemSz = sizeofIRType(descr->elemTy); 600 Int nElems = descr->nElems; 601 Int shift = 0; 602 603 /* throw out any cases not generated by an x86 front end. In 604 theory there might be a day where we need to handle them -- if 605 we ever run non-x86-guest on x86 host. */ 606 607 if (nElems != 8) 608 vpanic("genGuestArrayOffset(x86 host)(1)"); 609 610 switch (elemSz) { 611 case 1: shift = 0; break; 612 case 4: shift = 2; break; 613 case 8: shift = 3; break; 614 default: vpanic("genGuestArrayOffset(x86 host)(2)"); 615 } 616 617 /* Compute off into a reg, %off. Then return: 618 619 movl %off, %tmp 620 addl $bias, %tmp (if bias != 0) 621 andl %tmp, 7 622 ... base(%ebp, %tmp, shift) ... 623 */ 624 tmp = newVRegI(env); 625 roff = iselIntExpr_R(env, off); 626 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 627 if (bias != 0) { 628 addInstr(env, 629 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp)); 630 } 631 addInstr(env, 632 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp)); 633 return 634 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift ); 635 } 636 637 638 /* Mess with the FPU's rounding mode: set to the default rounding mode 639 (DEFAULT_FPUCW). */ 640 static 641 void set_FPU_rounding_default ( ISelEnv* env ) 642 { 643 /* pushl $DEFAULT_FPUCW 644 fldcw 0(%esp) 645 addl $4, %esp 646 */ 647 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 648 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); 649 addInstr(env, X86Instr_FpLdCW(zero_esp)); 650 add_to_esp(env, 4); 651 } 652 653 654 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed 655 expression denoting a value in the range 0 .. 3, indicating a round 656 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 657 the same rounding. 658 */ 659 static 660 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 661 { 662 HReg rrm = iselIntExpr_R(env, mode); 663 HReg rrm2 = newVRegI(env); 664 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 665 666 /* movl %rrm, %rrm2 667 andl $3, %rrm2 -- shouldn't be needed; paranoia 668 shll $10, %rrm2 669 orl $DEFAULT_FPUCW, %rrm2 670 pushl %rrm2 671 fldcw 0(%esp) 672 addl $4, %esp 673 */ 674 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 675 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); 676 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2)); 677 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); 678 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); 679 addInstr(env, X86Instr_FpLdCW(zero_esp)); 680 add_to_esp(env, 4); 681 } 682 683 684 /* Generate !src into a new vector register, and be sure that the code 685 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy 686 way to do this. 687 */ 688 static HReg do_sse_Not128 ( ISelEnv* env, HReg src ) 689 { 690 HReg dst = newVRegV(env); 691 /* Set dst to zero. If dst contains a NaN then all hell might 692 break loose after the comparison. So, first zero it. */ 693 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst)); 694 /* And now make it all 1s ... */ 695 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst)); 696 /* Finally, xor 'src' into it. */ 697 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst)); 698 /* Doesn't that just totally suck? */ 699 return dst; 700 } 701 702 703 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used 704 after most non-simple FPU operations (simple = +, -, *, / and 705 sqrt). 706 707 This could be done a lot more efficiently if needed, by loading 708 zero and adding it to the value to be rounded (fldz ; faddp?). 709 */ 710 static void roundToF64 ( ISelEnv* env, HReg reg ) 711 { 712 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 713 sub_from_esp(env, 8); 714 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 715 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 716 add_to_esp(env, 8); 717 } 718 719 720 /*---------------------------------------------------------*/ 721 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 722 /*---------------------------------------------------------*/ 723 724 /* Select insns for an integer-typed expression, and add them to the 725 code list. Return a reg holding the result. This reg will be a 726 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 727 want to modify it, ask for a new vreg, copy it in there, and modify 728 the copy. The register allocator will do its best to map both 729 vregs to the same real register, so the copies will often disappear 730 later in the game. 731 732 This should handle expressions of 32, 16 and 8-bit type. All 733 results are returned in a 32-bit register. For 16- and 8-bit 734 expressions, the upper 16/24 bits are arbitrary, so you should mask 735 or sign extend partial values if necessary. 736 */ 737 738 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 739 { 740 HReg r = iselIntExpr_R_wrk(env, e); 741 /* sanity checks ... */ 742 # if 0 743 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 744 # endif 745 vassert(hregClass(r) == HRcInt32); 746 vassert(hregIsVirtual(r)); 747 return r; 748 } 749 750 /* DO NOT CALL THIS DIRECTLY ! */ 751 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 752 { 753 MatchInfo mi; 754 755 IRType ty = typeOfIRExpr(env->type_env,e); 756 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 757 758 switch (e->tag) { 759 760 /* --------- TEMP --------- */ 761 case Iex_RdTmp: { 762 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 763 } 764 765 /* --------- LOAD --------- */ 766 case Iex_Load: { 767 HReg dst = newVRegI(env); 768 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 769 770 /* We can't handle big-endian loads, nor load-linked. */ 771 if (e->Iex.Load.end != Iend_LE) 772 goto irreducible; 773 774 if (ty == Ity_I32) { 775 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 776 X86RMI_Mem(amode), dst) ); 777 return dst; 778 } 779 if (ty == Ity_I16) { 780 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 781 return dst; 782 } 783 if (ty == Ity_I8) { 784 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 785 return dst; 786 } 787 break; 788 } 789 790 /* --------- TERNARY OP --------- */ 791 case Iex_Triop: { 792 IRTriop *triop = e->Iex.Triop.details; 793 /* C3210 flags following FPU partial remainder (fprem), both 794 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 795 if (triop->op == Iop_PRemC3210F64 796 || triop->op == Iop_PRem1C3210F64) { 797 HReg junk = newVRegF(env); 798 HReg dst = newVRegI(env); 799 HReg srcL = iselDblExpr(env, triop->arg2); 800 HReg srcR = iselDblExpr(env, triop->arg3); 801 /* XXXROUNDINGFIXME */ 802 /* set roundingmode here */ 803 addInstr(env, X86Instr_FpBinary( 804 e->Iex.Binop.op==Iop_PRemC3210F64 805 ? Xfp_PREM : Xfp_PREM1, 806 srcL,srcR,junk 807 )); 808 /* The previous pseudo-insn will have left the FPU's C3210 809 flags set correctly. So bag them. */ 810 addInstr(env, X86Instr_FpStSW_AX()); 811 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 812 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 813 return dst; 814 } 815 816 break; 817 } 818 819 /* --------- BINARY OP --------- */ 820 case Iex_Binop: { 821 X86AluOp aluOp; 822 X86ShiftOp shOp; 823 824 /* Pattern: Sub32(0,x) */ 825 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { 826 HReg dst = newVRegI(env); 827 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 828 addInstr(env, mk_iMOVsd_RR(reg,dst)); 829 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 830 return dst; 831 } 832 833 /* Is it an addition or logical style op? */ 834 switch (e->Iex.Binop.op) { 835 case Iop_Add8: case Iop_Add16: case Iop_Add32: 836 aluOp = Xalu_ADD; break; 837 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: 838 aluOp = Xalu_SUB; break; 839 case Iop_And8: case Iop_And16: case Iop_And32: 840 aluOp = Xalu_AND; break; 841 case Iop_Or8: case Iop_Or16: case Iop_Or32: 842 aluOp = Xalu_OR; break; 843 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: 844 aluOp = Xalu_XOR; break; 845 case Iop_Mul16: case Iop_Mul32: 846 aluOp = Xalu_MUL; break; 847 default: 848 aluOp = Xalu_INVALID; break; 849 } 850 /* For commutative ops we assume any literal 851 values are on the second operand. */ 852 if (aluOp != Xalu_INVALID) { 853 HReg dst = newVRegI(env); 854 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 855 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 856 addInstr(env, mk_iMOVsd_RR(reg,dst)); 857 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst)); 858 return dst; 859 } 860 /* Could do better here; forcing the first arg into a reg 861 isn't always clever. 862 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)), 863 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32( 864 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32))) 865 movl 0xFFFFFFA0(%vr41),%vr107 866 movl 0xFFFFFFA4(%vr41),%vr108 867 movl %vr107,%vr106 868 xorl %vr108,%vr106 869 movl 0xFFFFFFA8(%vr41),%vr109 870 movl %vr106,%vr105 871 andl %vr109,%vr105 872 movl 0xFFFFFFA0(%vr41),%vr110 873 movl %vr105,%vr104 874 xorl %vr110,%vr104 875 movl %vr104,%vr70 876 */ 877 878 /* Perhaps a shift op? */ 879 switch (e->Iex.Binop.op) { 880 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 881 shOp = Xsh_SHL; break; 882 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 883 shOp = Xsh_SHR; break; 884 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 885 shOp = Xsh_SAR; break; 886 default: 887 shOp = Xsh_INVALID; break; 888 } 889 if (shOp != Xsh_INVALID) { 890 HReg dst = newVRegI(env); 891 892 /* regL = the value to be shifted */ 893 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 894 addInstr(env, mk_iMOVsd_RR(regL,dst)); 895 896 /* Do any necessary widening for 16/8 bit operands */ 897 switch (e->Iex.Binop.op) { 898 case Iop_Shr8: 899 addInstr(env, X86Instr_Alu32R( 900 Xalu_AND, X86RMI_Imm(0xFF), dst)); 901 break; 902 case Iop_Shr16: 903 addInstr(env, X86Instr_Alu32R( 904 Xalu_AND, X86RMI_Imm(0xFFFF), dst)); 905 break; 906 case Iop_Sar8: 907 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst)); 908 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst)); 909 break; 910 case Iop_Sar16: 911 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst)); 912 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst)); 913 break; 914 default: break; 915 } 916 917 /* Now consider the shift amount. If it's a literal, we 918 can do a much better job than the general case. */ 919 if (e->Iex.Binop.arg2->tag == Iex_Const) { 920 /* assert that the IR is well-typed */ 921 Int nshift; 922 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 923 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 924 vassert(nshift >= 0); 925 if (nshift > 0) 926 /* Can't allow nshift==0 since that means %cl */ 927 addInstr(env, X86Instr_Sh32( shOp, nshift, dst )); 928 } else { 929 /* General case; we have to force the amount into %cl. */ 930 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 931 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX())); 932 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst)); 933 } 934 return dst; 935 } 936 937 /* Handle misc other ops. */ 938 939 if (e->Iex.Binop.op == Iop_Max32U) { 940 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 941 HReg dst = newVRegI(env); 942 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 943 addInstr(env, mk_iMOVsd_RR(src1,dst)); 944 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst)); 945 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst)); 946 return dst; 947 } 948 949 if (e->Iex.Binop.op == Iop_8HLto16) { 950 HReg hi8 = newVRegI(env); 951 HReg lo8 = newVRegI(env); 952 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 953 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 954 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 955 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 956 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8)); 957 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8)); 958 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8)); 959 return hi8; 960 } 961 962 if (e->Iex.Binop.op == Iop_16HLto32) { 963 HReg hi16 = newVRegI(env); 964 HReg lo16 = newVRegI(env); 965 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 966 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 967 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 968 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 969 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16)); 970 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16)); 971 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16)); 972 return hi16; 973 } 974 975 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8 976 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) { 977 HReg a16 = newVRegI(env); 978 HReg b16 = newVRegI(env); 979 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 980 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 981 Int shift = (e->Iex.Binop.op == Iop_MullS8 982 || e->Iex.Binop.op == Iop_MullU8) 983 ? 24 : 16; 984 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8 985 || e->Iex.Binop.op == Iop_MullS16) 986 ? Xsh_SAR : Xsh_SHR; 987 988 addInstr(env, mk_iMOVsd_RR(a16s, a16)); 989 addInstr(env, mk_iMOVsd_RR(b16s, b16)); 990 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16)); 991 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16)); 992 addInstr(env, X86Instr_Sh32(shr_op, shift, a16)); 993 addInstr(env, X86Instr_Sh32(shr_op, shift, b16)); 994 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16)); 995 return b16; 996 } 997 998 if (e->Iex.Binop.op == Iop_CmpF64) { 999 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1000 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1001 HReg dst = newVRegI(env); 1002 addInstr(env, X86Instr_FpCmp(fL,fR,dst)); 1003 /* shift this right 8 bits so as to conform to CmpF64 1004 definition. */ 1005 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst)); 1006 return dst; 1007 } 1008 1009 if (e->Iex.Binop.op == Iop_F64toI32S 1010 || e->Iex.Binop.op == Iop_F64toI16S) { 1011 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4; 1012 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1013 HReg dst = newVRegI(env); 1014 1015 /* Used several times ... */ 1016 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1017 1018 /* rf now holds the value to be converted, and rrm holds the 1019 rounding mode value, encoded as per the IRRoundingMode 1020 enum. The first thing to do is set the FPU's rounding 1021 mode accordingly. */ 1022 1023 /* Create a space for the format conversion. */ 1024 /* subl $4, %esp */ 1025 sub_from_esp(env, 4); 1026 1027 /* Set host rounding mode */ 1028 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1029 1030 /* gistw/l %rf, 0(%esp) */ 1031 addInstr(env, X86Instr_FpLdStI(False/*store*/, 1032 toUChar(sz), rf, zero_esp)); 1033 1034 if (sz == 2) { 1035 /* movzwl 0(%esp), %dst */ 1036 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1037 } else { 1038 /* movl 0(%esp), %dst */ 1039 vassert(sz == 4); 1040 addInstr(env, X86Instr_Alu32R( 1041 Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1042 } 1043 1044 /* Restore default FPU rounding. */ 1045 set_FPU_rounding_default( env ); 1046 1047 /* addl $4, %esp */ 1048 add_to_esp(env, 4); 1049 return dst; 1050 } 1051 1052 break; 1053 } 1054 1055 /* --------- UNARY OP --------- */ 1056 case Iex_Unop: { 1057 1058 /* 1Uto8(32to1(expr32)) */ 1059 if (e->Iex.Unop.op == Iop_1Uto8) { 1060 DECLARE_PATTERN(p_32to1_then_1Uto8); 1061 DEFINE_PATTERN(p_32to1_then_1Uto8, 1062 unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1063 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1064 IRExpr* expr32 = mi.bindee[0]; 1065 HReg dst = newVRegI(env); 1066 HReg src = iselIntExpr_R(env, expr32); 1067 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1068 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1069 X86RMI_Imm(1), dst)); 1070 return dst; 1071 } 1072 } 1073 1074 /* 8Uto32(LDle(expr32)) */ 1075 if (e->Iex.Unop.op == Iop_8Uto32) { 1076 DECLARE_PATTERN(p_LDle8_then_8Uto32); 1077 DEFINE_PATTERN(p_LDle8_then_8Uto32, 1078 unop(Iop_8Uto32, 1079 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1080 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1081 HReg dst = newVRegI(env); 1082 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1083 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1084 return dst; 1085 } 1086 } 1087 1088 /* 8Sto32(LDle(expr32)) */ 1089 if (e->Iex.Unop.op == Iop_8Sto32) { 1090 DECLARE_PATTERN(p_LDle8_then_8Sto32); 1091 DEFINE_PATTERN(p_LDle8_then_8Sto32, 1092 unop(Iop_8Sto32, 1093 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1094 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1095 HReg dst = newVRegI(env); 1096 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1097 addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1098 return dst; 1099 } 1100 } 1101 1102 /* 16Uto32(LDle(expr32)) */ 1103 if (e->Iex.Unop.op == Iop_16Uto32) { 1104 DECLARE_PATTERN(p_LDle16_then_16Uto32); 1105 DEFINE_PATTERN(p_LDle16_then_16Uto32, 1106 unop(Iop_16Uto32, 1107 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1108 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1109 HReg dst = newVRegI(env); 1110 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1111 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1112 return dst; 1113 } 1114 } 1115 1116 /* 8Uto32(GET:I8) */ 1117 if (e->Iex.Unop.op == Iop_8Uto32) { 1118 if (e->Iex.Unop.arg->tag == Iex_Get) { 1119 HReg dst; 1120 X86AMode* amode; 1121 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1122 dst = newVRegI(env); 1123 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1124 hregX86_EBP()); 1125 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1126 return dst; 1127 } 1128 } 1129 1130 /* 16to32(GET:I16) */ 1131 if (e->Iex.Unop.op == Iop_16Uto32) { 1132 if (e->Iex.Unop.arg->tag == Iex_Get) { 1133 HReg dst; 1134 X86AMode* amode; 1135 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1136 dst = newVRegI(env); 1137 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1138 hregX86_EBP()); 1139 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1140 return dst; 1141 } 1142 } 1143 1144 switch (e->Iex.Unop.op) { 1145 case Iop_8Uto16: 1146 case Iop_8Uto32: 1147 case Iop_16Uto32: { 1148 HReg dst = newVRegI(env); 1149 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1150 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1151 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1152 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1153 X86RMI_Imm(mask), dst)); 1154 return dst; 1155 } 1156 case Iop_8Sto16: 1157 case Iop_8Sto32: 1158 case Iop_16Sto32: { 1159 HReg dst = newVRegI(env); 1160 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1161 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24; 1162 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1163 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst)); 1164 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst)); 1165 return dst; 1166 } 1167 case Iop_Not8: 1168 case Iop_Not16: 1169 case Iop_Not32: { 1170 HReg dst = newVRegI(env); 1171 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1172 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1173 addInstr(env, X86Instr_Unary32(Xun_NOT,dst)); 1174 return dst; 1175 } 1176 case Iop_64HIto32: { 1177 HReg rHi, rLo; 1178 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1179 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1180 } 1181 case Iop_64to32: { 1182 HReg rHi, rLo; 1183 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1184 return rLo; /* similar stupid comment to the above ... */ 1185 } 1186 case Iop_16HIto8: 1187 case Iop_32HIto16: { 1188 HReg dst = newVRegI(env); 1189 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1190 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1191 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1192 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1193 return dst; 1194 } 1195 case Iop_1Uto32: 1196 case Iop_1Uto8: { 1197 HReg dst = newVRegI(env); 1198 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1199 addInstr(env, X86Instr_Set32(cond,dst)); 1200 return dst; 1201 } 1202 case Iop_1Sto8: 1203 case Iop_1Sto16: 1204 case Iop_1Sto32: { 1205 /* could do better than this, but for now ... */ 1206 HReg dst = newVRegI(env); 1207 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1208 addInstr(env, X86Instr_Set32(cond,dst)); 1209 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1210 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1211 return dst; 1212 } 1213 case Iop_Ctz32: { 1214 /* Count trailing zeroes, implemented by x86 'bsfl' */ 1215 HReg dst = newVRegI(env); 1216 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1217 addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1218 return dst; 1219 } 1220 case Iop_Clz32: { 1221 /* Count leading zeroes. Do 'bsrl' to establish the index 1222 of the highest set bit, and subtract that value from 1223 31. */ 1224 HReg tmp = newVRegI(env); 1225 HReg dst = newVRegI(env); 1226 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1227 addInstr(env, X86Instr_Bsfr32(False,src,tmp)); 1228 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 1229 X86RMI_Imm(31), dst)); 1230 addInstr(env, X86Instr_Alu32R(Xalu_SUB, 1231 X86RMI_Reg(tmp), dst)); 1232 return dst; 1233 } 1234 1235 case Iop_CmpwNEZ32: { 1236 HReg dst = newVRegI(env); 1237 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1238 addInstr(env, mk_iMOVsd_RR(src,dst)); 1239 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 1240 addInstr(env, X86Instr_Alu32R(Xalu_OR, 1241 X86RMI_Reg(src), dst)); 1242 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1243 return dst; 1244 } 1245 case Iop_Left8: 1246 case Iop_Left16: 1247 case Iop_Left32: { 1248 HReg dst = newVRegI(env); 1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1250 addInstr(env, mk_iMOVsd_RR(src, dst)); 1251 addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); 1252 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); 1253 return dst; 1254 } 1255 1256 case Iop_V128to32: { 1257 HReg dst = newVRegI(env); 1258 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1259 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1260 sub_from_esp(env, 16); 1261 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1262 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1263 add_to_esp(env, 16); 1264 return dst; 1265 } 1266 1267 /* ReinterpF32asI32(e) */ 1268 /* Given an IEEE754 single, produce an I32 with the same bit 1269 pattern. Keep stack 8-aligned even though only using 4 1270 bytes. */ 1271 case Iop_ReinterpF32asI32: { 1272 HReg rf = iselFltExpr(env, e->Iex.Unop.arg); 1273 HReg dst = newVRegI(env); 1274 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1275 /* paranoia */ 1276 set_FPU_rounding_default(env); 1277 /* subl $8, %esp */ 1278 sub_from_esp(env, 8); 1279 /* gstF %rf, 0(%esp) */ 1280 addInstr(env, 1281 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp)); 1282 /* movl 0(%esp), %dst */ 1283 addInstr(env, 1284 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1285 /* addl $8, %esp */ 1286 add_to_esp(env, 8); 1287 return dst; 1288 } 1289 1290 case Iop_16to8: 1291 case Iop_32to8: 1292 case Iop_32to16: 1293 /* These are no-ops. */ 1294 return iselIntExpr_R(env, e->Iex.Unop.arg); 1295 1296 default: 1297 break; 1298 } 1299 break; 1300 } 1301 1302 /* --------- GET --------- */ 1303 case Iex_Get: { 1304 if (ty == Ity_I32) { 1305 HReg dst = newVRegI(env); 1306 addInstr(env, X86Instr_Alu32R( 1307 Xalu_MOV, 1308 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1309 hregX86_EBP())), 1310 dst)); 1311 return dst; 1312 } 1313 if (ty == Ity_I8 || ty == Ity_I16) { 1314 HReg dst = newVRegI(env); 1315 addInstr(env, X86Instr_LoadEX( 1316 toUChar(ty==Ity_I8 ? 1 : 2), 1317 False, 1318 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1319 dst)); 1320 return dst; 1321 } 1322 break; 1323 } 1324 1325 case Iex_GetI: { 1326 X86AMode* am 1327 = genGuestArrayOffset( 1328 env, e->Iex.GetI.descr, 1329 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1330 HReg dst = newVRegI(env); 1331 if (ty == Ity_I8) { 1332 addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1333 return dst; 1334 } 1335 if (ty == Ity_I32) { 1336 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1337 return dst; 1338 } 1339 break; 1340 } 1341 1342 /* --------- CCALL --------- */ 1343 case Iex_CCall: { 1344 HReg dst = newVRegI(env); 1345 vassert(ty == e->Iex.CCall.retty); 1346 1347 /* be very restrictive for now. Only 32/64-bit ints allowed 1348 for args, and 32 bits for return type. */ 1349 if (e->Iex.CCall.retty != Ity_I32) 1350 goto irreducible; 1351 1352 /* Marshal args, do the call, clear stack. */ 1353 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 1354 1355 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1356 return dst; 1357 } 1358 1359 /* --------- LITERAL --------- */ 1360 /* 32/16/8-bit literals */ 1361 case Iex_Const: { 1362 X86RMI* rmi = iselIntExpr_RMI ( env, e ); 1363 HReg r = newVRegI(env); 1364 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r)); 1365 return r; 1366 } 1367 1368 /* --------- MULTIPLEX --------- */ 1369 case Iex_Mux0X: { 1370 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1371 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 1372 X86RM* r8; 1373 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX); 1374 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0); 1375 HReg dst = newVRegI(env); 1376 addInstr(env, mk_iMOVsd_RR(rX,dst)); 1377 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 1378 addInstr(env, X86Instr_Test32(0xFF, r8)); 1379 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst)); 1380 return dst; 1381 } 1382 break; 1383 } 1384 1385 default: 1386 break; 1387 } /* switch (e->tag) */ 1388 1389 /* We get here if no pattern matched. */ 1390 irreducible: 1391 ppIRExpr(e); 1392 vpanic("iselIntExpr_R: cannot reduce tree"); 1393 } 1394 1395 1396 /*---------------------------------------------------------*/ 1397 /*--- ISEL: Integer expression auxiliaries ---*/ 1398 /*---------------------------------------------------------*/ 1399 1400 /* --------------------- AMODEs --------------------- */ 1401 1402 /* Return an AMode which computes the value of the specified 1403 expression, possibly also adding insns to the code list as a 1404 result. The expression may only be a 32-bit one. 1405 */ 1406 1407 static Bool sane_AMode ( X86AMode* am ) 1408 { 1409 switch (am->tag) { 1410 case Xam_IR: 1411 return 1412 toBool( hregClass(am->Xam.IR.reg) == HRcInt32 1413 && (hregIsVirtual(am->Xam.IR.reg) 1414 || am->Xam.IR.reg == hregX86_EBP()) ); 1415 case Xam_IRRS: 1416 return 1417 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32 1418 && hregIsVirtual(am->Xam.IRRS.base) 1419 && hregClass(am->Xam.IRRS.index) == HRcInt32 1420 && hregIsVirtual(am->Xam.IRRS.index) ); 1421 default: 1422 vpanic("sane_AMode: unknown x86 amode tag"); 1423 } 1424 } 1425 1426 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1427 { 1428 X86AMode* am = iselIntExpr_AMode_wrk(env, e); 1429 vassert(sane_AMode(am)); 1430 return am; 1431 } 1432 1433 /* DO NOT CALL THIS DIRECTLY ! */ 1434 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1435 { 1436 IRType ty = typeOfIRExpr(env->type_env,e); 1437 vassert(ty == Ity_I32); 1438 1439 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ 1440 if (e->tag == Iex_Binop 1441 && e->Iex.Binop.op == Iop_Add32 1442 && e->Iex.Binop.arg2->tag == Iex_Const 1443 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 1444 && e->Iex.Binop.arg1->tag == Iex_Binop 1445 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 1446 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop 1447 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1448 && e->Iex.Binop.arg1 1449 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1450 && e->Iex.Binop.arg1 1451 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1452 UInt shift = e->Iex.Binop.arg1 1453 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1454 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1455 if (shift == 1 || shift == 2 || shift == 3) { 1456 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); 1457 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 1458 ->Iex.Binop.arg2->Iex.Binop.arg1 ); 1459 return X86AMode_IRRS(imm32, r1, r2, shift); 1460 } 1461 } 1462 1463 /* Add32(expr1, Shl32(expr2, imm)) */ 1464 if (e->tag == Iex_Binop 1465 && e->Iex.Binop.op == Iop_Add32 1466 && e->Iex.Binop.arg2->tag == Iex_Binop 1467 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1468 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1469 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1470 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1471 if (shift == 1 || shift == 2 || shift == 3) { 1472 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1473 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1474 return X86AMode_IRRS(0, r1, r2, shift); 1475 } 1476 } 1477 1478 /* Add32(expr,i) */ 1479 if (e->tag == Iex_Binop 1480 && e->Iex.Binop.op == Iop_Add32 1481 && e->Iex.Binop.arg2->tag == Iex_Const 1482 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 1483 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1484 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1); 1485 } 1486 1487 /* Doesn't match anything in particular. Generate it into 1488 a register and use that. */ 1489 { 1490 HReg r1 = iselIntExpr_R(env, e); 1491 return X86AMode_IR(0, r1); 1492 } 1493 } 1494 1495 1496 /* --------------------- RMIs --------------------- */ 1497 1498 /* Similarly, calculate an expression into an X86RMI operand. As with 1499 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1500 1501 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1502 { 1503 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1504 /* sanity checks ... */ 1505 switch (rmi->tag) { 1506 case Xrmi_Imm: 1507 return rmi; 1508 case Xrmi_Reg: 1509 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32); 1510 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg)); 1511 return rmi; 1512 case Xrmi_Mem: 1513 vassert(sane_AMode(rmi->Xrmi.Mem.am)); 1514 return rmi; 1515 default: 1516 vpanic("iselIntExpr_RMI: unknown x86 RMI tag"); 1517 } 1518 } 1519 1520 /* DO NOT CALL THIS DIRECTLY ! */ 1521 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1522 { 1523 IRType ty = typeOfIRExpr(env->type_env,e); 1524 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1525 1526 /* special case: immediate */ 1527 if (e->tag == Iex_Const) { 1528 UInt u; 1529 switch (e->Iex.Const.con->tag) { 1530 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1531 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1532 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1533 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1534 } 1535 return X86RMI_Imm(u); 1536 } 1537 1538 /* special case: 32-bit GET */ 1539 if (e->tag == Iex_Get && ty == Ity_I32) { 1540 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1541 hregX86_EBP())); 1542 } 1543 1544 /* special case: 32-bit load from memory */ 1545 if (e->tag == Iex_Load && ty == Ity_I32 1546 && e->Iex.Load.end == Iend_LE) { 1547 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1548 return X86RMI_Mem(am); 1549 } 1550 1551 /* default case: calculate into a register and return that */ 1552 { 1553 HReg r = iselIntExpr_R ( env, e ); 1554 return X86RMI_Reg(r); 1555 } 1556 } 1557 1558 1559 /* --------------------- RIs --------------------- */ 1560 1561 /* Calculate an expression into an X86RI operand. As with 1562 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1563 1564 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 1565 { 1566 X86RI* ri = iselIntExpr_RI_wrk(env, e); 1567 /* sanity checks ... */ 1568 switch (ri->tag) { 1569 case Xri_Imm: 1570 return ri; 1571 case Xri_Reg: 1572 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32); 1573 vassert(hregIsVirtual(ri->Xri.Reg.reg)); 1574 return ri; 1575 default: 1576 vpanic("iselIntExpr_RI: unknown x86 RI tag"); 1577 } 1578 } 1579 1580 /* DO NOT CALL THIS DIRECTLY ! */ 1581 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 1582 { 1583 IRType ty = typeOfIRExpr(env->type_env,e); 1584 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1585 1586 /* special case: immediate */ 1587 if (e->tag == Iex_Const) { 1588 UInt u; 1589 switch (e->Iex.Const.con->tag) { 1590 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1591 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1592 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1593 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1594 } 1595 return X86RI_Imm(u); 1596 } 1597 1598 /* default case: calculate into a register and return that */ 1599 { 1600 HReg r = iselIntExpr_R ( env, e ); 1601 return X86RI_Reg(r); 1602 } 1603 } 1604 1605 1606 /* --------------------- RMs --------------------- */ 1607 1608 /* Similarly, calculate an expression into an X86RM operand. As with 1609 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1610 1611 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 1612 { 1613 X86RM* rm = iselIntExpr_RM_wrk(env, e); 1614 /* sanity checks ... */ 1615 switch (rm->tag) { 1616 case Xrm_Reg: 1617 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32); 1618 vassert(hregIsVirtual(rm->Xrm.Reg.reg)); 1619 return rm; 1620 case Xrm_Mem: 1621 vassert(sane_AMode(rm->Xrm.Mem.am)); 1622 return rm; 1623 default: 1624 vpanic("iselIntExpr_RM: unknown x86 RM tag"); 1625 } 1626 } 1627 1628 /* DO NOT CALL THIS DIRECTLY ! */ 1629 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 1630 { 1631 IRType ty = typeOfIRExpr(env->type_env,e); 1632 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1633 1634 /* special case: 32-bit GET */ 1635 if (e->tag == Iex_Get && ty == Ity_I32) { 1636 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset, 1637 hregX86_EBP())); 1638 } 1639 1640 /* special case: load from memory */ 1641 1642 /* default case: calculate into a register and return that */ 1643 { 1644 HReg r = iselIntExpr_R ( env, e ); 1645 return X86RM_Reg(r); 1646 } 1647 } 1648 1649 1650 /* --------------------- CONDCODE --------------------- */ 1651 1652 /* Generate code to evaluated a bit-typed expression, returning the 1653 condition code which would correspond when the expression would 1654 notionally have returned 1. */ 1655 1656 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1657 { 1658 /* Uh, there's nothing we can sanity check here, unfortunately. */ 1659 return iselCondCode_wrk(env,e); 1660 } 1661 1662 /* DO NOT CALL THIS DIRECTLY ! */ 1663 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1664 { 1665 MatchInfo mi; 1666 1667 vassert(e); 1668 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1669 1670 /* var */ 1671 if (e->tag == Iex_RdTmp) { 1672 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1673 /* Test32 doesn't modify r32; so this is OK. */ 1674 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); 1675 return Xcc_NZ; 1676 } 1677 1678 /* Constant 1:Bit */ 1679 if (e->tag == Iex_Const) { 1680 HReg r; 1681 vassert(e->Iex.Const.con->tag == Ico_U1); 1682 vassert(e->Iex.Const.con->Ico.U1 == True 1683 || e->Iex.Const.con->Ico.U1 == False); 1684 r = newVRegI(env); 1685 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r)); 1686 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r)); 1687 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ; 1688 } 1689 1690 /* Not1(e) */ 1691 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1692 /* Generate code for the arg, and negate the test condition */ 1693 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1694 } 1695 1696 /* --- patterns rooted at: 32to1 --- */ 1697 1698 if (e->tag == Iex_Unop 1699 && e->Iex.Unop.op == Iop_32to1) { 1700 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1701 addInstr(env, X86Instr_Test32(1,rm)); 1702 return Xcc_NZ; 1703 } 1704 1705 /* --- patterns rooted at: CmpNEZ8 --- */ 1706 1707 /* CmpNEZ8(x) */ 1708 if (e->tag == Iex_Unop 1709 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1710 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1711 addInstr(env, X86Instr_Test32(0xFF,rm)); 1712 return Xcc_NZ; 1713 } 1714 1715 /* --- patterns rooted at: CmpNEZ16 --- */ 1716 1717 /* CmpNEZ16(x) */ 1718 if (e->tag == Iex_Unop 1719 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1720 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1721 addInstr(env, X86Instr_Test32(0xFFFF,rm)); 1722 return Xcc_NZ; 1723 } 1724 1725 /* --- patterns rooted at: CmpNEZ32 --- */ 1726 1727 /* CmpNEZ32(And32(x,y)) */ 1728 { 1729 DECLARE_PATTERN(p_CmpNEZ32_And32); 1730 DEFINE_PATTERN(p_CmpNEZ32_And32, 1731 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1)))); 1732 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) { 1733 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1734 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1735 HReg tmp = newVRegI(env); 1736 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1737 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp)); 1738 return Xcc_NZ; 1739 } 1740 } 1741 1742 /* CmpNEZ32(Or32(x,y)) */ 1743 { 1744 DECLARE_PATTERN(p_CmpNEZ32_Or32); 1745 DEFINE_PATTERN(p_CmpNEZ32_Or32, 1746 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1)))); 1747 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) { 1748 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1749 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1750 HReg tmp = newVRegI(env); 1751 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1752 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp)); 1753 return Xcc_NZ; 1754 } 1755 } 1756 1757 /* CmpNEZ32(GET(..):I32) */ 1758 if (e->tag == Iex_Unop 1759 && e->Iex.Unop.op == Iop_CmpNEZ32 1760 && e->Iex.Unop.arg->tag == Iex_Get) { 1761 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1762 hregX86_EBP()); 1763 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); 1764 return Xcc_NZ; 1765 } 1766 1767 /* CmpNEZ32(x) */ 1768 if (e->tag == Iex_Unop 1769 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1770 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1771 X86RMI* rmi2 = X86RMI_Imm(0); 1772 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1773 return Xcc_NZ; 1774 } 1775 1776 /* --- patterns rooted at: CmpNEZ64 --- */ 1777 1778 /* CmpNEZ64(Or64(x,y)) */ 1779 { 1780 DECLARE_PATTERN(p_CmpNEZ64_Or64); 1781 DEFINE_PATTERN(p_CmpNEZ64_Or64, 1782 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 1783 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 1784 HReg hi1, lo1, hi2, lo2; 1785 HReg tmp = newVRegI(env); 1786 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] ); 1787 addInstr(env, mk_iMOVsd_RR(hi1, tmp)); 1788 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp)); 1789 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] ); 1790 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp)); 1791 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp)); 1792 return Xcc_NZ; 1793 } 1794 } 1795 1796 /* CmpNEZ64(x) */ 1797 if (e->tag == Iex_Unop 1798 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1799 HReg hi, lo; 1800 HReg tmp = newVRegI(env); 1801 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg ); 1802 addInstr(env, mk_iMOVsd_RR(hi, tmp)); 1803 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); 1804 return Xcc_NZ; 1805 } 1806 1807 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */ 1808 1809 /* CmpEQ8 / CmpNE8 */ 1810 if (e->tag == Iex_Binop 1811 && (e->Iex.Binop.op == Iop_CmpEQ8 1812 || e->Iex.Binop.op == Iop_CmpNE8 1813 || e->Iex.Binop.op == Iop_CasCmpEQ8 1814 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 1815 if (isZeroU8(e->Iex.Binop.arg2)) { 1816 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1817 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); 1818 switch (e->Iex.Binop.op) { 1819 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1820 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1821 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); 1822 } 1823 } else { 1824 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1825 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1826 HReg r = newVRegI(env); 1827 addInstr(env, mk_iMOVsd_RR(r1,r)); 1828 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1829 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); 1830 switch (e->Iex.Binop.op) { 1831 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1832 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1833 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); 1834 } 1835 } 1836 } 1837 1838 /* CmpEQ16 / CmpNE16 */ 1839 if (e->tag == Iex_Binop 1840 && (e->Iex.Binop.op == Iop_CmpEQ16 1841 || e->Iex.Binop.op == Iop_CmpNE16 1842 || e->Iex.Binop.op == Iop_CasCmpEQ16 1843 || e->Iex.Binop.op == Iop_CasCmpNE16)) { 1844 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1845 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1846 HReg r = newVRegI(env); 1847 addInstr(env, mk_iMOVsd_RR(r1,r)); 1848 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1849 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); 1850 switch (e->Iex.Binop.op) { 1851 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z; 1852 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ; 1853 default: vpanic("iselCondCode(x86): CmpXX16"); 1854 } 1855 } 1856 1857 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation). 1858 Saves a "movl %eax, %tmp" compared to the default route. */ 1859 if (e->tag == Iex_Binop 1860 && e->Iex.Binop.op == Iop_CmpNE32 1861 && e->Iex.Binop.arg1->tag == Iex_CCall 1862 && e->Iex.Binop.arg2->tag == Iex_Const) { 1863 IRExpr* cal = e->Iex.Binop.arg1; 1864 IRExpr* con = e->Iex.Binop.arg2; 1865 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 1866 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */ 1867 vassert(con->Iex.Const.con->tag == Ico_U32); 1868 /* Marshal args, do the call. */ 1869 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args ); 1870 addInstr(env, X86Instr_Alu32R(Xalu_CMP, 1871 X86RMI_Imm(con->Iex.Const.con->Ico.U32), 1872 hregX86_EAX())); 1873 return Xcc_NZ; 1874 } 1875 1876 /* Cmp*32*(x,y) */ 1877 if (e->tag == Iex_Binop 1878 && (e->Iex.Binop.op == Iop_CmpEQ32 1879 || e->Iex.Binop.op == Iop_CmpNE32 1880 || e->Iex.Binop.op == Iop_CmpLT32S 1881 || e->Iex.Binop.op == Iop_CmpLT32U 1882 || e->Iex.Binop.op == Iop_CmpLE32S 1883 || e->Iex.Binop.op == Iop_CmpLE32U 1884 || e->Iex.Binop.op == Iop_CasCmpEQ32 1885 || e->Iex.Binop.op == Iop_CasCmpNE32)) { 1886 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1887 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1888 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1889 switch (e->Iex.Binop.op) { 1890 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; 1891 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ; 1892 case Iop_CmpLT32S: return Xcc_L; 1893 case Iop_CmpLT32U: return Xcc_B; 1894 case Iop_CmpLE32S: return Xcc_LE; 1895 case Iop_CmpLE32U: return Xcc_BE; 1896 default: vpanic("iselCondCode(x86): CmpXX32"); 1897 } 1898 } 1899 1900 /* CmpNE64 */ 1901 if (e->tag == Iex_Binop 1902 && (e->Iex.Binop.op == Iop_CmpNE64 1903 || e->Iex.Binop.op == Iop_CmpEQ64)) { 1904 HReg hi1, hi2, lo1, lo2; 1905 HReg tHi = newVRegI(env); 1906 HReg tLo = newVRegI(env); 1907 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); 1908 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); 1909 addInstr(env, mk_iMOVsd_RR(hi1, tHi)); 1910 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); 1911 addInstr(env, mk_iMOVsd_RR(lo1, tLo)); 1912 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); 1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); 1914 switch (e->Iex.Binop.op) { 1915 case Iop_CmpNE64: return Xcc_NZ; 1916 case Iop_CmpEQ64: return Xcc_Z; 1917 default: vpanic("iselCondCode(x86): CmpXX64"); 1918 } 1919 } 1920 1921 ppIRExpr(e); 1922 vpanic("iselCondCode"); 1923 } 1924 1925 1926 /*---------------------------------------------------------*/ 1927 /*--- ISEL: Integer expressions (64 bit) ---*/ 1928 /*---------------------------------------------------------*/ 1929 1930 /* Compute a 64-bit value into a register pair, which is returned as 1931 the first two parameters. As with iselIntExpr_R, these may be 1932 either real or virtual regs; in any case they must not be changed 1933 by subsequent code emitted by the caller. */ 1934 1935 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1936 { 1937 iselInt64Expr_wrk(rHi, rLo, env, e); 1938 # if 0 1939 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 1940 # endif 1941 vassert(hregClass(*rHi) == HRcInt32); 1942 vassert(hregIsVirtual(*rHi)); 1943 vassert(hregClass(*rLo) == HRcInt32); 1944 vassert(hregIsVirtual(*rLo)); 1945 } 1946 1947 /* DO NOT CALL THIS DIRECTLY ! */ 1948 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 1949 { 1950 MatchInfo mi; 1951 HWord fn = 0; /* helper fn for most SIMD64 stuff */ 1952 vassert(e); 1953 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 1954 1955 /* 64-bit literal */ 1956 if (e->tag == Iex_Const) { 1957 ULong w64 = e->Iex.Const.con->Ico.U64; 1958 UInt wHi = toUInt(w64 >> 32); 1959 UInt wLo = toUInt(w64); 1960 HReg tLo = newVRegI(env); 1961 HReg tHi = newVRegI(env); 1962 vassert(e->Iex.Const.con->tag == Ico_U64); 1963 if (wLo == wHi) { 1964 /* Save a precious Int register in this special case. */ 1965 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1966 *rHi = tLo; 1967 *rLo = tLo; 1968 } else { 1969 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 1970 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 1971 *rHi = tHi; 1972 *rLo = tLo; 1973 } 1974 return; 1975 } 1976 1977 /* read 64-bit IRTemp */ 1978 if (e->tag == Iex_RdTmp) { 1979 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 1980 return; 1981 } 1982 1983 /* 64-bit load */ 1984 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 1985 HReg tLo, tHi; 1986 X86AMode *am0, *am4; 1987 vassert(e->Iex.Load.ty == Ity_I64); 1988 tLo = newVRegI(env); 1989 tHi = newVRegI(env); 1990 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr); 1991 am4 = advance4(am0); 1992 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 1993 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 1994 *rHi = tHi; 1995 *rLo = tLo; 1996 return; 1997 } 1998 1999 /* 64-bit GET */ 2000 if (e->tag == Iex_Get) { 2001 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 2002 X86AMode* am4 = advance4(am); 2003 HReg tLo = newVRegI(env); 2004 HReg tHi = newVRegI(env); 2005 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2006 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2007 *rHi = tHi; 2008 *rLo = tLo; 2009 return; 2010 } 2011 2012 /* 64-bit GETI */ 2013 if (e->tag == Iex_GetI) { 2014 X86AMode* am 2015 = genGuestArrayOffset( env, e->Iex.GetI.descr, 2016 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2017 X86AMode* am4 = advance4(am); 2018 HReg tLo = newVRegI(env); 2019 HReg tHi = newVRegI(env); 2020 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2021 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2022 *rHi = tHi; 2023 *rLo = tLo; 2024 return; 2025 } 2026 2027 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */ 2028 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) { 2029 X86RM* r8; 2030 HReg e0Lo, e0Hi; 2031 HReg tLo = newVRegI(env); 2032 HReg tHi = newVRegI(env); 2033 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2034 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2035 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2036 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 2037 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2038 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2039 addInstr(env, X86Instr_Test32(0xFF, r8)); 2040 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi)); 2041 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo)); 2042 add_to_esp(env, 4); 2043 *rHi = tHi; 2044 *rLo = tLo; 2045 return; 2046 } 2047 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */ 2048 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) { 2049 X86RM* r8; 2050 HReg e0Lo, e0Hi; 2051 HReg tLo = newVRegI(env); 2052 HReg tHi = newVRegI(env); 2053 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2054 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX); 2055 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2056 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); 2057 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); 2058 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 2059 addInstr(env, X86Instr_Test32(0xFF, r8)); 2060 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi)); 2061 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo)); 2062 add_to_esp(env, 4); 2063 *rHi = tHi; 2064 *rLo = tLo; 2065 return; 2066 } 2067 2068 /* 64-bit Mux0X: Mux0X(g, expr, expr) */ 2069 if (e->tag == Iex_Mux0X) { 2070 X86RM* r8; 2071 HReg e0Lo, e0Hi, eXLo, eXHi; 2072 HReg tLo = newVRegI(env); 2073 HReg tHi = newVRegI(env); 2074 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); 2075 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); 2076 addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); 2077 addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); 2078 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 2079 addInstr(env, X86Instr_Test32(0xFF, r8)); 2080 /* This assumes the first cmov32 doesn't trash the condition 2081 codes, so they are still available for the second cmov32 */ 2082 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); 2083 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo)); 2084 *rHi = tHi; 2085 *rLo = tLo; 2086 return; 2087 } 2088 2089 /* --------- BINARY ops --------- */ 2090 if (e->tag == Iex_Binop) { 2091 switch (e->Iex.Binop.op) { 2092 /* 32 x 32 -> 64 multiply */ 2093 case Iop_MullU32: 2094 case Iop_MullS32: { 2095 /* get one operand into %eax, and the other into a R/M. 2096 Need to make an educated guess about which is better in 2097 which. */ 2098 HReg tLo = newVRegI(env); 2099 HReg tHi = newVRegI(env); 2100 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); 2101 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2102 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2103 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX())); 2104 addInstr(env, X86Instr_MulL(syned, rmLeft)); 2105 /* Result is now in EDX:EAX. Tell the caller. */ 2106 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2107 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2108 *rHi = tHi; 2109 *rLo = tLo; 2110 return; 2111 } 2112 2113 /* 64 x 32 -> (32(rem),32(div)) division */ 2114 case Iop_DivModU64to32: 2115 case Iop_DivModS64to32: { 2116 /* Get the 64-bit operand into edx:eax, and the other into 2117 any old R/M. */ 2118 HReg sHi, sLo; 2119 HReg tLo = newVRegI(env); 2120 HReg tHi = newVRegI(env); 2121 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 2122 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2123 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2124 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX())); 2125 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX())); 2126 addInstr(env, X86Instr_Div(syned, rmRight)); 2127 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2128 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2129 *rHi = tHi; 2130 *rLo = tLo; 2131 return; 2132 } 2133 2134 /* Or64/And64/Xor64 */ 2135 case Iop_Or64: 2136 case Iop_And64: 2137 case Iop_Xor64: { 2138 HReg xLo, xHi, yLo, yHi; 2139 HReg tLo = newVRegI(env); 2140 HReg tHi = newVRegI(env); 2141 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2142 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2143 : Xalu_XOR; 2144 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2145 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2146 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2147 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2148 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2149 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2150 *rHi = tHi; 2151 *rLo = tLo; 2152 return; 2153 } 2154 2155 /* Add64/Sub64 */ 2156 case Iop_Add64: 2157 if (e->Iex.Binop.arg2->tag == Iex_Const) { 2158 /* special case Add64(e, const) */ 2159 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2160 UInt wHi = toUInt(w64 >> 32); 2161 UInt wLo = toUInt(w64); 2162 HReg tLo = newVRegI(env); 2163 HReg tHi = newVRegI(env); 2164 HReg xLo, xHi; 2165 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64); 2166 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2167 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2168 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2169 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo)); 2170 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi)); 2171 *rHi = tHi; 2172 *rLo = tLo; 2173 return; 2174 } 2175 /* else fall through to the generic case */ 2176 case Iop_Sub64: { 2177 HReg xLo, xHi, yLo, yHi; 2178 HReg tLo = newVRegI(env); 2179 HReg tHi = newVRegI(env); 2180 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2181 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2182 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2183 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2184 if (e->Iex.Binop.op==Iop_Add64) { 2185 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2186 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2187 } else { 2188 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2189 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2190 } 2191 *rHi = tHi; 2192 *rLo = tLo; 2193 return; 2194 } 2195 2196 /* 32HLto64(e1,e2) */ 2197 case Iop_32HLto64: 2198 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2199 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2200 return; 2201 2202 /* 64-bit shifts */ 2203 case Iop_Shl64: { 2204 /* We use the same ingenious scheme as gcc. Put the value 2205 to be shifted into %hi:%lo, and the shift amount into 2206 %cl. Then (dsts on right, a la ATT syntax): 2207 2208 shldl %cl, %lo, %hi -- make %hi be right for the 2209 -- shift amt %cl % 32 2210 shll %cl, %lo -- make %lo be right for the 2211 -- shift amt %cl % 32 2212 2213 Now, if (shift amount % 64) is in the range 32 .. 63, 2214 we have to do a fixup, which puts the result low half 2215 into the result high half, and zeroes the low half: 2216 2217 testl $32, %ecx 2218 2219 cmovnz %lo, %hi 2220 movl $0, %tmp -- sigh; need yet another reg 2221 cmovnz %tmp, %lo 2222 */ 2223 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2224 tLo = newVRegI(env); 2225 tHi = newVRegI(env); 2226 tTemp = newVRegI(env); 2227 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2228 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2229 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2230 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2231 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2232 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2233 and those regs are legitimately modifiable. */ 2234 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2235 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); 2236 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2237 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2238 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2239 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2240 *rHi = tHi; 2241 *rLo = tLo; 2242 return; 2243 } 2244 2245 case Iop_Shr64: { 2246 /* We use the same ingenious scheme as gcc. Put the value 2247 to be shifted into %hi:%lo, and the shift amount into 2248 %cl. Then: 2249 2250 shrdl %cl, %hi, %lo -- make %lo be right for the 2251 -- shift amt %cl % 32 2252 shrl %cl, %hi -- make %hi be right for the 2253 -- shift amt %cl % 32 2254 2255 Now, if (shift amount % 64) is in the range 32 .. 63, 2256 we have to do a fixup, which puts the result high half 2257 into the result low half, and zeroes the high half: 2258 2259 testl $32, %ecx 2260 2261 cmovnz %hi, %lo 2262 movl $0, %tmp -- sigh; need yet another reg 2263 cmovnz %tmp, %hi 2264 */ 2265 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2266 tLo = newVRegI(env); 2267 tHi = newVRegI(env); 2268 tTemp = newVRegI(env); 2269 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2270 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2271 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2272 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2273 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2274 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2275 and those regs are legitimately modifiable. */ 2276 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2277 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); 2278 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2279 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2280 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2281 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2282 *rHi = tHi; 2283 *rLo = tLo; 2284 return; 2285 } 2286 2287 /* F64 -> I64 */ 2288 /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2289 case. Unfortunately I see no easy way to avoid the 2290 duplication. */ 2291 case Iop_F64toI64S: { 2292 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2293 HReg tLo = newVRegI(env); 2294 HReg tHi = newVRegI(env); 2295 2296 /* Used several times ... */ 2297 /* Careful ... this sharing is only safe because 2298 zero_esp/four_esp do not hold any registers which the 2299 register allocator could attempt to swizzle later. */ 2300 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2301 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2302 2303 /* rf now holds the value to be converted, and rrm holds 2304 the rounding mode value, encoded as per the 2305 IRRoundingMode enum. The first thing to do is set the 2306 FPU's rounding mode accordingly. */ 2307 2308 /* Create a space for the format conversion. */ 2309 /* subl $8, %esp */ 2310 sub_from_esp(env, 8); 2311 2312 /* Set host rounding mode */ 2313 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2314 2315 /* gistll %rf, 0(%esp) */ 2316 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2317 2318 /* movl 0(%esp), %dstLo */ 2319 /* movl 4(%esp), %dstHi */ 2320 addInstr(env, X86Instr_Alu32R( 2321 Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2322 addInstr(env, X86Instr_Alu32R( 2323 Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2324 2325 /* Restore default FPU rounding. */ 2326 set_FPU_rounding_default( env ); 2327 2328 /* addl $8, %esp */ 2329 add_to_esp(env, 8); 2330 2331 *rHi = tHi; 2332 *rLo = tLo; 2333 return; 2334 } 2335 2336 case Iop_Add8x8: 2337 fn = (HWord)h_generic_calc_Add8x8; goto binnish; 2338 case Iop_Add16x4: 2339 fn = (HWord)h_generic_calc_Add16x4; goto binnish; 2340 case Iop_Add32x2: 2341 fn = (HWord)h_generic_calc_Add32x2; goto binnish; 2342 2343 case Iop_Avg8Ux8: 2344 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish; 2345 case Iop_Avg16Ux4: 2346 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish; 2347 2348 case Iop_CmpEQ8x8: 2349 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish; 2350 case Iop_CmpEQ16x4: 2351 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish; 2352 case Iop_CmpEQ32x2: 2353 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish; 2354 2355 case Iop_CmpGT8Sx8: 2356 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish; 2357 case Iop_CmpGT16Sx4: 2358 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish; 2359 case Iop_CmpGT32Sx2: 2360 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish; 2361 2362 case Iop_InterleaveHI8x8: 2363 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish; 2364 case Iop_InterleaveLO8x8: 2365 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish; 2366 case Iop_InterleaveHI16x4: 2367 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish; 2368 case Iop_InterleaveLO16x4: 2369 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish; 2370 case Iop_InterleaveHI32x2: 2371 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; 2372 case Iop_InterleaveLO32x2: 2373 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; 2374 case Iop_CatOddLanes16x4: 2375 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; 2376 case Iop_CatEvenLanes16x4: 2377 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; 2378 case Iop_Perm8x8: 2379 fn = (HWord)h_generic_calc_Perm8x8; goto binnish; 2380 2381 case Iop_Max8Ux8: 2382 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; 2383 case Iop_Max16Sx4: 2384 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish; 2385 case Iop_Min8Ux8: 2386 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish; 2387 case Iop_Min16Sx4: 2388 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish; 2389 2390 case Iop_Mul16x4: 2391 fn = (HWord)h_generic_calc_Mul16x4; goto binnish; 2392 case Iop_Mul32x2: 2393 fn = (HWord)h_generic_calc_Mul32x2; goto binnish; 2394 case Iop_MulHi16Sx4: 2395 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; 2396 case Iop_MulHi16Ux4: 2397 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish; 2398 2399 case Iop_QAdd8Sx8: 2400 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish; 2401 case Iop_QAdd16Sx4: 2402 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish; 2403 case Iop_QAdd8Ux8: 2404 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish; 2405 case Iop_QAdd16Ux4: 2406 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; 2407 2408 case Iop_QNarrowBin32Sto16Sx4: 2409 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish; 2410 case Iop_QNarrowBin16Sto8Sx8: 2411 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish; 2412 case Iop_QNarrowBin16Sto8Ux8: 2413 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish; 2414 case Iop_NarrowBin16to8x8: 2415 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish; 2416 case Iop_NarrowBin32to16x4: 2417 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish; 2418 2419 case Iop_QSub8Sx8: 2420 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; 2421 case Iop_QSub16Sx4: 2422 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish; 2423 case Iop_QSub8Ux8: 2424 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish; 2425 case Iop_QSub16Ux4: 2426 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish; 2427 2428 case Iop_Sub8x8: 2429 fn = (HWord)h_generic_calc_Sub8x8; goto binnish; 2430 case Iop_Sub16x4: 2431 fn = (HWord)h_generic_calc_Sub16x4; goto binnish; 2432 case Iop_Sub32x2: 2433 fn = (HWord)h_generic_calc_Sub32x2; goto binnish; 2434 2435 binnish: { 2436 /* Note: the following assumes all helpers are of 2437 signature 2438 ULong fn ( ULong, ULong ), and they are 2439 not marked as regparm functions. 2440 */ 2441 HReg xLo, xHi, yLo, yHi; 2442 HReg tLo = newVRegI(env); 2443 HReg tHi = newVRegI(env); 2444 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2445 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi))); 2446 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo))); 2447 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2448 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2449 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2450 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2451 add_to_esp(env, 4*4); 2452 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2453 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2454 *rHi = tHi; 2455 *rLo = tLo; 2456 return; 2457 } 2458 2459 case Iop_ShlN32x2: 2460 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; 2461 case Iop_ShlN16x4: 2462 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; 2463 case Iop_ShlN8x8: 2464 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; 2465 case Iop_ShrN32x2: 2466 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; 2467 case Iop_ShrN16x4: 2468 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty; 2469 case Iop_SarN32x2: 2470 fn = (HWord)h_generic_calc_SarN32x2; goto shifty; 2471 case Iop_SarN16x4: 2472 fn = (HWord)h_generic_calc_SarN16x4; goto shifty; 2473 case Iop_SarN8x8: 2474 fn = (HWord)h_generic_calc_SarN8x8; goto shifty; 2475 shifty: { 2476 /* Note: the following assumes all helpers are of 2477 signature 2478 ULong fn ( ULong, UInt ), and they are 2479 not marked as regparm functions. 2480 */ 2481 HReg xLo, xHi; 2482 HReg tLo = newVRegI(env); 2483 HReg tHi = newVRegI(env); 2484 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2485 addInstr(env, X86Instr_Push(y)); 2486 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2487 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2488 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2489 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2490 add_to_esp(env, 3*4); 2491 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2492 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2493 *rHi = tHi; 2494 *rLo = tLo; 2495 return; 2496 } 2497 2498 default: 2499 break; 2500 } 2501 } /* if (e->tag == Iex_Binop) */ 2502 2503 2504 /* --------- UNARY ops --------- */ 2505 if (e->tag == Iex_Unop) { 2506 switch (e->Iex.Unop.op) { 2507 2508 /* 32Sto64(e) */ 2509 case Iop_32Sto64: { 2510 HReg tLo = newVRegI(env); 2511 HReg tHi = newVRegI(env); 2512 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2513 addInstr(env, mk_iMOVsd_RR(src,tHi)); 2514 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2515 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi)); 2516 *rHi = tHi; 2517 *rLo = tLo; 2518 return; 2519 } 2520 2521 /* 32Uto64(e) */ 2522 case Iop_32Uto64: { 2523 HReg tLo = newVRegI(env); 2524 HReg tHi = newVRegI(env); 2525 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2526 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2527 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2528 *rHi = tHi; 2529 *rLo = tLo; 2530 return; 2531 } 2532 2533 /* 16Uto64(e) */ 2534 case Iop_16Uto64: { 2535 HReg tLo = newVRegI(env); 2536 HReg tHi = newVRegI(env); 2537 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2538 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2539 addInstr(env, X86Instr_Alu32R(Xalu_AND, 2540 X86RMI_Imm(0xFFFF), tLo)); 2541 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2542 *rHi = tHi; 2543 *rLo = tLo; 2544 return; 2545 } 2546 2547 /* V128{HI}to64 */ 2548 case Iop_V128HIto64: 2549 case Iop_V128to64: { 2550 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 2551 HReg tLo = newVRegI(env); 2552 HReg tHi = newVRegI(env); 2553 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 2554 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 2555 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP()); 2556 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP()); 2557 sub_from_esp(env, 16); 2558 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 2559 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2560 X86RMI_Mem(espLO), tLo )); 2561 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2562 X86RMI_Mem(espHI), tHi )); 2563 add_to_esp(env, 16); 2564 *rHi = tHi; 2565 *rLo = tLo; 2566 return; 2567 } 2568 2569 /* could do better than this, but for now ... */ 2570 case Iop_1Sto64: { 2571 HReg tLo = newVRegI(env); 2572 HReg tHi = newVRegI(env); 2573 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2574 addInstr(env, X86Instr_Set32(cond,tLo)); 2575 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo)); 2576 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo)); 2577 addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2578 *rHi = tHi; 2579 *rLo = tLo; 2580 return; 2581 } 2582 2583 /* Not64(e) */ 2584 case Iop_Not64: { 2585 HReg tLo = newVRegI(env); 2586 HReg tHi = newVRegI(env); 2587 HReg sHi, sLo; 2588 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2589 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2590 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2591 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi)); 2592 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo)); 2593 *rHi = tHi; 2594 *rLo = tLo; 2595 return; 2596 } 2597 2598 /* Left64(e) */ 2599 case Iop_Left64: { 2600 HReg yLo, yHi; 2601 HReg tLo = newVRegI(env); 2602 HReg tHi = newVRegI(env); 2603 /* yHi:yLo = arg */ 2604 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2605 /* tLo = 0 - yLo, and set carry */ 2606 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo)); 2607 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2608 /* tHi = 0 - yHi - carry */ 2609 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2610 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2611 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2612 back in, so as to give the final result 2613 tHi:tLo = arg | -arg. */ 2614 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); 2615 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); 2616 *rHi = tHi; 2617 *rLo = tLo; 2618 return; 2619 } 2620 2621 /* --- patterns rooted at: CmpwNEZ64 --- */ 2622 2623 /* CmpwNEZ64(e) */ 2624 case Iop_CmpwNEZ64: { 2625 2626 DECLARE_PATTERN(p_CmpwNEZ64_Or64); 2627 DEFINE_PATTERN(p_CmpwNEZ64_Or64, 2628 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); 2629 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { 2630 /* CmpwNEZ64(Or64(x,y)) */ 2631 HReg xHi,xLo,yHi,yLo; 2632 HReg xBoth = newVRegI(env); 2633 HReg merged = newVRegI(env); 2634 HReg tmp2 = newVRegI(env); 2635 2636 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); 2637 addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); 2638 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2639 X86RMI_Reg(xLo),xBoth)); 2640 2641 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); 2642 addInstr(env, mk_iMOVsd_RR(yHi,merged)); 2643 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2644 X86RMI_Reg(yLo),merged)); 2645 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2646 X86RMI_Reg(xBoth),merged)); 2647 2648 /* tmp2 = (merged | -merged) >>s 31 */ 2649 addInstr(env, mk_iMOVsd_RR(merged,tmp2)); 2650 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2651 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2652 X86RMI_Reg(merged), tmp2)); 2653 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2654 *rHi = tmp2; 2655 *rLo = tmp2; 2656 return; 2657 } else { 2658 /* CmpwNEZ64(e) */ 2659 HReg srcLo, srcHi; 2660 HReg tmp1 = newVRegI(env); 2661 HReg tmp2 = newVRegI(env); 2662 /* srcHi:srcLo = arg */ 2663 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2664 /* tmp1 = srcHi | srcLo */ 2665 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); 2666 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2667 X86RMI_Reg(srcLo), tmp1)); 2668 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2669 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); 2670 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2671 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2672 X86RMI_Reg(tmp1), tmp2)); 2673 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2674 *rHi = tmp2; 2675 *rLo = tmp2; 2676 return; 2677 } 2678 } 2679 2680 /* ReinterpF64asI64(e) */ 2681 /* Given an IEEE754 double, produce an I64 with the same bit 2682 pattern. */ 2683 case Iop_ReinterpF64asI64: { 2684 HReg rf = iselDblExpr(env, e->Iex.Unop.arg); 2685 HReg tLo = newVRegI(env); 2686 HReg tHi = newVRegI(env); 2687 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2688 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2689 /* paranoia */ 2690 set_FPU_rounding_default(env); 2691 /* subl $8, %esp */ 2692 sub_from_esp(env, 8); 2693 /* gstD %rf, 0(%esp) */ 2694 addInstr(env, 2695 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp)); 2696 /* movl 0(%esp), %tLo */ 2697 addInstr(env, 2698 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2699 /* movl 4(%esp), %tHi */ 2700 addInstr(env, 2701 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2702 /* addl $8, %esp */ 2703 add_to_esp(env, 8); 2704 *rHi = tHi; 2705 *rLo = tLo; 2706 return; 2707 } 2708 2709 case Iop_CmpNEZ32x2: 2710 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish; 2711 case Iop_CmpNEZ16x4: 2712 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish; 2713 case Iop_CmpNEZ8x8: 2714 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish; 2715 unish: { 2716 /* Note: the following assumes all helpers are of 2717 signature 2718 ULong fn ( ULong ), and they are 2719 not marked as regparm functions. 2720 */ 2721 HReg xLo, xHi; 2722 HReg tLo = newVRegI(env); 2723 HReg tHi = newVRegI(env); 2724 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 2725 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2726 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2727 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); 2728 add_to_esp(env, 2*4); 2729 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2730 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2731 *rHi = tHi; 2732 *rLo = tLo; 2733 return; 2734 } 2735 2736 default: 2737 break; 2738 } 2739 } /* if (e->tag == Iex_Unop) */ 2740 2741 2742 /* --------- CCALL --------- */ 2743 if (e->tag == Iex_CCall) { 2744 HReg tLo = newVRegI(env); 2745 HReg tHi = newVRegI(env); 2746 2747 /* Marshal args, do the call, clear stack. */ 2748 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args ); 2749 2750 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2751 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2752 *rHi = tHi; 2753 *rLo = tLo; 2754 return; 2755 } 2756 2757 ppIRExpr(e); 2758 vpanic("iselInt64Expr"); 2759 } 2760 2761 2762 /*---------------------------------------------------------*/ 2763 /*--- ISEL: Floating point expressions (32 bit) ---*/ 2764 /*---------------------------------------------------------*/ 2765 2766 /* Nothing interesting here; really just wrappers for 2767 64-bit stuff. */ 2768 2769 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2770 { 2771 HReg r = iselFltExpr_wrk( env, e ); 2772 # if 0 2773 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2774 # endif 2775 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */ 2776 vassert(hregIsVirtual(r)); 2777 return r; 2778 } 2779 2780 /* DO NOT CALL THIS DIRECTLY */ 2781 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2782 { 2783 IRType ty = typeOfIRExpr(env->type_env,e); 2784 vassert(ty == Ity_F32); 2785 2786 if (e->tag == Iex_RdTmp) { 2787 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2788 } 2789 2790 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2791 X86AMode* am; 2792 HReg res = newVRegF(env); 2793 vassert(e->Iex.Load.ty == Ity_F32); 2794 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2795 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am)); 2796 return res; 2797 } 2798 2799 if (e->tag == Iex_Binop 2800 && e->Iex.Binop.op == Iop_F64toF32) { 2801 /* Although the result is still held in a standard FPU register, 2802 we need to round it to reflect the loss of accuracy/range 2803 entailed in casting it to a 32-bit float. */ 2804 HReg dst = newVRegF(env); 2805 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2806 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2807 addInstr(env, X86Instr_Fp64to32(src,dst)); 2808 set_FPU_rounding_default( env ); 2809 return dst; 2810 } 2811 2812 if (e->tag == Iex_Get) { 2813 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2814 hregX86_EBP() ); 2815 HReg res = newVRegF(env); 2816 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am )); 2817 return res; 2818 } 2819 2820 if (e->tag == Iex_Unop 2821 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2822 /* Given an I32, produce an IEEE754 float with the same bit 2823 pattern. */ 2824 HReg dst = newVRegF(env); 2825 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 2826 /* paranoia */ 2827 addInstr(env, X86Instr_Push(rmi)); 2828 addInstr(env, X86Instr_FpLdSt( 2829 True/*load*/, 4, dst, 2830 X86AMode_IR(0, hregX86_ESP()))); 2831 add_to_esp(env, 4); 2832 return dst; 2833 } 2834 2835 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2836 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); 2837 HReg dst = newVRegF(env); 2838 2839 /* rf now holds the value to be rounded. The first thing to do 2840 is set the FPU's rounding mode accordingly. */ 2841 2842 /* Set host rounding mode */ 2843 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2844 2845 /* grndint %rf, %dst */ 2846 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2847 2848 /* Restore default FPU rounding. */ 2849 set_FPU_rounding_default( env ); 2850 2851 return dst; 2852 } 2853 2854 ppIRExpr(e); 2855 vpanic("iselFltExpr_wrk"); 2856 } 2857 2858 2859 /*---------------------------------------------------------*/ 2860 /*--- ISEL: Floating point expressions (64 bit) ---*/ 2861 /*---------------------------------------------------------*/ 2862 2863 /* Compute a 64-bit floating point value into a register, the identity 2864 of which is returned. As with iselIntExpr_R, the reg may be either 2865 real or virtual; in any case it must not be changed by subsequent 2866 code emitted by the caller. */ 2867 2868 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2869 2870 Type S (1 bit) E (11 bits) F (52 bits) 2871 ---- --------- ----------- ----------- 2872 signalling NaN u 2047 (max) .0uuuuu---u 2873 (with at least 2874 one 1 bit) 2875 quiet NaN u 2047 (max) .1uuuuu---u 2876 2877 negative infinity 1 2047 (max) .000000---0 2878 2879 positive infinity 0 2047 (max) .000000---0 2880 2881 negative zero 1 0 .000000---0 2882 2883 positive zero 0 0 .000000---0 2884 */ 2885 2886 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2887 { 2888 HReg r = iselDblExpr_wrk( env, e ); 2889 # if 0 2890 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2891 # endif 2892 vassert(hregClass(r) == HRcFlt64); 2893 vassert(hregIsVirtual(r)); 2894 return r; 2895 } 2896 2897 /* DO NOT CALL THIS DIRECTLY */ 2898 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 2899 { 2900 IRType ty = typeOfIRExpr(env->type_env,e); 2901 vassert(e); 2902 vassert(ty == Ity_F64); 2903 2904 if (e->tag == Iex_RdTmp) { 2905 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2906 } 2907 2908 if (e->tag == Iex_Const) { 2909 union { UInt u32x2[2]; ULong u64; Double f64; } u; 2910 HReg freg = newVRegF(env); 2911 vassert(sizeof(u) == 8); 2912 vassert(sizeof(u.u64) == 8); 2913 vassert(sizeof(u.f64) == 8); 2914 vassert(sizeof(u.u32x2) == 8); 2915 2916 if (e->Iex.Const.con->tag == Ico_F64) { 2917 u.f64 = e->Iex.Const.con->Ico.F64; 2918 } 2919 else if (e->Iex.Const.con->tag == Ico_F64i) { 2920 u.u64 = e->Iex.Const.con->Ico.F64i; 2921 } 2922 else 2923 vpanic("iselDblExpr(x86): const"); 2924 2925 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1]))); 2926 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0]))); 2927 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg, 2928 X86AMode_IR(0, hregX86_ESP()))); 2929 add_to_esp(env, 8); 2930 return freg; 2931 } 2932 2933 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2934 X86AMode* am; 2935 HReg res = newVRegF(env); 2936 vassert(e->Iex.Load.ty == Ity_F64); 2937 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2938 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am)); 2939 return res; 2940 } 2941 2942 if (e->tag == Iex_Get) { 2943 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2944 hregX86_EBP() ); 2945 HReg res = newVRegF(env); 2946 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2947 return res; 2948 } 2949 2950 if (e->tag == Iex_GetI) { 2951 X86AMode* am 2952 = genGuestArrayOffset( 2953 env, e->Iex.GetI.descr, 2954 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2955 HReg res = newVRegF(env); 2956 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 2957 return res; 2958 } 2959 2960 if (e->tag == Iex_Triop) { 2961 X86FpOp fpop = Xfp_INVALID; 2962 IRTriop *triop = e->Iex.Triop.details; 2963 switch (triop->op) { 2964 case Iop_AddF64: fpop = Xfp_ADD; break; 2965 case Iop_SubF64: fpop = Xfp_SUB; break; 2966 case Iop_MulF64: fpop = Xfp_MUL; break; 2967 case Iop_DivF64: fpop = Xfp_DIV; break; 2968 case Iop_ScaleF64: fpop = Xfp_SCALE; break; 2969 case Iop_Yl2xF64: fpop = Xfp_YL2X; break; 2970 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break; 2971 case Iop_AtanF64: fpop = Xfp_ATAN; break; 2972 case Iop_PRemF64: fpop = Xfp_PREM; break; 2973 case Iop_PRem1F64: fpop = Xfp_PREM1; break; 2974 default: break; 2975 } 2976 if (fpop != Xfp_INVALID) { 2977 HReg res = newVRegF(env); 2978 HReg srcL = iselDblExpr(env, triop->arg2); 2979 HReg srcR = iselDblExpr(env, triop->arg3); 2980 /* XXXROUNDINGFIXME */ 2981 /* set roundingmode here */ 2982 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res)); 2983 if (fpop != Xfp_ADD && fpop != Xfp_SUB 2984 && fpop != Xfp_MUL && fpop != Xfp_DIV) 2985 roundToF64(env, res); 2986 return res; 2987 } 2988 } 2989 2990 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 2991 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2992 HReg dst = newVRegF(env); 2993 2994 /* rf now holds the value to be rounded. The first thing to do 2995 is set the FPU's rounding mode accordingly. */ 2996 2997 /* Set host rounding mode */ 2998 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2999 3000 /* grndint %rf, %dst */ 3001 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 3002 3003 /* Restore default FPU rounding. */ 3004 set_FPU_rounding_default( env ); 3005 3006 return dst; 3007 } 3008 3009 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 3010 HReg dst = newVRegF(env); 3011 HReg rHi,rLo; 3012 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2); 3013 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3014 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3015 3016 /* Set host rounding mode */ 3017 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3018 3019 addInstr(env, X86Instr_FpLdStI( 3020 True/*load*/, 8, dst, 3021 X86AMode_IR(0, hregX86_ESP()))); 3022 3023 /* Restore default FPU rounding. */ 3024 set_FPU_rounding_default( env ); 3025 3026 add_to_esp(env, 8); 3027 return dst; 3028 } 3029 3030 if (e->tag == Iex_Binop) { 3031 X86FpOp fpop = Xfp_INVALID; 3032 switch (e->Iex.Binop.op) { 3033 case Iop_SinF64: fpop = Xfp_SIN; break; 3034 case Iop_CosF64: fpop = Xfp_COS; break; 3035 case Iop_TanF64: fpop = Xfp_TAN; break; 3036 case Iop_2xm1F64: fpop = Xfp_2XM1; break; 3037 case Iop_SqrtF64: fpop = Xfp_SQRT; break; 3038 default: break; 3039 } 3040 if (fpop != Xfp_INVALID) { 3041 HReg res = newVRegF(env); 3042 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3043 /* XXXROUNDINGFIXME */ 3044 /* set roundingmode here */ 3045 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3046 if (fpop != Xfp_SQRT 3047 && fpop != Xfp_NEG && fpop != Xfp_ABS) 3048 roundToF64(env, res); 3049 return res; 3050 } 3051 } 3052 3053 if (e->tag == Iex_Unop) { 3054 X86FpOp fpop = Xfp_INVALID; 3055 switch (e->Iex.Unop.op) { 3056 case Iop_NegF64: fpop = Xfp_NEG; break; 3057 case Iop_AbsF64: fpop = Xfp_ABS; break; 3058 default: break; 3059 } 3060 if (fpop != Xfp_INVALID) { 3061 HReg res = newVRegF(env); 3062 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3063 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3064 if (fpop != Xfp_NEG && fpop != Xfp_ABS) 3065 roundToF64(env, res); 3066 return res; 3067 } 3068 } 3069 3070 if (e->tag == Iex_Unop) { 3071 switch (e->Iex.Unop.op) { 3072 case Iop_I32StoF64: { 3073 HReg dst = newVRegF(env); 3074 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3075 addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3076 set_FPU_rounding_default(env); 3077 addInstr(env, X86Instr_FpLdStI( 3078 True/*load*/, 4, dst, 3079 X86AMode_IR(0, hregX86_ESP()))); 3080 add_to_esp(env, 4); 3081 return dst; 3082 } 3083 case Iop_ReinterpI64asF64: { 3084 /* Given an I64, produce an IEEE754 double with the same 3085 bit pattern. */ 3086 HReg dst = newVRegF(env); 3087 HReg rHi, rLo; 3088 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg); 3089 /* paranoia */ 3090 set_FPU_rounding_default(env); 3091 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3092 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3093 addInstr(env, X86Instr_FpLdSt( 3094 True/*load*/, 8, dst, 3095 X86AMode_IR(0, hregX86_ESP()))); 3096 add_to_esp(env, 8); 3097 return dst; 3098 } 3099 case Iop_F32toF64: { 3100 /* this is a no-op */ 3101 HReg res = iselFltExpr(env, e->Iex.Unop.arg); 3102 return res; 3103 } 3104 default: 3105 break; 3106 } 3107 } 3108 3109 /* --------- MULTIPLEX --------- */ 3110 if (e->tag == Iex_Mux0X) { 3111 if (ty == Ity_F64 3112 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { 3113 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3114 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); 3115 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); 3116 HReg dst = newVRegF(env); 3117 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst)); 3118 addInstr(env, X86Instr_Test32(0xFF, r8)); 3119 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst)); 3120 return dst; 3121 } 3122 } 3123 3124 ppIRExpr(e); 3125 vpanic("iselDblExpr_wrk"); 3126 } 3127 3128 3129 /*---------------------------------------------------------*/ 3130 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3131 /*---------------------------------------------------------*/ 3132 3133 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3134 { 3135 HReg r = iselVecExpr_wrk( env, e ); 3136 # if 0 3137 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3138 # endif 3139 vassert(hregClass(r) == HRcVec128); 3140 vassert(hregIsVirtual(r)); 3141 return r; 3142 } 3143 3144 3145 /* DO NOT CALL THIS DIRECTLY */ 3146 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3147 { 3148 3149 # define REQUIRE_SSE1 \ 3150 do { if (env->hwcaps == 0/*baseline, no sse*/) \ 3151 goto vec_fail; \ 3152 } while (0) 3153 3154 # define REQUIRE_SSE2 \ 3155 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ 3156 goto vec_fail; \ 3157 } while (0) 3158 3159 # define SSE2_OR_ABOVE \ 3160 (env->hwcaps & VEX_HWCAPS_X86_SSE2) 3161 3162 HWord fn = 0; /* address of helper fn, if required */ 3163 MatchInfo mi; 3164 Bool arg1isEReg = False; 3165 X86SseOp op = Xsse_INVALID; 3166 IRType ty = typeOfIRExpr(env->type_env,e); 3167 vassert(e); 3168 vassert(ty == Ity_V128); 3169 3170 REQUIRE_SSE1; 3171 3172 if (e->tag == Iex_RdTmp) { 3173 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3174 } 3175 3176 if (e->tag == Iex_Get) { 3177 HReg dst = newVRegV(env); 3178 addInstr(env, X86Instr_SseLdSt( 3179 True/*load*/, 3180 dst, 3181 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()) 3182 ) 3183 ); 3184 return dst; 3185 } 3186 3187 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3188 HReg dst = newVRegV(env); 3189 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3190 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 3191 return dst; 3192 } 3193 3194 if (e->tag == Iex_Const) { 3195 HReg dst = newVRegV(env); 3196 vassert(e->Iex.Const.con->tag == Ico_V128); 3197 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst)); 3198 return dst; 3199 } 3200 3201 if (e->tag == Iex_Unop) { 3202 3203 if (SSE2_OR_ABOVE) { 3204 /* 64UtoV128(LDle:I64(addr)) */ 3205 DECLARE_PATTERN(p_zwiden_load64); 3206 DEFINE_PATTERN(p_zwiden_load64, 3207 unop(Iop_64UtoV128, 3208 IRExpr_Load(Iend_LE,Ity_I64,bind(0)))); 3209 if (matchIRExpr(&mi, p_zwiden_load64, e)) { 3210 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]); 3211 HReg dst = newVRegV(env); 3212 addInstr(env, X86Instr_SseLdzLO(8, dst, am)); 3213 return dst; 3214 } 3215 } 3216 3217 switch (e->Iex.Unop.op) { 3218 3219 case Iop_NotV128: { 3220 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3221 return do_sse_Not128(env, arg); 3222 } 3223 3224 case Iop_CmpNEZ64x2: { 3225 /* We can use SSE2 instructions for this. */ 3226 /* Ideally, we want to do a 64Ix2 comparison against zero of 3227 the operand. Problem is no such insn exists. Solution 3228 therefore is to do a 32Ix4 comparison instead, and bitwise- 3229 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3230 let the not'd result of this initial comparison be a:b:c:d. 3231 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3232 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3233 giving the required result. 3234 3235 The required selection sequence is 2,3,0,1, which 3236 according to Intel's documentation means the pshufd 3237 literal value is 0xB1, that is, 3238 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3239 */ 3240 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3241 HReg tmp = newVRegV(env); 3242 HReg dst = newVRegV(env); 3243 REQUIRE_SSE2; 3244 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp)); 3245 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp)); 3246 tmp = do_sse_Not128(env, tmp); 3247 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst)); 3248 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst)); 3249 return dst; 3250 } 3251 3252 case Iop_CmpNEZ32x4: { 3253 /* Sigh, we have to generate lousy code since this has to 3254 work on SSE1 hosts */ 3255 /* basically, the idea is: for each lane: 3256 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1) 3257 sbbl %r, %r (now %r = 1Sto32(CF)) 3258 movl %r, lane 3259 */ 3260 Int i; 3261 X86AMode* am; 3262 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3263 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3264 HReg dst = newVRegV(env); 3265 HReg r32 = newVRegI(env); 3266 sub_from_esp(env, 16); 3267 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0)); 3268 for (i = 0; i < 4; i++) { 3269 am = X86AMode_IR(i*4, hregX86_ESP()); 3270 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32)); 3271 addInstr(env, X86Instr_Unary32(Xun_NEG, r32)); 3272 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32)); 3273 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am)); 3274 } 3275 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3276 add_to_esp(env, 16); 3277 return dst; 3278 } 3279 3280 case Iop_CmpNEZ8x16: 3281 case Iop_CmpNEZ16x8: { 3282 /* We can use SSE2 instructions for this. */ 3283 HReg arg; 3284 HReg vec0 = newVRegV(env); 3285 HReg vec1 = newVRegV(env); 3286 HReg dst = newVRegV(env); 3287 X86SseOp cmpOp 3288 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16 3289 : Xsse_CMPEQ8; 3290 REQUIRE_SSE2; 3291 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0)); 3292 addInstr(env, mk_vMOVsd_RR(vec0, vec1)); 3293 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1)); 3294 /* defer arg computation to here so as to give CMPEQF as long 3295 as possible to complete */ 3296 arg = iselVecExpr(env, e->Iex.Unop.arg); 3297 /* vec0 is all 0s; vec1 is all 1s */ 3298 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3299 /* 16x8 or 8x16 comparison == */ 3300 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst)); 3301 /* invert result */ 3302 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst)); 3303 return dst; 3304 } 3305 3306 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary; 3307 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary; 3308 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary; 3309 do_32Fx4_unary: 3310 { 3311 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3312 HReg dst = newVRegV(env); 3313 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst)); 3314 return dst; 3315 } 3316 3317 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary; 3318 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary; 3319 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary; 3320 do_64Fx2_unary: 3321 { 3322 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3323 HReg dst = newVRegV(env); 3324 REQUIRE_SSE2; 3325 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst)); 3326 return dst; 3327 } 3328 3329 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary; 3330 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary; 3331 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary; 3332 do_32F0x4_unary: 3333 { 3334 /* A bit subtle. We have to copy the arg to the result 3335 register first, because actually doing the SSE scalar insn 3336 leaves the upper 3/4 of the destination register 3337 unchanged. Whereas the required semantics of these 3338 primops is that the upper 3/4 is simply copied in from the 3339 argument. */ 3340 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3341 HReg dst = newVRegV(env); 3342 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3343 addInstr(env, X86Instr_Sse32FLo(op, arg, dst)); 3344 return dst; 3345 } 3346 3347 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary; 3348 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary; 3349 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary; 3350 do_64F0x2_unary: 3351 { 3352 /* A bit subtle. We have to copy the arg to the result 3353 register first, because actually doing the SSE scalar insn 3354 leaves the upper half of the destination register 3355 unchanged. Whereas the required semantics of these 3356 primops is that the upper half is simply copied in from the 3357 argument. */ 3358 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3359 HReg dst = newVRegV(env); 3360 REQUIRE_SSE2; 3361 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3362 addInstr(env, X86Instr_Sse64FLo(op, arg, dst)); 3363 return dst; 3364 } 3365 3366 case Iop_32UtoV128: { 3367 HReg dst = newVRegV(env); 3368 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3369 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3370 addInstr(env, X86Instr_Push(rmi)); 3371 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0)); 3372 add_to_esp(env, 4); 3373 return dst; 3374 } 3375 3376 case Iop_64UtoV128: { 3377 HReg rHi, rLo; 3378 HReg dst = newVRegV(env); 3379 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3380 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg); 3381 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3382 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3383 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0)); 3384 add_to_esp(env, 8); 3385 return dst; 3386 } 3387 3388 default: 3389 break; 3390 } /* switch (e->Iex.Unop.op) */ 3391 } /* if (e->tag == Iex_Unop) */ 3392 3393 if (e->tag == Iex_Binop) { 3394 switch (e->Iex.Binop.op) { 3395 3396 case Iop_SetV128lo32: { 3397 HReg dst = newVRegV(env); 3398 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3399 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3400 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3401 sub_from_esp(env, 16); 3402 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3403 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0)); 3404 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3405 add_to_esp(env, 16); 3406 return dst; 3407 } 3408 3409 case Iop_SetV128lo64: { 3410 HReg dst = newVRegV(env); 3411 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3412 HReg srcIhi, srcIlo; 3413 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3414 X86AMode* esp4 = advance4(esp0); 3415 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2); 3416 sub_from_esp(env, 16); 3417 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3418 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0)); 3419 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4)); 3420 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3421 add_to_esp(env, 16); 3422 return dst; 3423 } 3424 3425 case Iop_64HLtoV128: { 3426 HReg r3, r2, r1, r0; 3427 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3428 X86AMode* esp4 = advance4(esp0); 3429 X86AMode* esp8 = advance4(esp4); 3430 X86AMode* esp12 = advance4(esp8); 3431 HReg dst = newVRegV(env); 3432 /* do this via the stack (easy, convenient, etc) */ 3433 sub_from_esp(env, 16); 3434 /* Do the less significant 64 bits */ 3435 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2); 3436 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0)); 3437 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4)); 3438 /* Do the more significant 64 bits */ 3439 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1); 3440 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8)); 3441 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12)); 3442 /* Fetch result back from stack. */ 3443 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3444 add_to_esp(env, 16); 3445 return dst; 3446 } 3447 3448 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4; 3449 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; 3450 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; 3451 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; 3452 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4; 3453 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4; 3454 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; 3455 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; 3456 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4; 3457 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4; 3458 do_32Fx4: 3459 { 3460 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3461 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3462 HReg dst = newVRegV(env); 3463 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3464 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3465 return dst; 3466 } 3467 3468 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; 3469 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; 3470 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; 3471 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; 3472 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2; 3473 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2; 3474 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; 3475 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; 3476 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2; 3477 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2; 3478 do_64Fx2: 3479 { 3480 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3481 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3482 HReg dst = newVRegV(env); 3483 REQUIRE_SSE2; 3484 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3485 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3486 return dst; 3487 } 3488 3489 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; 3490 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; 3491 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; 3492 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4; 3493 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4; 3494 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4; 3495 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4; 3496 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4; 3497 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4; 3498 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4; 3499 do_32F0x4: { 3500 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3501 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3502 HReg dst = newVRegV(env); 3503 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3504 addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); 3505 return dst; 3506 } 3507 3508 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; 3509 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; 3510 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; 3511 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2; 3512 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2; 3513 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2; 3514 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2; 3515 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2; 3516 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2; 3517 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2; 3518 do_64F0x2: { 3519 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3520 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3521 HReg dst = newVRegV(env); 3522 REQUIRE_SSE2; 3523 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3524 addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); 3525 return dst; 3526 } 3527 3528 case Iop_QNarrowBin32Sto16Sx8: 3529 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3530 case Iop_QNarrowBin16Sto8Sx16: 3531 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3532 case Iop_QNarrowBin16Sto8Ux16: 3533 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3534 3535 case Iop_InterleaveHI8x16: 3536 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3537 case Iop_InterleaveHI16x8: 3538 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3539 case Iop_InterleaveHI32x4: 3540 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3541 case Iop_InterleaveHI64x2: 3542 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3543 3544 case Iop_InterleaveLO8x16: 3545 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3546 case Iop_InterleaveLO16x8: 3547 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3548 case Iop_InterleaveLO32x4: 3549 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3550 case Iop_InterleaveLO64x2: 3551 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3552 3553 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg; 3554 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg; 3555 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg; 3556 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; 3557 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; 3558 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; 3559 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; 3560 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; 3561 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; 3562 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg; 3563 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg; 3564 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg; 3565 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg; 3566 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg; 3567 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg; 3568 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg; 3569 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; 3570 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; 3571 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; 3572 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; 3573 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; 3574 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; 3575 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; 3576 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; 3577 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; 3578 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; 3579 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; 3580 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; 3581 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; 3582 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; 3583 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; 3584 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; 3585 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; 3586 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; 3587 do_SseReRg: { 3588 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3589 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3590 HReg dst = newVRegV(env); 3591 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR) 3592 REQUIRE_SSE2; 3593 if (arg1isEReg) { 3594 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3595 addInstr(env, X86Instr_SseReRg(op, arg1, dst)); 3596 } else { 3597 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3598 addInstr(env, X86Instr_SseReRg(op, arg2, dst)); 3599 } 3600 return dst; 3601 } 3602 3603 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift; 3604 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift; 3605 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift; 3606 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift; 3607 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift; 3608 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; 3609 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; 3610 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; 3611 do_SseShift: { 3612 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3613 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3614 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3615 HReg ereg = newVRegV(env); 3616 HReg dst = newVRegV(env); 3617 REQUIRE_SSE2; 3618 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3619 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3620 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3621 addInstr(env, X86Instr_Push(rmi)); 3622 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); 3623 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3624 addInstr(env, X86Instr_SseReRg(op, ereg, dst)); 3625 add_to_esp(env, 16); 3626 return dst; 3627 } 3628 3629 case Iop_NarrowBin32to16x8: 3630 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3631 goto do_SseAssistedBinary; 3632 case Iop_NarrowBin16to8x16: 3633 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3634 goto do_SseAssistedBinary; 3635 do_SseAssistedBinary: { 3636 /* As with the amd64 case (where this is copied from) we 3637 generate pretty bad code. */ 3638 vassert(fn != 0); 3639 HReg dst = newVRegV(env); 3640 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3641 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3642 HReg argp = newVRegI(env); 3643 /* subl $112, %esp -- make a space */ 3644 sub_from_esp(env, 112); 3645 /* leal 48(%esp), %r_argp -- point into it */ 3646 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()), 3647 argp)); 3648 /* andl $-16, %r_argp -- 16-align the pointer */ 3649 addInstr(env, X86Instr_Alu32R(Xalu_AND, 3650 X86RMI_Imm( ~(UInt)15 ), 3651 argp)); 3652 /* Prepare 3 arg regs: 3653 leal 0(%r_argp), %eax 3654 leal 16(%r_argp), %edx 3655 leal 32(%r_argp), %ecx 3656 */ 3657 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp), 3658 hregX86_EAX())); 3659 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp), 3660 hregX86_EDX())); 3661 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp), 3662 hregX86_ECX())); 3663 /* Store the two args, at (%edx) and (%ecx): 3664 movupd %argL, 0(%edx) 3665 movupd %argR, 0(%ecx) 3666 */ 3667 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL, 3668 X86AMode_IR(0, hregX86_EDX()))); 3669 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR, 3670 X86AMode_IR(0, hregX86_ECX()))); 3671 /* call the helper */ 3672 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 )); 3673 /* fetch the result from memory, using %r_argp, which the 3674 register allocator will keep alive across the call. */ 3675 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst, 3676 X86AMode_IR(0, argp))); 3677 /* and finally, clear the space */ 3678 add_to_esp(env, 112); 3679 return dst; 3680 } 3681 3682 default: 3683 break; 3684 } /* switch (e->Iex.Binop.op) */ 3685 } /* if (e->tag == Iex_Binop) */ 3686 3687 if (e->tag == Iex_Mux0X) { 3688 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); 3689 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); 3690 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); 3691 HReg dst = newVRegV(env); 3692 addInstr(env, mk_vMOVsd_RR(rX,dst)); 3693 addInstr(env, X86Instr_Test32(0xFF, r8)); 3694 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst)); 3695 return dst; 3696 } 3697 3698 vec_fail: 3699 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", 3700 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); 3701 ppIRExpr(e); 3702 vpanic("iselVecExpr_wrk"); 3703 3704 # undef REQUIRE_SSE1 3705 # undef REQUIRE_SSE2 3706 # undef SSE2_OR_ABOVE 3707 } 3708 3709 3710 /*---------------------------------------------------------*/ 3711 /*--- ISEL: Statements ---*/ 3712 /*---------------------------------------------------------*/ 3713 3714 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3715 { 3716 if (vex_traceflags & VEX_TRACE_VCODE) { 3717 vex_printf("\n-- "); 3718 ppIRStmt(stmt); 3719 vex_printf("\n"); 3720 } 3721 3722 switch (stmt->tag) { 3723 3724 /* --------- STORE --------- */ 3725 case Ist_Store: { 3726 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3727 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3728 IREndness end = stmt->Ist.Store.end; 3729 3730 if (tya != Ity_I32 || end != Iend_LE) 3731 goto stmt_fail; 3732 3733 if (tyd == Ity_I32) { 3734 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3735 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3736 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am)); 3737 return; 3738 } 3739 if (tyd == Ity_I8 || tyd == Ity_I16) { 3740 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3741 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3742 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2), 3743 r,am )); 3744 return; 3745 } 3746 if (tyd == Ity_F64) { 3747 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3748 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3749 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am)); 3750 return; 3751 } 3752 if (tyd == Ity_F32) { 3753 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3754 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3755 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am)); 3756 return; 3757 } 3758 if (tyd == Ity_I64) { 3759 HReg vHi, vLo, rA; 3760 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data); 3761 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3762 addInstr(env, X86Instr_Alu32M( 3763 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA))); 3764 addInstr(env, X86Instr_Alu32M( 3765 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA))); 3766 return; 3767 } 3768 if (tyd == Ity_V128) { 3769 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3770 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3771 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am)); 3772 return; 3773 } 3774 break; 3775 } 3776 3777 /* --------- PUT --------- */ 3778 case Ist_Put: { 3779 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3780 if (ty == Ity_I32) { 3781 /* We're going to write to memory, so compute the RHS into an 3782 X86RI. */ 3783 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3784 addInstr(env, 3785 X86Instr_Alu32M( 3786 Xalu_MOV, 3787 ri, 3788 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP()) 3789 )); 3790 return; 3791 } 3792 if (ty == Ity_I8 || ty == Ity_I16) { 3793 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3794 addInstr(env, X86Instr_Store( 3795 toUChar(ty==Ity_I8 ? 1 : 2), 3796 r, 3797 X86AMode_IR(stmt->Ist.Put.offset, 3798 hregX86_EBP()))); 3799 return; 3800 } 3801 if (ty == Ity_I64) { 3802 HReg vHi, vLo; 3803 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3804 X86AMode* am4 = advance4(am); 3805 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data); 3806 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am )); 3807 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 )); 3808 return; 3809 } 3810 if (ty == Ity_V128) { 3811 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3812 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3813 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am)); 3814 return; 3815 } 3816 if (ty == Ity_F32) { 3817 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3818 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3819 set_FPU_rounding_default(env); /* paranoia */ 3820 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am )); 3821 return; 3822 } 3823 if (ty == Ity_F64) { 3824 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3825 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3826 set_FPU_rounding_default(env); /* paranoia */ 3827 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am )); 3828 return; 3829 } 3830 break; 3831 } 3832 3833 /* --------- Indexed PUT --------- */ 3834 case Ist_PutI: { 3835 IRPutI *puti = stmt->Ist.PutI.details; 3836 3837 X86AMode* am 3838 = genGuestArrayOffset( 3839 env, puti->descr, 3840 puti->ix, puti->bias ); 3841 3842 IRType ty = typeOfIRExpr(env->type_env, puti->data); 3843 if (ty == Ity_F64) { 3844 HReg val = iselDblExpr(env, puti->data); 3845 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am )); 3846 return; 3847 } 3848 if (ty == Ity_I8) { 3849 HReg r = iselIntExpr_R(env, puti->data); 3850 addInstr(env, X86Instr_Store( 1, r, am )); 3851 return; 3852 } 3853 if (ty == Ity_I32) { 3854 HReg r = iselIntExpr_R(env, puti->data); 3855 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am )); 3856 return; 3857 } 3858 if (ty == Ity_I64) { 3859 HReg rHi, rLo; 3860 X86AMode* am4 = advance4(am); 3861 iselInt64Expr(&rHi, &rLo, env, puti->data); 3862 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am )); 3863 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 )); 3864 return; 3865 } 3866 break; 3867 } 3868 3869 /* --------- TMP --------- */ 3870 case Ist_WrTmp: { 3871 IRTemp tmp = stmt->Ist.WrTmp.tmp; 3872 IRType ty = typeOfIRTemp(env->type_env, tmp); 3873 3874 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), 3875 compute it into an AMode and then use LEA. This usually 3876 produces fewer instructions, often because (for memcheck 3877 created IR) we get t = address-expression, (t is later used 3878 twice) and so doing this naturally turns address-expression 3879 back into an X86 amode. */ 3880 if (ty == Ity_I32 3881 && stmt->Ist.WrTmp.data->tag == Iex_Binop 3882 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { 3883 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 3884 HReg dst = lookupIRTemp(env, tmp); 3885 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { 3886 /* Hmm, iselIntExpr_AMode wimped out and just computed the 3887 value into a register. Just emit a normal reg-reg move 3888 so reg-alloc can coalesce it away in the usual way. */ 3889 HReg src = am->Xam.IR.reg; 3890 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); 3891 } else { 3892 addInstr(env, X86Instr_Lea32(am,dst)); 3893 } 3894 return; 3895 } 3896 3897 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 3898 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 3899 HReg dst = lookupIRTemp(env, tmp); 3900 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst)); 3901 return; 3902 } 3903 if (ty == Ity_I64) { 3904 HReg rHi, rLo, dstHi, dstLo; 3905 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 3906 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 3907 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 3908 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 3909 return; 3910 } 3911 if (ty == Ity_I1) { 3912 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 3913 HReg dst = lookupIRTemp(env, tmp); 3914 addInstr(env, X86Instr_Set32(cond, dst)); 3915 return; 3916 } 3917 if (ty == Ity_F64) { 3918 HReg dst = lookupIRTemp(env, tmp); 3919 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 3920 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3921 return; 3922 } 3923 if (ty == Ity_F32) { 3924 HReg dst = lookupIRTemp(env, tmp); 3925 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 3926 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 3927 return; 3928 } 3929 if (ty == Ity_V128) { 3930 HReg dst = lookupIRTemp(env, tmp); 3931 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 3932 addInstr(env, mk_vMOVsd_RR(src,dst)); 3933 return; 3934 } 3935 break; 3936 } 3937 3938 /* --------- Call to DIRTY helper --------- */ 3939 case Ist_Dirty: { 3940 IRType retty; 3941 IRDirty* d = stmt->Ist.Dirty.details; 3942 Bool passBBP = False; 3943 3944 if (d->nFxState == 0) 3945 vassert(!d->needsBBP); 3946 3947 passBBP = toBool(d->nFxState > 0 && d->needsBBP); 3948 3949 /* Marshal args, do the call, clear stack. */ 3950 doHelperCall( env, passBBP, d->guard, d->cee, d->args ); 3951 3952 /* Now figure out what to do with the returned value, if any. */ 3953 if (d->tmp == IRTemp_INVALID) 3954 /* No return value. Nothing to do. */ 3955 return; 3956 3957 retty = typeOfIRTemp(env->type_env, d->tmp); 3958 if (retty == Ity_I64) { 3959 HReg dstHi, dstLo; 3960 /* The returned value is in %edx:%eax. Park it in the 3961 register-pair associated with tmp. */ 3962 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 3963 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) ); 3964 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) ); 3965 return; 3966 } 3967 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) { 3968 /* The returned value is in %eax. Park it in the register 3969 associated with tmp. */ 3970 HReg dst = lookupIRTemp(env, d->tmp); 3971 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) ); 3972 return; 3973 } 3974 break; 3975 } 3976 3977 /* --------- MEM FENCE --------- */ 3978 case Ist_MBE: 3979 switch (stmt->Ist.MBE.event) { 3980 case Imbe_Fence: 3981 addInstr(env, X86Instr_MFence(env->hwcaps)); 3982 return; 3983 default: 3984 break; 3985 } 3986 break; 3987 3988 /* --------- ACAS --------- */ 3989 case Ist_CAS: 3990 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 3991 /* "normal" singleton CAS */ 3992 UChar sz; 3993 IRCAS* cas = stmt->Ist.CAS.details; 3994 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 3995 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 3996 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 3997 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 3998 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 3999 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4000 vassert(cas->expdHi == NULL); 4001 vassert(cas->dataHi == NULL); 4002 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4003 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4004 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4005 switch (ty) { 4006 case Ity_I32: sz = 4; break; 4007 case Ity_I16: sz = 2; break; 4008 case Ity_I8: sz = 1; break; 4009 default: goto unhandled_cas; 4010 } 4011 addInstr(env, X86Instr_ACAS(am, sz)); 4012 addInstr(env, 4013 X86Instr_CMov32(Xcc_NZ, 4014 X86RM_Reg(hregX86_EAX()), rOldLo)); 4015 return; 4016 } else { 4017 /* double CAS */ 4018 IRCAS* cas = stmt->Ist.CAS.details; 4019 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4020 /* only 32-bit allowed in this case */ 4021 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4022 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */ 4023 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4024 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4025 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4026 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4027 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4028 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4029 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4030 if (ty != Ity_I32) 4031 goto unhandled_cas; 4032 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4033 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4034 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX())); 4035 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4036 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX())); 4037 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4038 addInstr(env, X86Instr_DACAS(am)); 4039 addInstr(env, 4040 X86Instr_CMov32(Xcc_NZ, 4041 X86RM_Reg(hregX86_EDX()), rOldHi)); 4042 addInstr(env, 4043 X86Instr_CMov32(Xcc_NZ, 4044 X86RM_Reg(hregX86_EAX()), rOldLo)); 4045 return; 4046 } 4047 unhandled_cas: 4048 break; 4049 4050 /* --------- INSTR MARK --------- */ 4051 /* Doesn't generate any executable code ... */ 4052 case Ist_IMark: 4053 return; 4054 4055 /* --------- NO-OP --------- */ 4056 /* Fairly self-explanatory, wouldn't you say? */ 4057 case Ist_NoOp: 4058 return; 4059 4060 /* --------- EXIT --------- */ 4061 case Ist_Exit: { 4062 if (stmt->Ist.Exit.dst->tag != Ico_U32) 4063 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value"); 4064 4065 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4066 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP, 4067 hregX86_EBP()); 4068 4069 /* Case: boring transfer to known address */ 4070 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4071 if (env->chainingAllowed) { 4072 /* .. almost always true .. */ 4073 /* Skip the event check at the dst if this is a forwards 4074 edge. */ 4075 Bool toFastEP 4076 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; 4077 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4078 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32, 4079 amEIP, cc, toFastEP)); 4080 } else { 4081 /* .. very occasionally .. */ 4082 /* We can't use chaining, so ask for an assisted transfer, 4083 as that's the only alternative that is allowable. */ 4084 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4085 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring)); 4086 } 4087 return; 4088 } 4089 4090 /* Case: assisted transfer to arbitrary address */ 4091 switch (stmt->Ist.Exit.jk) { 4092 /* Keep this list in sync with that in iselNext below */ 4093 case Ijk_ClientReq: 4094 case Ijk_EmWarn: 4095 case Ijk_MapFail: 4096 case Ijk_NoDecode: 4097 case Ijk_NoRedir: 4098 case Ijk_SigSEGV: 4099 case Ijk_SigTRAP: 4100 case Ijk_Sys_int128: 4101 case Ijk_Sys_int129: 4102 case Ijk_Sys_int130: 4103 case Ijk_Sys_sysenter: 4104 case Ijk_TInval: 4105 case Ijk_Yield: 4106 { 4107 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4108 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk)); 4109 return; 4110 } 4111 default: 4112 break; 4113 } 4114 4115 /* Do we ever expect to see any other kind? */ 4116 goto stmt_fail; 4117 } 4118 4119 default: break; 4120 } 4121 stmt_fail: 4122 ppIRStmt(stmt); 4123 vpanic("iselStmt"); 4124 } 4125 4126 4127 /*---------------------------------------------------------*/ 4128 /*--- ISEL: Basic block terminators (Nexts) ---*/ 4129 /*---------------------------------------------------------*/ 4130 4131 static void iselNext ( ISelEnv* env, 4132 IRExpr* next, IRJumpKind jk, Int offsIP ) 4133 { 4134 if (vex_traceflags & VEX_TRACE_VCODE) { 4135 vex_printf( "\n-- PUT(%d) = ", offsIP); 4136 ppIRExpr( next ); 4137 vex_printf( "; exit-"); 4138 ppIRJumpKind(jk); 4139 vex_printf( "\n"); 4140 } 4141 4142 /* Case: boring transfer to known address */ 4143 if (next->tag == Iex_Const) { 4144 IRConst* cdst = next->Iex.Const.con; 4145 vassert(cdst->tag == Ico_U32); 4146 if (jk == Ijk_Boring || jk == Ijk_Call) { 4147 /* Boring transfer to known address */ 4148 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4149 if (env->chainingAllowed) { 4150 /* .. almost always true .. */ 4151 /* Skip the event check at the dst if this is a forwards 4152 edge. */ 4153 Bool toFastEP 4154 = ((Addr64)cdst->Ico.U32) > env->max_ga; 4155 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4156 addInstr(env, X86Instr_XDirect(cdst->Ico.U32, 4157 amEIP, Xcc_ALWAYS, 4158 toFastEP)); 4159 } else { 4160 /* .. very occasionally .. */ 4161 /* We can't use chaining, so ask for an assisted transfer, 4162 as that's the only alternative that is allowable. */ 4163 HReg r = iselIntExpr_R(env, next); 4164 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4165 Ijk_Boring)); 4166 } 4167 return; 4168 } 4169 } 4170 4171 /* Case: call/return (==boring) transfer to any address */ 4172 switch (jk) { 4173 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4174 HReg r = iselIntExpr_R(env, next); 4175 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4176 if (env->chainingAllowed) { 4177 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS)); 4178 } else { 4179 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4180 Ijk_Boring)); 4181 } 4182 return; 4183 } 4184 default: 4185 break; 4186 } 4187 4188 /* Case: assisted transfer to arbitrary address */ 4189 switch (jk) { 4190 /* Keep this list in sync with that for Ist_Exit above */ 4191 case Ijk_ClientReq: 4192 case Ijk_EmWarn: 4193 case Ijk_MapFail: 4194 case Ijk_NoDecode: 4195 case Ijk_NoRedir: 4196 case Ijk_SigSEGV: 4197 case Ijk_SigTRAP: 4198 case Ijk_Sys_int128: 4199 case Ijk_Sys_int129: 4200 case Ijk_Sys_int130: 4201 case Ijk_Sys_sysenter: 4202 case Ijk_TInval: 4203 case Ijk_Yield: 4204 { 4205 HReg r = iselIntExpr_R(env, next); 4206 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4207 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk)); 4208 return; 4209 } 4210 default: 4211 break; 4212 } 4213 4214 vex_printf( "\n-- PUT(%d) = ", offsIP); 4215 ppIRExpr( next ); 4216 vex_printf( "; exit-"); 4217 ppIRJumpKind(jk); 4218 vex_printf( "\n"); 4219 vassert(0); // are we expecting any other kind? 4220 } 4221 4222 4223 /*---------------------------------------------------------*/ 4224 /*--- Insn selector top-level ---*/ 4225 /*---------------------------------------------------------*/ 4226 4227 /* Translate an entire SB to x86 code. */ 4228 4229 HInstrArray* iselSB_X86 ( IRSB* bb, 4230 VexArch arch_host, 4231 VexArchInfo* archinfo_host, 4232 VexAbiInfo* vbi/*UNUSED*/, 4233 Int offs_Host_EvC_Counter, 4234 Int offs_Host_EvC_FailAddr, 4235 Bool chainingAllowed, 4236 Bool addProfInc, 4237 Addr64 max_ga ) 4238 { 4239 Int i, j; 4240 HReg hreg, hregHI; 4241 ISelEnv* env; 4242 UInt hwcaps_host = archinfo_host->hwcaps; 4243 X86AMode *amCounter, *amFailAddr; 4244 4245 /* sanity ... */ 4246 vassert(arch_host == VexArchX86); 4247 vassert(0 == (hwcaps_host 4248 & ~(VEX_HWCAPS_X86_SSE1 4249 | VEX_HWCAPS_X86_SSE2 4250 | VEX_HWCAPS_X86_SSE3 4251 | VEX_HWCAPS_X86_LZCNT))); 4252 vassert(sizeof(max_ga) == 8); 4253 vassert((max_ga >> 32) == 0); 4254 4255 /* Make up an initial environment to use. */ 4256 env = LibVEX_Alloc(sizeof(ISelEnv)); 4257 env->vreg_ctr = 0; 4258 4259 /* Set up output code array. */ 4260 env->code = newHInstrArray(); 4261 4262 /* Copy BB's type env. */ 4263 env->type_env = bb->tyenv; 4264 4265 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4266 change as we go along. */ 4267 env->n_vregmap = bb->tyenv->types_used; 4268 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4269 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4270 4271 /* and finally ... */ 4272 env->chainingAllowed = chainingAllowed; 4273 env->hwcaps = hwcaps_host; 4274 env->max_ga = max_ga; 4275 4276 /* For each IR temporary, allocate a suitably-kinded virtual 4277 register. */ 4278 j = 0; 4279 for (i = 0; i < env->n_vregmap; i++) { 4280 hregHI = hreg = INVALID_HREG; 4281 switch (bb->tyenv->types[i]) { 4282 case Ity_I1: 4283 case Ity_I8: 4284 case Ity_I16: 4285 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 4286 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True); 4287 hregHI = mkHReg(j++, HRcInt32, True); break; 4288 case Ity_F32: 4289 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 4290 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 4291 default: ppIRType(bb->tyenv->types[i]); 4292 vpanic("iselBB: IRTemp type"); 4293 } 4294 env->vregmap[i] = hreg; 4295 env->vregmapHI[i] = hregHI; 4296 } 4297 env->vreg_ctr = j; 4298 4299 /* The very first instruction must be an event check. */ 4300 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP()); 4301 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP()); 4302 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr)); 4303 4304 /* Possibly a block counter increment (for profiling). At this 4305 point we don't know the address of the counter, so just pretend 4306 it is zero. It will have to be patched later, but before this 4307 translation is used, by a call to LibVEX_patchProfCtr. */ 4308 if (addProfInc) { 4309 addInstr(env, X86Instr_ProfInc()); 4310 } 4311 4312 /* Ok, finally we can iterate over the statements. */ 4313 for (i = 0; i < bb->stmts_used; i++) 4314 iselStmt(env, bb->stmts[i]); 4315 4316 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4317 4318 /* record the number of vregs we used. */ 4319 env->code->n_vregs = env->vreg_ctr; 4320 return env->code; 4321 } 4322 4323 4324 /*---------------------------------------------------------------*/ 4325 /*--- end host_x86_isel.c ---*/ 4326 /*---------------------------------------------------------------*/ 4327