1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_isel.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 40 #include "ir_match.h" 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "host_generic_regs.h" 44 #include "host_generic_simd64.h" 45 #include "host_generic_simd128.h" 46 #include "host_x86_defs.h" 47 48 /* TODO 21 Apr 2005: 49 50 -- (Really an assembler issue) don't emit CMov32 as a cmov 51 insn, since that's expensive on P4 and conditional branch 52 is cheaper if (as we expect) the condition is highly predictable 53 54 -- preserve xmm registers across function calls (by declaring them 55 as trashed by call insns) 56 57 -- preserve x87 ST stack discipline across function calls. Sigh. 58 59 -- Check doHelperCall: if a call is conditional, we cannot safely 60 compute any regparm args directly to registers. Hence, the 61 fast-regparm marshalling should be restricted to unconditional 62 calls only. 63 */ 64 65 /*---------------------------------------------------------*/ 66 /*--- x87 control word stuff ---*/ 67 /*---------------------------------------------------------*/ 68 69 /* Vex-generated code expects to run with the FPU set as follows: all 70 exceptions masked, round-to-nearest, precision = 53 bits. This 71 corresponds to a FPU control word value of 0x027F. 72 73 Similarly the SSE control word (%mxcsr) should be 0x1F80. 74 75 %fpucw and %mxcsr should have these values on entry to 76 Vex-generated code, and should those values should be 77 unchanged at exit. 78 */ 79 80 #define DEFAULT_FPUCW 0x027F 81 82 /* debugging only, do not use */ 83 /* define DEFAULT_FPUCW 0x037F */ 84 85 86 /*---------------------------------------------------------*/ 87 /*--- misc helpers ---*/ 88 /*---------------------------------------------------------*/ 89 90 /* These are duplicated in guest-x86/toIR.c */ 91 static IRExpr* unop ( IROp op, IRExpr* a ) 92 { 93 return IRExpr_Unop(op, a); 94 } 95 96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 97 { 98 return IRExpr_Binop(op, a1, a2); 99 } 100 101 static IRExpr* bind ( Int binder ) 102 { 103 return IRExpr_Binder(binder); 104 } 105 106 static Bool isZeroU8 ( IRExpr* e ) 107 { 108 return e->tag == Iex_Const 109 && e->Iex.Const.con->tag == Ico_U8 110 && e->Iex.Const.con->Ico.U8 == 0; 111 } 112 113 static Bool isZeroU32 ( IRExpr* e ) 114 { 115 return e->tag == Iex_Const 116 && e->Iex.Const.con->tag == Ico_U32 117 && e->Iex.Const.con->Ico.U32 == 0; 118 } 119 120 //static Bool isZeroU64 ( IRExpr* e ) 121 //{ 122 // return e->tag == Iex_Const 123 // && e->Iex.Const.con->tag == Ico_U64 124 // && e->Iex.Const.con->Ico.U64 == 0ULL; 125 //} 126 127 128 /*---------------------------------------------------------*/ 129 /*--- ISelEnv ---*/ 130 /*---------------------------------------------------------*/ 131 132 /* This carries around: 133 134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we 135 might encounter. This is computed before insn selection starts, 136 and does not change. 137 138 - A mapping from IRTemp to HReg. This tells the insn selector 139 which virtual register(s) are associated with each IRTemp 140 temporary. This is computed before insn selection starts, and 141 does not change. We expect this mapping to map precisely the 142 same set of IRTemps as the type mapping does. 143 144 - vregmap holds the primary register for the IRTemp. 145 - vregmapHI is only used for 64-bit integer-typed 146 IRTemps. It holds the identity of a second 147 32-bit virtual HReg, which holds the high half 148 of the value. 149 150 - The code array, that is, the insns selected so far. 151 152 - A counter, for generating new virtual registers. 153 154 - The host subarchitecture we are selecting insns for. 155 This is set at the start and does not change. 156 157 - A Bool for indicating whether we may generate chain-me 158 instructions for control flow transfers, or whether we must use 159 XAssisted. 160 161 - The maximum guest address of any guest insn in this block. 162 Actually, the address of the highest-addressed byte from any insn 163 in this block. Is set at the start and does not change. This is 164 used for detecting jumps which are definitely forward-edges from 165 this block, and therefore can be made (chained) to the fast entry 166 point of the destination, thereby avoiding the destination's 167 event check. 168 169 Note, this is all (well, mostly) host-independent. 170 */ 171 172 typedef 173 struct { 174 /* Constant -- are set at the start and do not change. */ 175 IRTypeEnv* type_env; 176 177 HReg* vregmap; 178 HReg* vregmapHI; 179 Int n_vregmap; 180 181 UInt hwcaps; 182 183 Bool chainingAllowed; 184 Addr64 max_ga; 185 186 /* These are modified as we go along. */ 187 HInstrArray* code; 188 Int vreg_ctr; 189 } 190 ISelEnv; 191 192 193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) 194 { 195 vassert(tmp >= 0); 196 vassert(tmp < env->n_vregmap); 197 return env->vregmap[tmp]; 198 } 199 200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp ) 201 { 202 vassert(tmp >= 0); 203 vassert(tmp < env->n_vregmap); 204 vassert(! hregIsInvalid(env->vregmapHI[tmp])); 205 *vrLO = env->vregmap[tmp]; 206 *vrHI = env->vregmapHI[tmp]; 207 } 208 209 static void addInstr ( ISelEnv* env, X86Instr* instr ) 210 { 211 addHInstr(env->code, instr); 212 if (vex_traceflags & VEX_TRACE_VCODE) { 213 ppX86Instr(instr, False); 214 vex_printf("\n"); 215 } 216 } 217 218 static HReg newVRegI ( ISelEnv* env ) 219 { 220 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/); 221 env->vreg_ctr++; 222 return reg; 223 } 224 225 static HReg newVRegF ( ISelEnv* env ) 226 { 227 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); 228 env->vreg_ctr++; 229 return reg; 230 } 231 232 static HReg newVRegV ( ISelEnv* env ) 233 { 234 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); 235 env->vreg_ctr++; 236 return reg; 237 } 238 239 240 /*---------------------------------------------------------*/ 241 /*--- ISEL: Forward declarations ---*/ 242 /*---------------------------------------------------------*/ 243 244 /* These are organised as iselXXX and iselXXX_wrk pairs. The 245 iselXXX_wrk do the real work, but are not to be called directly. 246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then 247 checks that all returned registers are virtual. You should not 248 call the _wrk version directly. 249 */ 250 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ); 251 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ); 252 253 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ); 254 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ); 255 256 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ); 257 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ); 258 259 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); 260 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); 261 262 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ); 263 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ); 264 265 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 266 ISelEnv* env, IRExpr* e ); 267 static void iselInt64Expr ( HReg* rHi, HReg* rLo, 268 ISelEnv* env, IRExpr* e ); 269 270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); 271 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); 272 273 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); 274 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); 275 276 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); 277 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); 278 279 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ); 280 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ); 281 282 283 /*---------------------------------------------------------*/ 284 /*--- ISEL: Misc helpers ---*/ 285 /*---------------------------------------------------------*/ 286 287 /* Make a int reg-reg move. */ 288 289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst ) 290 { 291 vassert(hregClass(src) == HRcInt32); 292 vassert(hregClass(dst) == HRcInt32); 293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst); 294 } 295 296 297 /* Make a vector reg-reg move. */ 298 299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst ) 300 { 301 vassert(hregClass(src) == HRcVec128); 302 vassert(hregClass(dst) == HRcVec128); 303 return X86Instr_SseReRg(Xsse_MOV, src, dst); 304 } 305 306 /* Advance/retreat %esp by n. */ 307 308 static void add_to_esp ( ISelEnv* env, Int n ) 309 { 310 vassert(n > 0 && n < 256 && (n%4) == 0); 311 addInstr(env, 312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP())); 313 } 314 315 static void sub_from_esp ( ISelEnv* env, Int n ) 316 { 317 vassert(n > 0 && n < 256 && (n%4) == 0); 318 addInstr(env, 319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP())); 320 } 321 322 323 /* Given an amode, return one which references 4 bytes further 324 along. */ 325 326 static X86AMode* advance4 ( X86AMode* am ) 327 { 328 X86AMode* am4 = dopyX86AMode(am); 329 switch (am4->tag) { 330 case Xam_IRRS: 331 am4->Xam.IRRS.imm += 4; break; 332 case Xam_IR: 333 am4->Xam.IR.imm += 4; break; 334 default: 335 vpanic("advance4(x86,host)"); 336 } 337 return am4; 338 } 339 340 341 /* Push an arg onto the host stack, in preparation for a call to a 342 helper function of some kind. Returns the number of 32-bit words 343 pushed. If we encounter an IRExpr_VECRET() then we expect that 344 r_vecRetAddr will be a valid register, that holds the relevant 345 address. 346 */ 347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr ) 348 { 349 if (UNLIKELY(arg->tag == Iex_VECRET)) { 350 vassert(0); //ATC 351 vassert(!hregIsInvalid(r_vecRetAddr)); 352 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr))); 353 return 1; 354 } 355 if (UNLIKELY(arg->tag == Iex_BBPTR)) { 356 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP()))); 357 return 1; 358 } 359 /* Else it's a "normal" expression. */ 360 IRType arg_ty = typeOfIRExpr(env->type_env, arg); 361 if (arg_ty == Ity_I32) { 362 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg))); 363 return 1; 364 } else 365 if (arg_ty == Ity_I64) { 366 HReg rHi, rLo; 367 iselInt64Expr(&rHi, &rLo, env, arg); 368 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 369 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 370 return 2; 371 } 372 ppIRExpr(arg); 373 vpanic("pushArg(x86): can't handle arg of this type"); 374 } 375 376 377 /* Complete the call to a helper function, by calling the 378 helper and clearing the args off the stack. */ 379 380 static 381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc, 382 IRCallee* cee, Int n_arg_ws, 383 RetLoc rloc ) 384 { 385 /* Complication. Need to decide which reg to use as the fn address 386 pointer, in a way that doesn't trash regparm-passed 387 parameters. */ 388 vassert(sizeof(void*) == 4); 389 390 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)), 391 cee->regparms, rloc)); 392 if (n_arg_ws > 0) 393 add_to_esp(env, 4*n_arg_ws); 394 } 395 396 397 /* Used only in doHelperCall. See big comment in doHelperCall re 398 handling of regparm args. This function figures out whether 399 evaluation of an expression might require use of a fixed register. 400 If in doubt return True (safe but suboptimal). 401 */ 402 static 403 Bool mightRequireFixedRegs ( IRExpr* e ) 404 { 405 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { 406 // These are always "safe" -- either a copy of %esp in some 407 // arbitrary vreg, or a copy of %ebp, respectively. 408 return False; 409 } 410 /* Else it's a "normal" expression. */ 411 switch (e->tag) { 412 case Iex_RdTmp: case Iex_Const: case Iex_Get: 413 return False; 414 default: 415 return True; 416 } 417 } 418 419 420 /* Do a complete function call. |guard| is a Ity_Bit expression 421 indicating whether or not the call happens. If guard==NULL, the 422 call is unconditional. |retloc| is set to indicate where the 423 return value is after the call. The caller (of this fn) must 424 generate code to add |stackAdjustAfterCall| to the stack pointer 425 after the call is done. */ 426 427 static 428 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, 429 /*OUT*/RetLoc* retloc, 430 ISelEnv* env, 431 IRExpr* guard, 432 IRCallee* cee, IRType retTy, IRExpr** args ) 433 { 434 X86CondCode cc; 435 HReg argregs[3]; 436 HReg tmpregs[3]; 437 Bool danger; 438 Int not_done_yet, n_args, n_arg_ws, stack_limit, 439 i, argreg, argregX; 440 441 /* Set default returns. We'll update them later if needed. */ 442 *stackAdjustAfterCall = 0; 443 *retloc = mk_RetLoc_INVALID(); 444 445 /* These are used for cross-checking that IR-level constraints on 446 the use of Iex_VECRET and Iex_BBPTR are observed. */ 447 UInt nVECRETs = 0; 448 UInt nBBPTRs = 0; 449 450 /* Marshal args for a call, do the call, and clear the stack. 451 Complexities to consider: 452 453 * The return type can be I{64,32,16,8} or V128. In the V128 454 case, it is expected that |args| will contain the special 455 node IRExpr_VECRET(), in which case this routine generates 456 code to allocate space on the stack for the vector return 457 value. Since we are not passing any scalars on the stack, it 458 is enough to preallocate the return space before marshalling 459 any arguments, in this case. 460 461 |args| may also contain IRExpr_BBPTR(), in which case the 462 value in %ebp is passed as the corresponding argument. 463 464 * If the callee claims regparmness of 1, 2 or 3, we must pass the 465 first 1, 2 or 3 args in registers (EAX, EDX, and ECX 466 respectively). To keep things relatively simple, only args of 467 type I32 may be passed as regparms -- just bomb out if anything 468 else turns up. Clearly this depends on the front ends not 469 trying to pass any other types as regparms. 470 */ 471 472 /* 16 Nov 2004: the regparm handling is complicated by the 473 following problem. 474 475 Consider a call two a function with two regparm parameters: 476 f(e1,e2). We need to compute e1 into %eax and e2 into %edx. 477 Suppose code is first generated to compute e1 into %eax. Then, 478 code is generated to compute e2 into %edx. Unfortunately, if 479 the latter code sequence uses %eax, it will trash the value of 480 e1 computed by the former sequence. This could happen if (for 481 example) e2 itself involved a function call. In the code below, 482 args are evaluated right-to-left, not left-to-right, but the 483 principle and the problem are the same. 484 485 One solution is to compute all regparm-bound args into vregs 486 first, and once they are all done, move them to the relevant 487 real regs. This always gives correct code, but it also gives 488 a bunch of vreg-to-rreg moves which are usually redundant but 489 are hard for the register allocator to get rid of. 490 491 A compromise is to first examine all regparm'd argument 492 expressions. If they are all so simple that it is clear 493 they will be evaluated without use of any fixed registers, 494 use the old compute-directly-to-fixed-target scheme. If not, 495 be safe and use the via-vregs scheme. 496 497 Note this requires being able to examine an expression and 498 determine whether or not evaluation of it might use a fixed 499 register. That requires knowledge of how the rest of this 500 insn selector works. Currently just the following 3 are 501 regarded as safe -- hopefully they cover the majority of 502 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. 503 */ 504 vassert(cee->regparms >= 0 && cee->regparms <= 3); 505 506 /* Count the number of args and also the VECRETs */ 507 n_args = n_arg_ws = 0; 508 while (args[n_args]) { 509 IRExpr* arg = args[n_args]; 510 n_args++; 511 if (UNLIKELY(arg->tag == Iex_VECRET)) { 512 nVECRETs++; 513 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 514 nBBPTRs++; 515 } 516 } 517 518 /* If this fails, the IR is ill-formed */ 519 vassert(nBBPTRs == 0 || nBBPTRs == 1); 520 521 /* If we have a VECRET, allocate space on the stack for the return 522 value, and record the stack pointer after that. */ 523 HReg r_vecRetAddr = INVALID_HREG; 524 if (nVECRETs == 1) { 525 vassert(retTy == Ity_V128 || retTy == Ity_V256); 526 vassert(retTy != Ity_V256); // we don't handle that yet (if ever) 527 r_vecRetAddr = newVRegI(env); 528 sub_from_esp(env, 16); 529 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr )); 530 } else { 531 // If either of these fail, the IR is ill-formed 532 vassert(retTy != Ity_V128 && retTy != Ity_V256); 533 vassert(nVECRETs == 0); 534 } 535 536 not_done_yet = n_args; 537 538 stack_limit = cee->regparms; 539 540 /* ------ BEGIN marshall all arguments ------ */ 541 542 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */ 543 for (i = n_args-1; i >= stack_limit; i--) { 544 n_arg_ws += pushArg(env, args[i], r_vecRetAddr); 545 not_done_yet--; 546 } 547 548 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in 549 registers. */ 550 551 if (cee->regparms > 0) { 552 553 /* ------ BEGIN deal with regparms ------ */ 554 555 /* deal with regparms, not forgetting %ebp if needed. */ 556 argregs[0] = hregX86_EAX(); 557 argregs[1] = hregX86_EDX(); 558 argregs[2] = hregX86_ECX(); 559 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG; 560 561 argreg = cee->regparms; 562 563 /* In keeping with big comment above, detect potential danger 564 and use the via-vregs scheme if needed. */ 565 danger = False; 566 for (i = stack_limit-1; i >= 0; i--) { 567 if (mightRequireFixedRegs(args[i])) { 568 danger = True; 569 break; 570 } 571 } 572 573 if (danger) { 574 575 /* Move via temporaries */ 576 argregX = argreg; 577 for (i = stack_limit-1; i >= 0; i--) { 578 579 if (0) { 580 vex_printf("x86 host: register param is complex: "); 581 ppIRExpr(args[i]); 582 vex_printf("\n"); 583 } 584 585 IRExpr* arg = args[i]; 586 argreg--; 587 vassert(argreg >= 0); 588 if (UNLIKELY(arg->tag == Iex_VECRET)) { 589 vassert(0); //ATC 590 } 591 else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 592 vassert(0); //ATC 593 } else { 594 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32); 595 tmpregs[argreg] = iselIntExpr_R(env, arg); 596 } 597 not_done_yet--; 598 } 599 for (i = stack_limit-1; i >= 0; i--) { 600 argregX--; 601 vassert(argregX >= 0); 602 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) ); 603 } 604 605 } else { 606 /* It's safe to compute all regparm args directly into their 607 target registers. */ 608 for (i = stack_limit-1; i >= 0; i--) { 609 IRExpr* arg = args[i]; 610 argreg--; 611 vassert(argreg >= 0); 612 if (UNLIKELY(arg->tag == Iex_VECRET)) { 613 vassert(!hregIsInvalid(r_vecRetAddr)); 614 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 615 X86RMI_Reg(r_vecRetAddr), 616 argregs[argreg])); 617 } 618 else if (UNLIKELY(arg->tag == Iex_BBPTR)) { 619 vassert(0); //ATC 620 } else { 621 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32); 622 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 623 iselIntExpr_RMI(env, arg), 624 argregs[argreg])); 625 } 626 not_done_yet--; 627 } 628 629 } 630 631 /* ------ END deal with regparms ------ */ 632 633 } 634 635 vassert(not_done_yet == 0); 636 637 /* ------ END marshall all arguments ------ */ 638 639 /* Now we can compute the condition. We can't do it earlier 640 because the argument computations could trash the condition 641 codes. Be a bit clever to handle the common case where the 642 guard is 1:Bit. */ 643 cc = Xcc_ALWAYS; 644 if (guard) { 645 if (guard->tag == Iex_Const 646 && guard->Iex.Const.con->tag == Ico_U1 647 && guard->Iex.Const.con->Ico.U1 == True) { 648 /* unconditional -- do nothing */ 649 } else { 650 cc = iselCondCode( env, guard ); 651 } 652 } 653 654 /* Do final checks, set the return values, and generate the call 655 instruction proper. */ 656 vassert(*stackAdjustAfterCall == 0); 657 vassert(is_RetLoc_INVALID(*retloc)); 658 switch (retTy) { 659 case Ity_INVALID: 660 /* Function doesn't return a value. */ 661 *retloc = mk_RetLoc_simple(RLPri_None); 662 break; 663 case Ity_I64: 664 *retloc = mk_RetLoc_simple(RLPri_2Int); 665 break; 666 case Ity_I32: case Ity_I16: case Ity_I8: 667 *retloc = mk_RetLoc_simple(RLPri_Int); 668 break; 669 case Ity_V128: 670 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); 671 *stackAdjustAfterCall = 16; 672 break; 673 case Ity_V256: 674 vassert(0); // ATC 675 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); 676 *stackAdjustAfterCall = 32; 677 break; 678 default: 679 /* IR can denote other possible return types, but we don't 680 handle those here. */ 681 vassert(0); 682 } 683 684 /* Finally, generate the call itself. This needs the *retloc value 685 set in the switch above, which is why it's at the end. */ 686 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc ); 687 } 688 689 690 /* Given a guest-state array descriptor, an index expression and a 691 bias, generate an X86AMode holding the relevant guest state 692 offset. */ 693 694 static 695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr, 696 IRExpr* off, Int bias ) 697 { 698 HReg tmp, roff; 699 Int elemSz = sizeofIRType(descr->elemTy); 700 Int nElems = descr->nElems; 701 Int shift = 0; 702 703 /* throw out any cases not generated by an x86 front end. In 704 theory there might be a day where we need to handle them -- if 705 we ever run non-x86-guest on x86 host. */ 706 707 if (nElems != 8) 708 vpanic("genGuestArrayOffset(x86 host)(1)"); 709 710 switch (elemSz) { 711 case 1: shift = 0; break; 712 case 4: shift = 2; break; 713 case 8: shift = 3; break; 714 default: vpanic("genGuestArrayOffset(x86 host)(2)"); 715 } 716 717 /* Compute off into a reg, %off. Then return: 718 719 movl %off, %tmp 720 addl $bias, %tmp (if bias != 0) 721 andl %tmp, 7 722 ... base(%ebp, %tmp, shift) ... 723 */ 724 tmp = newVRegI(env); 725 roff = iselIntExpr_R(env, off); 726 addInstr(env, mk_iMOVsd_RR(roff, tmp)); 727 if (bias != 0) { 728 addInstr(env, 729 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp)); 730 } 731 addInstr(env, 732 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp)); 733 return 734 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift ); 735 } 736 737 738 /* Mess with the FPU's rounding mode: set to the default rounding mode 739 (DEFAULT_FPUCW). */ 740 static 741 void set_FPU_rounding_default ( ISelEnv* env ) 742 { 743 /* pushl $DEFAULT_FPUCW 744 fldcw 0(%esp) 745 addl $4, %esp 746 */ 747 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 748 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); 749 addInstr(env, X86Instr_FpLdCW(zero_esp)); 750 add_to_esp(env, 4); 751 } 752 753 754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed 755 expression denoting a value in the range 0 .. 3, indicating a round 756 mode encoded as per type IRRoundingMode. Set the x87 FPU to have 757 the same rounding. 758 */ 759 static 760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) 761 { 762 HReg rrm = iselIntExpr_R(env, mode); 763 HReg rrm2 = newVRegI(env); 764 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 765 766 /* movl %rrm, %rrm2 767 andl $3, %rrm2 -- shouldn't be needed; paranoia 768 shll $10, %rrm2 769 orl $DEFAULT_FPUCW, %rrm2 770 pushl %rrm2 771 fldcw 0(%esp) 772 addl $4, %esp 773 */ 774 addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); 775 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); 776 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2)); 777 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); 778 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); 779 addInstr(env, X86Instr_FpLdCW(zero_esp)); 780 add_to_esp(env, 4); 781 } 782 783 784 /* Generate !src into a new vector register, and be sure that the code 785 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy 786 way to do this. 787 */ 788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src ) 789 { 790 HReg dst = newVRegV(env); 791 /* Set dst to zero. If dst contains a NaN then all hell might 792 break loose after the comparison. So, first zero it. */ 793 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst)); 794 /* And now make it all 1s ... */ 795 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst)); 796 /* Finally, xor 'src' into it. */ 797 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst)); 798 /* Doesn't that just totally suck? */ 799 return dst; 800 } 801 802 803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used 804 after most non-simple FPU operations (simple = +, -, *, / and 805 sqrt). 806 807 This could be done a lot more efficiently if needed, by loading 808 zero and adding it to the value to be rounded (fldz ; faddp?). 809 */ 810 static void roundToF64 ( ISelEnv* env, HReg reg ) 811 { 812 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 813 sub_from_esp(env, 8); 814 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp)); 815 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp)); 816 add_to_esp(env, 8); 817 } 818 819 820 /*---------------------------------------------------------*/ 821 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ 822 /*---------------------------------------------------------*/ 823 824 /* Select insns for an integer-typed expression, and add them to the 825 code list. Return a reg holding the result. This reg will be a 826 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you 827 want to modify it, ask for a new vreg, copy it in there, and modify 828 the copy. The register allocator will do its best to map both 829 vregs to the same real register, so the copies will often disappear 830 later in the game. 831 832 This should handle expressions of 32, 16 and 8-bit type. All 833 results are returned in a 32-bit register. For 16- and 8-bit 834 expressions, the upper 16/24 bits are arbitrary, so you should mask 835 or sign extend partial values if necessary. 836 */ 837 838 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) 839 { 840 HReg r = iselIntExpr_R_wrk(env, e); 841 /* sanity checks ... */ 842 # if 0 843 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 844 # endif 845 vassert(hregClass(r) == HRcInt32); 846 vassert(hregIsVirtual(r)); 847 return r; 848 } 849 850 /* DO NOT CALL THIS DIRECTLY ! */ 851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) 852 { 853 MatchInfo mi; 854 855 IRType ty = typeOfIRExpr(env->type_env,e); 856 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 857 858 switch (e->tag) { 859 860 /* --------- TEMP --------- */ 861 case Iex_RdTmp: { 862 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 863 } 864 865 /* --------- LOAD --------- */ 866 case Iex_Load: { 867 HReg dst = newVRegI(env); 868 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr ); 869 870 /* We can't handle big-endian loads, nor load-linked. */ 871 if (e->Iex.Load.end != Iend_LE) 872 goto irreducible; 873 874 if (ty == Ity_I32) { 875 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 876 X86RMI_Mem(amode), dst) ); 877 return dst; 878 } 879 if (ty == Ity_I16) { 880 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 881 return dst; 882 } 883 if (ty == Ity_I8) { 884 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 885 return dst; 886 } 887 break; 888 } 889 890 /* --------- TERNARY OP --------- */ 891 case Iex_Triop: { 892 IRTriop *triop = e->Iex.Triop.details; 893 /* C3210 flags following FPU partial remainder (fprem), both 894 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */ 895 if (triop->op == Iop_PRemC3210F64 896 || triop->op == Iop_PRem1C3210F64) { 897 HReg junk = newVRegF(env); 898 HReg dst = newVRegI(env); 899 HReg srcL = iselDblExpr(env, triop->arg2); 900 HReg srcR = iselDblExpr(env, triop->arg3); 901 /* XXXROUNDINGFIXME */ 902 /* set roundingmode here */ 903 addInstr(env, X86Instr_FpBinary( 904 e->Iex.Binop.op==Iop_PRemC3210F64 905 ? Xfp_PREM : Xfp_PREM1, 906 srcL,srcR,junk 907 )); 908 /* The previous pseudo-insn will have left the FPU's C3210 909 flags set correctly. So bag them. */ 910 addInstr(env, X86Instr_FpStSW_AX()); 911 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 912 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst)); 913 return dst; 914 } 915 916 break; 917 } 918 919 /* --------- BINARY OP --------- */ 920 case Iex_Binop: { 921 X86AluOp aluOp; 922 X86ShiftOp shOp; 923 924 /* Pattern: Sub32(0,x) */ 925 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { 926 HReg dst = newVRegI(env); 927 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); 928 addInstr(env, mk_iMOVsd_RR(reg,dst)); 929 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 930 return dst; 931 } 932 933 /* Is it an addition or logical style op? */ 934 switch (e->Iex.Binop.op) { 935 case Iop_Add8: case Iop_Add16: case Iop_Add32: 936 aluOp = Xalu_ADD; break; 937 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: 938 aluOp = Xalu_SUB; break; 939 case Iop_And8: case Iop_And16: case Iop_And32: 940 aluOp = Xalu_AND; break; 941 case Iop_Or8: case Iop_Or16: case Iop_Or32: 942 aluOp = Xalu_OR; break; 943 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: 944 aluOp = Xalu_XOR; break; 945 case Iop_Mul16: case Iop_Mul32: 946 aluOp = Xalu_MUL; break; 947 default: 948 aluOp = Xalu_INVALID; break; 949 } 950 /* For commutative ops we assume any literal 951 values are on the second operand. */ 952 if (aluOp != Xalu_INVALID) { 953 HReg dst = newVRegI(env); 954 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); 955 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 956 addInstr(env, mk_iMOVsd_RR(reg,dst)); 957 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst)); 958 return dst; 959 } 960 /* Could do better here; forcing the first arg into a reg 961 isn't always clever. 962 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)), 963 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32( 964 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32))) 965 movl 0xFFFFFFA0(%vr41),%vr107 966 movl 0xFFFFFFA4(%vr41),%vr108 967 movl %vr107,%vr106 968 xorl %vr108,%vr106 969 movl 0xFFFFFFA8(%vr41),%vr109 970 movl %vr106,%vr105 971 andl %vr109,%vr105 972 movl 0xFFFFFFA0(%vr41),%vr110 973 movl %vr105,%vr104 974 xorl %vr110,%vr104 975 movl %vr104,%vr70 976 */ 977 978 /* Perhaps a shift op? */ 979 switch (e->Iex.Binop.op) { 980 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: 981 shOp = Xsh_SHL; break; 982 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8: 983 shOp = Xsh_SHR; break; 984 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8: 985 shOp = Xsh_SAR; break; 986 default: 987 shOp = Xsh_INVALID; break; 988 } 989 if (shOp != Xsh_INVALID) { 990 HReg dst = newVRegI(env); 991 992 /* regL = the value to be shifted */ 993 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); 994 addInstr(env, mk_iMOVsd_RR(regL,dst)); 995 996 /* Do any necessary widening for 16/8 bit operands */ 997 switch (e->Iex.Binop.op) { 998 case Iop_Shr8: 999 addInstr(env, X86Instr_Alu32R( 1000 Xalu_AND, X86RMI_Imm(0xFF), dst)); 1001 break; 1002 case Iop_Shr16: 1003 addInstr(env, X86Instr_Alu32R( 1004 Xalu_AND, X86RMI_Imm(0xFFFF), dst)); 1005 break; 1006 case Iop_Sar8: 1007 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst)); 1008 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst)); 1009 break; 1010 case Iop_Sar16: 1011 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst)); 1012 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst)); 1013 break; 1014 default: break; 1015 } 1016 1017 /* Now consider the shift amount. If it's a literal, we 1018 can do a much better job than the general case. */ 1019 if (e->Iex.Binop.arg2->tag == Iex_Const) { 1020 /* assert that the IR is well-typed */ 1021 Int nshift; 1022 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1023 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1024 vassert(nshift >= 0); 1025 if (nshift > 0) 1026 /* Can't allow nshift==0 since that means %cl */ 1027 addInstr(env, X86Instr_Sh32( shOp, nshift, dst )); 1028 } else { 1029 /* General case; we have to force the amount into %cl. */ 1030 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); 1031 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX())); 1032 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst)); 1033 } 1034 return dst; 1035 } 1036 1037 /* Handle misc other ops. */ 1038 1039 if (e->Iex.Binop.op == Iop_Max32U) { 1040 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1041 HReg dst = newVRegI(env); 1042 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); 1043 addInstr(env, mk_iMOVsd_RR(src1,dst)); 1044 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst)); 1045 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst)); 1046 return dst; 1047 } 1048 1049 if (e->Iex.Binop.op == Iop_8HLto16) { 1050 HReg hi8 = newVRegI(env); 1051 HReg lo8 = newVRegI(env); 1052 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1053 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1054 addInstr(env, mk_iMOVsd_RR(hi8s, hi8)); 1055 addInstr(env, mk_iMOVsd_RR(lo8s, lo8)); 1056 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8)); 1057 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8)); 1058 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8)); 1059 return hi8; 1060 } 1061 1062 if (e->Iex.Binop.op == Iop_16HLto32) { 1063 HReg hi16 = newVRegI(env); 1064 HReg lo16 = newVRegI(env); 1065 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1066 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1067 addInstr(env, mk_iMOVsd_RR(hi16s, hi16)); 1068 addInstr(env, mk_iMOVsd_RR(lo16s, lo16)); 1069 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16)); 1070 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16)); 1071 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16)); 1072 return hi16; 1073 } 1074 1075 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8 1076 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) { 1077 HReg a16 = newVRegI(env); 1078 HReg b16 = newVRegI(env); 1079 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1); 1080 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2); 1081 Int shift = (e->Iex.Binop.op == Iop_MullS8 1082 || e->Iex.Binop.op == Iop_MullU8) 1083 ? 24 : 16; 1084 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8 1085 || e->Iex.Binop.op == Iop_MullS16) 1086 ? Xsh_SAR : Xsh_SHR; 1087 1088 addInstr(env, mk_iMOVsd_RR(a16s, a16)); 1089 addInstr(env, mk_iMOVsd_RR(b16s, b16)); 1090 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16)); 1091 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16)); 1092 addInstr(env, X86Instr_Sh32(shr_op, shift, a16)); 1093 addInstr(env, X86Instr_Sh32(shr_op, shift, b16)); 1094 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16)); 1095 return b16; 1096 } 1097 1098 if (e->Iex.Binop.op == Iop_CmpF64) { 1099 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1); 1100 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2); 1101 HReg dst = newVRegI(env); 1102 addInstr(env, X86Instr_FpCmp(fL,fR,dst)); 1103 /* shift this right 8 bits so as to conform to CmpF64 1104 definition. */ 1105 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst)); 1106 return dst; 1107 } 1108 1109 if (e->Iex.Binop.op == Iop_F64toI32S 1110 || e->Iex.Binop.op == Iop_F64toI16S) { 1111 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4; 1112 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 1113 HReg dst = newVRegI(env); 1114 1115 /* Used several times ... */ 1116 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1117 1118 /* rf now holds the value to be converted, and rrm holds the 1119 rounding mode value, encoded as per the IRRoundingMode 1120 enum. The first thing to do is set the FPU's rounding 1121 mode accordingly. */ 1122 1123 /* Create a space for the format conversion. */ 1124 /* subl $4, %esp */ 1125 sub_from_esp(env, 4); 1126 1127 /* Set host rounding mode */ 1128 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 1129 1130 /* gistw/l %rf, 0(%esp) */ 1131 addInstr(env, X86Instr_FpLdStI(False/*store*/, 1132 toUChar(sz), rf, zero_esp)); 1133 1134 if (sz == 2) { 1135 /* movzwl 0(%esp), %dst */ 1136 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst)); 1137 } else { 1138 /* movl 0(%esp), %dst */ 1139 vassert(sz == 4); 1140 addInstr(env, X86Instr_Alu32R( 1141 Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1142 } 1143 1144 /* Restore default FPU rounding. */ 1145 set_FPU_rounding_default( env ); 1146 1147 /* addl $4, %esp */ 1148 add_to_esp(env, 4); 1149 return dst; 1150 } 1151 1152 break; 1153 } 1154 1155 /* --------- UNARY OP --------- */ 1156 case Iex_Unop: { 1157 1158 /* 1Uto8(32to1(expr32)) */ 1159 if (e->Iex.Unop.op == Iop_1Uto8) { 1160 DECLARE_PATTERN(p_32to1_then_1Uto8); 1161 DEFINE_PATTERN(p_32to1_then_1Uto8, 1162 unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); 1163 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { 1164 IRExpr* expr32 = mi.bindee[0]; 1165 HReg dst = newVRegI(env); 1166 HReg src = iselIntExpr_R(env, expr32); 1167 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1168 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1169 X86RMI_Imm(1), dst)); 1170 return dst; 1171 } 1172 } 1173 1174 /* 8Uto32(LDle(expr32)) */ 1175 if (e->Iex.Unop.op == Iop_8Uto32) { 1176 DECLARE_PATTERN(p_LDle8_then_8Uto32); 1177 DEFINE_PATTERN(p_LDle8_then_8Uto32, 1178 unop(Iop_8Uto32, 1179 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1180 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { 1181 HReg dst = newVRegI(env); 1182 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1183 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1184 return dst; 1185 } 1186 } 1187 1188 /* 8Sto32(LDle(expr32)) */ 1189 if (e->Iex.Unop.op == Iop_8Sto32) { 1190 DECLARE_PATTERN(p_LDle8_then_8Sto32); 1191 DEFINE_PATTERN(p_LDle8_then_8Sto32, 1192 unop(Iop_8Sto32, 1193 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); 1194 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { 1195 HReg dst = newVRegI(env); 1196 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1197 addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); 1198 return dst; 1199 } 1200 } 1201 1202 /* 16Uto32(LDle(expr32)) */ 1203 if (e->Iex.Unop.op == Iop_16Uto32) { 1204 DECLARE_PATTERN(p_LDle16_then_16Uto32); 1205 DEFINE_PATTERN(p_LDle16_then_16Uto32, 1206 unop(Iop_16Uto32, 1207 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) ); 1208 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) { 1209 HReg dst = newVRegI(env); 1210 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); 1211 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1212 return dst; 1213 } 1214 } 1215 1216 /* 8Uto32(GET:I8) */ 1217 if (e->Iex.Unop.op == Iop_8Uto32) { 1218 if (e->Iex.Unop.arg->tag == Iex_Get) { 1219 HReg dst; 1220 X86AMode* amode; 1221 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); 1222 dst = newVRegI(env); 1223 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1224 hregX86_EBP()); 1225 addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); 1226 return dst; 1227 } 1228 } 1229 1230 /* 16to32(GET:I16) */ 1231 if (e->Iex.Unop.op == Iop_16Uto32) { 1232 if (e->Iex.Unop.arg->tag == Iex_Get) { 1233 HReg dst; 1234 X86AMode* amode; 1235 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); 1236 dst = newVRegI(env); 1237 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1238 hregX86_EBP()); 1239 addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); 1240 return dst; 1241 } 1242 } 1243 1244 switch (e->Iex.Unop.op) { 1245 case Iop_8Uto16: 1246 case Iop_8Uto32: 1247 case Iop_16Uto32: { 1248 HReg dst = newVRegI(env); 1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1250 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF; 1251 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1252 addInstr(env, X86Instr_Alu32R(Xalu_AND, 1253 X86RMI_Imm(mask), dst)); 1254 return dst; 1255 } 1256 case Iop_8Sto16: 1257 case Iop_8Sto32: 1258 case Iop_16Sto32: { 1259 HReg dst = newVRegI(env); 1260 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1261 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24; 1262 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1263 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst)); 1264 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst)); 1265 return dst; 1266 } 1267 case Iop_Not8: 1268 case Iop_Not16: 1269 case Iop_Not32: { 1270 HReg dst = newVRegI(env); 1271 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1272 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1273 addInstr(env, X86Instr_Unary32(Xun_NOT,dst)); 1274 return dst; 1275 } 1276 case Iop_64HIto32: { 1277 HReg rHi, rLo; 1278 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1279 return rHi; /* and abandon rLo .. poor wee thing :-) */ 1280 } 1281 case Iop_64to32: { 1282 HReg rHi, rLo; 1283 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); 1284 return rLo; /* similar stupid comment to the above ... */ 1285 } 1286 case Iop_16HIto8: 1287 case Iop_32HIto16: { 1288 HReg dst = newVRegI(env); 1289 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1290 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16; 1291 addInstr(env, mk_iMOVsd_RR(src,dst) ); 1292 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst)); 1293 return dst; 1294 } 1295 case Iop_1Uto32: 1296 case Iop_1Uto8: { 1297 HReg dst = newVRegI(env); 1298 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1299 addInstr(env, X86Instr_Set32(cond,dst)); 1300 return dst; 1301 } 1302 case Iop_1Sto8: 1303 case Iop_1Sto16: 1304 case Iop_1Sto32: { 1305 /* could do better than this, but for now ... */ 1306 HReg dst = newVRegI(env); 1307 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 1308 addInstr(env, X86Instr_Set32(cond,dst)); 1309 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst)); 1310 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1311 return dst; 1312 } 1313 case Iop_Ctz32: { 1314 /* Count trailing zeroes, implemented by x86 'bsfl' */ 1315 HReg dst = newVRegI(env); 1316 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1317 addInstr(env, X86Instr_Bsfr32(True,src,dst)); 1318 return dst; 1319 } 1320 case Iop_Clz32: { 1321 /* Count leading zeroes. Do 'bsrl' to establish the index 1322 of the highest set bit, and subtract that value from 1323 31. */ 1324 HReg tmp = newVRegI(env); 1325 HReg dst = newVRegI(env); 1326 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1327 addInstr(env, X86Instr_Bsfr32(False,src,tmp)); 1328 addInstr(env, X86Instr_Alu32R(Xalu_MOV, 1329 X86RMI_Imm(31), dst)); 1330 addInstr(env, X86Instr_Alu32R(Xalu_SUB, 1331 X86RMI_Reg(tmp), dst)); 1332 return dst; 1333 } 1334 1335 case Iop_CmpwNEZ32: { 1336 HReg dst = newVRegI(env); 1337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1338 addInstr(env, mk_iMOVsd_RR(src,dst)); 1339 addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); 1340 addInstr(env, X86Instr_Alu32R(Xalu_OR, 1341 X86RMI_Reg(src), dst)); 1342 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); 1343 return dst; 1344 } 1345 case Iop_Left8: 1346 case Iop_Left16: 1347 case Iop_Left32: { 1348 HReg dst = newVRegI(env); 1349 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 1350 addInstr(env, mk_iMOVsd_RR(src, dst)); 1351 addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); 1352 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); 1353 return dst; 1354 } 1355 1356 case Iop_V128to32: { 1357 HReg dst = newVRegI(env); 1358 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 1359 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 1360 sub_from_esp(env, 16); 1361 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 1362 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst )); 1363 add_to_esp(env, 16); 1364 return dst; 1365 } 1366 1367 /* ReinterpF32asI32(e) */ 1368 /* Given an IEEE754 single, produce an I32 with the same bit 1369 pattern. Keep stack 8-aligned even though only using 4 1370 bytes. */ 1371 case Iop_ReinterpF32asI32: { 1372 HReg rf = iselFltExpr(env, e->Iex.Unop.arg); 1373 HReg dst = newVRegI(env); 1374 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 1375 /* paranoia */ 1376 set_FPU_rounding_default(env); 1377 /* subl $8, %esp */ 1378 sub_from_esp(env, 8); 1379 /* gstF %rf, 0(%esp) */ 1380 addInstr(env, 1381 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp)); 1382 /* movl 0(%esp), %dst */ 1383 addInstr(env, 1384 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst)); 1385 /* addl $8, %esp */ 1386 add_to_esp(env, 8); 1387 return dst; 1388 } 1389 1390 case Iop_16to8: 1391 case Iop_32to8: 1392 case Iop_32to16: 1393 /* These are no-ops. */ 1394 return iselIntExpr_R(env, e->Iex.Unop.arg); 1395 1396 case Iop_GetMSBs8x8: { 1397 /* Note: the following assumes the helper is of 1398 signature 1399 UInt fn ( ULong ), and is not a regparm fn. 1400 */ 1401 HReg xLo, xHi; 1402 HReg dst = newVRegI(env); 1403 HWord fn = (HWord)h_generic_calc_GetMSBs8x8; 1404 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 1405 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 1406 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 1407 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 1408 0, mk_RetLoc_simple(RLPri_Int) )); 1409 add_to_esp(env, 2*4); 1410 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1411 return dst; 1412 } 1413 1414 default: 1415 break; 1416 } 1417 break; 1418 } 1419 1420 /* --------- GET --------- */ 1421 case Iex_Get: { 1422 if (ty == Ity_I32) { 1423 HReg dst = newVRegI(env); 1424 addInstr(env, X86Instr_Alu32R( 1425 Xalu_MOV, 1426 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1427 hregX86_EBP())), 1428 dst)); 1429 return dst; 1430 } 1431 if (ty == Ity_I8 || ty == Ity_I16) { 1432 HReg dst = newVRegI(env); 1433 addInstr(env, X86Instr_LoadEX( 1434 toUChar(ty==Ity_I8 ? 1 : 2), 1435 False, 1436 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()), 1437 dst)); 1438 return dst; 1439 } 1440 break; 1441 } 1442 1443 case Iex_GetI: { 1444 X86AMode* am 1445 = genGuestArrayOffset( 1446 env, e->Iex.GetI.descr, 1447 e->Iex.GetI.ix, e->Iex.GetI.bias ); 1448 HReg dst = newVRegI(env); 1449 if (ty == Ity_I8) { 1450 addInstr(env, X86Instr_LoadEX( 1, False, am, dst )); 1451 return dst; 1452 } 1453 if (ty == Ity_I32) { 1454 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst)); 1455 return dst; 1456 } 1457 break; 1458 } 1459 1460 /* --------- CCALL --------- */ 1461 case Iex_CCall: { 1462 HReg dst = newVRegI(env); 1463 vassert(ty == e->Iex.CCall.retty); 1464 1465 /* be very restrictive for now. Only 32/64-bit ints allowed for 1466 args, and 32 bits for return type. Don't forget to change 1467 the RetLoc if more return types are allowed in future. */ 1468 if (e->Iex.CCall.retty != Ity_I32) 1469 goto irreducible; 1470 1471 /* Marshal args, do the call, clear stack. */ 1472 UInt addToSp = 0; 1473 RetLoc rloc = mk_RetLoc_INVALID(); 1474 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1475 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args ); 1476 vassert(is_sane_RetLoc(rloc)); 1477 vassert(rloc.pri == RLPri_Int); 1478 vassert(addToSp == 0); 1479 1480 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); 1481 return dst; 1482 } 1483 1484 /* --------- LITERAL --------- */ 1485 /* 32/16/8-bit literals */ 1486 case Iex_Const: { 1487 X86RMI* rmi = iselIntExpr_RMI ( env, e ); 1488 HReg r = newVRegI(env); 1489 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r)); 1490 return r; 1491 } 1492 1493 /* --------- MULTIPLEX --------- */ 1494 case Iex_ITE: { // VFD 1495 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) 1496 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 1497 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); 1498 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse); 1499 HReg dst = newVRegI(env); 1500 addInstr(env, mk_iMOVsd_RR(r1,dst)); 1501 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 1502 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst)); 1503 return dst; 1504 } 1505 break; 1506 } 1507 1508 default: 1509 break; 1510 } /* switch (e->tag) */ 1511 1512 /* We get here if no pattern matched. */ 1513 irreducible: 1514 ppIRExpr(e); 1515 vpanic("iselIntExpr_R: cannot reduce tree"); 1516 } 1517 1518 1519 /*---------------------------------------------------------*/ 1520 /*--- ISEL: Integer expression auxiliaries ---*/ 1521 /*---------------------------------------------------------*/ 1522 1523 /* --------------------- AMODEs --------------------- */ 1524 1525 /* Return an AMode which computes the value of the specified 1526 expression, possibly also adding insns to the code list as a 1527 result. The expression may only be a 32-bit one. 1528 */ 1529 1530 static Bool sane_AMode ( X86AMode* am ) 1531 { 1532 switch (am->tag) { 1533 case Xam_IR: 1534 return 1535 toBool( hregClass(am->Xam.IR.reg) == HRcInt32 1536 && (hregIsVirtual(am->Xam.IR.reg) 1537 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) ); 1538 case Xam_IRRS: 1539 return 1540 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32 1541 && hregIsVirtual(am->Xam.IRRS.base) 1542 && hregClass(am->Xam.IRRS.index) == HRcInt32 1543 && hregIsVirtual(am->Xam.IRRS.index) ); 1544 default: 1545 vpanic("sane_AMode: unknown x86 amode tag"); 1546 } 1547 } 1548 1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e ) 1550 { 1551 X86AMode* am = iselIntExpr_AMode_wrk(env, e); 1552 vassert(sane_AMode(am)); 1553 return am; 1554 } 1555 1556 /* DO NOT CALL THIS DIRECTLY ! */ 1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e ) 1558 { 1559 IRType ty = typeOfIRExpr(env->type_env,e); 1560 vassert(ty == Ity_I32); 1561 1562 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */ 1563 if (e->tag == Iex_Binop 1564 && e->Iex.Binop.op == Iop_Add32 1565 && e->Iex.Binop.arg2->tag == Iex_Const 1566 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32 1567 && e->Iex.Binop.arg1->tag == Iex_Binop 1568 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32 1569 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop 1570 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1571 && e->Iex.Binop.arg1 1572 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1573 && e->Iex.Binop.arg1 1574 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1575 UInt shift = e->Iex.Binop.arg1 1576 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1577 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1578 if (shift == 1 || shift == 2 || shift == 3) { 1579 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1); 1580 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1 1581 ->Iex.Binop.arg2->Iex.Binop.arg1 ); 1582 return X86AMode_IRRS(imm32, r1, r2, shift); 1583 } 1584 } 1585 1586 /* Add32(expr1, Shl32(expr2, imm)) */ 1587 if (e->tag == Iex_Binop 1588 && e->Iex.Binop.op == Iop_Add32 1589 && e->Iex.Binop.arg2->tag == Iex_Binop 1590 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32 1591 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const 1592 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) { 1593 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8; 1594 if (shift == 1 || shift == 2 || shift == 3) { 1595 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1596 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 ); 1597 return X86AMode_IRRS(0, r1, r2, shift); 1598 } 1599 } 1600 1601 /* Add32(expr,i) */ 1602 if (e->tag == Iex_Binop 1603 && e->Iex.Binop.op == Iop_Add32 1604 && e->Iex.Binop.arg2->tag == Iex_Const 1605 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { 1606 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1607 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1); 1608 } 1609 1610 /* Doesn't match anything in particular. Generate it into 1611 a register and use that. */ 1612 { 1613 HReg r1 = iselIntExpr_R(env, e); 1614 return X86AMode_IR(0, r1); 1615 } 1616 } 1617 1618 1619 /* --------------------- RMIs --------------------- */ 1620 1621 /* Similarly, calculate an expression into an X86RMI operand. As with 1622 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1623 1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e ) 1625 { 1626 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e); 1627 /* sanity checks ... */ 1628 switch (rmi->tag) { 1629 case Xrmi_Imm: 1630 return rmi; 1631 case Xrmi_Reg: 1632 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32); 1633 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg)); 1634 return rmi; 1635 case Xrmi_Mem: 1636 vassert(sane_AMode(rmi->Xrmi.Mem.am)); 1637 return rmi; 1638 default: 1639 vpanic("iselIntExpr_RMI: unknown x86 RMI tag"); 1640 } 1641 } 1642 1643 /* DO NOT CALL THIS DIRECTLY ! */ 1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e ) 1645 { 1646 IRType ty = typeOfIRExpr(env->type_env,e); 1647 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1648 1649 /* special case: immediate */ 1650 if (e->tag == Iex_Const) { 1651 UInt u; 1652 switch (e->Iex.Const.con->tag) { 1653 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1654 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1655 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1656 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1657 } 1658 return X86RMI_Imm(u); 1659 } 1660 1661 /* special case: 32-bit GET */ 1662 if (e->tag == Iex_Get && ty == Ity_I32) { 1663 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset, 1664 hregX86_EBP())); 1665 } 1666 1667 /* special case: 32-bit load from memory */ 1668 if (e->tag == Iex_Load && ty == Ity_I32 1669 && e->Iex.Load.end == Iend_LE) { 1670 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 1671 return X86RMI_Mem(am); 1672 } 1673 1674 /* default case: calculate into a register and return that */ 1675 { 1676 HReg r = iselIntExpr_R ( env, e ); 1677 return X86RMI_Reg(r); 1678 } 1679 } 1680 1681 1682 /* --------------------- RIs --------------------- */ 1683 1684 /* Calculate an expression into an X86RI operand. As with 1685 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1686 1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) 1688 { 1689 X86RI* ri = iselIntExpr_RI_wrk(env, e); 1690 /* sanity checks ... */ 1691 switch (ri->tag) { 1692 case Xri_Imm: 1693 return ri; 1694 case Xri_Reg: 1695 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32); 1696 vassert(hregIsVirtual(ri->Xri.Reg.reg)); 1697 return ri; 1698 default: 1699 vpanic("iselIntExpr_RI: unknown x86 RI tag"); 1700 } 1701 } 1702 1703 /* DO NOT CALL THIS DIRECTLY ! */ 1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) 1705 { 1706 IRType ty = typeOfIRExpr(env->type_env,e); 1707 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1708 1709 /* special case: immediate */ 1710 if (e->tag == Iex_Const) { 1711 UInt u; 1712 switch (e->Iex.Const.con->tag) { 1713 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; 1714 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; 1715 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; 1716 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)"); 1717 } 1718 return X86RI_Imm(u); 1719 } 1720 1721 /* default case: calculate into a register and return that */ 1722 { 1723 HReg r = iselIntExpr_R ( env, e ); 1724 return X86RI_Reg(r); 1725 } 1726 } 1727 1728 1729 /* --------------------- RMs --------------------- */ 1730 1731 /* Similarly, calculate an expression into an X86RM operand. As with 1732 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */ 1733 1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e ) 1735 { 1736 X86RM* rm = iselIntExpr_RM_wrk(env, e); 1737 /* sanity checks ... */ 1738 switch (rm->tag) { 1739 case Xrm_Reg: 1740 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32); 1741 vassert(hregIsVirtual(rm->Xrm.Reg.reg)); 1742 return rm; 1743 case Xrm_Mem: 1744 vassert(sane_AMode(rm->Xrm.Mem.am)); 1745 return rm; 1746 default: 1747 vpanic("iselIntExpr_RM: unknown x86 RM tag"); 1748 } 1749 } 1750 1751 /* DO NOT CALL THIS DIRECTLY ! */ 1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e ) 1753 { 1754 IRType ty = typeOfIRExpr(env->type_env,e); 1755 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); 1756 1757 /* special case: 32-bit GET */ 1758 if (e->tag == Iex_Get && ty == Ity_I32) { 1759 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset, 1760 hregX86_EBP())); 1761 } 1762 1763 /* special case: load from memory */ 1764 1765 /* default case: calculate into a register and return that */ 1766 { 1767 HReg r = iselIntExpr_R ( env, e ); 1768 return X86RM_Reg(r); 1769 } 1770 } 1771 1772 1773 /* --------------------- CONDCODE --------------------- */ 1774 1775 /* Generate code to evaluated a bit-typed expression, returning the 1776 condition code which would correspond when the expression would 1777 notionally have returned 1. */ 1778 1779 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) 1780 { 1781 /* Uh, there's nothing we can sanity check here, unfortunately. */ 1782 return iselCondCode_wrk(env,e); 1783 } 1784 1785 /* DO NOT CALL THIS DIRECTLY ! */ 1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) 1787 { 1788 MatchInfo mi; 1789 1790 vassert(e); 1791 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); 1792 1793 /* var */ 1794 if (e->tag == Iex_RdTmp) { 1795 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp); 1796 /* Test32 doesn't modify r32; so this is OK. */ 1797 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32))); 1798 return Xcc_NZ; 1799 } 1800 1801 /* Constant 1:Bit */ 1802 if (e->tag == Iex_Const) { 1803 HReg r; 1804 vassert(e->Iex.Const.con->tag == Ico_U1); 1805 vassert(e->Iex.Const.con->Ico.U1 == True 1806 || e->Iex.Const.con->Ico.U1 == False); 1807 r = newVRegI(env); 1808 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r)); 1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r)); 1810 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ; 1811 } 1812 1813 /* Not1(e) */ 1814 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { 1815 /* Generate code for the arg, and negate the test condition */ 1816 return 1 ^ iselCondCode(env, e->Iex.Unop.arg); 1817 } 1818 1819 /* --- patterns rooted at: 32to1 --- */ 1820 1821 if (e->tag == Iex_Unop 1822 && e->Iex.Unop.op == Iop_32to1) { 1823 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1824 addInstr(env, X86Instr_Test32(1,rm)); 1825 return Xcc_NZ; 1826 } 1827 1828 /* --- patterns rooted at: CmpNEZ8 --- */ 1829 1830 /* CmpNEZ8(x) */ 1831 if (e->tag == Iex_Unop 1832 && e->Iex.Unop.op == Iop_CmpNEZ8) { 1833 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1834 addInstr(env, X86Instr_Test32(0xFF,rm)); 1835 return Xcc_NZ; 1836 } 1837 1838 /* --- patterns rooted at: CmpNEZ16 --- */ 1839 1840 /* CmpNEZ16(x) */ 1841 if (e->tag == Iex_Unop 1842 && e->Iex.Unop.op == Iop_CmpNEZ16) { 1843 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); 1844 addInstr(env, X86Instr_Test32(0xFFFF,rm)); 1845 return Xcc_NZ; 1846 } 1847 1848 /* --- patterns rooted at: CmpNEZ32 --- */ 1849 1850 /* CmpNEZ32(And32(x,y)) */ 1851 { 1852 DECLARE_PATTERN(p_CmpNEZ32_And32); 1853 DEFINE_PATTERN(p_CmpNEZ32_And32, 1854 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1)))); 1855 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) { 1856 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1857 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1858 HReg tmp = newVRegI(env); 1859 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1860 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp)); 1861 return Xcc_NZ; 1862 } 1863 } 1864 1865 /* CmpNEZ32(Or32(x,y)) */ 1866 { 1867 DECLARE_PATTERN(p_CmpNEZ32_Or32); 1868 DEFINE_PATTERN(p_CmpNEZ32_Or32, 1869 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1)))); 1870 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) { 1871 HReg r0 = iselIntExpr_R(env, mi.bindee[0]); 1872 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]); 1873 HReg tmp = newVRegI(env); 1874 addInstr(env, mk_iMOVsd_RR(r0, tmp)); 1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp)); 1876 return Xcc_NZ; 1877 } 1878 } 1879 1880 /* CmpNEZ32(GET(..):I32) */ 1881 if (e->tag == Iex_Unop 1882 && e->Iex.Unop.op == Iop_CmpNEZ32 1883 && e->Iex.Unop.arg->tag == Iex_Get) { 1884 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, 1885 hregX86_EBP()); 1886 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); 1887 return Xcc_NZ; 1888 } 1889 1890 /* CmpNEZ32(x) */ 1891 if (e->tag == Iex_Unop 1892 && e->Iex.Unop.op == Iop_CmpNEZ32) { 1893 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); 1894 X86RMI* rmi2 = X86RMI_Imm(0); 1895 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 1896 return Xcc_NZ; 1897 } 1898 1899 /* --- patterns rooted at: CmpNEZ64 --- */ 1900 1901 /* CmpNEZ64(Or64(x,y)) */ 1902 { 1903 DECLARE_PATTERN(p_CmpNEZ64_Or64); 1904 DEFINE_PATTERN(p_CmpNEZ64_Or64, 1905 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1)))); 1906 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) { 1907 HReg hi1, lo1, hi2, lo2; 1908 HReg tmp = newVRegI(env); 1909 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] ); 1910 addInstr(env, mk_iMOVsd_RR(hi1, tmp)); 1911 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp)); 1912 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] ); 1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp)); 1914 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp)); 1915 return Xcc_NZ; 1916 } 1917 } 1918 1919 /* CmpNEZ64(x) */ 1920 if (e->tag == Iex_Unop 1921 && e->Iex.Unop.op == Iop_CmpNEZ64) { 1922 HReg hi, lo; 1923 HReg tmp = newVRegI(env); 1924 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg ); 1925 addInstr(env, mk_iMOVsd_RR(hi, tmp)); 1926 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); 1927 return Xcc_NZ; 1928 } 1929 1930 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */ 1931 1932 /* CmpEQ8 / CmpNE8 */ 1933 if (e->tag == Iex_Binop 1934 && (e->Iex.Binop.op == Iop_CmpEQ8 1935 || e->Iex.Binop.op == Iop_CmpNE8 1936 || e->Iex.Binop.op == Iop_CasCmpEQ8 1937 || e->Iex.Binop.op == Iop_CasCmpNE8)) { 1938 if (isZeroU8(e->Iex.Binop.arg2)) { 1939 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1940 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1))); 1941 switch (e->Iex.Binop.op) { 1942 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1943 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1944 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)"); 1945 } 1946 } else { 1947 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1948 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1949 HReg r = newVRegI(env); 1950 addInstr(env, mk_iMOVsd_RR(r1,r)); 1951 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1952 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r))); 1953 switch (e->Iex.Binop.op) { 1954 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z; 1955 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ; 1956 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)"); 1957 } 1958 } 1959 } 1960 1961 /* CmpEQ16 / CmpNE16 */ 1962 if (e->tag == Iex_Binop 1963 && (e->Iex.Binop.op == Iop_CmpEQ16 1964 || e->Iex.Binop.op == Iop_CmpNE16 1965 || e->Iex.Binop.op == Iop_CasCmpEQ16 1966 || e->Iex.Binop.op == Iop_CasCmpNE16 1967 || e->Iex.Binop.op == Iop_ExpCmpNE16)) { 1968 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 1969 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 1970 HReg r = newVRegI(env); 1971 addInstr(env, mk_iMOVsd_RR(r1,r)); 1972 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); 1973 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); 1974 switch (e->Iex.Binop.op) { 1975 case Iop_CmpEQ16: case Iop_CasCmpEQ16: 1976 return Xcc_Z; 1977 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16: 1978 return Xcc_NZ; 1979 default: 1980 vpanic("iselCondCode(x86): CmpXX16"); 1981 } 1982 } 1983 1984 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation). 1985 Saves a "movl %eax, %tmp" compared to the default route. */ 1986 if (e->tag == Iex_Binop 1987 && e->Iex.Binop.op == Iop_CmpNE32 1988 && e->Iex.Binop.arg1->tag == Iex_CCall 1989 && e->Iex.Binop.arg2->tag == Iex_Const) { 1990 IRExpr* cal = e->Iex.Binop.arg1; 1991 IRExpr* con = e->Iex.Binop.arg2; 1992 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */ 1993 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */ 1994 vassert(con->Iex.Const.con->tag == Ico_U32); 1995 /* Marshal args, do the call. */ 1996 UInt addToSp = 0; 1997 RetLoc rloc = mk_RetLoc_INVALID(); 1998 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 1999 cal->Iex.CCall.cee, 2000 cal->Iex.CCall.retty, cal->Iex.CCall.args ); 2001 vassert(is_sane_RetLoc(rloc)); 2002 vassert(rloc.pri == RLPri_Int); 2003 vassert(addToSp == 0); 2004 /* */ 2005 addInstr(env, X86Instr_Alu32R(Xalu_CMP, 2006 X86RMI_Imm(con->Iex.Const.con->Ico.U32), 2007 hregX86_EAX())); 2008 return Xcc_NZ; 2009 } 2010 2011 /* Cmp*32*(x,y) */ 2012 if (e->tag == Iex_Binop 2013 && (e->Iex.Binop.op == Iop_CmpEQ32 2014 || e->Iex.Binop.op == Iop_CmpNE32 2015 || e->Iex.Binop.op == Iop_CmpLT32S 2016 || e->Iex.Binop.op == Iop_CmpLT32U 2017 || e->Iex.Binop.op == Iop_CmpLE32S 2018 || e->Iex.Binop.op == Iop_CmpLE32U 2019 || e->Iex.Binop.op == Iop_CasCmpEQ32 2020 || e->Iex.Binop.op == Iop_CasCmpNE32 2021 || e->Iex.Binop.op == Iop_ExpCmpNE32)) { 2022 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); 2023 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2024 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); 2025 switch (e->Iex.Binop.op) { 2026 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; 2027 case Iop_CmpNE32: 2028 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ; 2029 case Iop_CmpLT32S: return Xcc_L; 2030 case Iop_CmpLT32U: return Xcc_B; 2031 case Iop_CmpLE32S: return Xcc_LE; 2032 case Iop_CmpLE32U: return Xcc_BE; 2033 default: vpanic("iselCondCode(x86): CmpXX32"); 2034 } 2035 } 2036 2037 /* CmpNE64 */ 2038 if (e->tag == Iex_Binop 2039 && (e->Iex.Binop.op == Iop_CmpNE64 2040 || e->Iex.Binop.op == Iop_CmpEQ64)) { 2041 HReg hi1, hi2, lo1, lo2; 2042 HReg tHi = newVRegI(env); 2043 HReg tLo = newVRegI(env); 2044 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); 2045 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); 2046 addInstr(env, mk_iMOVsd_RR(hi1, tHi)); 2047 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); 2048 addInstr(env, mk_iMOVsd_RR(lo1, tLo)); 2049 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); 2050 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); 2051 switch (e->Iex.Binop.op) { 2052 case Iop_CmpNE64: return Xcc_NZ; 2053 case Iop_CmpEQ64: return Xcc_Z; 2054 default: vpanic("iselCondCode(x86): CmpXX64"); 2055 } 2056 } 2057 2058 ppIRExpr(e); 2059 vpanic("iselCondCode"); 2060 } 2061 2062 2063 /*---------------------------------------------------------*/ 2064 /*--- ISEL: Integer expressions (64 bit) ---*/ 2065 /*---------------------------------------------------------*/ 2066 2067 /* Compute a 64-bit value into a register pair, which is returned as 2068 the first two parameters. As with iselIntExpr_R, these may be 2069 either real or virtual regs; in any case they must not be changed 2070 by subsequent code emitted by the caller. */ 2071 2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 2073 { 2074 iselInt64Expr_wrk(rHi, rLo, env, e); 2075 # if 0 2076 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2077 # endif 2078 vassert(hregClass(*rHi) == HRcInt32); 2079 vassert(hregIsVirtual(*rHi)); 2080 vassert(hregClass(*rLo) == HRcInt32); 2081 vassert(hregIsVirtual(*rLo)); 2082 } 2083 2084 /* DO NOT CALL THIS DIRECTLY ! */ 2085 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) 2086 { 2087 MatchInfo mi; 2088 HWord fn = 0; /* helper fn for most SIMD64 stuff */ 2089 vassert(e); 2090 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); 2091 2092 /* 64-bit literal */ 2093 if (e->tag == Iex_Const) { 2094 ULong w64 = e->Iex.Const.con->Ico.U64; 2095 UInt wHi = toUInt(w64 >> 32); 2096 UInt wLo = toUInt(w64); 2097 HReg tLo = newVRegI(env); 2098 HReg tHi = newVRegI(env); 2099 vassert(e->Iex.Const.con->tag == Ico_U64); 2100 if (wLo == wHi) { 2101 /* Save a precious Int register in this special case. */ 2102 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 2103 *rHi = tLo; 2104 *rLo = tLo; 2105 } else { 2106 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi)); 2107 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo)); 2108 *rHi = tHi; 2109 *rLo = tLo; 2110 } 2111 return; 2112 } 2113 2114 /* read 64-bit IRTemp */ 2115 if (e->tag == Iex_RdTmp) { 2116 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); 2117 return; 2118 } 2119 2120 /* 64-bit load */ 2121 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2122 HReg tLo, tHi; 2123 X86AMode *am0, *am4; 2124 vassert(e->Iex.Load.ty == Ity_I64); 2125 tLo = newVRegI(env); 2126 tHi = newVRegI(env); 2127 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr); 2128 am4 = advance4(am0); 2129 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo )); 2130 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2131 *rHi = tHi; 2132 *rLo = tLo; 2133 return; 2134 } 2135 2136 /* 64-bit GET */ 2137 if (e->tag == Iex_Get) { 2138 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()); 2139 X86AMode* am4 = advance4(am); 2140 HReg tLo = newVRegI(env); 2141 HReg tHi = newVRegI(env); 2142 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2143 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2144 *rHi = tHi; 2145 *rLo = tLo; 2146 return; 2147 } 2148 2149 /* 64-bit GETI */ 2150 if (e->tag == Iex_GetI) { 2151 X86AMode* am 2152 = genGuestArrayOffset( env, e->Iex.GetI.descr, 2153 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2154 X86AMode* am4 = advance4(am); 2155 HReg tLo = newVRegI(env); 2156 HReg tHi = newVRegI(env); 2157 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo )); 2158 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi )); 2159 *rHi = tHi; 2160 *rLo = tLo; 2161 return; 2162 } 2163 2164 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD 2165 if (e->tag == Iex_ITE) { 2166 HReg e0Lo, e0Hi, e1Lo, e1Hi; 2167 HReg tLo = newVRegI(env); 2168 HReg tHi = newVRegI(env); 2169 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse); 2170 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue); 2171 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi)); 2172 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo)); 2173 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 2174 /* This assumes the first cmov32 doesn't trash the condition 2175 codes, so they are still available for the second cmov32 */ 2176 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi)); 2177 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo)); 2178 *rHi = tHi; 2179 *rLo = tLo; 2180 return; 2181 } 2182 2183 /* --------- BINARY ops --------- */ 2184 if (e->tag == Iex_Binop) { 2185 switch (e->Iex.Binop.op) { 2186 /* 32 x 32 -> 64 multiply */ 2187 case Iop_MullU32: 2188 case Iop_MullS32: { 2189 /* get one operand into %eax, and the other into a R/M. 2190 Need to make an educated guess about which is better in 2191 which. */ 2192 HReg tLo = newVRegI(env); 2193 HReg tHi = newVRegI(env); 2194 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32); 2195 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1); 2196 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2); 2197 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX())); 2198 addInstr(env, X86Instr_MulL(syned, rmLeft)); 2199 /* Result is now in EDX:EAX. Tell the caller. */ 2200 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2201 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2202 *rHi = tHi; 2203 *rLo = tLo; 2204 return; 2205 } 2206 2207 /* 64 x 32 -> (32(rem),32(div)) division */ 2208 case Iop_DivModU64to32: 2209 case Iop_DivModS64to32: { 2210 /* Get the 64-bit operand into edx:eax, and the other into 2211 any old R/M. */ 2212 HReg sHi, sLo; 2213 HReg tLo = newVRegI(env); 2214 HReg tHi = newVRegI(env); 2215 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32); 2216 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2); 2217 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2218 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX())); 2219 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX())); 2220 addInstr(env, X86Instr_Div(syned, rmRight)); 2221 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2222 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2223 *rHi = tHi; 2224 *rLo = tLo; 2225 return; 2226 } 2227 2228 /* Or64/And64/Xor64 */ 2229 case Iop_Or64: 2230 case Iop_And64: 2231 case Iop_Xor64: { 2232 HReg xLo, xHi, yLo, yHi; 2233 HReg tLo = newVRegI(env); 2234 HReg tHi = newVRegI(env); 2235 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR 2236 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND 2237 : Xalu_XOR; 2238 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2239 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2240 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2241 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); 2242 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2243 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); 2244 *rHi = tHi; 2245 *rLo = tLo; 2246 return; 2247 } 2248 2249 /* Add64/Sub64 */ 2250 case Iop_Add64: 2251 if (e->Iex.Binop.arg2->tag == Iex_Const) { 2252 /* special case Add64(e, const) */ 2253 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2254 UInt wHi = toUInt(w64 >> 32); 2255 UInt wLo = toUInt(w64); 2256 HReg tLo = newVRegI(env); 2257 HReg tHi = newVRegI(env); 2258 HReg xLo, xHi; 2259 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64); 2260 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2261 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2262 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2263 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo)); 2264 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi)); 2265 *rHi = tHi; 2266 *rLo = tLo; 2267 return; 2268 } 2269 /* else fall through to the generic case */ 2270 case Iop_Sub64: { 2271 HReg xLo, xHi, yLo, yHi; 2272 HReg tLo = newVRegI(env); 2273 HReg tHi = newVRegI(env); 2274 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2275 addInstr(env, mk_iMOVsd_RR(xHi, tHi)); 2276 addInstr(env, mk_iMOVsd_RR(xLo, tLo)); 2277 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2278 if (e->Iex.Binop.op==Iop_Add64) { 2279 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo)); 2280 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi)); 2281 } else { 2282 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2283 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2284 } 2285 *rHi = tHi; 2286 *rLo = tLo; 2287 return; 2288 } 2289 2290 /* 32HLto64(e1,e2) */ 2291 case Iop_32HLto64: 2292 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); 2293 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); 2294 return; 2295 2296 /* 64-bit shifts */ 2297 case Iop_Shl64: { 2298 /* We use the same ingenious scheme as gcc. Put the value 2299 to be shifted into %hi:%lo, and the shift amount into 2300 %cl. Then (dsts on right, a la ATT syntax): 2301 2302 shldl %cl, %lo, %hi -- make %hi be right for the 2303 -- shift amt %cl % 32 2304 shll %cl, %lo -- make %lo be right for the 2305 -- shift amt %cl % 32 2306 2307 Now, if (shift amount % 64) is in the range 32 .. 63, 2308 we have to do a fixup, which puts the result low half 2309 into the result high half, and zeroes the low half: 2310 2311 testl $32, %ecx 2312 2313 cmovnz %lo, %hi 2314 movl $0, %tmp -- sigh; need yet another reg 2315 cmovnz %tmp, %lo 2316 */ 2317 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2318 tLo = newVRegI(env); 2319 tHi = newVRegI(env); 2320 tTemp = newVRegI(env); 2321 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2322 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2323 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2324 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2325 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2326 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2327 and those regs are legitimately modifiable. */ 2328 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi)); 2329 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo)); 2330 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2331 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi)); 2332 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2333 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo)); 2334 *rHi = tHi; 2335 *rLo = tLo; 2336 return; 2337 } 2338 2339 case Iop_Shr64: { 2340 /* We use the same ingenious scheme as gcc. Put the value 2341 to be shifted into %hi:%lo, and the shift amount into 2342 %cl. Then: 2343 2344 shrdl %cl, %hi, %lo -- make %lo be right for the 2345 -- shift amt %cl % 32 2346 shrl %cl, %hi -- make %hi be right for the 2347 -- shift amt %cl % 32 2348 2349 Now, if (shift amount % 64) is in the range 32 .. 63, 2350 we have to do a fixup, which puts the result high half 2351 into the result low half, and zeroes the high half: 2352 2353 testl $32, %ecx 2354 2355 cmovnz %hi, %lo 2356 movl $0, %tmp -- sigh; need yet another reg 2357 cmovnz %tmp, %hi 2358 */ 2359 HReg rAmt, sHi, sLo, tHi, tLo, tTemp; 2360 tLo = newVRegI(env); 2361 tHi = newVRegI(env); 2362 tTemp = newVRegI(env); 2363 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2); 2364 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1); 2365 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX())); 2366 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2367 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2368 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo 2369 and those regs are legitimately modifiable. */ 2370 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo)); 2371 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi)); 2372 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX()))); 2373 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo)); 2374 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp)); 2375 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi)); 2376 *rHi = tHi; 2377 *rLo = tLo; 2378 return; 2379 } 2380 2381 /* F64 -> I64 */ 2382 /* Sigh, this is an almost exact copy of the F64 -> I32/I16 2383 case. Unfortunately I see no easy way to avoid the 2384 duplication. */ 2385 case Iop_F64toI64S: { 2386 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 2387 HReg tLo = newVRegI(env); 2388 HReg tHi = newVRegI(env); 2389 2390 /* Used several times ... */ 2391 /* Careful ... this sharing is only safe because 2392 zero_esp/four_esp do not hold any registers which the 2393 register allocator could attempt to swizzle later. */ 2394 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2395 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2396 2397 /* rf now holds the value to be converted, and rrm holds 2398 the rounding mode value, encoded as per the 2399 IRRoundingMode enum. The first thing to do is set the 2400 FPU's rounding mode accordingly. */ 2401 2402 /* Create a space for the format conversion. */ 2403 /* subl $8, %esp */ 2404 sub_from_esp(env, 8); 2405 2406 /* Set host rounding mode */ 2407 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2408 2409 /* gistll %rf, 0(%esp) */ 2410 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp)); 2411 2412 /* movl 0(%esp), %dstLo */ 2413 /* movl 4(%esp), %dstHi */ 2414 addInstr(env, X86Instr_Alu32R( 2415 Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2416 addInstr(env, X86Instr_Alu32R( 2417 Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2418 2419 /* Restore default FPU rounding. */ 2420 set_FPU_rounding_default( env ); 2421 2422 /* addl $8, %esp */ 2423 add_to_esp(env, 8); 2424 2425 *rHi = tHi; 2426 *rLo = tLo; 2427 return; 2428 } 2429 2430 case Iop_Add8x8: 2431 fn = (HWord)h_generic_calc_Add8x8; goto binnish; 2432 case Iop_Add16x4: 2433 fn = (HWord)h_generic_calc_Add16x4; goto binnish; 2434 case Iop_Add32x2: 2435 fn = (HWord)h_generic_calc_Add32x2; goto binnish; 2436 2437 case Iop_Avg8Ux8: 2438 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish; 2439 case Iop_Avg16Ux4: 2440 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish; 2441 2442 case Iop_CmpEQ8x8: 2443 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish; 2444 case Iop_CmpEQ16x4: 2445 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish; 2446 case Iop_CmpEQ32x2: 2447 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish; 2448 2449 case Iop_CmpGT8Sx8: 2450 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish; 2451 case Iop_CmpGT16Sx4: 2452 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish; 2453 case Iop_CmpGT32Sx2: 2454 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish; 2455 2456 case Iop_InterleaveHI8x8: 2457 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish; 2458 case Iop_InterleaveLO8x8: 2459 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish; 2460 case Iop_InterleaveHI16x4: 2461 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish; 2462 case Iop_InterleaveLO16x4: 2463 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish; 2464 case Iop_InterleaveHI32x2: 2465 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; 2466 case Iop_InterleaveLO32x2: 2467 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; 2468 case Iop_CatOddLanes16x4: 2469 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; 2470 case Iop_CatEvenLanes16x4: 2471 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; 2472 case Iop_Perm8x8: 2473 fn = (HWord)h_generic_calc_Perm8x8; goto binnish; 2474 2475 case Iop_Max8Ux8: 2476 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; 2477 case Iop_Max16Sx4: 2478 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish; 2479 case Iop_Min8Ux8: 2480 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish; 2481 case Iop_Min16Sx4: 2482 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish; 2483 2484 case Iop_Mul16x4: 2485 fn = (HWord)h_generic_calc_Mul16x4; goto binnish; 2486 case Iop_Mul32x2: 2487 fn = (HWord)h_generic_calc_Mul32x2; goto binnish; 2488 case Iop_MulHi16Sx4: 2489 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; 2490 case Iop_MulHi16Ux4: 2491 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish; 2492 2493 case Iop_QAdd8Sx8: 2494 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish; 2495 case Iop_QAdd16Sx4: 2496 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish; 2497 case Iop_QAdd8Ux8: 2498 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish; 2499 case Iop_QAdd16Ux4: 2500 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; 2501 2502 case Iop_QNarrowBin32Sto16Sx4: 2503 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish; 2504 case Iop_QNarrowBin16Sto8Sx8: 2505 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish; 2506 case Iop_QNarrowBin16Sto8Ux8: 2507 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish; 2508 case Iop_NarrowBin16to8x8: 2509 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish; 2510 case Iop_NarrowBin32to16x4: 2511 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish; 2512 2513 case Iop_QSub8Sx8: 2514 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; 2515 case Iop_QSub16Sx4: 2516 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish; 2517 case Iop_QSub8Ux8: 2518 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish; 2519 case Iop_QSub16Ux4: 2520 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish; 2521 2522 case Iop_Sub8x8: 2523 fn = (HWord)h_generic_calc_Sub8x8; goto binnish; 2524 case Iop_Sub16x4: 2525 fn = (HWord)h_generic_calc_Sub16x4; goto binnish; 2526 case Iop_Sub32x2: 2527 fn = (HWord)h_generic_calc_Sub32x2; goto binnish; 2528 2529 binnish: { 2530 /* Note: the following assumes all helpers are of 2531 signature 2532 ULong fn ( ULong, ULong ), and they are 2533 not marked as regparm functions. 2534 */ 2535 HReg xLo, xHi, yLo, yHi; 2536 HReg tLo = newVRegI(env); 2537 HReg tHi = newVRegI(env); 2538 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); 2539 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi))); 2540 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo))); 2541 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2542 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2543 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2544 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 2545 0, mk_RetLoc_simple(RLPri_2Int) )); 2546 add_to_esp(env, 4*4); 2547 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2548 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2549 *rHi = tHi; 2550 *rLo = tLo; 2551 return; 2552 } 2553 2554 case Iop_ShlN32x2: 2555 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; 2556 case Iop_ShlN16x4: 2557 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; 2558 case Iop_ShlN8x8: 2559 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; 2560 case Iop_ShrN32x2: 2561 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; 2562 case Iop_ShrN16x4: 2563 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty; 2564 case Iop_SarN32x2: 2565 fn = (HWord)h_generic_calc_SarN32x2; goto shifty; 2566 case Iop_SarN16x4: 2567 fn = (HWord)h_generic_calc_SarN16x4; goto shifty; 2568 case Iop_SarN8x8: 2569 fn = (HWord)h_generic_calc_SarN8x8; goto shifty; 2570 shifty: { 2571 /* Note: the following assumes all helpers are of 2572 signature 2573 ULong fn ( ULong, UInt ), and they are 2574 not marked as regparm functions. 2575 */ 2576 HReg xLo, xHi; 2577 HReg tLo = newVRegI(env); 2578 HReg tHi = newVRegI(env); 2579 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 2580 addInstr(env, X86Instr_Push(y)); 2581 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); 2582 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2583 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2584 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 2585 0, mk_RetLoc_simple(RLPri_2Int) )); 2586 add_to_esp(env, 3*4); 2587 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2588 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2589 *rHi = tHi; 2590 *rLo = tLo; 2591 return; 2592 } 2593 2594 default: 2595 break; 2596 } 2597 } /* if (e->tag == Iex_Binop) */ 2598 2599 2600 /* --------- UNARY ops --------- */ 2601 if (e->tag == Iex_Unop) { 2602 switch (e->Iex.Unop.op) { 2603 2604 /* 32Sto64(e) */ 2605 case Iop_32Sto64: { 2606 HReg tLo = newVRegI(env); 2607 HReg tHi = newVRegI(env); 2608 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2609 addInstr(env, mk_iMOVsd_RR(src,tHi)); 2610 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2611 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi)); 2612 *rHi = tHi; 2613 *rLo = tLo; 2614 return; 2615 } 2616 2617 /* 32Uto64(e) */ 2618 case Iop_32Uto64: { 2619 HReg tLo = newVRegI(env); 2620 HReg tHi = newVRegI(env); 2621 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2622 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2623 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2624 *rHi = tHi; 2625 *rLo = tLo; 2626 return; 2627 } 2628 2629 /* 16Uto64(e) */ 2630 case Iop_16Uto64: { 2631 HReg tLo = newVRegI(env); 2632 HReg tHi = newVRegI(env); 2633 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); 2634 addInstr(env, mk_iMOVsd_RR(src,tLo)); 2635 addInstr(env, X86Instr_Alu32R(Xalu_AND, 2636 X86RMI_Imm(0xFFFF), tLo)); 2637 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2638 *rHi = tHi; 2639 *rLo = tLo; 2640 return; 2641 } 2642 2643 /* V128{HI}to64 */ 2644 case Iop_V128HIto64: 2645 case Iop_V128to64: { 2646 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0; 2647 HReg tLo = newVRegI(env); 2648 HReg tHi = newVRegI(env); 2649 HReg vec = iselVecExpr(env, e->Iex.Unop.arg); 2650 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 2651 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP()); 2652 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP()); 2653 sub_from_esp(env, 16); 2654 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0)); 2655 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2656 X86RMI_Mem(espLO), tLo )); 2657 addInstr(env, X86Instr_Alu32R( Xalu_MOV, 2658 X86RMI_Mem(espHI), tHi )); 2659 add_to_esp(env, 16); 2660 *rHi = tHi; 2661 *rLo = tLo; 2662 return; 2663 } 2664 2665 /* could do better than this, but for now ... */ 2666 case Iop_1Sto64: { 2667 HReg tLo = newVRegI(env); 2668 HReg tHi = newVRegI(env); 2669 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg); 2670 addInstr(env, X86Instr_Set32(cond,tLo)); 2671 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo)); 2672 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo)); 2673 addInstr(env, mk_iMOVsd_RR(tLo, tHi)); 2674 *rHi = tHi; 2675 *rLo = tLo; 2676 return; 2677 } 2678 2679 /* Not64(e) */ 2680 case Iop_Not64: { 2681 HReg tLo = newVRegI(env); 2682 HReg tHi = newVRegI(env); 2683 HReg sHi, sLo; 2684 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg); 2685 addInstr(env, mk_iMOVsd_RR(sHi, tHi)); 2686 addInstr(env, mk_iMOVsd_RR(sLo, tLo)); 2687 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi)); 2688 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo)); 2689 *rHi = tHi; 2690 *rLo = tLo; 2691 return; 2692 } 2693 2694 /* Left64(e) */ 2695 case Iop_Left64: { 2696 HReg yLo, yHi; 2697 HReg tLo = newVRegI(env); 2698 HReg tHi = newVRegI(env); 2699 /* yHi:yLo = arg */ 2700 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); 2701 /* tLo = 0 - yLo, and set carry */ 2702 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo)); 2703 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo)); 2704 /* tHi = 0 - yHi - carry */ 2705 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); 2706 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); 2707 /* So now we have tHi:tLo = -arg. To finish off, or 'arg' 2708 back in, so as to give the final result 2709 tHi:tLo = arg | -arg. */ 2710 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); 2711 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); 2712 *rHi = tHi; 2713 *rLo = tLo; 2714 return; 2715 } 2716 2717 /* --- patterns rooted at: CmpwNEZ64 --- */ 2718 2719 /* CmpwNEZ64(e) */ 2720 case Iop_CmpwNEZ64: { 2721 2722 DECLARE_PATTERN(p_CmpwNEZ64_Or64); 2723 DEFINE_PATTERN(p_CmpwNEZ64_Or64, 2724 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); 2725 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { 2726 /* CmpwNEZ64(Or64(x,y)) */ 2727 HReg xHi,xLo,yHi,yLo; 2728 HReg xBoth = newVRegI(env); 2729 HReg merged = newVRegI(env); 2730 HReg tmp2 = newVRegI(env); 2731 2732 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); 2733 addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); 2734 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2735 X86RMI_Reg(xLo),xBoth)); 2736 2737 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); 2738 addInstr(env, mk_iMOVsd_RR(yHi,merged)); 2739 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2740 X86RMI_Reg(yLo),merged)); 2741 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2742 X86RMI_Reg(xBoth),merged)); 2743 2744 /* tmp2 = (merged | -merged) >>s 31 */ 2745 addInstr(env, mk_iMOVsd_RR(merged,tmp2)); 2746 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2747 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2748 X86RMI_Reg(merged), tmp2)); 2749 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2750 *rHi = tmp2; 2751 *rLo = tmp2; 2752 return; 2753 } else { 2754 /* CmpwNEZ64(e) */ 2755 HReg srcLo, srcHi; 2756 HReg tmp1 = newVRegI(env); 2757 HReg tmp2 = newVRegI(env); 2758 /* srcHi:srcLo = arg */ 2759 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); 2760 /* tmp1 = srcHi | srcLo */ 2761 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); 2762 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2763 X86RMI_Reg(srcLo), tmp1)); 2764 /* tmp2 = (tmp1 | -tmp1) >>s 31 */ 2765 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); 2766 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); 2767 addInstr(env, X86Instr_Alu32R(Xalu_OR, 2768 X86RMI_Reg(tmp1), tmp2)); 2769 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); 2770 *rHi = tmp2; 2771 *rLo = tmp2; 2772 return; 2773 } 2774 } 2775 2776 /* ReinterpF64asI64(e) */ 2777 /* Given an IEEE754 double, produce an I64 with the same bit 2778 pattern. */ 2779 case Iop_ReinterpF64asI64: { 2780 HReg rf = iselDblExpr(env, e->Iex.Unop.arg); 2781 HReg tLo = newVRegI(env); 2782 HReg tHi = newVRegI(env); 2783 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); 2784 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP()); 2785 /* paranoia */ 2786 set_FPU_rounding_default(env); 2787 /* subl $8, %esp */ 2788 sub_from_esp(env, 8); 2789 /* gstD %rf, 0(%esp) */ 2790 addInstr(env, 2791 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp)); 2792 /* movl 0(%esp), %tLo */ 2793 addInstr(env, 2794 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo)); 2795 /* movl 4(%esp), %tHi */ 2796 addInstr(env, 2797 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi)); 2798 /* addl $8, %esp */ 2799 add_to_esp(env, 8); 2800 *rHi = tHi; 2801 *rLo = tLo; 2802 return; 2803 } 2804 2805 case Iop_CmpNEZ32x2: 2806 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish; 2807 case Iop_CmpNEZ16x4: 2808 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish; 2809 case Iop_CmpNEZ8x8: 2810 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish; 2811 unish: { 2812 /* Note: the following assumes all helpers are of 2813 signature 2814 ULong fn ( ULong ), and they are 2815 not marked as regparm functions. 2816 */ 2817 HReg xLo, xHi; 2818 HReg tLo = newVRegI(env); 2819 HReg tHi = newVRegI(env); 2820 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); 2821 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); 2822 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); 2823 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 2824 0, mk_RetLoc_simple(RLPri_2Int) )); 2825 add_to_esp(env, 2*4); 2826 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2827 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2828 *rHi = tHi; 2829 *rLo = tLo; 2830 return; 2831 } 2832 2833 default: 2834 break; 2835 } 2836 } /* if (e->tag == Iex_Unop) */ 2837 2838 2839 /* --------- CCALL --------- */ 2840 if (e->tag == Iex_CCall) { 2841 HReg tLo = newVRegI(env); 2842 HReg tHi = newVRegI(env); 2843 2844 /* Marshal args, do the call, clear stack. */ 2845 UInt addToSp = 0; 2846 RetLoc rloc = mk_RetLoc_INVALID(); 2847 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, 2848 e->Iex.CCall.cee, 2849 e->Iex.CCall.retty, e->Iex.CCall.args ); 2850 vassert(is_sane_RetLoc(rloc)); 2851 vassert(rloc.pri == RLPri_2Int); 2852 vassert(addToSp == 0); 2853 /* */ 2854 2855 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi)); 2856 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo)); 2857 *rHi = tHi; 2858 *rLo = tLo; 2859 return; 2860 } 2861 2862 ppIRExpr(e); 2863 vpanic("iselInt64Expr"); 2864 } 2865 2866 2867 /*---------------------------------------------------------*/ 2868 /*--- ISEL: Floating point expressions (32 bit) ---*/ 2869 /*---------------------------------------------------------*/ 2870 2871 /* Nothing interesting here; really just wrappers for 2872 64-bit stuff. */ 2873 2874 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) 2875 { 2876 HReg r = iselFltExpr_wrk( env, e ); 2877 # if 0 2878 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2879 # endif 2880 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */ 2881 vassert(hregIsVirtual(r)); 2882 return r; 2883 } 2884 2885 /* DO NOT CALL THIS DIRECTLY */ 2886 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) 2887 { 2888 IRType ty = typeOfIRExpr(env->type_env,e); 2889 vassert(ty == Ity_F32); 2890 2891 if (e->tag == Iex_RdTmp) { 2892 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 2893 } 2894 2895 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 2896 X86AMode* am; 2897 HReg res = newVRegF(env); 2898 vassert(e->Iex.Load.ty == Ity_F32); 2899 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 2900 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am)); 2901 return res; 2902 } 2903 2904 if (e->tag == Iex_Binop 2905 && e->Iex.Binop.op == Iop_F64toF32) { 2906 /* Although the result is still held in a standard FPU register, 2907 we need to round it to reflect the loss of accuracy/range 2908 entailed in casting it to a 32-bit float. */ 2909 HReg dst = newVRegF(env); 2910 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 2911 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2912 addInstr(env, X86Instr_Fp64to32(src,dst)); 2913 set_FPU_rounding_default( env ); 2914 return dst; 2915 } 2916 2917 if (e->tag == Iex_Get) { 2918 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 2919 hregX86_EBP() ); 2920 HReg res = newVRegF(env); 2921 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am )); 2922 return res; 2923 } 2924 2925 if (e->tag == Iex_Unop 2926 && e->Iex.Unop.op == Iop_ReinterpI32asF32) { 2927 /* Given an I32, produce an IEEE754 float with the same bit 2928 pattern. */ 2929 HReg dst = newVRegF(env); 2930 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 2931 /* paranoia */ 2932 addInstr(env, X86Instr_Push(rmi)); 2933 addInstr(env, X86Instr_FpLdSt( 2934 True/*load*/, 4, dst, 2935 X86AMode_IR(0, hregX86_ESP()))); 2936 add_to_esp(env, 4); 2937 return dst; 2938 } 2939 2940 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { 2941 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); 2942 HReg dst = newVRegF(env); 2943 2944 /* rf now holds the value to be rounded. The first thing to do 2945 is set the FPU's rounding mode accordingly. */ 2946 2947 /* Set host rounding mode */ 2948 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 2949 2950 /* grndint %rf, %dst */ 2951 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 2952 2953 /* Restore default FPU rounding. */ 2954 set_FPU_rounding_default( env ); 2955 2956 return dst; 2957 } 2958 2959 ppIRExpr(e); 2960 vpanic("iselFltExpr_wrk"); 2961 } 2962 2963 2964 /*---------------------------------------------------------*/ 2965 /*--- ISEL: Floating point expressions (64 bit) ---*/ 2966 /*---------------------------------------------------------*/ 2967 2968 /* Compute a 64-bit floating point value into a register, the identity 2969 of which is returned. As with iselIntExpr_R, the reg may be either 2970 real or virtual; in any case it must not be changed by subsequent 2971 code emitted by the caller. */ 2972 2973 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm: 2974 2975 Type S (1 bit) E (11 bits) F (52 bits) 2976 ---- --------- ----------- ----------- 2977 signalling NaN u 2047 (max) .0uuuuu---u 2978 (with at least 2979 one 1 bit) 2980 quiet NaN u 2047 (max) .1uuuuu---u 2981 2982 negative infinity 1 2047 (max) .000000---0 2983 2984 positive infinity 0 2047 (max) .000000---0 2985 2986 negative zero 1 0 .000000---0 2987 2988 positive zero 0 0 .000000---0 2989 */ 2990 2991 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) 2992 { 2993 HReg r = iselDblExpr_wrk( env, e ); 2994 # if 0 2995 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 2996 # endif 2997 vassert(hregClass(r) == HRcFlt64); 2998 vassert(hregIsVirtual(r)); 2999 return r; 3000 } 3001 3002 /* DO NOT CALL THIS DIRECTLY */ 3003 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) 3004 { 3005 IRType ty = typeOfIRExpr(env->type_env,e); 3006 vassert(e); 3007 vassert(ty == Ity_F64); 3008 3009 if (e->tag == Iex_RdTmp) { 3010 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3011 } 3012 3013 if (e->tag == Iex_Const) { 3014 union { UInt u32x2[2]; ULong u64; Double f64; } u; 3015 HReg freg = newVRegF(env); 3016 vassert(sizeof(u) == 8); 3017 vassert(sizeof(u.u64) == 8); 3018 vassert(sizeof(u.f64) == 8); 3019 vassert(sizeof(u.u32x2) == 8); 3020 3021 if (e->Iex.Const.con->tag == Ico_F64) { 3022 u.f64 = e->Iex.Const.con->Ico.F64; 3023 } 3024 else if (e->Iex.Const.con->tag == Ico_F64i) { 3025 u.u64 = e->Iex.Const.con->Ico.F64i; 3026 } 3027 else 3028 vpanic("iselDblExpr(x86): const"); 3029 3030 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1]))); 3031 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0]))); 3032 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg, 3033 X86AMode_IR(0, hregX86_ESP()))); 3034 add_to_esp(env, 8); 3035 return freg; 3036 } 3037 3038 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3039 X86AMode* am; 3040 HReg res = newVRegF(env); 3041 vassert(e->Iex.Load.ty == Ity_F64); 3042 am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3043 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am)); 3044 return res; 3045 } 3046 3047 if (e->tag == Iex_Get) { 3048 X86AMode* am = X86AMode_IR( e->Iex.Get.offset, 3049 hregX86_EBP() ); 3050 HReg res = newVRegF(env); 3051 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 3052 return res; 3053 } 3054 3055 if (e->tag == Iex_GetI) { 3056 X86AMode* am 3057 = genGuestArrayOffset( 3058 env, e->Iex.GetI.descr, 3059 e->Iex.GetI.ix, e->Iex.GetI.bias ); 3060 HReg res = newVRegF(env); 3061 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am )); 3062 return res; 3063 } 3064 3065 if (e->tag == Iex_Triop) { 3066 X86FpOp fpop = Xfp_INVALID; 3067 IRTriop *triop = e->Iex.Triop.details; 3068 switch (triop->op) { 3069 case Iop_AddF64: fpop = Xfp_ADD; break; 3070 case Iop_SubF64: fpop = Xfp_SUB; break; 3071 case Iop_MulF64: fpop = Xfp_MUL; break; 3072 case Iop_DivF64: fpop = Xfp_DIV; break; 3073 case Iop_ScaleF64: fpop = Xfp_SCALE; break; 3074 case Iop_Yl2xF64: fpop = Xfp_YL2X; break; 3075 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break; 3076 case Iop_AtanF64: fpop = Xfp_ATAN; break; 3077 case Iop_PRemF64: fpop = Xfp_PREM; break; 3078 case Iop_PRem1F64: fpop = Xfp_PREM1; break; 3079 default: break; 3080 } 3081 if (fpop != Xfp_INVALID) { 3082 HReg res = newVRegF(env); 3083 HReg srcL = iselDblExpr(env, triop->arg2); 3084 HReg srcR = iselDblExpr(env, triop->arg3); 3085 /* XXXROUNDINGFIXME */ 3086 /* set roundingmode here */ 3087 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res)); 3088 if (fpop != Xfp_ADD && fpop != Xfp_SUB 3089 && fpop != Xfp_MUL && fpop != Xfp_DIV) 3090 roundToF64(env, res); 3091 return res; 3092 } 3093 } 3094 3095 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) { 3096 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); 3097 HReg dst = newVRegF(env); 3098 3099 /* rf now holds the value to be rounded. The first thing to do 3100 is set the FPU's rounding mode accordingly. */ 3101 3102 /* Set host rounding mode */ 3103 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3104 3105 /* grndint %rf, %dst */ 3106 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); 3107 3108 /* Restore default FPU rounding. */ 3109 set_FPU_rounding_default( env ); 3110 3111 return dst; 3112 } 3113 3114 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) { 3115 HReg dst = newVRegF(env); 3116 HReg rHi,rLo; 3117 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2); 3118 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3119 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3120 3121 /* Set host rounding mode */ 3122 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); 3123 3124 addInstr(env, X86Instr_FpLdStI( 3125 True/*load*/, 8, dst, 3126 X86AMode_IR(0, hregX86_ESP()))); 3127 3128 /* Restore default FPU rounding. */ 3129 set_FPU_rounding_default( env ); 3130 3131 add_to_esp(env, 8); 3132 return dst; 3133 } 3134 3135 if (e->tag == Iex_Binop) { 3136 X86FpOp fpop = Xfp_INVALID; 3137 switch (e->Iex.Binop.op) { 3138 case Iop_SinF64: fpop = Xfp_SIN; break; 3139 case Iop_CosF64: fpop = Xfp_COS; break; 3140 case Iop_TanF64: fpop = Xfp_TAN; break; 3141 case Iop_2xm1F64: fpop = Xfp_2XM1; break; 3142 case Iop_SqrtF64: fpop = Xfp_SQRT; break; 3143 default: break; 3144 } 3145 if (fpop != Xfp_INVALID) { 3146 HReg res = newVRegF(env); 3147 HReg src = iselDblExpr(env, e->Iex.Binop.arg2); 3148 /* XXXROUNDINGFIXME */ 3149 /* set roundingmode here */ 3150 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition 3151 codes. I don't think that matters, since this insn 3152 selector never generates such an instruction intervening 3153 between an flag-setting instruction and a flag-using 3154 instruction. */ 3155 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3156 if (fpop != Xfp_SQRT 3157 && fpop != Xfp_NEG && fpop != Xfp_ABS) 3158 roundToF64(env, res); 3159 return res; 3160 } 3161 } 3162 3163 if (e->tag == Iex_Unop) { 3164 X86FpOp fpop = Xfp_INVALID; 3165 switch (e->Iex.Unop.op) { 3166 case Iop_NegF64: fpop = Xfp_NEG; break; 3167 case Iop_AbsF64: fpop = Xfp_ABS; break; 3168 default: break; 3169 } 3170 if (fpop != Xfp_INVALID) { 3171 HReg res = newVRegF(env); 3172 HReg src = iselDblExpr(env, e->Iex.Unop.arg); 3173 addInstr(env, X86Instr_FpUnary(fpop,src,res)); 3174 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS, 3175 but might need to do that for other unary ops. */ 3176 return res; 3177 } 3178 } 3179 3180 if (e->tag == Iex_Unop) { 3181 switch (e->Iex.Unop.op) { 3182 case Iop_I32StoF64: { 3183 HReg dst = newVRegF(env); 3184 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg); 3185 addInstr(env, X86Instr_Push(X86RMI_Reg(ri))); 3186 set_FPU_rounding_default(env); 3187 addInstr(env, X86Instr_FpLdStI( 3188 True/*load*/, 4, dst, 3189 X86AMode_IR(0, hregX86_ESP()))); 3190 add_to_esp(env, 4); 3191 return dst; 3192 } 3193 case Iop_ReinterpI64asF64: { 3194 /* Given an I64, produce an IEEE754 double with the same 3195 bit pattern. */ 3196 HReg dst = newVRegF(env); 3197 HReg rHi, rLo; 3198 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg); 3199 /* paranoia */ 3200 set_FPU_rounding_default(env); 3201 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3202 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3203 addInstr(env, X86Instr_FpLdSt( 3204 True/*load*/, 8, dst, 3205 X86AMode_IR(0, hregX86_ESP()))); 3206 add_to_esp(env, 8); 3207 return dst; 3208 } 3209 case Iop_F32toF64: { 3210 /* this is a no-op */ 3211 HReg res = iselFltExpr(env, e->Iex.Unop.arg); 3212 return res; 3213 } 3214 default: 3215 break; 3216 } 3217 } 3218 3219 /* --------- MULTIPLEX --------- */ 3220 if (e->tag == Iex_ITE) { // VFD 3221 if (ty == Ity_F64 3222 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { 3223 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); 3224 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); 3225 HReg dst = newVRegF(env); 3226 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst)); 3227 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3228 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst)); 3229 return dst; 3230 } 3231 } 3232 3233 ppIRExpr(e); 3234 vpanic("iselDblExpr_wrk"); 3235 } 3236 3237 3238 /*---------------------------------------------------------*/ 3239 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/ 3240 /*---------------------------------------------------------*/ 3241 3242 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) 3243 { 3244 HReg r = iselVecExpr_wrk( env, e ); 3245 # if 0 3246 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3247 # endif 3248 vassert(hregClass(r) == HRcVec128); 3249 vassert(hregIsVirtual(r)); 3250 return r; 3251 } 3252 3253 3254 /* DO NOT CALL THIS DIRECTLY */ 3255 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) 3256 { 3257 3258 # define REQUIRE_SSE1 \ 3259 do { if (env->hwcaps == 0/*baseline, no sse*/ \ 3260 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \ 3261 goto vec_fail; \ 3262 } while (0) 3263 3264 # define REQUIRE_SSE2 \ 3265 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ 3266 goto vec_fail; \ 3267 } while (0) 3268 3269 # define SSE2_OR_ABOVE \ 3270 (env->hwcaps & VEX_HWCAPS_X86_SSE2) 3271 3272 HWord fn = 0; /* address of helper fn, if required */ 3273 MatchInfo mi; 3274 Bool arg1isEReg = False; 3275 X86SseOp op = Xsse_INVALID; 3276 IRType ty = typeOfIRExpr(env->type_env,e); 3277 vassert(e); 3278 vassert(ty == Ity_V128); 3279 3280 REQUIRE_SSE1; 3281 3282 if (e->tag == Iex_RdTmp) { 3283 return lookupIRTemp(env, e->Iex.RdTmp.tmp); 3284 } 3285 3286 if (e->tag == Iex_Get) { 3287 HReg dst = newVRegV(env); 3288 addInstr(env, X86Instr_SseLdSt( 3289 True/*load*/, 3290 dst, 3291 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP()) 3292 ) 3293 ); 3294 return dst; 3295 } 3296 3297 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { 3298 HReg dst = newVRegV(env); 3299 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr); 3300 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 3301 return dst; 3302 } 3303 3304 if (e->tag == Iex_Const) { 3305 HReg dst = newVRegV(env); 3306 vassert(e->Iex.Const.con->tag == Ico_V128); 3307 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst)); 3308 return dst; 3309 } 3310 3311 if (e->tag == Iex_Unop) { 3312 3313 if (SSE2_OR_ABOVE) { 3314 /* 64UtoV128(LDle:I64(addr)) */ 3315 DECLARE_PATTERN(p_zwiden_load64); 3316 DEFINE_PATTERN(p_zwiden_load64, 3317 unop(Iop_64UtoV128, 3318 IRExpr_Load(Iend_LE,Ity_I64,bind(0)))); 3319 if (matchIRExpr(&mi, p_zwiden_load64, e)) { 3320 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]); 3321 HReg dst = newVRegV(env); 3322 addInstr(env, X86Instr_SseLdzLO(8, dst, am)); 3323 return dst; 3324 } 3325 } 3326 3327 switch (e->Iex.Unop.op) { 3328 3329 case Iop_NotV128: { 3330 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3331 return do_sse_Not128(env, arg); 3332 } 3333 3334 case Iop_CmpNEZ64x2: { 3335 /* We can use SSE2 instructions for this. */ 3336 /* Ideally, we want to do a 64Ix2 comparison against zero of 3337 the operand. Problem is no such insn exists. Solution 3338 therefore is to do a 32Ix4 comparison instead, and bitwise- 3339 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and 3340 let the not'd result of this initial comparison be a:b:c:d. 3341 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use 3342 pshufd to create a value b:a:d:c, and OR that with a:b:c:d, 3343 giving the required result. 3344 3345 The required selection sequence is 2,3,0,1, which 3346 according to Intel's documentation means the pshufd 3347 literal value is 0xB1, that is, 3348 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0) 3349 */ 3350 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3351 HReg tmp = newVRegV(env); 3352 HReg dst = newVRegV(env); 3353 REQUIRE_SSE2; 3354 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp)); 3355 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp)); 3356 tmp = do_sse_Not128(env, tmp); 3357 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst)); 3358 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst)); 3359 return dst; 3360 } 3361 3362 case Iop_CmpNEZ32x4: { 3363 /* Sigh, we have to generate lousy code since this has to 3364 work on SSE1 hosts */ 3365 /* basically, the idea is: for each lane: 3366 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1) 3367 sbbl %r, %r (now %r = 1Sto32(CF)) 3368 movl %r, lane 3369 */ 3370 Int i; 3371 X86AMode* am; 3372 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3373 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3374 HReg dst = newVRegV(env); 3375 HReg r32 = newVRegI(env); 3376 sub_from_esp(env, 16); 3377 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0)); 3378 for (i = 0; i < 4; i++) { 3379 am = X86AMode_IR(i*4, hregX86_ESP()); 3380 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32)); 3381 addInstr(env, X86Instr_Unary32(Xun_NEG, r32)); 3382 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32)); 3383 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am)); 3384 } 3385 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3386 add_to_esp(env, 16); 3387 return dst; 3388 } 3389 3390 case Iop_CmpNEZ8x16: 3391 case Iop_CmpNEZ16x8: { 3392 /* We can use SSE2 instructions for this. */ 3393 HReg arg; 3394 HReg vec0 = newVRegV(env); 3395 HReg vec1 = newVRegV(env); 3396 HReg dst = newVRegV(env); 3397 X86SseOp cmpOp 3398 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16 3399 : Xsse_CMPEQ8; 3400 REQUIRE_SSE2; 3401 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0)); 3402 addInstr(env, mk_vMOVsd_RR(vec0, vec1)); 3403 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1)); 3404 /* defer arg computation to here so as to give CMPEQF as long 3405 as possible to complete */ 3406 arg = iselVecExpr(env, e->Iex.Unop.arg); 3407 /* vec0 is all 0s; vec1 is all 1s */ 3408 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3409 /* 16x8 or 8x16 comparison == */ 3410 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst)); 3411 /* invert result */ 3412 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst)); 3413 return dst; 3414 } 3415 3416 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary; 3417 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary; 3418 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary; 3419 do_32Fx4_unary: 3420 { 3421 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3422 HReg dst = newVRegV(env); 3423 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst)); 3424 return dst; 3425 } 3426 3427 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary; 3428 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary; 3429 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary; 3430 do_64Fx2_unary: 3431 { 3432 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3433 HReg dst = newVRegV(env); 3434 REQUIRE_SSE2; 3435 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst)); 3436 return dst; 3437 } 3438 3439 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary; 3440 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary; 3441 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary; 3442 do_32F0x4_unary: 3443 { 3444 /* A bit subtle. We have to copy the arg to the result 3445 register first, because actually doing the SSE scalar insn 3446 leaves the upper 3/4 of the destination register 3447 unchanged. Whereas the required semantics of these 3448 primops is that the upper 3/4 is simply copied in from the 3449 argument. */ 3450 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3451 HReg dst = newVRegV(env); 3452 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3453 addInstr(env, X86Instr_Sse32FLo(op, arg, dst)); 3454 return dst; 3455 } 3456 3457 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary; 3458 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary; 3459 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary; 3460 do_64F0x2_unary: 3461 { 3462 /* A bit subtle. We have to copy the arg to the result 3463 register first, because actually doing the SSE scalar insn 3464 leaves the upper half of the destination register 3465 unchanged. Whereas the required semantics of these 3466 primops is that the upper half is simply copied in from the 3467 argument. */ 3468 HReg arg = iselVecExpr(env, e->Iex.Unop.arg); 3469 HReg dst = newVRegV(env); 3470 REQUIRE_SSE2; 3471 addInstr(env, mk_vMOVsd_RR(arg, dst)); 3472 addInstr(env, X86Instr_Sse64FLo(op, arg, dst)); 3473 return dst; 3474 } 3475 3476 case Iop_32UtoV128: { 3477 HReg dst = newVRegV(env); 3478 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3479 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg); 3480 addInstr(env, X86Instr_Push(rmi)); 3481 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0)); 3482 add_to_esp(env, 4); 3483 return dst; 3484 } 3485 3486 case Iop_64UtoV128: { 3487 HReg rHi, rLo; 3488 HReg dst = newVRegV(env); 3489 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3490 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg); 3491 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi))); 3492 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo))); 3493 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0)); 3494 add_to_esp(env, 8); 3495 return dst; 3496 } 3497 3498 default: 3499 break; 3500 } /* switch (e->Iex.Unop.op) */ 3501 } /* if (e->tag == Iex_Unop) */ 3502 3503 if (e->tag == Iex_Binop) { 3504 switch (e->Iex.Binop.op) { 3505 3506 case Iop_SetV128lo32: { 3507 HReg dst = newVRegV(env); 3508 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3509 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); 3510 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3511 sub_from_esp(env, 16); 3512 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3513 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0)); 3514 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3515 add_to_esp(env, 16); 3516 return dst; 3517 } 3518 3519 case Iop_SetV128lo64: { 3520 HReg dst = newVRegV(env); 3521 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1); 3522 HReg srcIhi, srcIlo; 3523 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3524 X86AMode* esp4 = advance4(esp0); 3525 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2); 3526 sub_from_esp(env, 16); 3527 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0)); 3528 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0)); 3529 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4)); 3530 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3531 add_to_esp(env, 16); 3532 return dst; 3533 } 3534 3535 case Iop_64HLtoV128: { 3536 HReg r3, r2, r1, r0; 3537 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3538 X86AMode* esp4 = advance4(esp0); 3539 X86AMode* esp8 = advance4(esp4); 3540 X86AMode* esp12 = advance4(esp8); 3541 HReg dst = newVRegV(env); 3542 /* do this via the stack (easy, convenient, etc) */ 3543 sub_from_esp(env, 16); 3544 /* Do the less significant 64 bits */ 3545 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2); 3546 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0)); 3547 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4)); 3548 /* Do the more significant 64 bits */ 3549 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1); 3550 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8)); 3551 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12)); 3552 /* Fetch result back from stack. */ 3553 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0)); 3554 add_to_esp(env, 16); 3555 return dst; 3556 } 3557 3558 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4; 3559 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4; 3560 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4; 3561 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4; 3562 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4; 3563 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4; 3564 do_32Fx4: 3565 { 3566 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3567 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3568 HReg dst = newVRegV(env); 3569 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3570 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3571 return dst; 3572 } 3573 3574 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; 3575 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; 3576 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; 3577 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2; 3578 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; 3579 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; 3580 do_64Fx2: 3581 { 3582 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3583 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3584 HReg dst = newVRegV(env); 3585 REQUIRE_SSE2; 3586 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3587 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3588 return dst; 3589 } 3590 3591 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; 3592 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; 3593 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; 3594 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4; 3595 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4; 3596 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4; 3597 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4; 3598 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4; 3599 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4; 3600 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4; 3601 do_32F0x4: { 3602 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3603 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3604 HReg dst = newVRegV(env); 3605 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3606 addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); 3607 return dst; 3608 } 3609 3610 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; 3611 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; 3612 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; 3613 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2; 3614 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2; 3615 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2; 3616 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2; 3617 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2; 3618 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2; 3619 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2; 3620 do_64F0x2: { 3621 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3622 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3623 HReg dst = newVRegV(env); 3624 REQUIRE_SSE2; 3625 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3626 addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); 3627 return dst; 3628 } 3629 3630 case Iop_QNarrowBin32Sto16Sx8: 3631 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; 3632 case Iop_QNarrowBin16Sto8Sx16: 3633 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; 3634 case Iop_QNarrowBin16Sto8Ux16: 3635 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; 3636 3637 case Iop_InterleaveHI8x16: 3638 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; 3639 case Iop_InterleaveHI16x8: 3640 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg; 3641 case Iop_InterleaveHI32x4: 3642 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; 3643 case Iop_InterleaveHI64x2: 3644 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; 3645 3646 case Iop_InterleaveLO8x16: 3647 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; 3648 case Iop_InterleaveLO16x8: 3649 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg; 3650 case Iop_InterleaveLO32x4: 3651 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg; 3652 case Iop_InterleaveLO64x2: 3653 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg; 3654 3655 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg; 3656 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg; 3657 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg; 3658 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; 3659 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; 3660 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; 3661 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; 3662 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; 3663 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; 3664 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg; 3665 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg; 3666 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg; 3667 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg; 3668 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg; 3669 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg; 3670 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg; 3671 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; 3672 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; 3673 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; 3674 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; 3675 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; 3676 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; 3677 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; 3678 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; 3679 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; 3680 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; 3681 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; 3682 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; 3683 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; 3684 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; 3685 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; 3686 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; 3687 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; 3688 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; 3689 do_SseReRg: { 3690 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); 3691 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); 3692 HReg dst = newVRegV(env); 3693 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR) 3694 REQUIRE_SSE2; 3695 if (arg1isEReg) { 3696 addInstr(env, mk_vMOVsd_RR(arg2, dst)); 3697 addInstr(env, X86Instr_SseReRg(op, arg1, dst)); 3698 } else { 3699 addInstr(env, mk_vMOVsd_RR(arg1, dst)); 3700 addInstr(env, X86Instr_SseReRg(op, arg2, dst)); 3701 } 3702 return dst; 3703 } 3704 3705 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift; 3706 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift; 3707 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift; 3708 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift; 3709 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift; 3710 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; 3711 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; 3712 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; 3713 do_SseShift: { 3714 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); 3715 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); 3716 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); 3717 HReg ereg = newVRegV(env); 3718 HReg dst = newVRegV(env); 3719 REQUIRE_SSE2; 3720 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3721 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3722 addInstr(env, X86Instr_Push(X86RMI_Imm(0))); 3723 addInstr(env, X86Instr_Push(rmi)); 3724 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); 3725 addInstr(env, mk_vMOVsd_RR(greg, dst)); 3726 addInstr(env, X86Instr_SseReRg(op, ereg, dst)); 3727 add_to_esp(env, 16); 3728 return dst; 3729 } 3730 3731 case Iop_NarrowBin32to16x8: 3732 fn = (HWord)h_generic_calc_NarrowBin32to16x8; 3733 goto do_SseAssistedBinary; 3734 case Iop_NarrowBin16to8x16: 3735 fn = (HWord)h_generic_calc_NarrowBin16to8x16; 3736 goto do_SseAssistedBinary; 3737 do_SseAssistedBinary: { 3738 /* As with the amd64 case (where this is copied from) we 3739 generate pretty bad code. */ 3740 vassert(fn != 0); 3741 HReg dst = newVRegV(env); 3742 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); 3743 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); 3744 HReg argp = newVRegI(env); 3745 /* subl $112, %esp -- make a space */ 3746 sub_from_esp(env, 112); 3747 /* leal 48(%esp), %r_argp -- point into it */ 3748 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()), 3749 argp)); 3750 /* andl $-16, %r_argp -- 16-align the pointer */ 3751 addInstr(env, X86Instr_Alu32R(Xalu_AND, 3752 X86RMI_Imm( ~(UInt)15 ), 3753 argp)); 3754 /* Prepare 3 arg regs: 3755 leal 0(%r_argp), %eax 3756 leal 16(%r_argp), %edx 3757 leal 32(%r_argp), %ecx 3758 */ 3759 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp), 3760 hregX86_EAX())); 3761 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp), 3762 hregX86_EDX())); 3763 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp), 3764 hregX86_ECX())); 3765 /* Store the two args, at (%edx) and (%ecx): 3766 movupd %argL, 0(%edx) 3767 movupd %argR, 0(%ecx) 3768 */ 3769 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL, 3770 X86AMode_IR(0, hregX86_EDX()))); 3771 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR, 3772 X86AMode_IR(0, hregX86_ECX()))); 3773 /* call the helper */ 3774 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3775 3, mk_RetLoc_simple(RLPri_None) )); 3776 /* fetch the result from memory, using %r_argp, which the 3777 register allocator will keep alive across the call. */ 3778 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst, 3779 X86AMode_IR(0, argp))); 3780 /* and finally, clear the space */ 3781 add_to_esp(env, 112); 3782 return dst; 3783 } 3784 3785 default: 3786 break; 3787 } /* switch (e->Iex.Binop.op) */ 3788 } /* if (e->tag == Iex_Binop) */ 3789 3790 3791 if (e->tag == Iex_Triop) { 3792 IRTriop *triop = e->Iex.Triop.details; 3793 switch (triop->op) { 3794 3795 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm; 3796 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm; 3797 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm; 3798 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm; 3799 do_32Fx4_w_rm: 3800 { 3801 HReg argL = iselVecExpr(env, triop->arg2); 3802 HReg argR = iselVecExpr(env, triop->arg3); 3803 HReg dst = newVRegV(env); 3804 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3805 /* XXXROUNDINGFIXME */ 3806 /* set roundingmode here */ 3807 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); 3808 return dst; 3809 } 3810 3811 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm; 3812 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm; 3813 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm; 3814 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm; 3815 do_64Fx2_w_rm: 3816 { 3817 HReg argL = iselVecExpr(env, triop->arg2); 3818 HReg argR = iselVecExpr(env, triop->arg3); 3819 HReg dst = newVRegV(env); 3820 REQUIRE_SSE2; 3821 addInstr(env, mk_vMOVsd_RR(argL, dst)); 3822 /* XXXROUNDINGFIXME */ 3823 /* set roundingmode here */ 3824 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); 3825 return dst; 3826 } 3827 3828 default: 3829 break; 3830 } /* switch (triop->op) */ 3831 } /* if (e->tag == Iex_Triop) */ 3832 3833 3834 if (e->tag == Iex_ITE) { // VFD 3835 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue); 3836 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse); 3837 HReg dst = newVRegV(env); 3838 addInstr(env, mk_vMOVsd_RR(r1,dst)); 3839 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond); 3840 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst)); 3841 return dst; 3842 } 3843 3844 vec_fail: 3845 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", 3846 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); 3847 ppIRExpr(e); 3848 vpanic("iselVecExpr_wrk"); 3849 3850 # undef REQUIRE_SSE1 3851 # undef REQUIRE_SSE2 3852 # undef SSE2_OR_ABOVE 3853 } 3854 3855 3856 /*---------------------------------------------------------*/ 3857 /*--- ISEL: Statements ---*/ 3858 /*---------------------------------------------------------*/ 3859 3860 static void iselStmt ( ISelEnv* env, IRStmt* stmt ) 3861 { 3862 if (vex_traceflags & VEX_TRACE_VCODE) { 3863 vex_printf("\n-- "); 3864 ppIRStmt(stmt); 3865 vex_printf("\n"); 3866 } 3867 3868 switch (stmt->tag) { 3869 3870 /* --------- STORE --------- */ 3871 case Ist_Store: { 3872 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); 3873 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); 3874 IREndness end = stmt->Ist.Store.end; 3875 3876 if (tya != Ity_I32 || end != Iend_LE) 3877 goto stmt_fail; 3878 3879 if (tyd == Ity_I32) { 3880 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3881 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data); 3882 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am)); 3883 return; 3884 } 3885 if (tyd == Ity_I8 || tyd == Ity_I16) { 3886 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3887 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data); 3888 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2), 3889 r,am )); 3890 return; 3891 } 3892 if (tyd == Ity_F64) { 3893 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3894 HReg r = iselDblExpr(env, stmt->Ist.Store.data); 3895 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am)); 3896 return; 3897 } 3898 if (tyd == Ity_F32) { 3899 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3900 HReg r = iselFltExpr(env, stmt->Ist.Store.data); 3901 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am)); 3902 return; 3903 } 3904 if (tyd == Ity_I64) { 3905 HReg vHi, vLo, rA; 3906 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data); 3907 rA = iselIntExpr_R(env, stmt->Ist.Store.addr); 3908 addInstr(env, X86Instr_Alu32M( 3909 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA))); 3910 addInstr(env, X86Instr_Alu32M( 3911 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA))); 3912 return; 3913 } 3914 if (tyd == Ity_V128) { 3915 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr); 3916 HReg r = iselVecExpr(env, stmt->Ist.Store.data); 3917 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am)); 3918 return; 3919 } 3920 break; 3921 } 3922 3923 /* --------- PUT --------- */ 3924 case Ist_Put: { 3925 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); 3926 if (ty == Ity_I32) { 3927 /* We're going to write to memory, so compute the RHS into an 3928 X86RI. */ 3929 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data); 3930 addInstr(env, 3931 X86Instr_Alu32M( 3932 Xalu_MOV, 3933 ri, 3934 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP()) 3935 )); 3936 return; 3937 } 3938 if (ty == Ity_I8 || ty == Ity_I16) { 3939 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data); 3940 addInstr(env, X86Instr_Store( 3941 toUChar(ty==Ity_I8 ? 1 : 2), 3942 r, 3943 X86AMode_IR(stmt->Ist.Put.offset, 3944 hregX86_EBP()))); 3945 return; 3946 } 3947 if (ty == Ity_I64) { 3948 HReg vHi, vLo; 3949 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3950 X86AMode* am4 = advance4(am); 3951 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data); 3952 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am )); 3953 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 )); 3954 return; 3955 } 3956 if (ty == Ity_V128) { 3957 HReg vec = iselVecExpr(env, stmt->Ist.Put.data); 3958 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3959 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am)); 3960 return; 3961 } 3962 if (ty == Ity_F32) { 3963 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data); 3964 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3965 set_FPU_rounding_default(env); /* paranoia */ 3966 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am )); 3967 return; 3968 } 3969 if (ty == Ity_F64) { 3970 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data); 3971 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP()); 3972 set_FPU_rounding_default(env); /* paranoia */ 3973 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am )); 3974 return; 3975 } 3976 break; 3977 } 3978 3979 /* --------- Indexed PUT --------- */ 3980 case Ist_PutI: { 3981 IRPutI *puti = stmt->Ist.PutI.details; 3982 3983 X86AMode* am 3984 = genGuestArrayOffset( 3985 env, puti->descr, 3986 puti->ix, puti->bias ); 3987 3988 IRType ty = typeOfIRExpr(env->type_env, puti->data); 3989 if (ty == Ity_F64) { 3990 HReg val = iselDblExpr(env, puti->data); 3991 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am )); 3992 return; 3993 } 3994 if (ty == Ity_I8) { 3995 HReg r = iselIntExpr_R(env, puti->data); 3996 addInstr(env, X86Instr_Store( 1, r, am )); 3997 return; 3998 } 3999 if (ty == Ity_I32) { 4000 HReg r = iselIntExpr_R(env, puti->data); 4001 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am )); 4002 return; 4003 } 4004 if (ty == Ity_I64) { 4005 HReg rHi, rLo; 4006 X86AMode* am4 = advance4(am); 4007 iselInt64Expr(&rHi, &rLo, env, puti->data); 4008 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am )); 4009 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 )); 4010 return; 4011 } 4012 break; 4013 } 4014 4015 /* --------- TMP --------- */ 4016 case Ist_WrTmp: { 4017 IRTemp tmp = stmt->Ist.WrTmp.tmp; 4018 IRType ty = typeOfIRTemp(env->type_env, tmp); 4019 4020 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..), 4021 compute it into an AMode and then use LEA. This usually 4022 produces fewer instructions, often because (for memcheck 4023 created IR) we get t = address-expression, (t is later used 4024 twice) and so doing this naturally turns address-expression 4025 back into an X86 amode. */ 4026 if (ty == Ity_I32 4027 && stmt->Ist.WrTmp.data->tag == Iex_Binop 4028 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) { 4029 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data); 4030 HReg dst = lookupIRTemp(env, tmp); 4031 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) { 4032 /* Hmm, iselIntExpr_AMode wimped out and just computed the 4033 value into a register. Just emit a normal reg-reg move 4034 so reg-alloc can coalesce it away in the usual way. */ 4035 HReg src = am->Xam.IR.reg; 4036 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst)); 4037 } else { 4038 addInstr(env, X86Instr_Lea32(am,dst)); 4039 } 4040 return; 4041 } 4042 4043 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { 4044 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data); 4045 HReg dst = lookupIRTemp(env, tmp); 4046 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst)); 4047 return; 4048 } 4049 if (ty == Ity_I64) { 4050 HReg rHi, rLo, dstHi, dstLo; 4051 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); 4052 lookupIRTemp64( &dstHi, &dstLo, env, tmp); 4053 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) ); 4054 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) ); 4055 return; 4056 } 4057 if (ty == Ity_I1) { 4058 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data); 4059 HReg dst = lookupIRTemp(env, tmp); 4060 addInstr(env, X86Instr_Set32(cond, dst)); 4061 return; 4062 } 4063 if (ty == Ity_F64) { 4064 HReg dst = lookupIRTemp(env, tmp); 4065 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); 4066 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 4067 return; 4068 } 4069 if (ty == Ity_F32) { 4070 HReg dst = lookupIRTemp(env, tmp); 4071 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); 4072 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst)); 4073 return; 4074 } 4075 if (ty == Ity_V128) { 4076 HReg dst = lookupIRTemp(env, tmp); 4077 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data); 4078 addInstr(env, mk_vMOVsd_RR(src,dst)); 4079 return; 4080 } 4081 break; 4082 } 4083 4084 /* --------- Call to DIRTY helper --------- */ 4085 case Ist_Dirty: { 4086 IRDirty* d = stmt->Ist.Dirty.details; 4087 4088 /* Figure out the return type, if any. */ 4089 IRType retty = Ity_INVALID; 4090 if (d->tmp != IRTemp_INVALID) 4091 retty = typeOfIRTemp(env->type_env, d->tmp); 4092 4093 Bool retty_ok = False; 4094 switch (retty) { 4095 case Ity_INVALID: /* function doesn't return anything */ 4096 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: 4097 case Ity_V128: 4098 retty_ok = True; break; 4099 default: 4100 break; 4101 } 4102 if (!retty_ok) 4103 break; /* will go to stmt_fail: */ 4104 4105 /* Marshal args, do the call, and set the return value to 4106 0x555..555 if this is a conditional call that returns a value 4107 and the call is skipped. */ 4108 UInt addToSp = 0; 4109 RetLoc rloc = mk_RetLoc_INVALID(); 4110 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); 4111 vassert(is_sane_RetLoc(rloc)); 4112 4113 /* Now figure out what to do with the returned value, if any. */ 4114 switch (retty) { 4115 case Ity_INVALID: { 4116 /* No return value. Nothing to do. */ 4117 vassert(d->tmp == IRTemp_INVALID); 4118 vassert(rloc.pri == RLPri_None); 4119 vassert(addToSp == 0); 4120 return; 4121 } 4122 case Ity_I32: case Ity_I16: case Ity_I8: { 4123 /* The returned value is in %eax. Park it in the register 4124 associated with tmp. */ 4125 vassert(rloc.pri == RLPri_Int); 4126 vassert(addToSp == 0); 4127 HReg dst = lookupIRTemp(env, d->tmp); 4128 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) ); 4129 return; 4130 } 4131 case Ity_I64: { 4132 /* The returned value is in %edx:%eax. Park it in the 4133 register-pair associated with tmp. */ 4134 vassert(rloc.pri == RLPri_2Int); 4135 vassert(addToSp == 0); 4136 HReg dstHi, dstLo; 4137 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp); 4138 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) ); 4139 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) ); 4140 return; 4141 } 4142 case Ity_V128: { 4143 /* The returned value is on the stack, and *retloc tells 4144 us where. Fish it off the stack and then move the 4145 stack pointer upwards to clear it, as directed by 4146 doHelperCall. */ 4147 vassert(rloc.pri == RLPri_V128SpRel); 4148 vassert(addToSp >= 16); 4149 HReg dst = lookupIRTemp(env, d->tmp); 4150 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP()); 4151 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am )); 4152 add_to_esp(env, addToSp); 4153 return; 4154 } 4155 default: 4156 /*NOTREACHED*/ 4157 vassert(0); 4158 } 4159 break; 4160 } 4161 4162 /* --------- MEM FENCE --------- */ 4163 case Ist_MBE: 4164 switch (stmt->Ist.MBE.event) { 4165 case Imbe_Fence: 4166 addInstr(env, X86Instr_MFence(env->hwcaps)); 4167 return; 4168 default: 4169 break; 4170 } 4171 break; 4172 4173 /* --------- ACAS --------- */ 4174 case Ist_CAS: 4175 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { 4176 /* "normal" singleton CAS */ 4177 UChar sz; 4178 IRCAS* cas = stmt->Ist.CAS.details; 4179 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4180 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4181 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4182 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4183 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4184 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4185 vassert(cas->expdHi == NULL); 4186 vassert(cas->dataHi == NULL); 4187 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4188 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4189 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4190 switch (ty) { 4191 case Ity_I32: sz = 4; break; 4192 case Ity_I16: sz = 2; break; 4193 case Ity_I8: sz = 1; break; 4194 default: goto unhandled_cas; 4195 } 4196 addInstr(env, X86Instr_ACAS(am, sz)); 4197 addInstr(env, 4198 X86Instr_CMov32(Xcc_NZ, 4199 X86RM_Reg(hregX86_EAX()), rOldLo)); 4200 return; 4201 } else { 4202 /* double CAS */ 4203 IRCAS* cas = stmt->Ist.CAS.details; 4204 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); 4205 /* only 32-bit allowed in this case */ 4206 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */ 4207 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */ 4208 X86AMode* am = iselIntExpr_AMode(env, cas->addr); 4209 HReg rDataHi = iselIntExpr_R(env, cas->dataHi); 4210 HReg rDataLo = iselIntExpr_R(env, cas->dataLo); 4211 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi); 4212 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo); 4213 HReg rOldHi = lookupIRTemp(env, cas->oldHi); 4214 HReg rOldLo = lookupIRTemp(env, cas->oldLo); 4215 if (ty != Ity_I32) 4216 goto unhandled_cas; 4217 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi)); 4218 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo)); 4219 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX())); 4220 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX())); 4221 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX())); 4222 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX())); 4223 addInstr(env, X86Instr_DACAS(am)); 4224 addInstr(env, 4225 X86Instr_CMov32(Xcc_NZ, 4226 X86RM_Reg(hregX86_EDX()), rOldHi)); 4227 addInstr(env, 4228 X86Instr_CMov32(Xcc_NZ, 4229 X86RM_Reg(hregX86_EAX()), rOldLo)); 4230 return; 4231 } 4232 unhandled_cas: 4233 break; 4234 4235 /* --------- INSTR MARK --------- */ 4236 /* Doesn't generate any executable code ... */ 4237 case Ist_IMark: 4238 return; 4239 4240 /* --------- NO-OP --------- */ 4241 /* Fairly self-explanatory, wouldn't you say? */ 4242 case Ist_NoOp: 4243 return; 4244 4245 /* --------- EXIT --------- */ 4246 case Ist_Exit: { 4247 if (stmt->Ist.Exit.dst->tag != Ico_U32) 4248 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value"); 4249 4250 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); 4251 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP, 4252 hregX86_EBP()); 4253 4254 /* Case: boring transfer to known address */ 4255 if (stmt->Ist.Exit.jk == Ijk_Boring) { 4256 if (env->chainingAllowed) { 4257 /* .. almost always true .. */ 4258 /* Skip the event check at the dst if this is a forwards 4259 edge. */ 4260 Bool toFastEP 4261 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; 4262 if (0) vex_printf("%s", toFastEP ? "Y" : ","); 4263 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32, 4264 amEIP, cc, toFastEP)); 4265 } else { 4266 /* .. very occasionally .. */ 4267 /* We can't use chaining, so ask for an assisted transfer, 4268 as that's the only alternative that is allowable. */ 4269 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4270 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring)); 4271 } 4272 return; 4273 } 4274 4275 /* Case: assisted transfer to arbitrary address */ 4276 switch (stmt->Ist.Exit.jk) { 4277 /* Keep this list in sync with that in iselNext below */ 4278 case Ijk_ClientReq: 4279 case Ijk_EmWarn: 4280 case Ijk_MapFail: 4281 case Ijk_NoDecode: 4282 case Ijk_NoRedir: 4283 case Ijk_SigSEGV: 4284 case Ijk_SigTRAP: 4285 case Ijk_Sys_int128: 4286 case Ijk_Sys_int129: 4287 case Ijk_Sys_int130: 4288 case Ijk_Sys_syscall: 4289 case Ijk_Sys_sysenter: 4290 case Ijk_InvalICache: 4291 case Ijk_Yield: 4292 { 4293 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); 4294 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk)); 4295 return; 4296 } 4297 default: 4298 break; 4299 } 4300 4301 /* Do we ever expect to see any other kind? */ 4302 goto stmt_fail; 4303 } 4304 4305 default: break; 4306 } 4307 stmt_fail: 4308 ppIRStmt(stmt); 4309 vpanic("iselStmt"); 4310 } 4311 4312 4313 /*---------------------------------------------------------*/ 4314 /*--- ISEL: Basic block terminators (Nexts) ---*/ 4315 /*---------------------------------------------------------*/ 4316 4317 static void iselNext ( ISelEnv* env, 4318 IRExpr* next, IRJumpKind jk, Int offsIP ) 4319 { 4320 if (vex_traceflags & VEX_TRACE_VCODE) { 4321 vex_printf( "\n-- PUT(%d) = ", offsIP); 4322 ppIRExpr( next ); 4323 vex_printf( "; exit-"); 4324 ppIRJumpKind(jk); 4325 vex_printf( "\n"); 4326 } 4327 4328 /* Case: boring transfer to known address */ 4329 if (next->tag == Iex_Const) { 4330 IRConst* cdst = next->Iex.Const.con; 4331 vassert(cdst->tag == Ico_U32); 4332 if (jk == Ijk_Boring || jk == Ijk_Call) { 4333 /* Boring transfer to known address */ 4334 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4335 if (env->chainingAllowed) { 4336 /* .. almost always true .. */ 4337 /* Skip the event check at the dst if this is a forwards 4338 edge. */ 4339 Bool toFastEP 4340 = ((Addr64)cdst->Ico.U32) > env->max_ga; 4341 if (0) vex_printf("%s", toFastEP ? "X" : "."); 4342 addInstr(env, X86Instr_XDirect(cdst->Ico.U32, 4343 amEIP, Xcc_ALWAYS, 4344 toFastEP)); 4345 } else { 4346 /* .. very occasionally .. */ 4347 /* We can't use chaining, so ask for an assisted transfer, 4348 as that's the only alternative that is allowable. */ 4349 HReg r = iselIntExpr_R(env, next); 4350 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4351 Ijk_Boring)); 4352 } 4353 return; 4354 } 4355 } 4356 4357 /* Case: call/return (==boring) transfer to any address */ 4358 switch (jk) { 4359 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { 4360 HReg r = iselIntExpr_R(env, next); 4361 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4362 if (env->chainingAllowed) { 4363 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS)); 4364 } else { 4365 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, 4366 Ijk_Boring)); 4367 } 4368 return; 4369 } 4370 default: 4371 break; 4372 } 4373 4374 /* Case: assisted transfer to arbitrary address */ 4375 switch (jk) { 4376 /* Keep this list in sync with that for Ist_Exit above */ 4377 case Ijk_ClientReq: 4378 case Ijk_EmWarn: 4379 case Ijk_MapFail: 4380 case Ijk_NoDecode: 4381 case Ijk_NoRedir: 4382 case Ijk_SigSEGV: 4383 case Ijk_SigTRAP: 4384 case Ijk_Sys_int128: 4385 case Ijk_Sys_int129: 4386 case Ijk_Sys_int130: 4387 case Ijk_Sys_syscall: 4388 case Ijk_Sys_sysenter: 4389 case Ijk_InvalICache: 4390 case Ijk_Yield: 4391 { 4392 HReg r = iselIntExpr_R(env, next); 4393 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); 4394 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk)); 4395 return; 4396 } 4397 default: 4398 break; 4399 } 4400 4401 vex_printf( "\n-- PUT(%d) = ", offsIP); 4402 ppIRExpr( next ); 4403 vex_printf( "; exit-"); 4404 ppIRJumpKind(jk); 4405 vex_printf( "\n"); 4406 vassert(0); // are we expecting any other kind? 4407 } 4408 4409 4410 /*---------------------------------------------------------*/ 4411 /*--- Insn selector top-level ---*/ 4412 /*---------------------------------------------------------*/ 4413 4414 /* Translate an entire SB to x86 code. */ 4415 4416 HInstrArray* iselSB_X86 ( IRSB* bb, 4417 VexArch arch_host, 4418 VexArchInfo* archinfo_host, 4419 VexAbiInfo* vbi/*UNUSED*/, 4420 Int offs_Host_EvC_Counter, 4421 Int offs_Host_EvC_FailAddr, 4422 Bool chainingAllowed, 4423 Bool addProfInc, 4424 Addr64 max_ga ) 4425 { 4426 Int i, j; 4427 HReg hreg, hregHI; 4428 ISelEnv* env; 4429 UInt hwcaps_host = archinfo_host->hwcaps; 4430 X86AMode *amCounter, *amFailAddr; 4431 4432 /* sanity ... */ 4433 vassert(arch_host == VexArchX86); 4434 vassert(0 == (hwcaps_host 4435 & ~(VEX_HWCAPS_X86_MMXEXT 4436 | VEX_HWCAPS_X86_SSE1 4437 | VEX_HWCAPS_X86_SSE2 4438 | VEX_HWCAPS_X86_SSE3 4439 | VEX_HWCAPS_X86_LZCNT))); 4440 vassert(sizeof(max_ga) == 8); 4441 vassert((max_ga >> 32) == 0); 4442 4443 /* Make up an initial environment to use. */ 4444 env = LibVEX_Alloc(sizeof(ISelEnv)); 4445 env->vreg_ctr = 0; 4446 4447 /* Set up output code array. */ 4448 env->code = newHInstrArray(); 4449 4450 /* Copy BB's type env. */ 4451 env->type_env = bb->tyenv; 4452 4453 /* Make up an IRTemp -> virtual HReg mapping. This doesn't 4454 change as we go along. */ 4455 env->n_vregmap = bb->tyenv->types_used; 4456 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4457 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); 4458 4459 /* and finally ... */ 4460 env->chainingAllowed = chainingAllowed; 4461 env->hwcaps = hwcaps_host; 4462 env->max_ga = max_ga; 4463 4464 /* For each IR temporary, allocate a suitably-kinded virtual 4465 register. */ 4466 j = 0; 4467 for (i = 0; i < env->n_vregmap; i++) { 4468 hregHI = hreg = INVALID_HREG; 4469 switch (bb->tyenv->types[i]) { 4470 case Ity_I1: 4471 case Ity_I8: 4472 case Ity_I16: 4473 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; 4474 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True); 4475 hregHI = mkHReg(j++, HRcInt32, True); break; 4476 case Ity_F32: 4477 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; 4478 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; 4479 default: ppIRType(bb->tyenv->types[i]); 4480 vpanic("iselBB: IRTemp type"); 4481 } 4482 env->vregmap[i] = hreg; 4483 env->vregmapHI[i] = hregHI; 4484 } 4485 env->vreg_ctr = j; 4486 4487 /* The very first instruction must be an event check. */ 4488 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP()); 4489 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP()); 4490 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr)); 4491 4492 /* Possibly a block counter increment (for profiling). At this 4493 point we don't know the address of the counter, so just pretend 4494 it is zero. It will have to be patched later, but before this 4495 translation is used, by a call to LibVEX_patchProfCtr. */ 4496 if (addProfInc) { 4497 addInstr(env, X86Instr_ProfInc()); 4498 } 4499 4500 /* Ok, finally we can iterate over the statements. */ 4501 for (i = 0; i < bb->stmts_used; i++) 4502 iselStmt(env, bb->stmts[i]); 4503 4504 iselNext(env, bb->next, bb->jumpkind, bb->offsIP); 4505 4506 /* record the number of vregs we used. */ 4507 env->code->n_vregs = env->vreg_ctr; 4508 return env->code; 4509 } 4510 4511 4512 /*---------------------------------------------------------------*/ 4513 /*--- end host_x86_isel.c ---*/ 4514 /*---------------------------------------------------------------*/ 4515