1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_x86_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 const RRegUniverse* getRRegUniverse_X86 ( void ) 48 { 49 /* The real-register universe is a big constant, so we just want to 50 initialise it once. */ 51 static RRegUniverse rRegUniverse_X86; 52 static Bool rRegUniverse_X86_initted = False; 53 54 /* Handy shorthand, nothing more */ 55 RRegUniverse* ru = &rRegUniverse_X86; 56 57 /* This isn't thread-safe. Sigh. */ 58 if (LIKELY(rRegUniverse_X86_initted)) 59 return ru; 60 61 RRegUniverse__init(ru); 62 63 /* Add the registers. The initial segment of this array must be 64 those available for allocation by reg-alloc, and those that 65 follow are not available for allocation. */ 66 ru->regs[ru->size++] = hregX86_EAX(); 67 ru->regs[ru->size++] = hregX86_EBX(); 68 ru->regs[ru->size++] = hregX86_ECX(); 69 ru->regs[ru->size++] = hregX86_EDX(); 70 ru->regs[ru->size++] = hregX86_ESI(); 71 ru->regs[ru->size++] = hregX86_EDI(); 72 ru->regs[ru->size++] = hregX86_FAKE0(); 73 ru->regs[ru->size++] = hregX86_FAKE1(); 74 ru->regs[ru->size++] = hregX86_FAKE2(); 75 ru->regs[ru->size++] = hregX86_FAKE3(); 76 ru->regs[ru->size++] = hregX86_FAKE4(); 77 ru->regs[ru->size++] = hregX86_FAKE5(); 78 ru->regs[ru->size++] = hregX86_XMM0(); 79 ru->regs[ru->size++] = hregX86_XMM1(); 80 ru->regs[ru->size++] = hregX86_XMM2(); 81 ru->regs[ru->size++] = hregX86_XMM3(); 82 ru->regs[ru->size++] = hregX86_XMM4(); 83 ru->regs[ru->size++] = hregX86_XMM5(); 84 ru->regs[ru->size++] = hregX86_XMM6(); 85 ru->regs[ru->size++] = hregX86_XMM7(); 86 ru->allocable = ru->size; 87 /* And other regs, not available to the allocator. */ 88 ru->regs[ru->size++] = hregX86_ESP(); 89 ru->regs[ru->size++] = hregX86_EBP(); 90 91 rRegUniverse_X86_initted = True; 92 93 RRegUniverse__check_is_sane(ru); 94 return ru; 95 } 96 97 98 void ppHRegX86 ( HReg reg ) 99 { 100 Int r; 101 static const HChar* ireg32_names[8] 102 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 103 /* Be generic for all virtual regs. */ 104 if (hregIsVirtual(reg)) { 105 ppHReg(reg); 106 return; 107 } 108 /* But specific for real regs. */ 109 switch (hregClass(reg)) { 110 case HRcInt32: 111 r = hregEncoding(reg); 112 vassert(r >= 0 && r < 8); 113 vex_printf("%s", ireg32_names[r]); 114 return; 115 case HRcFlt64: 116 r = hregEncoding(reg); 117 vassert(r >= 0 && r < 6); 118 vex_printf("%%fake%d", r); 119 return; 120 case HRcVec128: 121 r = hregEncoding(reg); 122 vassert(r >= 0 && r < 8); 123 vex_printf("%%xmm%d", r); 124 return; 125 default: 126 vpanic("ppHRegX86"); 127 } 128 } 129 130 131 /* --------- Condition codes, Intel encoding. --------- */ 132 133 const HChar* showX86CondCode ( X86CondCode cond ) 134 { 135 switch (cond) { 136 case Xcc_O: return "o"; 137 case Xcc_NO: return "no"; 138 case Xcc_B: return "b"; 139 case Xcc_NB: return "nb"; 140 case Xcc_Z: return "z"; 141 case Xcc_NZ: return "nz"; 142 case Xcc_BE: return "be"; 143 case Xcc_NBE: return "nbe"; 144 case Xcc_S: return "s"; 145 case Xcc_NS: return "ns"; 146 case Xcc_P: return "p"; 147 case Xcc_NP: return "np"; 148 case Xcc_L: return "l"; 149 case Xcc_NL: return "nl"; 150 case Xcc_LE: return "le"; 151 case Xcc_NLE: return "nle"; 152 case Xcc_ALWAYS: return "ALWAYS"; 153 default: vpanic("ppX86CondCode"); 154 } 155 } 156 157 158 /* --------- X86AMode: memory address expressions. --------- */ 159 160 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 161 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode)); 162 am->tag = Xam_IR; 163 am->Xam.IR.imm = imm32; 164 am->Xam.IR.reg = reg; 165 return am; 166 } 167 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 168 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode)); 169 am->tag = Xam_IRRS; 170 am->Xam.IRRS.imm = imm32; 171 am->Xam.IRRS.base = base; 172 am->Xam.IRRS.index = indEx; 173 am->Xam.IRRS.shift = shift; 174 vassert(shift >= 0 && shift <= 3); 175 return am; 176 } 177 178 X86AMode* dopyX86AMode ( X86AMode* am ) { 179 switch (am->tag) { 180 case Xam_IR: 181 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 182 case Xam_IRRS: 183 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 184 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 185 default: 186 vpanic("dopyX86AMode"); 187 } 188 } 189 190 void ppX86AMode ( X86AMode* am ) { 191 switch (am->tag) { 192 case Xam_IR: 193 if (am->Xam.IR.imm == 0) 194 vex_printf("("); 195 else 196 vex_printf("0x%x(", am->Xam.IR.imm); 197 ppHRegX86(am->Xam.IR.reg); 198 vex_printf(")"); 199 return; 200 case Xam_IRRS: 201 vex_printf("0x%x(", am->Xam.IRRS.imm); 202 ppHRegX86(am->Xam.IRRS.base); 203 vex_printf(","); 204 ppHRegX86(am->Xam.IRRS.index); 205 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 206 return; 207 default: 208 vpanic("ppX86AMode"); 209 } 210 } 211 212 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 213 switch (am->tag) { 214 case Xam_IR: 215 addHRegUse(u, HRmRead, am->Xam.IR.reg); 216 return; 217 case Xam_IRRS: 218 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 219 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 220 return; 221 default: 222 vpanic("addRegUsage_X86AMode"); 223 } 224 } 225 226 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 227 switch (am->tag) { 228 case Xam_IR: 229 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 230 return; 231 case Xam_IRRS: 232 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 233 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 234 return; 235 default: 236 vpanic("mapRegs_X86AMode"); 237 } 238 } 239 240 /* --------- Operand, which can be reg, immediate or memory. --------- */ 241 242 X86RMI* X86RMI_Imm ( UInt imm32 ) { 243 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 244 op->tag = Xrmi_Imm; 245 op->Xrmi.Imm.imm32 = imm32; 246 return op; 247 } 248 X86RMI* X86RMI_Reg ( HReg reg ) { 249 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 250 op->tag = Xrmi_Reg; 251 op->Xrmi.Reg.reg = reg; 252 return op; 253 } 254 X86RMI* X86RMI_Mem ( X86AMode* am ) { 255 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 256 op->tag = Xrmi_Mem; 257 op->Xrmi.Mem.am = am; 258 return op; 259 } 260 261 void ppX86RMI ( X86RMI* op ) { 262 switch (op->tag) { 263 case Xrmi_Imm: 264 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 265 return; 266 case Xrmi_Reg: 267 ppHRegX86(op->Xrmi.Reg.reg); 268 return; 269 case Xrmi_Mem: 270 ppX86AMode(op->Xrmi.Mem.am); 271 return; 272 default: 273 vpanic("ppX86RMI"); 274 } 275 } 276 277 /* An X86RMI can only be used in a "read" context (what would it mean 278 to write or modify a literal?) and so we enumerate its registers 279 accordingly. */ 280 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 281 switch (op->tag) { 282 case Xrmi_Imm: 283 return; 284 case Xrmi_Reg: 285 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 286 return; 287 case Xrmi_Mem: 288 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 289 return; 290 default: 291 vpanic("addRegUsage_X86RMI"); 292 } 293 } 294 295 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 296 switch (op->tag) { 297 case Xrmi_Imm: 298 return; 299 case Xrmi_Reg: 300 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 301 return; 302 case Xrmi_Mem: 303 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 304 return; 305 default: 306 vpanic("mapRegs_X86RMI"); 307 } 308 } 309 310 311 /* --------- Operand, which can be reg or immediate only. --------- */ 312 313 X86RI* X86RI_Imm ( UInt imm32 ) { 314 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI)); 315 op->tag = Xri_Imm; 316 op->Xri.Imm.imm32 = imm32; 317 return op; 318 } 319 X86RI* X86RI_Reg ( HReg reg ) { 320 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI)); 321 op->tag = Xri_Reg; 322 op->Xri.Reg.reg = reg; 323 return op; 324 } 325 326 void ppX86RI ( X86RI* op ) { 327 switch (op->tag) { 328 case Xri_Imm: 329 vex_printf("$0x%x", op->Xri.Imm.imm32); 330 return; 331 case Xri_Reg: 332 ppHRegX86(op->Xri.Reg.reg); 333 return; 334 default: 335 vpanic("ppX86RI"); 336 } 337 } 338 339 /* An X86RI can only be used in a "read" context (what would it mean 340 to write or modify a literal?) and so we enumerate its registers 341 accordingly. */ 342 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 343 switch (op->tag) { 344 case Xri_Imm: 345 return; 346 case Xri_Reg: 347 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 348 return; 349 default: 350 vpanic("addRegUsage_X86RI"); 351 } 352 } 353 354 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 355 switch (op->tag) { 356 case Xri_Imm: 357 return; 358 case Xri_Reg: 359 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 360 return; 361 default: 362 vpanic("mapRegs_X86RI"); 363 } 364 } 365 366 367 /* --------- Operand, which can be reg or memory only. --------- */ 368 369 X86RM* X86RM_Reg ( HReg reg ) { 370 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM)); 371 op->tag = Xrm_Reg; 372 op->Xrm.Reg.reg = reg; 373 return op; 374 } 375 X86RM* X86RM_Mem ( X86AMode* am ) { 376 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM)); 377 op->tag = Xrm_Mem; 378 op->Xrm.Mem.am = am; 379 return op; 380 } 381 382 void ppX86RM ( X86RM* op ) { 383 switch (op->tag) { 384 case Xrm_Mem: 385 ppX86AMode(op->Xrm.Mem.am); 386 return; 387 case Xrm_Reg: 388 ppHRegX86(op->Xrm.Reg.reg); 389 return; 390 default: 391 vpanic("ppX86RM"); 392 } 393 } 394 395 /* Because an X86RM can be both a source or destination operand, we 396 have to supply a mode -- pertaining to the operand as a whole -- 397 indicating how it's being used. */ 398 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 399 switch (op->tag) { 400 case Xrm_Mem: 401 /* Memory is read, written or modified. So we just want to 402 know the regs read by the amode. */ 403 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 404 return; 405 case Xrm_Reg: 406 /* reg is read, written or modified. Add it in the 407 appropriate way. */ 408 addHRegUse(u, mode, op->Xrm.Reg.reg); 409 return; 410 default: 411 vpanic("addRegUsage_X86RM"); 412 } 413 } 414 415 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 416 { 417 switch (op->tag) { 418 case Xrm_Mem: 419 mapRegs_X86AMode(m, op->Xrm.Mem.am); 420 return; 421 case Xrm_Reg: 422 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 423 return; 424 default: 425 vpanic("mapRegs_X86RM"); 426 } 427 } 428 429 430 /* --------- Instructions. --------- */ 431 432 const HChar* showX86UnaryOp ( X86UnaryOp op ) { 433 switch (op) { 434 case Xun_NOT: return "not"; 435 case Xun_NEG: return "neg"; 436 default: vpanic("showX86UnaryOp"); 437 } 438 } 439 440 const HChar* showX86AluOp ( X86AluOp op ) { 441 switch (op) { 442 case Xalu_MOV: return "mov"; 443 case Xalu_CMP: return "cmp"; 444 case Xalu_ADD: return "add"; 445 case Xalu_SUB: return "sub"; 446 case Xalu_ADC: return "adc"; 447 case Xalu_SBB: return "sbb"; 448 case Xalu_AND: return "and"; 449 case Xalu_OR: return "or"; 450 case Xalu_XOR: return "xor"; 451 case Xalu_MUL: return "mul"; 452 default: vpanic("showX86AluOp"); 453 } 454 } 455 456 const HChar* showX86ShiftOp ( X86ShiftOp op ) { 457 switch (op) { 458 case Xsh_SHL: return "shl"; 459 case Xsh_SHR: return "shr"; 460 case Xsh_SAR: return "sar"; 461 default: vpanic("showX86ShiftOp"); 462 } 463 } 464 465 const HChar* showX86FpOp ( X86FpOp op ) { 466 switch (op) { 467 case Xfp_ADD: return "add"; 468 case Xfp_SUB: return "sub"; 469 case Xfp_MUL: return "mul"; 470 case Xfp_DIV: return "div"; 471 case Xfp_SCALE: return "scale"; 472 case Xfp_ATAN: return "atan"; 473 case Xfp_YL2X: return "yl2x"; 474 case Xfp_YL2XP1: return "yl2xp1"; 475 case Xfp_PREM: return "prem"; 476 case Xfp_PREM1: return "prem1"; 477 case Xfp_SQRT: return "sqrt"; 478 case Xfp_ABS: return "abs"; 479 case Xfp_NEG: return "chs"; 480 case Xfp_MOV: return "mov"; 481 case Xfp_SIN: return "sin"; 482 case Xfp_COS: return "cos"; 483 case Xfp_TAN: return "tan"; 484 case Xfp_ROUND: return "round"; 485 case Xfp_2XM1: return "2xm1"; 486 default: vpanic("showX86FpOp"); 487 } 488 } 489 490 const HChar* showX86SseOp ( X86SseOp op ) { 491 switch (op) { 492 case Xsse_MOV: return "mov(?!)"; 493 case Xsse_ADDF: return "add"; 494 case Xsse_SUBF: return "sub"; 495 case Xsse_MULF: return "mul"; 496 case Xsse_DIVF: return "div"; 497 case Xsse_MAXF: return "max"; 498 case Xsse_MINF: return "min"; 499 case Xsse_CMPEQF: return "cmpFeq"; 500 case Xsse_CMPLTF: return "cmpFlt"; 501 case Xsse_CMPLEF: return "cmpFle"; 502 case Xsse_CMPUNF: return "cmpFun"; 503 case Xsse_RCPF: return "rcp"; 504 case Xsse_RSQRTF: return "rsqrt"; 505 case Xsse_SQRTF: return "sqrt"; 506 case Xsse_AND: return "and"; 507 case Xsse_OR: return "or"; 508 case Xsse_XOR: return "xor"; 509 case Xsse_ANDN: return "andn"; 510 case Xsse_ADD8: return "paddb"; 511 case Xsse_ADD16: return "paddw"; 512 case Xsse_ADD32: return "paddd"; 513 case Xsse_ADD64: return "paddq"; 514 case Xsse_QADD8U: return "paddusb"; 515 case Xsse_QADD16U: return "paddusw"; 516 case Xsse_QADD8S: return "paddsb"; 517 case Xsse_QADD16S: return "paddsw"; 518 case Xsse_SUB8: return "psubb"; 519 case Xsse_SUB16: return "psubw"; 520 case Xsse_SUB32: return "psubd"; 521 case Xsse_SUB64: return "psubq"; 522 case Xsse_QSUB8U: return "psubusb"; 523 case Xsse_QSUB16U: return "psubusw"; 524 case Xsse_QSUB8S: return "psubsb"; 525 case Xsse_QSUB16S: return "psubsw"; 526 case Xsse_MUL16: return "pmullw"; 527 case Xsse_MULHI16U: return "pmulhuw"; 528 case Xsse_MULHI16S: return "pmulhw"; 529 case Xsse_AVG8U: return "pavgb"; 530 case Xsse_AVG16U: return "pavgw"; 531 case Xsse_MAX16S: return "pmaxw"; 532 case Xsse_MAX8U: return "pmaxub"; 533 case Xsse_MIN16S: return "pminw"; 534 case Xsse_MIN8U: return "pminub"; 535 case Xsse_CMPEQ8: return "pcmpeqb"; 536 case Xsse_CMPEQ16: return "pcmpeqw"; 537 case Xsse_CMPEQ32: return "pcmpeqd"; 538 case Xsse_CMPGT8S: return "pcmpgtb"; 539 case Xsse_CMPGT16S: return "pcmpgtw"; 540 case Xsse_CMPGT32S: return "pcmpgtd"; 541 case Xsse_SHL16: return "psllw"; 542 case Xsse_SHL32: return "pslld"; 543 case Xsse_SHL64: return "psllq"; 544 case Xsse_SHR16: return "psrlw"; 545 case Xsse_SHR32: return "psrld"; 546 case Xsse_SHR64: return "psrlq"; 547 case Xsse_SAR16: return "psraw"; 548 case Xsse_SAR32: return "psrad"; 549 case Xsse_PACKSSD: return "packssdw"; 550 case Xsse_PACKSSW: return "packsswb"; 551 case Xsse_PACKUSW: return "packuswb"; 552 case Xsse_UNPCKHB: return "punpckhb"; 553 case Xsse_UNPCKHW: return "punpckhw"; 554 case Xsse_UNPCKHD: return "punpckhd"; 555 case Xsse_UNPCKHQ: return "punpckhq"; 556 case Xsse_UNPCKLB: return "punpcklb"; 557 case Xsse_UNPCKLW: return "punpcklw"; 558 case Xsse_UNPCKLD: return "punpckld"; 559 case Xsse_UNPCKLQ: return "punpcklq"; 560 default: vpanic("showX86SseOp"); 561 } 562 } 563 564 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 565 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 566 i->tag = Xin_Alu32R; 567 i->Xin.Alu32R.op = op; 568 i->Xin.Alu32R.src = src; 569 i->Xin.Alu32R.dst = dst; 570 return i; 571 } 572 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 573 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 574 i->tag = Xin_Alu32M; 575 i->Xin.Alu32M.op = op; 576 i->Xin.Alu32M.src = src; 577 i->Xin.Alu32M.dst = dst; 578 vassert(op != Xalu_MUL); 579 return i; 580 } 581 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 582 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 583 i->tag = Xin_Sh32; 584 i->Xin.Sh32.op = op; 585 i->Xin.Sh32.src = src; 586 i->Xin.Sh32.dst = dst; 587 return i; 588 } 589 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 590 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 591 i->tag = Xin_Test32; 592 i->Xin.Test32.imm32 = imm32; 593 i->Xin.Test32.dst = dst; 594 return i; 595 } 596 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 597 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 598 i->tag = Xin_Unary32; 599 i->Xin.Unary32.op = op; 600 i->Xin.Unary32.dst = dst; 601 return i; 602 } 603 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 604 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 605 i->tag = Xin_Lea32; 606 i->Xin.Lea32.am = am; 607 i->Xin.Lea32.dst = dst; 608 return i; 609 } 610 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 611 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 612 i->tag = Xin_MulL; 613 i->Xin.MulL.syned = syned; 614 i->Xin.MulL.src = src; 615 return i; 616 } 617 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 618 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 619 i->tag = Xin_Div; 620 i->Xin.Div.syned = syned; 621 i->Xin.Div.src = src; 622 return i; 623 } 624 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 625 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 626 i->tag = Xin_Sh3232; 627 i->Xin.Sh3232.op = op; 628 i->Xin.Sh3232.amt = amt; 629 i->Xin.Sh3232.src = src; 630 i->Xin.Sh3232.dst = dst; 631 vassert(op == Xsh_SHL || op == Xsh_SHR); 632 return i; 633 } 634 X86Instr* X86Instr_Push( X86RMI* src ) { 635 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 636 i->tag = Xin_Push; 637 i->Xin.Push.src = src; 638 return i; 639 } 640 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms, 641 RetLoc rloc ) { 642 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 643 i->tag = Xin_Call; 644 i->Xin.Call.cond = cond; 645 i->Xin.Call.target = target; 646 i->Xin.Call.regparms = regparms; 647 i->Xin.Call.rloc = rloc; 648 vassert(regparms >= 0 && regparms <= 3); 649 vassert(is_sane_RetLoc(rloc)); 650 return i; 651 } 652 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, 653 X86CondCode cond, Bool toFastEP ) { 654 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 655 i->tag = Xin_XDirect; 656 i->Xin.XDirect.dstGA = dstGA; 657 i->Xin.XDirect.amEIP = amEIP; 658 i->Xin.XDirect.cond = cond; 659 i->Xin.XDirect.toFastEP = toFastEP; 660 return i; 661 } 662 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, 663 X86CondCode cond ) { 664 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 665 i->tag = Xin_XIndir; 666 i->Xin.XIndir.dstGA = dstGA; 667 i->Xin.XIndir.amEIP = amEIP; 668 i->Xin.XIndir.cond = cond; 669 return i; 670 } 671 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, 672 X86CondCode cond, IRJumpKind jk ) { 673 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 674 i->tag = Xin_XAssisted; 675 i->Xin.XAssisted.dstGA = dstGA; 676 i->Xin.XAssisted.amEIP = amEIP; 677 i->Xin.XAssisted.cond = cond; 678 i->Xin.XAssisted.jk = jk; 679 return i; 680 } 681 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 682 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 683 i->tag = Xin_CMov32; 684 i->Xin.CMov32.cond = cond; 685 i->Xin.CMov32.src = src; 686 i->Xin.CMov32.dst = dst; 687 vassert(cond != Xcc_ALWAYS); 688 return i; 689 } 690 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 691 X86AMode* src, HReg dst ) { 692 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 693 i->tag = Xin_LoadEX; 694 i->Xin.LoadEX.szSmall = szSmall; 695 i->Xin.LoadEX.syned = syned; 696 i->Xin.LoadEX.src = src; 697 i->Xin.LoadEX.dst = dst; 698 vassert(szSmall == 1 || szSmall == 2); 699 return i; 700 } 701 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 702 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 703 i->tag = Xin_Store; 704 i->Xin.Store.sz = sz; 705 i->Xin.Store.src = src; 706 i->Xin.Store.dst = dst; 707 vassert(sz == 1 || sz == 2); 708 return i; 709 } 710 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 711 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 712 i->tag = Xin_Set32; 713 i->Xin.Set32.cond = cond; 714 i->Xin.Set32.dst = dst; 715 return i; 716 } 717 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 718 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 719 i->tag = Xin_Bsfr32; 720 i->Xin.Bsfr32.isFwds = isFwds; 721 i->Xin.Bsfr32.src = src; 722 i->Xin.Bsfr32.dst = dst; 723 return i; 724 } 725 X86Instr* X86Instr_MFence ( UInt hwcaps ) { 726 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 727 i->tag = Xin_MFence; 728 i->Xin.MFence.hwcaps = hwcaps; 729 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT 730 |VEX_HWCAPS_X86_SSE1 731 |VEX_HWCAPS_X86_SSE2 732 |VEX_HWCAPS_X86_SSE3 733 |VEX_HWCAPS_X86_LZCNT))); 734 return i; 735 } 736 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 737 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 738 i->tag = Xin_ACAS; 739 i->Xin.ACAS.addr = addr; 740 i->Xin.ACAS.sz = sz; 741 vassert(sz == 4 || sz == 2 || sz == 1); 742 return i; 743 } 744 X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 745 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 746 i->tag = Xin_DACAS; 747 i->Xin.DACAS.addr = addr; 748 return i; 749 } 750 751 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 752 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 753 i->tag = Xin_FpUnary; 754 i->Xin.FpUnary.op = op; 755 i->Xin.FpUnary.src = src; 756 i->Xin.FpUnary.dst = dst; 757 return i; 758 } 759 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 760 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 761 i->tag = Xin_FpBinary; 762 i->Xin.FpBinary.op = op; 763 i->Xin.FpBinary.srcL = srcL; 764 i->Xin.FpBinary.srcR = srcR; 765 i->Xin.FpBinary.dst = dst; 766 return i; 767 } 768 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 769 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 770 i->tag = Xin_FpLdSt; 771 i->Xin.FpLdSt.isLoad = isLoad; 772 i->Xin.FpLdSt.sz = sz; 773 i->Xin.FpLdSt.reg = reg; 774 i->Xin.FpLdSt.addr = addr; 775 vassert(sz == 4 || sz == 8 || sz == 10); 776 return i; 777 } 778 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 779 HReg reg, X86AMode* addr ) { 780 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 781 i->tag = Xin_FpLdStI; 782 i->Xin.FpLdStI.isLoad = isLoad; 783 i->Xin.FpLdStI.sz = sz; 784 i->Xin.FpLdStI.reg = reg; 785 i->Xin.FpLdStI.addr = addr; 786 vassert(sz == 2 || sz == 4 || sz == 8); 787 return i; 788 } 789 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 790 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 791 i->tag = Xin_Fp64to32; 792 i->Xin.Fp64to32.src = src; 793 i->Xin.Fp64to32.dst = dst; 794 return i; 795 } 796 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 797 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 798 i->tag = Xin_FpCMov; 799 i->Xin.FpCMov.cond = cond; 800 i->Xin.FpCMov.src = src; 801 i->Xin.FpCMov.dst = dst; 802 vassert(cond != Xcc_ALWAYS); 803 return i; 804 } 805 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 806 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 807 i->tag = Xin_FpLdCW; 808 i->Xin.FpLdCW.addr = addr; 809 return i; 810 } 811 X86Instr* X86Instr_FpStSW_AX ( void ) { 812 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 813 i->tag = Xin_FpStSW_AX; 814 return i; 815 } 816 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 817 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 818 i->tag = Xin_FpCmp; 819 i->Xin.FpCmp.srcL = srcL; 820 i->Xin.FpCmp.srcR = srcR; 821 i->Xin.FpCmp.dst = dst; 822 return i; 823 } 824 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 825 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 826 i->tag = Xin_SseConst; 827 i->Xin.SseConst.con = con; 828 i->Xin.SseConst.dst = dst; 829 vassert(hregClass(dst) == HRcVec128); 830 return i; 831 } 832 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 833 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 834 i->tag = Xin_SseLdSt; 835 i->Xin.SseLdSt.isLoad = isLoad; 836 i->Xin.SseLdSt.reg = reg; 837 i->Xin.SseLdSt.addr = addr; 838 return i; 839 } 840 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 841 { 842 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 843 i->tag = Xin_SseLdzLO; 844 i->Xin.SseLdzLO.sz = toUChar(sz); 845 i->Xin.SseLdzLO.reg = reg; 846 i->Xin.SseLdzLO.addr = addr; 847 vassert(sz == 4 || sz == 8); 848 return i; 849 } 850 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 851 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 852 i->tag = Xin_Sse32Fx4; 853 i->Xin.Sse32Fx4.op = op; 854 i->Xin.Sse32Fx4.src = src; 855 i->Xin.Sse32Fx4.dst = dst; 856 vassert(op != Xsse_MOV); 857 return i; 858 } 859 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 860 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 861 i->tag = Xin_Sse32FLo; 862 i->Xin.Sse32FLo.op = op; 863 i->Xin.Sse32FLo.src = src; 864 i->Xin.Sse32FLo.dst = dst; 865 vassert(op != Xsse_MOV); 866 return i; 867 } 868 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 869 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 870 i->tag = Xin_Sse64Fx2; 871 i->Xin.Sse64Fx2.op = op; 872 i->Xin.Sse64Fx2.src = src; 873 i->Xin.Sse64Fx2.dst = dst; 874 vassert(op != Xsse_MOV); 875 return i; 876 } 877 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 878 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 879 i->tag = Xin_Sse64FLo; 880 i->Xin.Sse64FLo.op = op; 881 i->Xin.Sse64FLo.src = src; 882 i->Xin.Sse64FLo.dst = dst; 883 vassert(op != Xsse_MOV); 884 return i; 885 } 886 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 887 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 888 i->tag = Xin_SseReRg; 889 i->Xin.SseReRg.op = op; 890 i->Xin.SseReRg.src = re; 891 i->Xin.SseReRg.dst = rg; 892 return i; 893 } 894 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 895 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 896 i->tag = Xin_SseCMov; 897 i->Xin.SseCMov.cond = cond; 898 i->Xin.SseCMov.src = src; 899 i->Xin.SseCMov.dst = dst; 900 vassert(cond != Xcc_ALWAYS); 901 return i; 902 } 903 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 904 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 905 i->tag = Xin_SseShuf; 906 i->Xin.SseShuf.order = order; 907 i->Xin.SseShuf.src = src; 908 i->Xin.SseShuf.dst = dst; 909 vassert(order >= 0 && order <= 0xFF); 910 return i; 911 } 912 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, 913 X86AMode* amFailAddr ) { 914 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 915 i->tag = Xin_EvCheck; 916 i->Xin.EvCheck.amCounter = amCounter; 917 i->Xin.EvCheck.amFailAddr = amFailAddr; 918 return i; 919 } 920 X86Instr* X86Instr_ProfInc ( void ) { 921 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 922 i->tag = Xin_ProfInc; 923 return i; 924 } 925 926 void ppX86Instr ( const X86Instr* i, Bool mode64 ) { 927 vassert(mode64 == False); 928 switch (i->tag) { 929 case Xin_Alu32R: 930 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 931 ppX86RMI(i->Xin.Alu32R.src); 932 vex_printf(","); 933 ppHRegX86(i->Xin.Alu32R.dst); 934 return; 935 case Xin_Alu32M: 936 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 937 ppX86RI(i->Xin.Alu32M.src); 938 vex_printf(","); 939 ppX86AMode(i->Xin.Alu32M.dst); 940 return; 941 case Xin_Sh32: 942 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 943 if (i->Xin.Sh32.src == 0) 944 vex_printf("%%cl,"); 945 else 946 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 947 ppHRegX86(i->Xin.Sh32.dst); 948 return; 949 case Xin_Test32: 950 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 951 ppX86RM(i->Xin.Test32.dst); 952 return; 953 case Xin_Unary32: 954 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 955 ppHRegX86(i->Xin.Unary32.dst); 956 return; 957 case Xin_Lea32: 958 vex_printf("leal "); 959 ppX86AMode(i->Xin.Lea32.am); 960 vex_printf(","); 961 ppHRegX86(i->Xin.Lea32.dst); 962 return; 963 case Xin_MulL: 964 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 965 ppX86RM(i->Xin.MulL.src); 966 return; 967 case Xin_Div: 968 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 969 ppX86RM(i->Xin.Div.src); 970 return; 971 case Xin_Sh3232: 972 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 973 if (i->Xin.Sh3232.amt == 0) 974 vex_printf(" %%cl,"); 975 else 976 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 977 ppHRegX86(i->Xin.Sh3232.src); 978 vex_printf(","); 979 ppHRegX86(i->Xin.Sh3232.dst); 980 return; 981 case Xin_Push: 982 vex_printf("pushl "); 983 ppX86RMI(i->Xin.Push.src); 984 return; 985 case Xin_Call: 986 vex_printf("call%s[%d,", 987 i->Xin.Call.cond==Xcc_ALWAYS 988 ? "" : showX86CondCode(i->Xin.Call.cond), 989 i->Xin.Call.regparms); 990 ppRetLoc(i->Xin.Call.rloc); 991 vex_printf("] 0x%x", i->Xin.Call.target); 992 break; 993 case Xin_XDirect: 994 vex_printf("(xDirect) "); 995 vex_printf("if (%%eflags.%s) { ", 996 showX86CondCode(i->Xin.XDirect.cond)); 997 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); 998 ppX86AMode(i->Xin.XDirect.amEIP); 999 vex_printf("; "); 1000 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", 1001 i->Xin.XDirect.toFastEP ? "fast" : "slow"); 1002 return; 1003 case Xin_XIndir: 1004 vex_printf("(xIndir) "); 1005 vex_printf("if (%%eflags.%s) { movl ", 1006 showX86CondCode(i->Xin.XIndir.cond)); 1007 ppHRegX86(i->Xin.XIndir.dstGA); 1008 vex_printf(","); 1009 ppX86AMode(i->Xin.XIndir.amEIP); 1010 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); 1011 return; 1012 case Xin_XAssisted: 1013 vex_printf("(xAssisted) "); 1014 vex_printf("if (%%eflags.%s) { ", 1015 showX86CondCode(i->Xin.XAssisted.cond)); 1016 vex_printf("movl "); 1017 ppHRegX86(i->Xin.XAssisted.dstGA); 1018 vex_printf(","); 1019 ppX86AMode(i->Xin.XAssisted.amEIP); 1020 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", 1021 (Int)i->Xin.XAssisted.jk); 1022 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); 1023 return; 1024 case Xin_CMov32: 1025 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 1026 ppX86RM(i->Xin.CMov32.src); 1027 vex_printf(","); 1028 ppHRegX86(i->Xin.CMov32.dst); 1029 return; 1030 case Xin_LoadEX: 1031 vex_printf("mov%c%cl ", 1032 i->Xin.LoadEX.syned ? 's' : 'z', 1033 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 1034 ppX86AMode(i->Xin.LoadEX.src); 1035 vex_printf(","); 1036 ppHRegX86(i->Xin.LoadEX.dst); 1037 return; 1038 case Xin_Store: 1039 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 1040 ppHRegX86(i->Xin.Store.src); 1041 vex_printf(","); 1042 ppX86AMode(i->Xin.Store.dst); 1043 return; 1044 case Xin_Set32: 1045 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 1046 ppHRegX86(i->Xin.Set32.dst); 1047 return; 1048 case Xin_Bsfr32: 1049 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1050 ppHRegX86(i->Xin.Bsfr32.src); 1051 vex_printf(","); 1052 ppHRegX86(i->Xin.Bsfr32.dst); 1053 return; 1054 case Xin_MFence: 1055 vex_printf("mfence(%s)", 1056 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1057 return; 1058 case Xin_ACAS: 1059 vex_printf("lock cmpxchg%c ", 1060 i->Xin.ACAS.sz==1 ? 'b' 1061 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1062 vex_printf("{%%eax->%%ebx},"); 1063 ppX86AMode(i->Xin.ACAS.addr); 1064 return; 1065 case Xin_DACAS: 1066 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1067 ppX86AMode(i->Xin.DACAS.addr); 1068 return; 1069 case Xin_FpUnary: 1070 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1071 ppHRegX86(i->Xin.FpUnary.src); 1072 vex_printf(","); 1073 ppHRegX86(i->Xin.FpUnary.dst); 1074 break; 1075 case Xin_FpBinary: 1076 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1077 ppHRegX86(i->Xin.FpBinary.srcL); 1078 vex_printf(","); 1079 ppHRegX86(i->Xin.FpBinary.srcR); 1080 vex_printf(","); 1081 ppHRegX86(i->Xin.FpBinary.dst); 1082 break; 1083 case Xin_FpLdSt: 1084 if (i->Xin.FpLdSt.isLoad) { 1085 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1086 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1087 ppX86AMode(i->Xin.FpLdSt.addr); 1088 vex_printf(", "); 1089 ppHRegX86(i->Xin.FpLdSt.reg); 1090 } else { 1091 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1092 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1093 ppHRegX86(i->Xin.FpLdSt.reg); 1094 vex_printf(", "); 1095 ppX86AMode(i->Xin.FpLdSt.addr); 1096 } 1097 return; 1098 case Xin_FpLdStI: 1099 if (i->Xin.FpLdStI.isLoad) { 1100 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1101 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1102 ppX86AMode(i->Xin.FpLdStI.addr); 1103 vex_printf(", "); 1104 ppHRegX86(i->Xin.FpLdStI.reg); 1105 } else { 1106 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1107 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1108 ppHRegX86(i->Xin.FpLdStI.reg); 1109 vex_printf(", "); 1110 ppX86AMode(i->Xin.FpLdStI.addr); 1111 } 1112 return; 1113 case Xin_Fp64to32: 1114 vex_printf("gdtof "); 1115 ppHRegX86(i->Xin.Fp64to32.src); 1116 vex_printf(","); 1117 ppHRegX86(i->Xin.Fp64to32.dst); 1118 return; 1119 case Xin_FpCMov: 1120 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1121 ppHRegX86(i->Xin.FpCMov.src); 1122 vex_printf(","); 1123 ppHRegX86(i->Xin.FpCMov.dst); 1124 return; 1125 case Xin_FpLdCW: 1126 vex_printf("fldcw "); 1127 ppX86AMode(i->Xin.FpLdCW.addr); 1128 return; 1129 case Xin_FpStSW_AX: 1130 vex_printf("fstsw %%ax"); 1131 return; 1132 case Xin_FpCmp: 1133 vex_printf("gcmp "); 1134 ppHRegX86(i->Xin.FpCmp.srcL); 1135 vex_printf(","); 1136 ppHRegX86(i->Xin.FpCmp.srcR); 1137 vex_printf(","); 1138 ppHRegX86(i->Xin.FpCmp.dst); 1139 break; 1140 case Xin_SseConst: 1141 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1142 ppHRegX86(i->Xin.SseConst.dst); 1143 break; 1144 case Xin_SseLdSt: 1145 vex_printf("movups "); 1146 if (i->Xin.SseLdSt.isLoad) { 1147 ppX86AMode(i->Xin.SseLdSt.addr); 1148 vex_printf(","); 1149 ppHRegX86(i->Xin.SseLdSt.reg); 1150 } else { 1151 ppHRegX86(i->Xin.SseLdSt.reg); 1152 vex_printf(","); 1153 ppX86AMode(i->Xin.SseLdSt.addr); 1154 } 1155 return; 1156 case Xin_SseLdzLO: 1157 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1158 ppX86AMode(i->Xin.SseLdzLO.addr); 1159 vex_printf(","); 1160 ppHRegX86(i->Xin.SseLdzLO.reg); 1161 return; 1162 case Xin_Sse32Fx4: 1163 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1164 ppHRegX86(i->Xin.Sse32Fx4.src); 1165 vex_printf(","); 1166 ppHRegX86(i->Xin.Sse32Fx4.dst); 1167 return; 1168 case Xin_Sse32FLo: 1169 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1170 ppHRegX86(i->Xin.Sse32FLo.src); 1171 vex_printf(","); 1172 ppHRegX86(i->Xin.Sse32FLo.dst); 1173 return; 1174 case Xin_Sse64Fx2: 1175 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1176 ppHRegX86(i->Xin.Sse64Fx2.src); 1177 vex_printf(","); 1178 ppHRegX86(i->Xin.Sse64Fx2.dst); 1179 return; 1180 case Xin_Sse64FLo: 1181 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1182 ppHRegX86(i->Xin.Sse64FLo.src); 1183 vex_printf(","); 1184 ppHRegX86(i->Xin.Sse64FLo.dst); 1185 return; 1186 case Xin_SseReRg: 1187 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1188 ppHRegX86(i->Xin.SseReRg.src); 1189 vex_printf(","); 1190 ppHRegX86(i->Xin.SseReRg.dst); 1191 return; 1192 case Xin_SseCMov: 1193 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1194 ppHRegX86(i->Xin.SseCMov.src); 1195 vex_printf(","); 1196 ppHRegX86(i->Xin.SseCMov.dst); 1197 return; 1198 case Xin_SseShuf: 1199 vex_printf("pshufd $0x%x,", (UInt)i->Xin.SseShuf.order); 1200 ppHRegX86(i->Xin.SseShuf.src); 1201 vex_printf(","); 1202 ppHRegX86(i->Xin.SseShuf.dst); 1203 return; 1204 case Xin_EvCheck: 1205 vex_printf("(evCheck) decl "); 1206 ppX86AMode(i->Xin.EvCheck.amCounter); 1207 vex_printf("; jns nofail; jmp *"); 1208 ppX86AMode(i->Xin.EvCheck.amFailAddr); 1209 vex_printf("; nofail:"); 1210 return; 1211 case Xin_ProfInc: 1212 vex_printf("(profInc) addl $1,NotKnownYet; " 1213 "adcl $0,NotKnownYet+4"); 1214 return; 1215 default: 1216 vpanic("ppX86Instr"); 1217 } 1218 } 1219 1220 /* --------- Helpers for register allocation. --------- */ 1221 1222 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) 1223 { 1224 Bool unary; 1225 vassert(mode64 == False); 1226 initHRegUsage(u); 1227 switch (i->tag) { 1228 case Xin_Alu32R: 1229 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1230 if (i->Xin.Alu32R.op == Xalu_MOV) { 1231 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1232 return; 1233 } 1234 if (i->Xin.Alu32R.op == Xalu_CMP) { 1235 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1236 return; 1237 } 1238 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1239 return; 1240 case Xin_Alu32M: 1241 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1242 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1243 return; 1244 case Xin_Sh32: 1245 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1246 if (i->Xin.Sh32.src == 0) 1247 addHRegUse(u, HRmRead, hregX86_ECX()); 1248 return; 1249 case Xin_Test32: 1250 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1251 return; 1252 case Xin_Unary32: 1253 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1254 return; 1255 case Xin_Lea32: 1256 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1257 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1258 return; 1259 case Xin_MulL: 1260 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1261 addHRegUse(u, HRmModify, hregX86_EAX()); 1262 addHRegUse(u, HRmWrite, hregX86_EDX()); 1263 return; 1264 case Xin_Div: 1265 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1266 addHRegUse(u, HRmModify, hregX86_EAX()); 1267 addHRegUse(u, HRmModify, hregX86_EDX()); 1268 return; 1269 case Xin_Sh3232: 1270 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1271 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1272 if (i->Xin.Sh3232.amt == 0) 1273 addHRegUse(u, HRmRead, hregX86_ECX()); 1274 return; 1275 case Xin_Push: 1276 addRegUsage_X86RMI(u, i->Xin.Push.src); 1277 addHRegUse(u, HRmModify, hregX86_ESP()); 1278 return; 1279 case Xin_Call: 1280 /* This is a bit subtle. */ 1281 /* First off, claim it trashes all the caller-saved regs 1282 which fall within the register allocator's jurisdiction. 1283 These I believe to be %eax %ecx %edx and all the xmm 1284 registers. */ 1285 addHRegUse(u, HRmWrite, hregX86_EAX()); 1286 addHRegUse(u, HRmWrite, hregX86_ECX()); 1287 addHRegUse(u, HRmWrite, hregX86_EDX()); 1288 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1289 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1290 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1291 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1292 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1293 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1294 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1295 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1296 /* Now we have to state any parameter-carrying registers 1297 which might be read. This depends on the regparmness. */ 1298 switch (i->Xin.Call.regparms) { 1299 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1300 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1301 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1302 case 0: break; 1303 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1304 } 1305 /* Finally, there is the issue that the insn trashes a 1306 register because the literal target address has to be 1307 loaded into a register. Fortunately, for the 0/1/2 1308 regparm case, we can use EAX, EDX and ECX respectively, so 1309 this does not cause any further damage. For the 3-regparm 1310 case, we'll have to choose another register arbitrarily -- 1311 since A, D and C are used for parameters -- and so we might 1312 as well choose EDI. */ 1313 if (i->Xin.Call.regparms == 3) 1314 addHRegUse(u, HRmWrite, hregX86_EDI()); 1315 /* Upshot of this is that the assembler really must observe 1316 the here-stated convention of which register to use as an 1317 address temporary, depending on the regparmness: 0==EAX, 1318 1==EDX, 2==ECX, 3==EDI. */ 1319 return; 1320 /* XDirect/XIndir/XAssisted are also a bit subtle. They 1321 conditionally exit the block. Hence we only need to list (1) 1322 the registers that they read, and (2) the registers that they 1323 write in the case where the block is not exited. (2) is 1324 empty, hence only (1) is relevant here. */ 1325 case Xin_XDirect: 1326 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); 1327 return; 1328 case Xin_XIndir: 1329 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); 1330 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); 1331 return; 1332 case Xin_XAssisted: 1333 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); 1334 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); 1335 return; 1336 case Xin_CMov32: 1337 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1338 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1339 return; 1340 case Xin_LoadEX: 1341 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1342 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1343 return; 1344 case Xin_Store: 1345 addHRegUse(u, HRmRead, i->Xin.Store.src); 1346 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1347 return; 1348 case Xin_Set32: 1349 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1350 return; 1351 case Xin_Bsfr32: 1352 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1353 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1354 return; 1355 case Xin_MFence: 1356 return; 1357 case Xin_ACAS: 1358 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1359 addHRegUse(u, HRmRead, hregX86_EBX()); 1360 addHRegUse(u, HRmModify, hregX86_EAX()); 1361 return; 1362 case Xin_DACAS: 1363 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1364 addHRegUse(u, HRmRead, hregX86_ECX()); 1365 addHRegUse(u, HRmRead, hregX86_EBX()); 1366 addHRegUse(u, HRmModify, hregX86_EDX()); 1367 addHRegUse(u, HRmModify, hregX86_EAX()); 1368 return; 1369 case Xin_FpUnary: 1370 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1371 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1372 return; 1373 case Xin_FpBinary: 1374 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1375 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1376 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1377 return; 1378 case Xin_FpLdSt: 1379 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1380 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1381 i->Xin.FpLdSt.reg); 1382 return; 1383 case Xin_FpLdStI: 1384 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1385 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1386 i->Xin.FpLdStI.reg); 1387 return; 1388 case Xin_Fp64to32: 1389 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1390 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1391 return; 1392 case Xin_FpCMov: 1393 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1394 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1395 return; 1396 case Xin_FpLdCW: 1397 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1398 return; 1399 case Xin_FpStSW_AX: 1400 addHRegUse(u, HRmWrite, hregX86_EAX()); 1401 return; 1402 case Xin_FpCmp: 1403 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1404 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1405 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1406 addHRegUse(u, HRmWrite, hregX86_EAX()); 1407 return; 1408 case Xin_SseLdSt: 1409 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1410 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1411 i->Xin.SseLdSt.reg); 1412 return; 1413 case Xin_SseLdzLO: 1414 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1415 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1416 return; 1417 case Xin_SseConst: 1418 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1419 return; 1420 case Xin_Sse32Fx4: 1421 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1422 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1423 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1424 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1425 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1426 addHRegUse(u, unary ? HRmWrite : HRmModify, 1427 i->Xin.Sse32Fx4.dst); 1428 return; 1429 case Xin_Sse32FLo: 1430 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1431 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1432 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1433 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1434 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1435 addHRegUse(u, unary ? HRmWrite : HRmModify, 1436 i->Xin.Sse32FLo.dst); 1437 return; 1438 case Xin_Sse64Fx2: 1439 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1440 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1441 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1442 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1443 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1444 addHRegUse(u, unary ? HRmWrite : HRmModify, 1445 i->Xin.Sse64Fx2.dst); 1446 return; 1447 case Xin_Sse64FLo: 1448 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1449 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1450 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1451 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1452 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1453 addHRegUse(u, unary ? HRmWrite : HRmModify, 1454 i->Xin.Sse64FLo.dst); 1455 return; 1456 case Xin_SseReRg: 1457 if (i->Xin.SseReRg.op == Xsse_XOR 1458 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) { 1459 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1460 /* (as opposed to a rite of passage :-) */ 1461 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1462 } else { 1463 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1464 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1465 ? HRmWrite : HRmModify, 1466 i->Xin.SseReRg.dst); 1467 } 1468 return; 1469 case Xin_SseCMov: 1470 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1471 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1472 return; 1473 case Xin_SseShuf: 1474 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1475 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1476 return; 1477 case Xin_EvCheck: 1478 /* We expect both amodes only to mention %ebp, so this is in 1479 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1480 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); 1481 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); 1482 return; 1483 case Xin_ProfInc: 1484 /* does not use any registers. */ 1485 return; 1486 default: 1487 ppX86Instr(i, False); 1488 vpanic("getRegUsage_X86Instr"); 1489 } 1490 } 1491 1492 /* local helper */ 1493 static void mapReg( HRegRemap* m, HReg* r ) 1494 { 1495 *r = lookupHRegRemap(m, *r); 1496 } 1497 1498 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1499 { 1500 vassert(mode64 == False); 1501 switch (i->tag) { 1502 case Xin_Alu32R: 1503 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1504 mapReg(m, &i->Xin.Alu32R.dst); 1505 return; 1506 case Xin_Alu32M: 1507 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1508 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1509 return; 1510 case Xin_Sh32: 1511 mapReg(m, &i->Xin.Sh32.dst); 1512 return; 1513 case Xin_Test32: 1514 mapRegs_X86RM(m, i->Xin.Test32.dst); 1515 return; 1516 case Xin_Unary32: 1517 mapReg(m, &i->Xin.Unary32.dst); 1518 return; 1519 case Xin_Lea32: 1520 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1521 mapReg(m, &i->Xin.Lea32.dst); 1522 return; 1523 case Xin_MulL: 1524 mapRegs_X86RM(m, i->Xin.MulL.src); 1525 return; 1526 case Xin_Div: 1527 mapRegs_X86RM(m, i->Xin.Div.src); 1528 return; 1529 case Xin_Sh3232: 1530 mapReg(m, &i->Xin.Sh3232.src); 1531 mapReg(m, &i->Xin.Sh3232.dst); 1532 return; 1533 case Xin_Push: 1534 mapRegs_X86RMI(m, i->Xin.Push.src); 1535 return; 1536 case Xin_Call: 1537 return; 1538 case Xin_XDirect: 1539 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); 1540 return; 1541 case Xin_XIndir: 1542 mapReg(m, &i->Xin.XIndir.dstGA); 1543 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); 1544 return; 1545 case Xin_XAssisted: 1546 mapReg(m, &i->Xin.XAssisted.dstGA); 1547 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); 1548 return; 1549 case Xin_CMov32: 1550 mapRegs_X86RM(m, i->Xin.CMov32.src); 1551 mapReg(m, &i->Xin.CMov32.dst); 1552 return; 1553 case Xin_LoadEX: 1554 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1555 mapReg(m, &i->Xin.LoadEX.dst); 1556 return; 1557 case Xin_Store: 1558 mapReg(m, &i->Xin.Store.src); 1559 mapRegs_X86AMode(m, i->Xin.Store.dst); 1560 return; 1561 case Xin_Set32: 1562 mapReg(m, &i->Xin.Set32.dst); 1563 return; 1564 case Xin_Bsfr32: 1565 mapReg(m, &i->Xin.Bsfr32.src); 1566 mapReg(m, &i->Xin.Bsfr32.dst); 1567 return; 1568 case Xin_MFence: 1569 return; 1570 case Xin_ACAS: 1571 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1572 return; 1573 case Xin_DACAS: 1574 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1575 return; 1576 case Xin_FpUnary: 1577 mapReg(m, &i->Xin.FpUnary.src); 1578 mapReg(m, &i->Xin.FpUnary.dst); 1579 return; 1580 case Xin_FpBinary: 1581 mapReg(m, &i->Xin.FpBinary.srcL); 1582 mapReg(m, &i->Xin.FpBinary.srcR); 1583 mapReg(m, &i->Xin.FpBinary.dst); 1584 return; 1585 case Xin_FpLdSt: 1586 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1587 mapReg(m, &i->Xin.FpLdSt.reg); 1588 return; 1589 case Xin_FpLdStI: 1590 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1591 mapReg(m, &i->Xin.FpLdStI.reg); 1592 return; 1593 case Xin_Fp64to32: 1594 mapReg(m, &i->Xin.Fp64to32.src); 1595 mapReg(m, &i->Xin.Fp64to32.dst); 1596 return; 1597 case Xin_FpCMov: 1598 mapReg(m, &i->Xin.FpCMov.src); 1599 mapReg(m, &i->Xin.FpCMov.dst); 1600 return; 1601 case Xin_FpLdCW: 1602 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1603 return; 1604 case Xin_FpStSW_AX: 1605 return; 1606 case Xin_FpCmp: 1607 mapReg(m, &i->Xin.FpCmp.srcL); 1608 mapReg(m, &i->Xin.FpCmp.srcR); 1609 mapReg(m, &i->Xin.FpCmp.dst); 1610 return; 1611 case Xin_SseConst: 1612 mapReg(m, &i->Xin.SseConst.dst); 1613 return; 1614 case Xin_SseLdSt: 1615 mapReg(m, &i->Xin.SseLdSt.reg); 1616 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1617 break; 1618 case Xin_SseLdzLO: 1619 mapReg(m, &i->Xin.SseLdzLO.reg); 1620 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1621 break; 1622 case Xin_Sse32Fx4: 1623 mapReg(m, &i->Xin.Sse32Fx4.src); 1624 mapReg(m, &i->Xin.Sse32Fx4.dst); 1625 return; 1626 case Xin_Sse32FLo: 1627 mapReg(m, &i->Xin.Sse32FLo.src); 1628 mapReg(m, &i->Xin.Sse32FLo.dst); 1629 return; 1630 case Xin_Sse64Fx2: 1631 mapReg(m, &i->Xin.Sse64Fx2.src); 1632 mapReg(m, &i->Xin.Sse64Fx2.dst); 1633 return; 1634 case Xin_Sse64FLo: 1635 mapReg(m, &i->Xin.Sse64FLo.src); 1636 mapReg(m, &i->Xin.Sse64FLo.dst); 1637 return; 1638 case Xin_SseReRg: 1639 mapReg(m, &i->Xin.SseReRg.src); 1640 mapReg(m, &i->Xin.SseReRg.dst); 1641 return; 1642 case Xin_SseCMov: 1643 mapReg(m, &i->Xin.SseCMov.src); 1644 mapReg(m, &i->Xin.SseCMov.dst); 1645 return; 1646 case Xin_SseShuf: 1647 mapReg(m, &i->Xin.SseShuf.src); 1648 mapReg(m, &i->Xin.SseShuf.dst); 1649 return; 1650 case Xin_EvCheck: 1651 /* We expect both amodes only to mention %ebp, so this is in 1652 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1653 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); 1654 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); 1655 return; 1656 case Xin_ProfInc: 1657 /* does not use any registers. */ 1658 return; 1659 1660 default: 1661 ppX86Instr(i, mode64); 1662 vpanic("mapRegs_X86Instr"); 1663 } 1664 } 1665 1666 /* Figure out if i represents a reg-reg move, and if so assign the 1667 source and destination to *src and *dst. If in doubt say No. Used 1668 by the register allocator to do move coalescing. 1669 */ 1670 Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst ) 1671 { 1672 /* Moves between integer regs */ 1673 if (i->tag == Xin_Alu32R) { 1674 if (i->Xin.Alu32R.op != Xalu_MOV) 1675 return False; 1676 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1677 return False; 1678 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1679 *dst = i->Xin.Alu32R.dst; 1680 return True; 1681 } 1682 /* Moves between FP regs */ 1683 if (i->tag == Xin_FpUnary) { 1684 if (i->Xin.FpUnary.op != Xfp_MOV) 1685 return False; 1686 *src = i->Xin.FpUnary.src; 1687 *dst = i->Xin.FpUnary.dst; 1688 return True; 1689 } 1690 if (i->tag == Xin_SseReRg) { 1691 if (i->Xin.SseReRg.op != Xsse_MOV) 1692 return False; 1693 *src = i->Xin.SseReRg.src; 1694 *dst = i->Xin.SseReRg.dst; 1695 return True; 1696 } 1697 return False; 1698 } 1699 1700 1701 /* Generate x86 spill/reload instructions under the direction of the 1702 register allocator. Note it's critical these don't write the 1703 condition codes. */ 1704 1705 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1706 HReg rreg, Int offsetB, Bool mode64 ) 1707 { 1708 X86AMode* am; 1709 vassert(offsetB >= 0); 1710 vassert(!hregIsVirtual(rreg)); 1711 vassert(mode64 == False); 1712 *i1 = *i2 = NULL; 1713 am = X86AMode_IR(offsetB, hregX86_EBP()); 1714 switch (hregClass(rreg)) { 1715 case HRcInt32: 1716 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1717 return; 1718 case HRcFlt64: 1719 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1720 return; 1721 case HRcVec128: 1722 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1723 return; 1724 default: 1725 ppHRegClass(hregClass(rreg)); 1726 vpanic("genSpill_X86: unimplemented regclass"); 1727 } 1728 } 1729 1730 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1731 HReg rreg, Int offsetB, Bool mode64 ) 1732 { 1733 X86AMode* am; 1734 vassert(offsetB >= 0); 1735 vassert(!hregIsVirtual(rreg)); 1736 vassert(mode64 == False); 1737 *i1 = *i2 = NULL; 1738 am = X86AMode_IR(offsetB, hregX86_EBP()); 1739 switch (hregClass(rreg)) { 1740 case HRcInt32: 1741 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1742 return; 1743 case HRcFlt64: 1744 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1745 return; 1746 case HRcVec128: 1747 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1748 return; 1749 default: 1750 ppHRegClass(hregClass(rreg)); 1751 vpanic("genReload_X86: unimplemented regclass"); 1752 } 1753 } 1754 1755 /* The given instruction reads the specified vreg exactly once, and 1756 that vreg is currently located at the given spill offset. If 1757 possible, return a variant of the instruction to one which instead 1758 references the spill slot directly. */ 1759 1760 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1761 { 1762 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1763 1764 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1765 Convert to: src=RMI_Mem, dst=Reg 1766 */ 1767 if (i->tag == Xin_Alu32R 1768 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1769 || i->Xin.Alu32R.op == Xalu_XOR) 1770 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1771 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) { 1772 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg)); 1773 return X86Instr_Alu32R( 1774 i->Xin.Alu32R.op, 1775 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1776 i->Xin.Alu32R.dst 1777 ); 1778 } 1779 1780 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1781 Convert to: src=RI_Imm, dst=Mem 1782 */ 1783 if (i->tag == Xin_Alu32R 1784 && (i->Xin.Alu32R.op == Xalu_CMP) 1785 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1786 && sameHReg(i->Xin.Alu32R.dst, vreg)) { 1787 return X86Instr_Alu32M( 1788 i->Xin.Alu32R.op, 1789 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1790 X86AMode_IR( spill_off, hregX86_EBP()) 1791 ); 1792 } 1793 1794 /* Deal with form: Push(RMI_Reg) 1795 Convert to: Push(RMI_Mem) 1796 */ 1797 if (i->tag == Xin_Push 1798 && i->Xin.Push.src->tag == Xrmi_Reg 1799 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) { 1800 return X86Instr_Push( 1801 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1802 ); 1803 } 1804 1805 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1806 Convert to CMov32(RM_Mem, dst) */ 1807 if (i->tag == Xin_CMov32 1808 && i->Xin.CMov32.src->tag == Xrm_Reg 1809 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) { 1810 vassert(! sameHReg(i->Xin.CMov32.dst, vreg)); 1811 return X86Instr_CMov32( 1812 i->Xin.CMov32.cond, 1813 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1814 i->Xin.CMov32.dst 1815 ); 1816 } 1817 1818 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1819 if (i->tag == Xin_Test32 1820 && i->Xin.Test32.dst->tag == Xrm_Reg 1821 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) { 1822 return X86Instr_Test32( 1823 i->Xin.Test32.imm32, 1824 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1825 ); 1826 } 1827 1828 return NULL; 1829 } 1830 1831 1832 /* --------- The x86 assembler (bleh.) --------- */ 1833 1834 inline static UInt iregEnc ( HReg r ) 1835 { 1836 UInt n; 1837 vassert(hregClass(r) == HRcInt32); 1838 vassert(!hregIsVirtual(r)); 1839 n = hregEncoding(r); 1840 vassert(n <= 7); 1841 return n; 1842 } 1843 1844 inline static UInt fregEnc ( HReg r ) 1845 { 1846 UInt n; 1847 vassert(hregClass(r) == HRcFlt64); 1848 vassert(!hregIsVirtual(r)); 1849 n = hregEncoding(r); 1850 vassert(n <= 5); 1851 return n; 1852 } 1853 1854 inline static UInt vregEnc ( HReg r ) 1855 { 1856 UInt n; 1857 vassert(hregClass(r) == HRcVec128); 1858 vassert(!hregIsVirtual(r)); 1859 n = hregEncoding(r); 1860 vassert(n <= 7); 1861 return n; 1862 } 1863 1864 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem ) 1865 { 1866 vassert(mod < 4); 1867 vassert((reg|regmem) < 8); 1868 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) ); 1869 } 1870 1871 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase ) 1872 { 1873 vassert(shift < 4); 1874 vassert((regindex|regbase) < 8); 1875 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) ); 1876 } 1877 1878 static UChar* emit32 ( UChar* p, UInt w32 ) 1879 { 1880 *p++ = toUChar( w32 & 0x000000FF); 1881 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1882 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1883 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1884 return p; 1885 } 1886 1887 /* Does a sign-extend of the lowest 8 bits give 1888 the original number? */ 1889 static Bool fits8bits ( UInt w32 ) 1890 { 1891 Int i32 = (Int)w32; 1892 return toBool(i32 == ((Int)(w32 << 24) >> 24)); 1893 } 1894 1895 1896 /* Forming mod-reg-rm bytes and scale-index-base bytes. 1897 1898 greg, 0(ereg) | ereg != ESP && ereg != EBP 1899 = 00 greg ereg 1900 1901 greg, d8(ereg) | ereg != ESP 1902 = 01 greg ereg, d8 1903 1904 greg, d32(ereg) | ereg != ESP 1905 = 10 greg ereg, d32 1906 1907 greg, d8(%esp) = 01 greg 100, 0x24, d8 1908 1909 ----------------------------------------------- 1910 1911 greg, d8(base,index,scale) 1912 | index != ESP 1913 = 01 greg 100, scale index base, d8 1914 1915 greg, d32(base,index,scale) 1916 | index != ESP 1917 = 10 greg 100, scale index base, d32 1918 */ 1919 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am ) 1920 { 1921 if (am->tag == Xam_IR) { 1922 if (am->Xam.IR.imm == 0 1923 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1924 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) { 1925 *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg)); 1926 return p; 1927 } 1928 if (fits8bits(am->Xam.IR.imm) 1929 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1930 *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg)); 1931 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1932 return p; 1933 } 1934 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1935 *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg)); 1936 p = emit32(p, am->Xam.IR.imm); 1937 return p; 1938 } 1939 if (sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1940 && fits8bits(am->Xam.IR.imm)) { 1941 *p++ = mkModRegRM(1, gregEnc, 4); 1942 *p++ = 0x24; 1943 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1944 return p; 1945 } 1946 ppX86AMode(am); 1947 vpanic("doAMode_M: can't emit amode IR"); 1948 /*NOTREACHED*/ 1949 } 1950 if (am->tag == Xam_IRRS) { 1951 if (fits8bits(am->Xam.IRRS.imm) 1952 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1953 *p++ = mkModRegRM(1, gregEnc, 4); 1954 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index), 1955 iregEnc(am->Xam.IRRS.base)); 1956 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1957 return p; 1958 } 1959 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1960 *p++ = mkModRegRM(2, gregEnc, 4); 1961 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index), 1962 iregEnc(am->Xam.IRRS.base)); 1963 p = emit32(p, am->Xam.IRRS.imm); 1964 return p; 1965 } 1966 ppX86AMode(am); 1967 vpanic("doAMode_M: can't emit amode IRRS"); 1968 /*NOTREACHED*/ 1969 } 1970 vpanic("doAMode_M: unknown amode"); 1971 /*NOTREACHED*/ 1972 } 1973 1974 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1975 { 1976 return doAMode_M__wrk(p, iregEnc(greg), am); 1977 } 1978 1979 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am ) 1980 { 1981 vassert(gregEnc < 8); 1982 return doAMode_M__wrk(p, gregEnc, am); 1983 } 1984 1985 1986 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1987 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc ) 1988 { 1989 *p++ = mkModRegRM(3, gregEnc, eregEnc); 1990 return p; 1991 } 1992 1993 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1994 { 1995 return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg)); 1996 } 1997 1998 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg ) 1999 { 2000 vassert(gregEnc < 8); 2001 return doAMode_R__wrk(p, gregEnc, iregEnc(ereg)); 2002 } 2003 2004 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc ) 2005 { 2006 vassert( (gregEnc|eregEnc) < 8); 2007 return doAMode_R__wrk(p, gregEnc, eregEnc); 2008 } 2009 2010 2011 /* Emit ffree %st(7) */ 2012 static UChar* do_ffree_st7 ( UChar* p ) 2013 { 2014 *p++ = 0xDD; 2015 *p++ = 0xC7; 2016 return p; 2017 } 2018 2019 /* Emit fstp %st(i), 1 <= i <= 7 */ 2020 static UChar* do_fstp_st ( UChar* p, Int i ) 2021 { 2022 vassert(1 <= i && i <= 7); 2023 *p++ = 0xDD; 2024 *p++ = toUChar(0xD8+i); 2025 return p; 2026 } 2027 2028 /* Emit fld %st(i), 0 <= i <= 6 */ 2029 static UChar* do_fld_st ( UChar* p, Int i ) 2030 { 2031 vassert(0 <= i && i <= 6); 2032 *p++ = 0xD9; 2033 *p++ = toUChar(0xC0+i); 2034 return p; 2035 } 2036 2037 /* Emit f<op> %st(0) */ 2038 static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 2039 { 2040 switch (op) { 2041 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2042 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2043 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2044 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2045 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2046 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2047 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2048 case Xfp_MOV: break; 2049 case Xfp_TAN: 2050 /* fptan pushes 1.0 on the FP stack, except when the argument 2051 is out of range. Hence we have to do the instruction, 2052 then inspect C2 to see if there is an out of range 2053 condition. If there is, we skip the fincstp that is used 2054 by the in-range case to get rid of this extra 1.0 2055 value. */ 2056 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */ 2057 *p++ = 0xD9; *p++ = 0xF2; // fptan 2058 *p++ = 0x50; // pushl %eax 2059 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax 2060 *p++ = 0x66; *p++ = 0xA9; 2061 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax 2062 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp 2063 *p++ = 0xD9; *p++ = 0xF7; // fincstp 2064 *p++ = 0x58; // after_fincstp: popl %eax 2065 break; 2066 default: 2067 vpanic("do_fop1_st: unknown op"); 2068 } 2069 return p; 2070 } 2071 2072 /* Emit f<op> %st(i), 1 <= i <= 5 */ 2073 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 2074 { 2075 Int subopc; 2076 switch (op) { 2077 case Xfp_ADD: subopc = 0; break; 2078 case Xfp_SUB: subopc = 4; break; 2079 case Xfp_MUL: subopc = 1; break; 2080 case Xfp_DIV: subopc = 6; break; 2081 default: vpanic("do_fop2_st: unknown op"); 2082 } 2083 *p++ = 0xD8; 2084 p = doAMode_R_enc_enc(p, subopc, i); 2085 return p; 2086 } 2087 2088 /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2089 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2090 */ 2091 static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2092 { 2093 UInt w; 2094 vassert(0 == (tags & ~0xF)); 2095 if (tags == 0) { 2096 /* pushl $0x00000000 */ 2097 *p++ = 0x6A; 2098 *p++ = 0x00; 2099 } 2100 else 2101 /* pushl $0xFFFFFFFF */ 2102 if (tags == 0xF) { 2103 *p++ = 0x6A; 2104 *p++ = 0xFF; 2105 } else { 2106 vassert(0); /* awaiting test case */ 2107 w = 0; 2108 if (tags & 1) w |= 0x000000FF; 2109 if (tags & 2) w |= 0x0000FF00; 2110 if (tags & 4) w |= 0x00FF0000; 2111 if (tags & 8) w |= 0xFF000000; 2112 *p++ = 0x68; 2113 p = emit32(p, w); 2114 } 2115 return p; 2116 } 2117 2118 /* Emit an instruction into buf and return the number of bytes used. 2119 Note that buf is not the insn's final place, and therefore it is 2120 imperative to emit position-independent code. If the emitted 2121 instruction was a profiler inc, set *is_profInc to True, else 2122 leave it unchanged. */ 2123 2124 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, 2125 UChar* buf, Int nbuf, const X86Instr* i, 2126 Bool mode64, VexEndness endness_host, 2127 const void* disp_cp_chain_me_to_slowEP, 2128 const void* disp_cp_chain_me_to_fastEP, 2129 const void* disp_cp_xindir, 2130 const void* disp_cp_xassisted ) 2131 { 2132 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2133 2134 UInt xtra; 2135 UChar* p = &buf[0]; 2136 UChar* ptmp; 2137 vassert(nbuf >= 32); 2138 vassert(mode64 == False); 2139 2140 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2141 2142 switch (i->tag) { 2143 2144 case Xin_Alu32R: 2145 /* Deal specially with MOV */ 2146 if (i->Xin.Alu32R.op == Xalu_MOV) { 2147 switch (i->Xin.Alu32R.src->tag) { 2148 case Xrmi_Imm: 2149 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst)); 2150 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2151 goto done; 2152 case Xrmi_Reg: 2153 *p++ = 0x89; 2154 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2155 i->Xin.Alu32R.dst); 2156 goto done; 2157 case Xrmi_Mem: 2158 *p++ = 0x8B; 2159 p = doAMode_M(p, i->Xin.Alu32R.dst, 2160 i->Xin.Alu32R.src->Xrmi.Mem.am); 2161 goto done; 2162 default: 2163 goto bad; 2164 } 2165 } 2166 /* MUL */ 2167 if (i->Xin.Alu32R.op == Xalu_MUL) { 2168 switch (i->Xin.Alu32R.src->tag) { 2169 case Xrmi_Reg: 2170 *p++ = 0x0F; 2171 *p++ = 0xAF; 2172 p = doAMode_R(p, i->Xin.Alu32R.dst, 2173 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2174 goto done; 2175 case Xrmi_Mem: 2176 *p++ = 0x0F; 2177 *p++ = 0xAF; 2178 p = doAMode_M(p, i->Xin.Alu32R.dst, 2179 i->Xin.Alu32R.src->Xrmi.Mem.am); 2180 goto done; 2181 case Xrmi_Imm: 2182 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2183 *p++ = 0x6B; 2184 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2185 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2186 } else { 2187 *p++ = 0x69; 2188 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2189 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2190 } 2191 goto done; 2192 default: 2193 goto bad; 2194 } 2195 } 2196 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2197 opc = opc_rr = subopc_imm = opc_imma = 0; 2198 switch (i->Xin.Alu32R.op) { 2199 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2200 subopc_imm = 2; opc_imma = 0x15; break; 2201 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2202 subopc_imm = 0; opc_imma = 0x05; break; 2203 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2204 subopc_imm = 5; opc_imma = 0x2D; break; 2205 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2206 subopc_imm = 3; opc_imma = 0x1D; break; 2207 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2208 subopc_imm = 4; opc_imma = 0x25; break; 2209 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2210 subopc_imm = 6; opc_imma = 0x35; break; 2211 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2212 subopc_imm = 1; opc_imma = 0x0D; break; 2213 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2214 subopc_imm = 7; opc_imma = 0x3D; break; 2215 default: goto bad; 2216 } 2217 switch (i->Xin.Alu32R.src->tag) { 2218 case Xrmi_Imm: 2219 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX()) 2220 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2221 *p++ = toUChar(opc_imma); 2222 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2223 } else 2224 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2225 *p++ = 0x83; 2226 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst); 2227 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2228 } else { 2229 *p++ = 0x81; 2230 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst); 2231 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2232 } 2233 goto done; 2234 case Xrmi_Reg: 2235 *p++ = toUChar(opc_rr); 2236 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2237 i->Xin.Alu32R.dst); 2238 goto done; 2239 case Xrmi_Mem: 2240 *p++ = toUChar(opc); 2241 p = doAMode_M(p, i->Xin.Alu32R.dst, 2242 i->Xin.Alu32R.src->Xrmi.Mem.am); 2243 goto done; 2244 default: 2245 goto bad; 2246 } 2247 break; 2248 2249 case Xin_Alu32M: 2250 /* Deal specially with MOV */ 2251 if (i->Xin.Alu32M.op == Xalu_MOV) { 2252 switch (i->Xin.Alu32M.src->tag) { 2253 case Xri_Reg: 2254 *p++ = 0x89; 2255 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2256 i->Xin.Alu32M.dst); 2257 goto done; 2258 case Xri_Imm: 2259 *p++ = 0xC7; 2260 p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst); 2261 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2262 goto done; 2263 default: 2264 goto bad; 2265 } 2266 } 2267 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2268 allowed here. */ 2269 opc = subopc_imm = opc_imma = 0; 2270 switch (i->Xin.Alu32M.op) { 2271 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2272 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2273 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2274 default: goto bad; 2275 } 2276 switch (i->Xin.Alu32M.src->tag) { 2277 case Xri_Reg: 2278 *p++ = toUChar(opc); 2279 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2280 i->Xin.Alu32M.dst); 2281 goto done; 2282 case Xri_Imm: 2283 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2284 *p++ = 0x83; 2285 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst); 2286 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2287 goto done; 2288 } else { 2289 *p++ = 0x81; 2290 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst); 2291 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2292 goto done; 2293 } 2294 default: 2295 goto bad; 2296 } 2297 break; 2298 2299 case Xin_Sh32: 2300 opc_cl = opc_imm = subopc = 0; 2301 switch (i->Xin.Sh32.op) { 2302 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2303 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2304 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2305 default: goto bad; 2306 } 2307 if (i->Xin.Sh32.src == 0) { 2308 *p++ = toUChar(opc_cl); 2309 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst); 2310 } else { 2311 *p++ = toUChar(opc_imm); 2312 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst); 2313 *p++ = (UChar)(i->Xin.Sh32.src); 2314 } 2315 goto done; 2316 2317 case Xin_Test32: 2318 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2319 /* testl $imm32, %reg */ 2320 *p++ = 0xF7; 2321 p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg); 2322 p = emit32(p, i->Xin.Test32.imm32); 2323 goto done; 2324 } else { 2325 /* testl $imm32, amode */ 2326 *p++ = 0xF7; 2327 p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am); 2328 p = emit32(p, i->Xin.Test32.imm32); 2329 goto done; 2330 } 2331 2332 case Xin_Unary32: 2333 if (i->Xin.Unary32.op == Xun_NOT) { 2334 *p++ = 0xF7; 2335 p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst); 2336 goto done; 2337 } 2338 if (i->Xin.Unary32.op == Xun_NEG) { 2339 *p++ = 0xF7; 2340 p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst); 2341 goto done; 2342 } 2343 break; 2344 2345 case Xin_Lea32: 2346 *p++ = 0x8D; 2347 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2348 goto done; 2349 2350 case Xin_MulL: 2351 subopc = i->Xin.MulL.syned ? 5 : 4; 2352 *p++ = 0xF7; 2353 switch (i->Xin.MulL.src->tag) { 2354 case Xrm_Mem: 2355 p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am); 2356 goto done; 2357 case Xrm_Reg: 2358 p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg); 2359 goto done; 2360 default: 2361 goto bad; 2362 } 2363 break; 2364 2365 case Xin_Div: 2366 subopc = i->Xin.Div.syned ? 7 : 6; 2367 *p++ = 0xF7; 2368 switch (i->Xin.Div.src->tag) { 2369 case Xrm_Mem: 2370 p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am); 2371 goto done; 2372 case Xrm_Reg: 2373 p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg); 2374 goto done; 2375 default: 2376 goto bad; 2377 } 2378 break; 2379 2380 case Xin_Sh3232: 2381 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2382 if (i->Xin.Sh3232.amt == 0) { 2383 /* shldl/shrdl by %cl */ 2384 *p++ = 0x0F; 2385 if (i->Xin.Sh3232.op == Xsh_SHL) { 2386 *p++ = 0xA5; 2387 } else { 2388 *p++ = 0xAD; 2389 } 2390 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2391 goto done; 2392 } 2393 break; 2394 2395 case Xin_Push: 2396 switch (i->Xin.Push.src->tag) { 2397 case Xrmi_Mem: 2398 *p++ = 0xFF; 2399 p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am); 2400 goto done; 2401 case Xrmi_Imm: 2402 *p++ = 0x68; 2403 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2404 goto done; 2405 case Xrmi_Reg: 2406 *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg)); 2407 goto done; 2408 default: 2409 goto bad; 2410 } 2411 2412 case Xin_Call: 2413 if (i->Xin.Call.cond != Xcc_ALWAYS 2414 && i->Xin.Call.rloc.pri != RLPri_None) { 2415 /* The call might not happen (it isn't unconditional) and it 2416 returns a result. In this case we will need to generate a 2417 control flow diamond to put 0x555..555 in the return 2418 register(s) in the case where the call doesn't happen. If 2419 this ever becomes necessary, maybe copy code from the ARM 2420 equivalent. Until that day, just give up. */ 2421 goto bad; 2422 } 2423 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2424 for explanation of this. */ 2425 switch (i->Xin.Call.regparms) { 2426 case 0: irno = iregEnc(hregX86_EAX()); break; 2427 case 1: irno = iregEnc(hregX86_EDX()); break; 2428 case 2: irno = iregEnc(hregX86_ECX()); break; 2429 case 3: irno = iregEnc(hregX86_EDI()); break; 2430 default: vpanic(" emit_X86Instr:call:regparms"); 2431 } 2432 /* jump over the following two insns if the condition does not 2433 hold */ 2434 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2435 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2436 *p++ = 0x07; /* 7 bytes in the next two insns */ 2437 } 2438 /* movl $target, %tmp */ 2439 *p++ = toUChar(0xB8 + irno); 2440 p = emit32(p, i->Xin.Call.target); 2441 /* call *%tmp */ 2442 *p++ = 0xFF; 2443 *p++ = toUChar(0xD0 + irno); 2444 goto done; 2445 2446 case Xin_XDirect: { 2447 /* NB: what goes on here has to be very closely coordinated with the 2448 chainXDirect_X86 and unchainXDirect_X86 below. */ 2449 /* We're generating chain-me requests here, so we need to be 2450 sure this is actually allowed -- no-redir translations can't 2451 use chain-me's. Hence: */ 2452 vassert(disp_cp_chain_me_to_slowEP != NULL); 2453 vassert(disp_cp_chain_me_to_fastEP != NULL); 2454 2455 /* Use ptmp for backpatching conditional jumps. */ 2456 ptmp = NULL; 2457 2458 /* First off, if this is conditional, create a conditional 2459 jump over the rest of it. */ 2460 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2461 /* jmp fwds if !condition */ 2462 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); 2463 ptmp = p; /* fill in this bit later */ 2464 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2465 } 2466 2467 /* Update the guest EIP. */ 2468 /* movl $dstGA, amEIP */ 2469 *p++ = 0xC7; 2470 p = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP); 2471 p = emit32(p, i->Xin.XDirect.dstGA); 2472 2473 /* --- FIRST PATCHABLE BYTE follows --- */ 2474 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling 2475 to) backs up the return address, so as to find the address of 2476 the first patchable byte. So: don't change the length of the 2477 two instructions below. */ 2478 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ 2479 *p++ = 0xBA; 2480 const void* disp_cp_chain_me 2481 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 2482 : disp_cp_chain_me_to_slowEP; 2483 p = emit32(p, (UInt)(Addr)disp_cp_chain_me); 2484 /* call *%edx */ 2485 *p++ = 0xFF; 2486 *p++ = 0xD2; 2487 /* --- END of PATCHABLE BYTES --- */ 2488 2489 /* Fix up the conditional jump, if there was one. */ 2490 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2491 Int delta = p - ptmp; 2492 vassert(delta > 0 && delta < 40); 2493 *ptmp = toUChar(delta-1); 2494 } 2495 goto done; 2496 } 2497 2498 case Xin_XIndir: { 2499 /* We're generating transfers that could lead indirectly to a 2500 chain-me, so we need to be sure this is actually allowed -- 2501 no-redir translations are not allowed to reach normal 2502 translations without going through the scheduler. That means 2503 no XDirects or XIndirs out from no-redir translations. 2504 Hence: */ 2505 vassert(disp_cp_xindir != NULL); 2506 2507 /* Use ptmp for backpatching conditional jumps. */ 2508 ptmp = NULL; 2509 2510 /* First off, if this is conditional, create a conditional 2511 jump over the rest of it. */ 2512 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2513 /* jmp fwds if !condition */ 2514 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); 2515 ptmp = p; /* fill in this bit later */ 2516 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2517 } 2518 2519 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2520 *p++ = 0x89; 2521 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2522 2523 /* movl $disp_indir, %edx */ 2524 *p++ = 0xBA; 2525 p = emit32(p, (UInt)(Addr)disp_cp_xindir); 2526 /* jmp *%edx */ 2527 *p++ = 0xFF; 2528 *p++ = 0xE2; 2529 2530 /* Fix up the conditional jump, if there was one. */ 2531 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2532 Int delta = p - ptmp; 2533 vassert(delta > 0 && delta < 40); 2534 *ptmp = toUChar(delta-1); 2535 } 2536 goto done; 2537 } 2538 2539 case Xin_XAssisted: { 2540 /* Use ptmp for backpatching conditional jumps. */ 2541 ptmp = NULL; 2542 2543 /* First off, if this is conditional, create a conditional 2544 jump over the rest of it. */ 2545 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2546 /* jmp fwds if !condition */ 2547 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); 2548 ptmp = p; /* fill in this bit later */ 2549 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2550 } 2551 2552 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2553 *p++ = 0x89; 2554 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2555 /* movl $magic_number, %ebp. */ 2556 UInt trcval = 0; 2557 switch (i->Xin.XAssisted.jk) { 2558 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; 2559 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; 2560 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; 2561 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break; 2562 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break; 2563 case Ijk_Sys_int145: trcval = VEX_TRC_JMP_SYS_INT145; break; 2564 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break; 2565 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break; 2566 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; 2567 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; 2568 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; 2569 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; 2570 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; 2571 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; 2572 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; 2573 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; 2574 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; 2575 /* We don't expect to see the following being assisted. */ 2576 case Ijk_Ret: 2577 case Ijk_Call: 2578 /* fallthrough */ 2579 default: 2580 ppIRJumpKind(i->Xin.XAssisted.jk); 2581 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); 2582 } 2583 vassert(trcval != 0); 2584 *p++ = 0xBD; 2585 p = emit32(p, trcval); 2586 2587 /* movl $disp_indir, %edx */ 2588 *p++ = 0xBA; 2589 p = emit32(p, (UInt)(Addr)disp_cp_xassisted); 2590 /* jmp *%edx */ 2591 *p++ = 0xFF; 2592 *p++ = 0xE2; 2593 2594 /* Fix up the conditional jump, if there was one. */ 2595 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2596 Int delta = p - ptmp; 2597 vassert(delta > 0 && delta < 40); 2598 *ptmp = toUChar(delta-1); 2599 } 2600 goto done; 2601 } 2602 2603 case Xin_CMov32: 2604 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2605 2606 /* This generates cmov, which is illegal on P54/P55. */ 2607 /* 2608 *p++ = 0x0F; 2609 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2610 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2611 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2612 goto done; 2613 } 2614 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2615 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2616 goto done; 2617 } 2618 */ 2619 2620 /* Alternative version which works on any x86 variant. */ 2621 /* jmp fwds if !condition */ 2622 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2623 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2624 ptmp = p; 2625 2626 switch (i->Xin.CMov32.src->tag) { 2627 case Xrm_Reg: 2628 /* Big sigh. This is movl E -> G ... */ 2629 *p++ = 0x89; 2630 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2631 i->Xin.CMov32.dst); 2632 2633 break; 2634 case Xrm_Mem: 2635 /* ... whereas this is movl G -> E. That's why the args 2636 to doAMode_R appear to be the wrong way round in the 2637 Xrm_Reg case. */ 2638 *p++ = 0x8B; 2639 p = doAMode_M(p, i->Xin.CMov32.dst, 2640 i->Xin.CMov32.src->Xrm.Mem.am); 2641 break; 2642 default: 2643 goto bad; 2644 } 2645 /* Fill in the jump offset. */ 2646 *(ptmp-1) = toUChar(p - ptmp); 2647 goto done; 2648 2649 break; 2650 2651 case Xin_LoadEX: 2652 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2653 /* movzbl */ 2654 *p++ = 0x0F; 2655 *p++ = 0xB6; 2656 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2657 goto done; 2658 } 2659 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2660 /* movzwl */ 2661 *p++ = 0x0F; 2662 *p++ = 0xB7; 2663 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2664 goto done; 2665 } 2666 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2667 /* movsbl */ 2668 *p++ = 0x0F; 2669 *p++ = 0xBE; 2670 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2671 goto done; 2672 } 2673 break; 2674 2675 case Xin_Set32: 2676 /* Make the destination register be 1 or 0, depending on whether 2677 the relevant condition holds. We have to dodge and weave 2678 when the destination is %esi or %edi as we cannot directly 2679 emit the native 'setb %reg' for those. Further complication: 2680 the top 24 bits of the destination should be forced to zero, 2681 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2682 Sigh. So start off my moving $0 into the dest. */ 2683 2684 /* Do we need to swap in %eax? */ 2685 if (iregEnc(i->Xin.Set32.dst) >= 4) { 2686 /* xchg %eax, %dst */ 2687 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst)); 2688 /* movl $0, %eax */ 2689 *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX())); 2690 p = emit32(p, 0); 2691 /* setb lo8(%eax) */ 2692 *p++ = 0x0F; 2693 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2694 p = doAMode_R_enc_reg(p, 0, hregX86_EAX()); 2695 /* xchg %eax, %dst */ 2696 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst)); 2697 } else { 2698 /* movl $0, %dst */ 2699 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst)); 2700 p = emit32(p, 0); 2701 /* setb lo8(%dst) */ 2702 *p++ = 0x0F; 2703 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2704 p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst); 2705 } 2706 goto done; 2707 2708 case Xin_Bsfr32: 2709 *p++ = 0x0F; 2710 if (i->Xin.Bsfr32.isFwds) { 2711 *p++ = 0xBC; 2712 } else { 2713 *p++ = 0xBD; 2714 } 2715 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2716 goto done; 2717 2718 case Xin_MFence: 2719 /* see comment in hdefs.h re this insn */ 2720 if (0) vex_printf("EMIT FENCE\n"); 2721 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2722 |VEX_HWCAPS_X86_SSE2)) { 2723 /* mfence */ 2724 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2725 goto done; 2726 } 2727 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) { 2728 /* sfence */ 2729 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2730 /* lock addl $0,0(%esp) */ 2731 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2732 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2733 goto done; 2734 } 2735 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2736 /* lock addl $0,0(%esp) */ 2737 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2738 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2739 goto done; 2740 } 2741 vpanic("emit_X86Instr:mfence:hwcaps"); 2742 /*NOTREACHED*/ 2743 break; 2744 2745 case Xin_ACAS: 2746 /* lock */ 2747 *p++ = 0xF0; 2748 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2749 in %ebx. The new-value register is hardwired to be %ebx 2750 since letting it be any integer register gives the problem 2751 that %sil and %dil are unaddressible on x86 and hence we 2752 would have to resort to the same kind of trickery as with 2753 byte-sized Xin.Store, just below. Given that this isn't 2754 performance critical, it is simpler just to force the 2755 register operand to %ebx (could equally be %ecx or %edx). 2756 (Although %ebx is more consistent with cmpxchg8b.) */ 2757 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2758 *p++ = 0x0F; 2759 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2760 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2761 goto done; 2762 2763 case Xin_DACAS: 2764 /* lock */ 2765 *p++ = 0xF0; 2766 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2767 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2768 aren't encoded in the insn. */ 2769 *p++ = 0x0F; 2770 *p++ = 0xC7; 2771 p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr); 2772 goto done; 2773 2774 case Xin_Store: 2775 if (i->Xin.Store.sz == 2) { 2776 /* This case, at least, is simple, given that we can 2777 reference the low 16 bits of any integer register. */ 2778 *p++ = 0x66; 2779 *p++ = 0x89; 2780 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2781 goto done; 2782 } 2783 2784 if (i->Xin.Store.sz == 1) { 2785 /* We have to do complex dodging and weaving if src is not 2786 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2787 if (iregEnc(i->Xin.Store.src) < 4) { 2788 /* we're OK, can do it directly */ 2789 *p++ = 0x88; 2790 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2791 goto done; 2792 } else { 2793 /* Bleh. This means the source is %edi or %esi. Since 2794 the address mode can only mention three registers, at 2795 least one of %eax/%ebx/%ecx/%edx must be available to 2796 temporarily swap the source into, so the store can 2797 happen. So we have to look at the regs mentioned 2798 in the amode. */ 2799 HReg swap = INVALID_HREG; 2800 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2801 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2802 HRegUsage u; 2803 initHRegUsage(&u); 2804 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2805 /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; } 2806 else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; } 2807 else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; } 2808 else if (! HRegUsage__contains(&u, edx)) { swap = edx; } 2809 vassert(! hregIsInvalid(swap)); 2810 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2811 *p++ = 0x87; 2812 p = doAMode_R(p, i->Xin.Store.src, swap); 2813 /* movb lo8{%swap}, (dst) */ 2814 *p++ = 0x88; 2815 p = doAMode_M(p, swap, i->Xin.Store.dst); 2816 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2817 *p++ = 0x87; 2818 p = doAMode_R(p, i->Xin.Store.src, swap); 2819 goto done; 2820 } 2821 } /* if (i->Xin.Store.sz == 1) */ 2822 break; 2823 2824 case Xin_FpUnary: 2825 /* gop %src, %dst 2826 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2827 */ 2828 p = do_ffree_st7(p); 2829 p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src)); 2830 p = do_fop1_st(p, i->Xin.FpUnary.op); 2831 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst)); 2832 goto done; 2833 2834 case Xin_FpBinary: 2835 if (i->Xin.FpBinary.op == Xfp_YL2X 2836 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2837 /* Have to do this specially. */ 2838 /* ffree %st7 ; fld %st(srcL) ; 2839 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2840 p = do_ffree_st7(p); 2841 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2842 p = do_ffree_st7(p); 2843 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR)); 2844 *p++ = 0xD9; 2845 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2846 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2847 goto done; 2848 } 2849 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2850 /* Have to do this specially. */ 2851 /* ffree %st7 ; fld %st(srcL) ; 2852 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2853 p = do_ffree_st7(p); 2854 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2855 p = do_ffree_st7(p); 2856 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR)); 2857 *p++ = 0xD9; *p++ = 0xF3; 2858 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2859 goto done; 2860 } 2861 if (i->Xin.FpBinary.op == Xfp_PREM 2862 || i->Xin.FpBinary.op == Xfp_PREM1 2863 || i->Xin.FpBinary.op == Xfp_SCALE) { 2864 /* Have to do this specially. */ 2865 /* ffree %st7 ; fld %st(srcR) ; 2866 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2867 fincstp ; ffree %st7 */ 2868 p = do_ffree_st7(p); 2869 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR)); 2870 p = do_ffree_st7(p); 2871 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL)); 2872 *p++ = 0xD9; 2873 switch (i->Xin.FpBinary.op) { 2874 case Xfp_PREM: *p++ = 0xF8; break; 2875 case Xfp_PREM1: *p++ = 0xF5; break; 2876 case Xfp_SCALE: *p++ = 0xFD; break; 2877 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2878 } 2879 p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst)); 2880 *p++ = 0xD9; *p++ = 0xF7; 2881 p = do_ffree_st7(p); 2882 goto done; 2883 } 2884 /* General case */ 2885 /* gop %srcL, %srcR, %dst 2886 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2887 */ 2888 p = do_ffree_st7(p); 2889 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2890 p = do_fop2_st(p, i->Xin.FpBinary.op, 2891 1+fregEnc(i->Xin.FpBinary.srcR)); 2892 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2893 goto done; 2894 2895 case Xin_FpLdSt: 2896 if (i->Xin.FpLdSt.isLoad) { 2897 /* Load from memory into %fakeN. 2898 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2899 */ 2900 p = do_ffree_st7(p); 2901 switch (i->Xin.FpLdSt.sz) { 2902 case 4: 2903 *p++ = 0xD9; 2904 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr); 2905 break; 2906 case 8: 2907 *p++ = 0xDD; 2908 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr); 2909 break; 2910 case 10: 2911 *p++ = 0xDB; 2912 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr); 2913 break; 2914 default: 2915 vpanic("emitX86Instr(FpLdSt,load)"); 2916 } 2917 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg)); 2918 goto done; 2919 } else { 2920 /* Store from %fakeN into memory. 2921 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2922 */ 2923 p = do_ffree_st7(p); 2924 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg)); 2925 switch (i->Xin.FpLdSt.sz) { 2926 case 4: 2927 *p++ = 0xD9; 2928 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr); 2929 break; 2930 case 8: 2931 *p++ = 0xDD; 2932 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr); 2933 break; 2934 case 10: 2935 *p++ = 0xDB; 2936 p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr); 2937 break; 2938 default: 2939 vpanic("emitX86Instr(FpLdSt,store)"); 2940 } 2941 goto done; 2942 } 2943 break; 2944 2945 case Xin_FpLdStI: 2946 if (i->Xin.FpLdStI.isLoad) { 2947 /* Load from memory into %fakeN, converting from an int. 2948 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2949 */ 2950 switch (i->Xin.FpLdStI.sz) { 2951 case 8: opc = 0xDF; subopc_imm = 5; break; 2952 case 4: opc = 0xDB; subopc_imm = 0; break; 2953 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2954 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2955 } 2956 p = do_ffree_st7(p); 2957 *p++ = toUChar(opc); 2958 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr); 2959 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg)); 2960 goto done; 2961 } else { 2962 /* Store from %fakeN into memory, converting to an int. 2963 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2964 */ 2965 switch (i->Xin.FpLdStI.sz) { 2966 case 8: opc = 0xDF; subopc_imm = 7; break; 2967 case 4: opc = 0xDB; subopc_imm = 3; break; 2968 case 2: opc = 0xDF; subopc_imm = 3; break; 2969 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2970 } 2971 p = do_ffree_st7(p); 2972 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg)); 2973 *p++ = toUChar(opc); 2974 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr); 2975 goto done; 2976 } 2977 break; 2978 2979 case Xin_Fp64to32: 2980 /* ffree %st7 ; fld %st(src) */ 2981 p = do_ffree_st7(p); 2982 p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src)); 2983 /* subl $4, %esp */ 2984 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2985 /* fstps (%esp) */ 2986 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2987 /* flds (%esp) */ 2988 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2989 /* addl $4, %esp */ 2990 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2991 /* fstp %st(1+dst) */ 2992 p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst)); 2993 goto done; 2994 2995 case Xin_FpCMov: 2996 /* jmp fwds if !condition */ 2997 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2998 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2999 ptmp = p; 3000 3001 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 3002 p = do_ffree_st7(p); 3003 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src)); 3004 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst)); 3005 3006 /* Fill in the jump offset. */ 3007 *(ptmp-1) = toUChar(p - ptmp); 3008 goto done; 3009 3010 case Xin_FpLdCW: 3011 *p++ = 0xD9; 3012 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr); 3013 goto done; 3014 3015 case Xin_FpStSW_AX: 3016 /* note, this emits fnstsw %ax, not fstsw %ax */ 3017 *p++ = 0xDF; 3018 *p++ = 0xE0; 3019 goto done; 3020 3021 case Xin_FpCmp: 3022 /* gcmp %fL, %fR, %dst 3023 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 3024 fnstsw %ax ; movl %eax, %dst 3025 */ 3026 /* ffree %st7 */ 3027 p = do_ffree_st7(p); 3028 /* fpush %fL */ 3029 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL)); 3030 /* fucomp %(fR+1) */ 3031 *p++ = 0xDD; 3032 *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR)))); 3033 /* fnstsw %ax */ 3034 *p++ = 0xDF; 3035 *p++ = 0xE0; 3036 /* movl %eax, %dst */ 3037 *p++ = 0x89; 3038 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 3039 goto done; 3040 3041 case Xin_SseConst: { 3042 UShort con = i->Xin.SseConst.con; 3043 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 3044 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 3045 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 3046 p = push_word_from_tags(p, toUShort(con & 0xF)); 3047 /* movl (%esp), %xmm-dst */ 3048 *p++ = 0x0F; 3049 *p++ = 0x10; 3050 *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst))); 3051 *p++ = 0x24; 3052 /* addl $16, %esp */ 3053 *p++ = 0x83; 3054 *p++ = 0xC4; 3055 *p++ = 0x10; 3056 goto done; 3057 } 3058 3059 case Xin_SseLdSt: 3060 *p++ = 0x0F; 3061 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 3062 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr); 3063 goto done; 3064 3065 case Xin_SseLdzLO: 3066 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 3067 /* movs[sd] amode, %xmm-dst */ 3068 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3069 *p++ = 0x0F; 3070 *p++ = 0x10; 3071 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr); 3072 goto done; 3073 3074 case Xin_Sse32Fx4: 3075 xtra = 0; 3076 *p++ = 0x0F; 3077 switch (i->Xin.Sse32Fx4.op) { 3078 case Xsse_ADDF: *p++ = 0x58; break; 3079 case Xsse_DIVF: *p++ = 0x5E; break; 3080 case Xsse_MAXF: *p++ = 0x5F; break; 3081 case Xsse_MINF: *p++ = 0x5D; break; 3082 case Xsse_MULF: *p++ = 0x59; break; 3083 case Xsse_RCPF: *p++ = 0x53; break; 3084 case Xsse_RSQRTF: *p++ = 0x52; break; 3085 case Xsse_SQRTF: *p++ = 0x51; break; 3086 case Xsse_SUBF: *p++ = 0x5C; break; 3087 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3088 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3089 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3090 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3091 default: goto bad; 3092 } 3093 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst), 3094 vregEnc(i->Xin.Sse32Fx4.src) ); 3095 if (xtra & 0x100) 3096 *p++ = toUChar(xtra & 0xFF); 3097 goto done; 3098 3099 case Xin_Sse64Fx2: 3100 xtra = 0; 3101 *p++ = 0x66; 3102 *p++ = 0x0F; 3103 switch (i->Xin.Sse64Fx2.op) { 3104 case Xsse_ADDF: *p++ = 0x58; break; 3105 case Xsse_DIVF: *p++ = 0x5E; break; 3106 case Xsse_MAXF: *p++ = 0x5F; break; 3107 case Xsse_MINF: *p++ = 0x5D; break; 3108 case Xsse_MULF: *p++ = 0x59; break; 3109 case Xsse_RCPF: *p++ = 0x53; break; 3110 case Xsse_RSQRTF: *p++ = 0x52; break; 3111 case Xsse_SQRTF: *p++ = 0x51; break; 3112 case Xsse_SUBF: *p++ = 0x5C; break; 3113 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3114 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3115 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3116 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3117 default: goto bad; 3118 } 3119 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst), 3120 vregEnc(i->Xin.Sse64Fx2.src) ); 3121 if (xtra & 0x100) 3122 *p++ = toUChar(xtra & 0xFF); 3123 goto done; 3124 3125 case Xin_Sse32FLo: 3126 xtra = 0; 3127 *p++ = 0xF3; 3128 *p++ = 0x0F; 3129 switch (i->Xin.Sse32FLo.op) { 3130 case Xsse_ADDF: *p++ = 0x58; break; 3131 case Xsse_DIVF: *p++ = 0x5E; break; 3132 case Xsse_MAXF: *p++ = 0x5F; break; 3133 case Xsse_MINF: *p++ = 0x5D; break; 3134 case Xsse_MULF: *p++ = 0x59; break; 3135 case Xsse_RCPF: *p++ = 0x53; break; 3136 case Xsse_RSQRTF: *p++ = 0x52; break; 3137 case Xsse_SQRTF: *p++ = 0x51; break; 3138 case Xsse_SUBF: *p++ = 0x5C; break; 3139 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3140 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3141 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3142 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3143 default: goto bad; 3144 } 3145 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst), 3146 vregEnc(i->Xin.Sse32FLo.src) ); 3147 if (xtra & 0x100) 3148 *p++ = toUChar(xtra & 0xFF); 3149 goto done; 3150 3151 case Xin_Sse64FLo: 3152 xtra = 0; 3153 *p++ = 0xF2; 3154 *p++ = 0x0F; 3155 switch (i->Xin.Sse64FLo.op) { 3156 case Xsse_ADDF: *p++ = 0x58; break; 3157 case Xsse_DIVF: *p++ = 0x5E; break; 3158 case Xsse_MAXF: *p++ = 0x5F; break; 3159 case Xsse_MINF: *p++ = 0x5D; break; 3160 case Xsse_MULF: *p++ = 0x59; break; 3161 case Xsse_RCPF: *p++ = 0x53; break; 3162 case Xsse_RSQRTF: *p++ = 0x52; break; 3163 case Xsse_SQRTF: *p++ = 0x51; break; 3164 case Xsse_SUBF: *p++ = 0x5C; break; 3165 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3166 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3167 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3168 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3169 default: goto bad; 3170 } 3171 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst), 3172 vregEnc(i->Xin.Sse64FLo.src) ); 3173 if (xtra & 0x100) 3174 *p++ = toUChar(xtra & 0xFF); 3175 goto done; 3176 3177 case Xin_SseReRg: 3178 # define XX(_n) *p++ = (_n) 3179 switch (i->Xin.SseReRg.op) { 3180 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 3181 case Xsse_OR: XX(0x0F); XX(0x56); break; 3182 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3183 case Xsse_AND: XX(0x0F); XX(0x54); break; 3184 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3185 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3186 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3187 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3188 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3189 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3190 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3191 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3192 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3193 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3194 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3195 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3196 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3197 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3198 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3199 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3200 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3201 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3202 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3203 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3204 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3205 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3206 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3207 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3208 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3209 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3210 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3211 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3212 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3213 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3214 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3215 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3216 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3217 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3218 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3219 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3220 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3221 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3222 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3223 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3224 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3225 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3226 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3227 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3228 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3229 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3230 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3231 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3232 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3233 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3234 default: goto bad; 3235 } 3236 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst), 3237 vregEnc(i->Xin.SseReRg.src) ); 3238 # undef XX 3239 goto done; 3240 3241 case Xin_SseCMov: 3242 /* jmp fwds if !condition */ 3243 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3244 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3245 ptmp = p; 3246 3247 /* movaps %src, %dst */ 3248 *p++ = 0x0F; 3249 *p++ = 0x28; 3250 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst), 3251 vregEnc(i->Xin.SseCMov.src) ); 3252 3253 /* Fill in the jump offset. */ 3254 *(ptmp-1) = toUChar(p - ptmp); 3255 goto done; 3256 3257 case Xin_SseShuf: 3258 *p++ = 0x66; 3259 *p++ = 0x0F; 3260 *p++ = 0x70; 3261 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst), 3262 vregEnc(i->Xin.SseShuf.src) ); 3263 *p++ = (UChar)(i->Xin.SseShuf.order); 3264 goto done; 3265 3266 case Xin_EvCheck: { 3267 /* We generate: 3268 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) 3269 (2 bytes) jns nofail expected taken 3270 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) 3271 nofail: 3272 */ 3273 /* This is heavily asserted re instruction lengths. It needs to 3274 be. If we get given unexpected forms of .amCounter or 3275 .amFailAddr -- basically, anything that's not of the form 3276 uimm7(%ebp) -- they are likely to fail. */ 3277 /* Note also that after the decl we must be very careful not to 3278 read the carry flag, else we get a partial flags stall. 3279 js/jns avoids that, though. */ 3280 UChar* p0 = p; 3281 /* --- decl 8(%ebp) --- */ 3282 /* "1" because + there's no register in this encoding; 3283 instead the register + field is used as a sub opcode. The 3284 encoding for "decl r/m32" + is FF /1, hence the "1". */ 3285 *p++ = 0xFF; 3286 p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter); 3287 vassert(p - p0 == 3); 3288 /* --- jns nofail --- */ 3289 *p++ = 0x79; 3290 *p++ = 0x03; /* need to check this 0x03 after the next insn */ 3291 vassert(p - p0 == 5); 3292 /* --- jmp* 0(%ebp) --- */ 3293 /* The encoding is FF /4. */ 3294 *p++ = 0xFF; 3295 p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr); 3296 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ 3297 /* And crosscheck .. */ 3298 vassert(evCheckSzB_X86() == 8); 3299 goto done; 3300 } 3301 3302 case Xin_ProfInc: { 3303 /* We generate addl $1,NotKnownYet 3304 adcl $0,NotKnownYet+4 3305 in the expectation that a later call to LibVEX_patchProfCtr 3306 will be used to fill in the immediate fields once the right 3307 value is known. 3308 83 05 00 00 00 00 01 3309 83 15 00 00 00 00 00 3310 */ 3311 *p++ = 0x83; *p++ = 0x05; 3312 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3313 *p++ = 0x01; 3314 *p++ = 0x83; *p++ = 0x15; 3315 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3316 *p++ = 0x00; 3317 /* Tell the caller .. */ 3318 vassert(!(*is_profInc)); 3319 *is_profInc = True; 3320 goto done; 3321 } 3322 3323 default: 3324 goto bad; 3325 } 3326 3327 bad: 3328 ppX86Instr(i, mode64); 3329 vpanic("emit_X86Instr"); 3330 /*NOTREACHED*/ 3331 3332 done: 3333 vassert(p - &buf[0] <= 32); 3334 return p - &buf[0]; 3335 } 3336 3337 3338 /* How big is an event check? See case for Xin_EvCheck in 3339 emit_X86Instr just above. That crosschecks what this returns, so 3340 we can tell if we're inconsistent. */ 3341 Int evCheckSzB_X86 (void) 3342 { 3343 return 8; 3344 } 3345 3346 3347 /* NB: what goes on here has to be very closely coordinated with the 3348 emitInstr case for XDirect, above. */ 3349 VexInvalRange chainXDirect_X86 ( VexEndness endness_host, 3350 void* place_to_chain, 3351 const void* disp_cp_chain_me_EXPECTED, 3352 const void* place_to_jump_to ) 3353 { 3354 vassert(endness_host == VexEndnessLE); 3355 3356 /* What we're expecting to see is: 3357 movl $disp_cp_chain_me_EXPECTED, %edx 3358 call *%edx 3359 viz 3360 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3361 FF D2 3362 */ 3363 UChar* p = (UChar*)place_to_chain; 3364 vassert(p[0] == 0xBA); 3365 vassert(read_misaligned_UInt_LE(&p[1]) 3366 == (UInt)(Addr)disp_cp_chain_me_EXPECTED); 3367 vassert(p[5] == 0xFF); 3368 vassert(p[6] == 0xD2); 3369 /* And what we want to change it to is: 3370 jmp disp32 where disp32 is relative to the next insn 3371 ud2; 3372 viz 3373 E9 <4 bytes == disp32> 3374 0F 0B 3375 The replacement has the same length as the original. 3376 */ 3377 /* This is the delta we need to put into a JMP d32 insn. It's 3378 relative to the start of the next insn, hence the -5. */ 3379 Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5; 3380 3381 /* And make the modifications. */ 3382 p[0] = 0xE9; 3383 write_misaligned_UInt_LE(&p[1], (UInt)(ULong)delta); 3384 p[5] = 0x0F; p[6] = 0x0B; 3385 /* sanity check on the delta -- top 32 are all 0 or all 1 */ 3386 delta >>= 32; 3387 vassert(delta == 0LL || delta == -1LL); 3388 VexInvalRange vir = { (HWord)place_to_chain, 7 }; 3389 return vir; 3390 } 3391 3392 3393 /* NB: what goes on here has to be very closely coordinated with the 3394 emitInstr case for XDirect, above. */ 3395 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host, 3396 void* place_to_unchain, 3397 const void* place_to_jump_to_EXPECTED, 3398 const void* disp_cp_chain_me ) 3399 { 3400 vassert(endness_host == VexEndnessLE); 3401 3402 /* What we're expecting to see is: 3403 jmp d32 3404 ud2; 3405 viz 3406 E9 <4 bytes == disp32> 3407 0F 0B 3408 */ 3409 UChar* p = (UChar*)place_to_unchain; 3410 Bool valid = False; 3411 if (p[0] == 0xE9 3412 && p[5] == 0x0F && p[6] == 0x0B) { 3413 /* Check the offset is right. */ 3414 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]); 3415 if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) { 3416 valid = True; 3417 if (0) 3418 vex_printf("QQQ unchainXDirect_X86: found valid\n"); 3419 } 3420 } 3421 vassert(valid); 3422 /* And what we want to change it to is: 3423 movl $disp_cp_chain_me, %edx 3424 call *%edx 3425 viz 3426 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3427 FF D2 3428 So it's the same length (convenient, huh). 3429 */ 3430 p[0] = 0xBA; 3431 write_misaligned_UInt_LE(&p[1], (UInt)(Addr)disp_cp_chain_me); 3432 p[5] = 0xFF; 3433 p[6] = 0xD2; 3434 VexInvalRange vir = { (HWord)place_to_unchain, 7 }; 3435 return vir; 3436 } 3437 3438 3439 /* Patch the counter address into a profile inc point, as previously 3440 created by the Xin_ProfInc case for emit_X86Instr. */ 3441 VexInvalRange patchProfInc_X86 ( VexEndness endness_host, 3442 void* place_to_patch, 3443 const ULong* location_of_counter ) 3444 { 3445 vassert(endness_host == VexEndnessLE); 3446 vassert(sizeof(ULong*) == 4); 3447 UChar* p = (UChar*)place_to_patch; 3448 vassert(p[0] == 0x83); 3449 vassert(p[1] == 0x05); 3450 vassert(p[2] == 0x00); 3451 vassert(p[3] == 0x00); 3452 vassert(p[4] == 0x00); 3453 vassert(p[5] == 0x00); 3454 vassert(p[6] == 0x01); 3455 vassert(p[7] == 0x83); 3456 vassert(p[8] == 0x15); 3457 vassert(p[9] == 0x00); 3458 vassert(p[10] == 0x00); 3459 vassert(p[11] == 0x00); 3460 vassert(p[12] == 0x00); 3461 vassert(p[13] == 0x00); 3462 UInt imm32 = (UInt)(Addr)location_of_counter; 3463 p[2] = imm32 & 0xFF; imm32 >>= 8; 3464 p[3] = imm32 & 0xFF; imm32 >>= 8; 3465 p[4] = imm32 & 0xFF; imm32 >>= 8; 3466 p[5] = imm32 & 0xFF; 3467 imm32 = 4 + (UInt)(Addr)location_of_counter; 3468 p[9] = imm32 & 0xFF; imm32 >>= 8; 3469 p[10] = imm32 & 0xFF; imm32 >>= 8; 3470 p[11] = imm32 & 0xFF; imm32 >>= 8; 3471 p[12] = imm32 & 0xFF; 3472 VexInvalRange vir = { (HWord)place_to_patch, 14 }; 3473 return vir; 3474 } 3475 3476 3477 /*---------------------------------------------------------------*/ 3478 /*--- end host_x86_defs.c ---*/ 3479 /*---------------------------------------------------------------*/ 3480