1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_x86_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 const RRegUniverse* getRRegUniverse_X86 ( void ) 48 { 49 /* The real-register universe is a big constant, so we just want to 50 initialise it once. */ 51 static RRegUniverse rRegUniverse_X86; 52 static Bool rRegUniverse_X86_initted = False; 53 54 /* Handy shorthand, nothing more */ 55 RRegUniverse* ru = &rRegUniverse_X86; 56 57 /* This isn't thread-safe. Sigh. */ 58 if (LIKELY(rRegUniverse_X86_initted)) 59 return ru; 60 61 RRegUniverse__init(ru); 62 63 /* Add the registers. The initial segment of this array must be 64 those available for allocation by reg-alloc, and those that 65 follow are not available for allocation. */ 66 ru->regs[ru->size++] = hregX86_EAX(); 67 ru->regs[ru->size++] = hregX86_EBX(); 68 ru->regs[ru->size++] = hregX86_ECX(); 69 ru->regs[ru->size++] = hregX86_EDX(); 70 ru->regs[ru->size++] = hregX86_ESI(); 71 ru->regs[ru->size++] = hregX86_EDI(); 72 ru->regs[ru->size++] = hregX86_FAKE0(); 73 ru->regs[ru->size++] = hregX86_FAKE1(); 74 ru->regs[ru->size++] = hregX86_FAKE2(); 75 ru->regs[ru->size++] = hregX86_FAKE3(); 76 ru->regs[ru->size++] = hregX86_FAKE4(); 77 ru->regs[ru->size++] = hregX86_FAKE5(); 78 ru->regs[ru->size++] = hregX86_XMM0(); 79 ru->regs[ru->size++] = hregX86_XMM1(); 80 ru->regs[ru->size++] = hregX86_XMM2(); 81 ru->regs[ru->size++] = hregX86_XMM3(); 82 ru->regs[ru->size++] = hregX86_XMM4(); 83 ru->regs[ru->size++] = hregX86_XMM5(); 84 ru->regs[ru->size++] = hregX86_XMM6(); 85 ru->regs[ru->size++] = hregX86_XMM7(); 86 ru->allocable = ru->size; 87 /* And other regs, not available to the allocator. */ 88 ru->regs[ru->size++] = hregX86_ESP(); 89 ru->regs[ru->size++] = hregX86_EBP(); 90 91 rRegUniverse_X86_initted = True; 92 93 RRegUniverse__check_is_sane(ru); 94 return ru; 95 } 96 97 98 void ppHRegX86 ( HReg reg ) 99 { 100 Int r; 101 static const HChar* ireg32_names[8] 102 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 103 /* Be generic for all virtual regs. */ 104 if (hregIsVirtual(reg)) { 105 ppHReg(reg); 106 return; 107 } 108 /* But specific for real regs. */ 109 switch (hregClass(reg)) { 110 case HRcInt32: 111 r = hregEncoding(reg); 112 vassert(r >= 0 && r < 8); 113 vex_printf("%s", ireg32_names[r]); 114 return; 115 case HRcFlt64: 116 r = hregEncoding(reg); 117 vassert(r >= 0 && r < 6); 118 vex_printf("%%fake%d", r); 119 return; 120 case HRcVec128: 121 r = hregEncoding(reg); 122 vassert(r >= 0 && r < 8); 123 vex_printf("%%xmm%d", r); 124 return; 125 default: 126 vpanic("ppHRegX86"); 127 } 128 } 129 130 131 /* --------- Condition codes, Intel encoding. --------- */ 132 133 const HChar* showX86CondCode ( X86CondCode cond ) 134 { 135 switch (cond) { 136 case Xcc_O: return "o"; 137 case Xcc_NO: return "no"; 138 case Xcc_B: return "b"; 139 case Xcc_NB: return "nb"; 140 case Xcc_Z: return "z"; 141 case Xcc_NZ: return "nz"; 142 case Xcc_BE: return "be"; 143 case Xcc_NBE: return "nbe"; 144 case Xcc_S: return "s"; 145 case Xcc_NS: return "ns"; 146 case Xcc_P: return "p"; 147 case Xcc_NP: return "np"; 148 case Xcc_L: return "l"; 149 case Xcc_NL: return "nl"; 150 case Xcc_LE: return "le"; 151 case Xcc_NLE: return "nle"; 152 case Xcc_ALWAYS: return "ALWAYS"; 153 default: vpanic("ppX86CondCode"); 154 } 155 } 156 157 158 /* --------- X86AMode: memory address expressions. --------- */ 159 160 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 161 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode)); 162 am->tag = Xam_IR; 163 am->Xam.IR.imm = imm32; 164 am->Xam.IR.reg = reg; 165 return am; 166 } 167 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 168 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode)); 169 am->tag = Xam_IRRS; 170 am->Xam.IRRS.imm = imm32; 171 am->Xam.IRRS.base = base; 172 am->Xam.IRRS.index = indEx; 173 am->Xam.IRRS.shift = shift; 174 vassert(shift >= 0 && shift <= 3); 175 return am; 176 } 177 178 X86AMode* dopyX86AMode ( X86AMode* am ) { 179 switch (am->tag) { 180 case Xam_IR: 181 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 182 case Xam_IRRS: 183 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 184 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 185 default: 186 vpanic("dopyX86AMode"); 187 } 188 } 189 190 void ppX86AMode ( X86AMode* am ) { 191 switch (am->tag) { 192 case Xam_IR: 193 if (am->Xam.IR.imm == 0) 194 vex_printf("("); 195 else 196 vex_printf("0x%x(", am->Xam.IR.imm); 197 ppHRegX86(am->Xam.IR.reg); 198 vex_printf(")"); 199 return; 200 case Xam_IRRS: 201 vex_printf("0x%x(", am->Xam.IRRS.imm); 202 ppHRegX86(am->Xam.IRRS.base); 203 vex_printf(","); 204 ppHRegX86(am->Xam.IRRS.index); 205 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 206 return; 207 default: 208 vpanic("ppX86AMode"); 209 } 210 } 211 212 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 213 switch (am->tag) { 214 case Xam_IR: 215 addHRegUse(u, HRmRead, am->Xam.IR.reg); 216 return; 217 case Xam_IRRS: 218 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 219 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 220 return; 221 default: 222 vpanic("addRegUsage_X86AMode"); 223 } 224 } 225 226 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 227 switch (am->tag) { 228 case Xam_IR: 229 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 230 return; 231 case Xam_IRRS: 232 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 233 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 234 return; 235 default: 236 vpanic("mapRegs_X86AMode"); 237 } 238 } 239 240 /* --------- Operand, which can be reg, immediate or memory. --------- */ 241 242 X86RMI* X86RMI_Imm ( UInt imm32 ) { 243 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 244 op->tag = Xrmi_Imm; 245 op->Xrmi.Imm.imm32 = imm32; 246 return op; 247 } 248 X86RMI* X86RMI_Reg ( HReg reg ) { 249 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 250 op->tag = Xrmi_Reg; 251 op->Xrmi.Reg.reg = reg; 252 return op; 253 } 254 X86RMI* X86RMI_Mem ( X86AMode* am ) { 255 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI)); 256 op->tag = Xrmi_Mem; 257 op->Xrmi.Mem.am = am; 258 return op; 259 } 260 261 void ppX86RMI ( X86RMI* op ) { 262 switch (op->tag) { 263 case Xrmi_Imm: 264 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 265 return; 266 case Xrmi_Reg: 267 ppHRegX86(op->Xrmi.Reg.reg); 268 return; 269 case Xrmi_Mem: 270 ppX86AMode(op->Xrmi.Mem.am); 271 return; 272 default: 273 vpanic("ppX86RMI"); 274 } 275 } 276 277 /* An X86RMI can only be used in a "read" context (what would it mean 278 to write or modify a literal?) and so we enumerate its registers 279 accordingly. */ 280 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 281 switch (op->tag) { 282 case Xrmi_Imm: 283 return; 284 case Xrmi_Reg: 285 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 286 return; 287 case Xrmi_Mem: 288 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 289 return; 290 default: 291 vpanic("addRegUsage_X86RMI"); 292 } 293 } 294 295 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 296 switch (op->tag) { 297 case Xrmi_Imm: 298 return; 299 case Xrmi_Reg: 300 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 301 return; 302 case Xrmi_Mem: 303 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 304 return; 305 default: 306 vpanic("mapRegs_X86RMI"); 307 } 308 } 309 310 311 /* --------- Operand, which can be reg or immediate only. --------- */ 312 313 X86RI* X86RI_Imm ( UInt imm32 ) { 314 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI)); 315 op->tag = Xri_Imm; 316 op->Xri.Imm.imm32 = imm32; 317 return op; 318 } 319 X86RI* X86RI_Reg ( HReg reg ) { 320 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI)); 321 op->tag = Xri_Reg; 322 op->Xri.Reg.reg = reg; 323 return op; 324 } 325 326 void ppX86RI ( X86RI* op ) { 327 switch (op->tag) { 328 case Xri_Imm: 329 vex_printf("$0x%x", op->Xri.Imm.imm32); 330 return; 331 case Xri_Reg: 332 ppHRegX86(op->Xri.Reg.reg); 333 return; 334 default: 335 vpanic("ppX86RI"); 336 } 337 } 338 339 /* An X86RI can only be used in a "read" context (what would it mean 340 to write or modify a literal?) and so we enumerate its registers 341 accordingly. */ 342 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 343 switch (op->tag) { 344 case Xri_Imm: 345 return; 346 case Xri_Reg: 347 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 348 return; 349 default: 350 vpanic("addRegUsage_X86RI"); 351 } 352 } 353 354 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 355 switch (op->tag) { 356 case Xri_Imm: 357 return; 358 case Xri_Reg: 359 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 360 return; 361 default: 362 vpanic("mapRegs_X86RI"); 363 } 364 } 365 366 367 /* --------- Operand, which can be reg or memory only. --------- */ 368 369 X86RM* X86RM_Reg ( HReg reg ) { 370 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM)); 371 op->tag = Xrm_Reg; 372 op->Xrm.Reg.reg = reg; 373 return op; 374 } 375 X86RM* X86RM_Mem ( X86AMode* am ) { 376 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM)); 377 op->tag = Xrm_Mem; 378 op->Xrm.Mem.am = am; 379 return op; 380 } 381 382 void ppX86RM ( X86RM* op ) { 383 switch (op->tag) { 384 case Xrm_Mem: 385 ppX86AMode(op->Xrm.Mem.am); 386 return; 387 case Xrm_Reg: 388 ppHRegX86(op->Xrm.Reg.reg); 389 return; 390 default: 391 vpanic("ppX86RM"); 392 } 393 } 394 395 /* Because an X86RM can be both a source or destination operand, we 396 have to supply a mode -- pertaining to the operand as a whole -- 397 indicating how it's being used. */ 398 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 399 switch (op->tag) { 400 case Xrm_Mem: 401 /* Memory is read, written or modified. So we just want to 402 know the regs read by the amode. */ 403 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 404 return; 405 case Xrm_Reg: 406 /* reg is read, written or modified. Add it in the 407 appropriate way. */ 408 addHRegUse(u, mode, op->Xrm.Reg.reg); 409 return; 410 default: 411 vpanic("addRegUsage_X86RM"); 412 } 413 } 414 415 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 416 { 417 switch (op->tag) { 418 case Xrm_Mem: 419 mapRegs_X86AMode(m, op->Xrm.Mem.am); 420 return; 421 case Xrm_Reg: 422 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 423 return; 424 default: 425 vpanic("mapRegs_X86RM"); 426 } 427 } 428 429 430 /* --------- Instructions. --------- */ 431 432 const HChar* showX86UnaryOp ( X86UnaryOp op ) { 433 switch (op) { 434 case Xun_NOT: return "not"; 435 case Xun_NEG: return "neg"; 436 default: vpanic("showX86UnaryOp"); 437 } 438 } 439 440 const HChar* showX86AluOp ( X86AluOp op ) { 441 switch (op) { 442 case Xalu_MOV: return "mov"; 443 case Xalu_CMP: return "cmp"; 444 case Xalu_ADD: return "add"; 445 case Xalu_SUB: return "sub"; 446 case Xalu_ADC: return "adc"; 447 case Xalu_SBB: return "sbb"; 448 case Xalu_AND: return "and"; 449 case Xalu_OR: return "or"; 450 case Xalu_XOR: return "xor"; 451 case Xalu_MUL: return "mul"; 452 default: vpanic("showX86AluOp"); 453 } 454 } 455 456 const HChar* showX86ShiftOp ( X86ShiftOp op ) { 457 switch (op) { 458 case Xsh_SHL: return "shl"; 459 case Xsh_SHR: return "shr"; 460 case Xsh_SAR: return "sar"; 461 default: vpanic("showX86ShiftOp"); 462 } 463 } 464 465 const HChar* showX86FpOp ( X86FpOp op ) { 466 switch (op) { 467 case Xfp_ADD: return "add"; 468 case Xfp_SUB: return "sub"; 469 case Xfp_MUL: return "mul"; 470 case Xfp_DIV: return "div"; 471 case Xfp_SCALE: return "scale"; 472 case Xfp_ATAN: return "atan"; 473 case Xfp_YL2X: return "yl2x"; 474 case Xfp_YL2XP1: return "yl2xp1"; 475 case Xfp_PREM: return "prem"; 476 case Xfp_PREM1: return "prem1"; 477 case Xfp_SQRT: return "sqrt"; 478 case Xfp_ABS: return "abs"; 479 case Xfp_NEG: return "chs"; 480 case Xfp_MOV: return "mov"; 481 case Xfp_SIN: return "sin"; 482 case Xfp_COS: return "cos"; 483 case Xfp_TAN: return "tan"; 484 case Xfp_ROUND: return "round"; 485 case Xfp_2XM1: return "2xm1"; 486 default: vpanic("showX86FpOp"); 487 } 488 } 489 490 const HChar* showX86SseOp ( X86SseOp op ) { 491 switch (op) { 492 case Xsse_MOV: return "mov(?!)"; 493 case Xsse_ADDF: return "add"; 494 case Xsse_SUBF: return "sub"; 495 case Xsse_MULF: return "mul"; 496 case Xsse_DIVF: return "div"; 497 case Xsse_MAXF: return "max"; 498 case Xsse_MINF: return "min"; 499 case Xsse_CMPEQF: return "cmpFeq"; 500 case Xsse_CMPLTF: return "cmpFlt"; 501 case Xsse_CMPLEF: return "cmpFle"; 502 case Xsse_CMPUNF: return "cmpFun"; 503 case Xsse_RCPF: return "rcp"; 504 case Xsse_RSQRTF: return "rsqrt"; 505 case Xsse_SQRTF: return "sqrt"; 506 case Xsse_AND: return "and"; 507 case Xsse_OR: return "or"; 508 case Xsse_XOR: return "xor"; 509 case Xsse_ANDN: return "andn"; 510 case Xsse_ADD8: return "paddb"; 511 case Xsse_ADD16: return "paddw"; 512 case Xsse_ADD32: return "paddd"; 513 case Xsse_ADD64: return "paddq"; 514 case Xsse_QADD8U: return "paddusb"; 515 case Xsse_QADD16U: return "paddusw"; 516 case Xsse_QADD8S: return "paddsb"; 517 case Xsse_QADD16S: return "paddsw"; 518 case Xsse_SUB8: return "psubb"; 519 case Xsse_SUB16: return "psubw"; 520 case Xsse_SUB32: return "psubd"; 521 case Xsse_SUB64: return "psubq"; 522 case Xsse_QSUB8U: return "psubusb"; 523 case Xsse_QSUB16U: return "psubusw"; 524 case Xsse_QSUB8S: return "psubsb"; 525 case Xsse_QSUB16S: return "psubsw"; 526 case Xsse_MUL16: return "pmullw"; 527 case Xsse_MULHI16U: return "pmulhuw"; 528 case Xsse_MULHI16S: return "pmulhw"; 529 case Xsse_AVG8U: return "pavgb"; 530 case Xsse_AVG16U: return "pavgw"; 531 case Xsse_MAX16S: return "pmaxw"; 532 case Xsse_MAX8U: return "pmaxub"; 533 case Xsse_MIN16S: return "pminw"; 534 case Xsse_MIN8U: return "pminub"; 535 case Xsse_CMPEQ8: return "pcmpeqb"; 536 case Xsse_CMPEQ16: return "pcmpeqw"; 537 case Xsse_CMPEQ32: return "pcmpeqd"; 538 case Xsse_CMPGT8S: return "pcmpgtb"; 539 case Xsse_CMPGT16S: return "pcmpgtw"; 540 case Xsse_CMPGT32S: return "pcmpgtd"; 541 case Xsse_SHL16: return "psllw"; 542 case Xsse_SHL32: return "pslld"; 543 case Xsse_SHL64: return "psllq"; 544 case Xsse_SHR16: return "psrlw"; 545 case Xsse_SHR32: return "psrld"; 546 case Xsse_SHR64: return "psrlq"; 547 case Xsse_SAR16: return "psraw"; 548 case Xsse_SAR32: return "psrad"; 549 case Xsse_PACKSSD: return "packssdw"; 550 case Xsse_PACKSSW: return "packsswb"; 551 case Xsse_PACKUSW: return "packuswb"; 552 case Xsse_UNPCKHB: return "punpckhb"; 553 case Xsse_UNPCKHW: return "punpckhw"; 554 case Xsse_UNPCKHD: return "punpckhd"; 555 case Xsse_UNPCKHQ: return "punpckhq"; 556 case Xsse_UNPCKLB: return "punpcklb"; 557 case Xsse_UNPCKLW: return "punpcklw"; 558 case Xsse_UNPCKLD: return "punpckld"; 559 case Xsse_UNPCKLQ: return "punpcklq"; 560 default: vpanic("showX86SseOp"); 561 } 562 } 563 564 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 565 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 566 i->tag = Xin_Alu32R; 567 i->Xin.Alu32R.op = op; 568 i->Xin.Alu32R.src = src; 569 i->Xin.Alu32R.dst = dst; 570 return i; 571 } 572 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 573 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 574 i->tag = Xin_Alu32M; 575 i->Xin.Alu32M.op = op; 576 i->Xin.Alu32M.src = src; 577 i->Xin.Alu32M.dst = dst; 578 vassert(op != Xalu_MUL); 579 return i; 580 } 581 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 582 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 583 i->tag = Xin_Sh32; 584 i->Xin.Sh32.op = op; 585 i->Xin.Sh32.src = src; 586 i->Xin.Sh32.dst = dst; 587 return i; 588 } 589 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 590 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 591 i->tag = Xin_Test32; 592 i->Xin.Test32.imm32 = imm32; 593 i->Xin.Test32.dst = dst; 594 return i; 595 } 596 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 597 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 598 i->tag = Xin_Unary32; 599 i->Xin.Unary32.op = op; 600 i->Xin.Unary32.dst = dst; 601 return i; 602 } 603 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 604 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 605 i->tag = Xin_Lea32; 606 i->Xin.Lea32.am = am; 607 i->Xin.Lea32.dst = dst; 608 return i; 609 } 610 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 611 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 612 i->tag = Xin_MulL; 613 i->Xin.MulL.syned = syned; 614 i->Xin.MulL.src = src; 615 return i; 616 } 617 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 618 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 619 i->tag = Xin_Div; 620 i->Xin.Div.syned = syned; 621 i->Xin.Div.src = src; 622 return i; 623 } 624 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 625 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 626 i->tag = Xin_Sh3232; 627 i->Xin.Sh3232.op = op; 628 i->Xin.Sh3232.amt = amt; 629 i->Xin.Sh3232.src = src; 630 i->Xin.Sh3232.dst = dst; 631 vassert(op == Xsh_SHL || op == Xsh_SHR); 632 return i; 633 } 634 X86Instr* X86Instr_Push( X86RMI* src ) { 635 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 636 i->tag = Xin_Push; 637 i->Xin.Push.src = src; 638 return i; 639 } 640 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms, 641 RetLoc rloc ) { 642 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 643 i->tag = Xin_Call; 644 i->Xin.Call.cond = cond; 645 i->Xin.Call.target = target; 646 i->Xin.Call.regparms = regparms; 647 i->Xin.Call.rloc = rloc; 648 vassert(regparms >= 0 && regparms <= 3); 649 vassert(is_sane_RetLoc(rloc)); 650 return i; 651 } 652 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, 653 X86CondCode cond, Bool toFastEP ) { 654 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 655 i->tag = Xin_XDirect; 656 i->Xin.XDirect.dstGA = dstGA; 657 i->Xin.XDirect.amEIP = amEIP; 658 i->Xin.XDirect.cond = cond; 659 i->Xin.XDirect.toFastEP = toFastEP; 660 return i; 661 } 662 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, 663 X86CondCode cond ) { 664 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 665 i->tag = Xin_XIndir; 666 i->Xin.XIndir.dstGA = dstGA; 667 i->Xin.XIndir.amEIP = amEIP; 668 i->Xin.XIndir.cond = cond; 669 return i; 670 } 671 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, 672 X86CondCode cond, IRJumpKind jk ) { 673 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 674 i->tag = Xin_XAssisted; 675 i->Xin.XAssisted.dstGA = dstGA; 676 i->Xin.XAssisted.amEIP = amEIP; 677 i->Xin.XAssisted.cond = cond; 678 i->Xin.XAssisted.jk = jk; 679 return i; 680 } 681 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 682 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 683 i->tag = Xin_CMov32; 684 i->Xin.CMov32.cond = cond; 685 i->Xin.CMov32.src = src; 686 i->Xin.CMov32.dst = dst; 687 vassert(cond != Xcc_ALWAYS); 688 return i; 689 } 690 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 691 X86AMode* src, HReg dst ) { 692 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 693 i->tag = Xin_LoadEX; 694 i->Xin.LoadEX.szSmall = szSmall; 695 i->Xin.LoadEX.syned = syned; 696 i->Xin.LoadEX.src = src; 697 i->Xin.LoadEX.dst = dst; 698 vassert(szSmall == 1 || szSmall == 2); 699 return i; 700 } 701 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 702 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 703 i->tag = Xin_Store; 704 i->Xin.Store.sz = sz; 705 i->Xin.Store.src = src; 706 i->Xin.Store.dst = dst; 707 vassert(sz == 1 || sz == 2); 708 return i; 709 } 710 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 711 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 712 i->tag = Xin_Set32; 713 i->Xin.Set32.cond = cond; 714 i->Xin.Set32.dst = dst; 715 return i; 716 } 717 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 718 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 719 i->tag = Xin_Bsfr32; 720 i->Xin.Bsfr32.isFwds = isFwds; 721 i->Xin.Bsfr32.src = src; 722 i->Xin.Bsfr32.dst = dst; 723 return i; 724 } 725 X86Instr* X86Instr_MFence ( UInt hwcaps ) { 726 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 727 i->tag = Xin_MFence; 728 i->Xin.MFence.hwcaps = hwcaps; 729 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT 730 |VEX_HWCAPS_X86_SSE1 731 |VEX_HWCAPS_X86_SSE2 732 |VEX_HWCAPS_X86_SSE3 733 |VEX_HWCAPS_X86_LZCNT))); 734 return i; 735 } 736 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 737 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 738 i->tag = Xin_ACAS; 739 i->Xin.ACAS.addr = addr; 740 i->Xin.ACAS.sz = sz; 741 vassert(sz == 4 || sz == 2 || sz == 1); 742 return i; 743 } 744 X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 745 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 746 i->tag = Xin_DACAS; 747 i->Xin.DACAS.addr = addr; 748 return i; 749 } 750 751 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 752 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 753 i->tag = Xin_FpUnary; 754 i->Xin.FpUnary.op = op; 755 i->Xin.FpUnary.src = src; 756 i->Xin.FpUnary.dst = dst; 757 return i; 758 } 759 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 760 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 761 i->tag = Xin_FpBinary; 762 i->Xin.FpBinary.op = op; 763 i->Xin.FpBinary.srcL = srcL; 764 i->Xin.FpBinary.srcR = srcR; 765 i->Xin.FpBinary.dst = dst; 766 return i; 767 } 768 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 769 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 770 i->tag = Xin_FpLdSt; 771 i->Xin.FpLdSt.isLoad = isLoad; 772 i->Xin.FpLdSt.sz = sz; 773 i->Xin.FpLdSt.reg = reg; 774 i->Xin.FpLdSt.addr = addr; 775 vassert(sz == 4 || sz == 8 || sz == 10); 776 return i; 777 } 778 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 779 HReg reg, X86AMode* addr ) { 780 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 781 i->tag = Xin_FpLdStI; 782 i->Xin.FpLdStI.isLoad = isLoad; 783 i->Xin.FpLdStI.sz = sz; 784 i->Xin.FpLdStI.reg = reg; 785 i->Xin.FpLdStI.addr = addr; 786 vassert(sz == 2 || sz == 4 || sz == 8); 787 return i; 788 } 789 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 790 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 791 i->tag = Xin_Fp64to32; 792 i->Xin.Fp64to32.src = src; 793 i->Xin.Fp64to32.dst = dst; 794 return i; 795 } 796 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 797 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 798 i->tag = Xin_FpCMov; 799 i->Xin.FpCMov.cond = cond; 800 i->Xin.FpCMov.src = src; 801 i->Xin.FpCMov.dst = dst; 802 vassert(cond != Xcc_ALWAYS); 803 return i; 804 } 805 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 806 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 807 i->tag = Xin_FpLdCW; 808 i->Xin.FpLdCW.addr = addr; 809 return i; 810 } 811 X86Instr* X86Instr_FpStSW_AX ( void ) { 812 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 813 i->tag = Xin_FpStSW_AX; 814 return i; 815 } 816 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 817 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 818 i->tag = Xin_FpCmp; 819 i->Xin.FpCmp.srcL = srcL; 820 i->Xin.FpCmp.srcR = srcR; 821 i->Xin.FpCmp.dst = dst; 822 return i; 823 } 824 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 825 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 826 i->tag = Xin_SseConst; 827 i->Xin.SseConst.con = con; 828 i->Xin.SseConst.dst = dst; 829 vassert(hregClass(dst) == HRcVec128); 830 return i; 831 } 832 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 833 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 834 i->tag = Xin_SseLdSt; 835 i->Xin.SseLdSt.isLoad = isLoad; 836 i->Xin.SseLdSt.reg = reg; 837 i->Xin.SseLdSt.addr = addr; 838 return i; 839 } 840 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 841 { 842 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 843 i->tag = Xin_SseLdzLO; 844 i->Xin.SseLdzLO.sz = toUChar(sz); 845 i->Xin.SseLdzLO.reg = reg; 846 i->Xin.SseLdzLO.addr = addr; 847 vassert(sz == 4 || sz == 8); 848 return i; 849 } 850 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 851 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 852 i->tag = Xin_Sse32Fx4; 853 i->Xin.Sse32Fx4.op = op; 854 i->Xin.Sse32Fx4.src = src; 855 i->Xin.Sse32Fx4.dst = dst; 856 vassert(op != Xsse_MOV); 857 return i; 858 } 859 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 860 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 861 i->tag = Xin_Sse32FLo; 862 i->Xin.Sse32FLo.op = op; 863 i->Xin.Sse32FLo.src = src; 864 i->Xin.Sse32FLo.dst = dst; 865 vassert(op != Xsse_MOV); 866 return i; 867 } 868 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 869 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 870 i->tag = Xin_Sse64Fx2; 871 i->Xin.Sse64Fx2.op = op; 872 i->Xin.Sse64Fx2.src = src; 873 i->Xin.Sse64Fx2.dst = dst; 874 vassert(op != Xsse_MOV); 875 return i; 876 } 877 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 878 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 879 i->tag = Xin_Sse64FLo; 880 i->Xin.Sse64FLo.op = op; 881 i->Xin.Sse64FLo.src = src; 882 i->Xin.Sse64FLo.dst = dst; 883 vassert(op != Xsse_MOV); 884 return i; 885 } 886 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 887 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 888 i->tag = Xin_SseReRg; 889 i->Xin.SseReRg.op = op; 890 i->Xin.SseReRg.src = re; 891 i->Xin.SseReRg.dst = rg; 892 return i; 893 } 894 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 895 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 896 i->tag = Xin_SseCMov; 897 i->Xin.SseCMov.cond = cond; 898 i->Xin.SseCMov.src = src; 899 i->Xin.SseCMov.dst = dst; 900 vassert(cond != Xcc_ALWAYS); 901 return i; 902 } 903 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 904 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 905 i->tag = Xin_SseShuf; 906 i->Xin.SseShuf.order = order; 907 i->Xin.SseShuf.src = src; 908 i->Xin.SseShuf.dst = dst; 909 vassert(order >= 0 && order <= 0xFF); 910 return i; 911 } 912 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, 913 X86AMode* amFailAddr ) { 914 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 915 i->tag = Xin_EvCheck; 916 i->Xin.EvCheck.amCounter = amCounter; 917 i->Xin.EvCheck.amFailAddr = amFailAddr; 918 return i; 919 } 920 X86Instr* X86Instr_ProfInc ( void ) { 921 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); 922 i->tag = Xin_ProfInc; 923 return i; 924 } 925 926 void ppX86Instr ( const X86Instr* i, Bool mode64 ) { 927 vassert(mode64 == False); 928 switch (i->tag) { 929 case Xin_Alu32R: 930 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 931 ppX86RMI(i->Xin.Alu32R.src); 932 vex_printf(","); 933 ppHRegX86(i->Xin.Alu32R.dst); 934 return; 935 case Xin_Alu32M: 936 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 937 ppX86RI(i->Xin.Alu32M.src); 938 vex_printf(","); 939 ppX86AMode(i->Xin.Alu32M.dst); 940 return; 941 case Xin_Sh32: 942 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 943 if (i->Xin.Sh32.src == 0) 944 vex_printf("%%cl,"); 945 else 946 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 947 ppHRegX86(i->Xin.Sh32.dst); 948 return; 949 case Xin_Test32: 950 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 951 ppX86RM(i->Xin.Test32.dst); 952 return; 953 case Xin_Unary32: 954 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 955 ppHRegX86(i->Xin.Unary32.dst); 956 return; 957 case Xin_Lea32: 958 vex_printf("leal "); 959 ppX86AMode(i->Xin.Lea32.am); 960 vex_printf(","); 961 ppHRegX86(i->Xin.Lea32.dst); 962 return; 963 case Xin_MulL: 964 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 965 ppX86RM(i->Xin.MulL.src); 966 return; 967 case Xin_Div: 968 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 969 ppX86RM(i->Xin.Div.src); 970 return; 971 case Xin_Sh3232: 972 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 973 if (i->Xin.Sh3232.amt == 0) 974 vex_printf(" %%cl,"); 975 else 976 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 977 ppHRegX86(i->Xin.Sh3232.src); 978 vex_printf(","); 979 ppHRegX86(i->Xin.Sh3232.dst); 980 return; 981 case Xin_Push: 982 vex_printf("pushl "); 983 ppX86RMI(i->Xin.Push.src); 984 return; 985 case Xin_Call: 986 vex_printf("call%s[%d,", 987 i->Xin.Call.cond==Xcc_ALWAYS 988 ? "" : showX86CondCode(i->Xin.Call.cond), 989 i->Xin.Call.regparms); 990 ppRetLoc(i->Xin.Call.rloc); 991 vex_printf("] 0x%x", i->Xin.Call.target); 992 break; 993 case Xin_XDirect: 994 vex_printf("(xDirect) "); 995 vex_printf("if (%%eflags.%s) { ", 996 showX86CondCode(i->Xin.XDirect.cond)); 997 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); 998 ppX86AMode(i->Xin.XDirect.amEIP); 999 vex_printf("; "); 1000 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", 1001 i->Xin.XDirect.toFastEP ? "fast" : "slow"); 1002 return; 1003 case Xin_XIndir: 1004 vex_printf("(xIndir) "); 1005 vex_printf("if (%%eflags.%s) { movl ", 1006 showX86CondCode(i->Xin.XIndir.cond)); 1007 ppHRegX86(i->Xin.XIndir.dstGA); 1008 vex_printf(","); 1009 ppX86AMode(i->Xin.XIndir.amEIP); 1010 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); 1011 return; 1012 case Xin_XAssisted: 1013 vex_printf("(xAssisted) "); 1014 vex_printf("if (%%eflags.%s) { ", 1015 showX86CondCode(i->Xin.XAssisted.cond)); 1016 vex_printf("movl "); 1017 ppHRegX86(i->Xin.XAssisted.dstGA); 1018 vex_printf(","); 1019 ppX86AMode(i->Xin.XAssisted.amEIP); 1020 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", 1021 (Int)i->Xin.XAssisted.jk); 1022 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); 1023 return; 1024 case Xin_CMov32: 1025 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 1026 ppX86RM(i->Xin.CMov32.src); 1027 vex_printf(","); 1028 ppHRegX86(i->Xin.CMov32.dst); 1029 return; 1030 case Xin_LoadEX: 1031 vex_printf("mov%c%cl ", 1032 i->Xin.LoadEX.syned ? 's' : 'z', 1033 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 1034 ppX86AMode(i->Xin.LoadEX.src); 1035 vex_printf(","); 1036 ppHRegX86(i->Xin.LoadEX.dst); 1037 return; 1038 case Xin_Store: 1039 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 1040 ppHRegX86(i->Xin.Store.src); 1041 vex_printf(","); 1042 ppX86AMode(i->Xin.Store.dst); 1043 return; 1044 case Xin_Set32: 1045 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 1046 ppHRegX86(i->Xin.Set32.dst); 1047 return; 1048 case Xin_Bsfr32: 1049 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1050 ppHRegX86(i->Xin.Bsfr32.src); 1051 vex_printf(","); 1052 ppHRegX86(i->Xin.Bsfr32.dst); 1053 return; 1054 case Xin_MFence: 1055 vex_printf("mfence(%s)", 1056 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1057 return; 1058 case Xin_ACAS: 1059 vex_printf("lock cmpxchg%c ", 1060 i->Xin.ACAS.sz==1 ? 'b' 1061 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1062 vex_printf("{%%eax->%%ebx},"); 1063 ppX86AMode(i->Xin.ACAS.addr); 1064 return; 1065 case Xin_DACAS: 1066 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1067 ppX86AMode(i->Xin.DACAS.addr); 1068 return; 1069 case Xin_FpUnary: 1070 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1071 ppHRegX86(i->Xin.FpUnary.src); 1072 vex_printf(","); 1073 ppHRegX86(i->Xin.FpUnary.dst); 1074 break; 1075 case Xin_FpBinary: 1076 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1077 ppHRegX86(i->Xin.FpBinary.srcL); 1078 vex_printf(","); 1079 ppHRegX86(i->Xin.FpBinary.srcR); 1080 vex_printf(","); 1081 ppHRegX86(i->Xin.FpBinary.dst); 1082 break; 1083 case Xin_FpLdSt: 1084 if (i->Xin.FpLdSt.isLoad) { 1085 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1086 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1087 ppX86AMode(i->Xin.FpLdSt.addr); 1088 vex_printf(", "); 1089 ppHRegX86(i->Xin.FpLdSt.reg); 1090 } else { 1091 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1092 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1093 ppHRegX86(i->Xin.FpLdSt.reg); 1094 vex_printf(", "); 1095 ppX86AMode(i->Xin.FpLdSt.addr); 1096 } 1097 return; 1098 case Xin_FpLdStI: 1099 if (i->Xin.FpLdStI.isLoad) { 1100 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1101 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1102 ppX86AMode(i->Xin.FpLdStI.addr); 1103 vex_printf(", "); 1104 ppHRegX86(i->Xin.FpLdStI.reg); 1105 } else { 1106 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1107 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1108 ppHRegX86(i->Xin.FpLdStI.reg); 1109 vex_printf(", "); 1110 ppX86AMode(i->Xin.FpLdStI.addr); 1111 } 1112 return; 1113 case Xin_Fp64to32: 1114 vex_printf("gdtof "); 1115 ppHRegX86(i->Xin.Fp64to32.src); 1116 vex_printf(","); 1117 ppHRegX86(i->Xin.Fp64to32.dst); 1118 return; 1119 case Xin_FpCMov: 1120 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1121 ppHRegX86(i->Xin.FpCMov.src); 1122 vex_printf(","); 1123 ppHRegX86(i->Xin.FpCMov.dst); 1124 return; 1125 case Xin_FpLdCW: 1126 vex_printf("fldcw "); 1127 ppX86AMode(i->Xin.FpLdCW.addr); 1128 return; 1129 case Xin_FpStSW_AX: 1130 vex_printf("fstsw %%ax"); 1131 return; 1132 case Xin_FpCmp: 1133 vex_printf("gcmp "); 1134 ppHRegX86(i->Xin.FpCmp.srcL); 1135 vex_printf(","); 1136 ppHRegX86(i->Xin.FpCmp.srcR); 1137 vex_printf(","); 1138 ppHRegX86(i->Xin.FpCmp.dst); 1139 break; 1140 case Xin_SseConst: 1141 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1142 ppHRegX86(i->Xin.SseConst.dst); 1143 break; 1144 case Xin_SseLdSt: 1145 vex_printf("movups "); 1146 if (i->Xin.SseLdSt.isLoad) { 1147 ppX86AMode(i->Xin.SseLdSt.addr); 1148 vex_printf(","); 1149 ppHRegX86(i->Xin.SseLdSt.reg); 1150 } else { 1151 ppHRegX86(i->Xin.SseLdSt.reg); 1152 vex_printf(","); 1153 ppX86AMode(i->Xin.SseLdSt.addr); 1154 } 1155 return; 1156 case Xin_SseLdzLO: 1157 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1158 ppX86AMode(i->Xin.SseLdzLO.addr); 1159 vex_printf(","); 1160 ppHRegX86(i->Xin.SseLdzLO.reg); 1161 return; 1162 case Xin_Sse32Fx4: 1163 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1164 ppHRegX86(i->Xin.Sse32Fx4.src); 1165 vex_printf(","); 1166 ppHRegX86(i->Xin.Sse32Fx4.dst); 1167 return; 1168 case Xin_Sse32FLo: 1169 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1170 ppHRegX86(i->Xin.Sse32FLo.src); 1171 vex_printf(","); 1172 ppHRegX86(i->Xin.Sse32FLo.dst); 1173 return; 1174 case Xin_Sse64Fx2: 1175 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1176 ppHRegX86(i->Xin.Sse64Fx2.src); 1177 vex_printf(","); 1178 ppHRegX86(i->Xin.Sse64Fx2.dst); 1179 return; 1180 case Xin_Sse64FLo: 1181 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1182 ppHRegX86(i->Xin.Sse64FLo.src); 1183 vex_printf(","); 1184 ppHRegX86(i->Xin.Sse64FLo.dst); 1185 return; 1186 case Xin_SseReRg: 1187 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1188 ppHRegX86(i->Xin.SseReRg.src); 1189 vex_printf(","); 1190 ppHRegX86(i->Xin.SseReRg.dst); 1191 return; 1192 case Xin_SseCMov: 1193 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1194 ppHRegX86(i->Xin.SseCMov.src); 1195 vex_printf(","); 1196 ppHRegX86(i->Xin.SseCMov.dst); 1197 return; 1198 case Xin_SseShuf: 1199 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); 1200 ppHRegX86(i->Xin.SseShuf.src); 1201 vex_printf(","); 1202 ppHRegX86(i->Xin.SseShuf.dst); 1203 return; 1204 case Xin_EvCheck: 1205 vex_printf("(evCheck) decl "); 1206 ppX86AMode(i->Xin.EvCheck.amCounter); 1207 vex_printf("; jns nofail; jmp *"); 1208 ppX86AMode(i->Xin.EvCheck.amFailAddr); 1209 vex_printf("; nofail:"); 1210 return; 1211 case Xin_ProfInc: 1212 vex_printf("(profInc) addl $1,NotKnownYet; " 1213 "adcl $0,NotKnownYet+4"); 1214 return; 1215 default: 1216 vpanic("ppX86Instr"); 1217 } 1218 } 1219 1220 /* --------- Helpers for register allocation. --------- */ 1221 1222 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) 1223 { 1224 Bool unary; 1225 vassert(mode64 == False); 1226 initHRegUsage(u); 1227 switch (i->tag) { 1228 case Xin_Alu32R: 1229 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1230 if (i->Xin.Alu32R.op == Xalu_MOV) { 1231 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1232 return; 1233 } 1234 if (i->Xin.Alu32R.op == Xalu_CMP) { 1235 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1236 return; 1237 } 1238 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1239 return; 1240 case Xin_Alu32M: 1241 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1242 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1243 return; 1244 case Xin_Sh32: 1245 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1246 if (i->Xin.Sh32.src == 0) 1247 addHRegUse(u, HRmRead, hregX86_ECX()); 1248 return; 1249 case Xin_Test32: 1250 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1251 return; 1252 case Xin_Unary32: 1253 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1254 return; 1255 case Xin_Lea32: 1256 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1257 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1258 return; 1259 case Xin_MulL: 1260 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1261 addHRegUse(u, HRmModify, hregX86_EAX()); 1262 addHRegUse(u, HRmWrite, hregX86_EDX()); 1263 return; 1264 case Xin_Div: 1265 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1266 addHRegUse(u, HRmModify, hregX86_EAX()); 1267 addHRegUse(u, HRmModify, hregX86_EDX()); 1268 return; 1269 case Xin_Sh3232: 1270 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1271 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1272 if (i->Xin.Sh3232.amt == 0) 1273 addHRegUse(u, HRmRead, hregX86_ECX()); 1274 return; 1275 case Xin_Push: 1276 addRegUsage_X86RMI(u, i->Xin.Push.src); 1277 addHRegUse(u, HRmModify, hregX86_ESP()); 1278 return; 1279 case Xin_Call: 1280 /* This is a bit subtle. */ 1281 /* First off, claim it trashes all the caller-saved regs 1282 which fall within the register allocator's jurisdiction. 1283 These I believe to be %eax %ecx %edx and all the xmm 1284 registers. */ 1285 addHRegUse(u, HRmWrite, hregX86_EAX()); 1286 addHRegUse(u, HRmWrite, hregX86_ECX()); 1287 addHRegUse(u, HRmWrite, hregX86_EDX()); 1288 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1289 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1290 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1291 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1292 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1293 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1294 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1295 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1296 /* Now we have to state any parameter-carrying registers 1297 which might be read. This depends on the regparmness. */ 1298 switch (i->Xin.Call.regparms) { 1299 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1300 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1301 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1302 case 0: break; 1303 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1304 } 1305 /* Finally, there is the issue that the insn trashes a 1306 register because the literal target address has to be 1307 loaded into a register. Fortunately, for the 0/1/2 1308 regparm case, we can use EAX, EDX and ECX respectively, so 1309 this does not cause any further damage. For the 3-regparm 1310 case, we'll have to choose another register arbitrarily -- 1311 since A, D and C are used for parameters -- and so we might 1312 as well choose EDI. */ 1313 if (i->Xin.Call.regparms == 3) 1314 addHRegUse(u, HRmWrite, hregX86_EDI()); 1315 /* Upshot of this is that the assembler really must observe 1316 the here-stated convention of which register to use as an 1317 address temporary, depending on the regparmness: 0==EAX, 1318 1==EDX, 2==ECX, 3==EDI. */ 1319 return; 1320 /* XDirect/XIndir/XAssisted are also a bit subtle. They 1321 conditionally exit the block. Hence we only need to list (1) 1322 the registers that they read, and (2) the registers that they 1323 write in the case where the block is not exited. (2) is 1324 empty, hence only (1) is relevant here. */ 1325 case Xin_XDirect: 1326 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); 1327 return; 1328 case Xin_XIndir: 1329 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); 1330 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); 1331 return; 1332 case Xin_XAssisted: 1333 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); 1334 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); 1335 return; 1336 case Xin_CMov32: 1337 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1338 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1339 return; 1340 case Xin_LoadEX: 1341 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1342 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1343 return; 1344 case Xin_Store: 1345 addHRegUse(u, HRmRead, i->Xin.Store.src); 1346 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1347 return; 1348 case Xin_Set32: 1349 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1350 return; 1351 case Xin_Bsfr32: 1352 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1353 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1354 return; 1355 case Xin_MFence: 1356 return; 1357 case Xin_ACAS: 1358 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1359 addHRegUse(u, HRmRead, hregX86_EBX()); 1360 addHRegUse(u, HRmModify, hregX86_EAX()); 1361 return; 1362 case Xin_DACAS: 1363 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1364 addHRegUse(u, HRmRead, hregX86_ECX()); 1365 addHRegUse(u, HRmRead, hregX86_EBX()); 1366 addHRegUse(u, HRmModify, hregX86_EDX()); 1367 addHRegUse(u, HRmModify, hregX86_EAX()); 1368 return; 1369 case Xin_FpUnary: 1370 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1371 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1372 return; 1373 case Xin_FpBinary: 1374 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1375 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1376 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1377 return; 1378 case Xin_FpLdSt: 1379 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1380 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1381 i->Xin.FpLdSt.reg); 1382 return; 1383 case Xin_FpLdStI: 1384 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1385 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1386 i->Xin.FpLdStI.reg); 1387 return; 1388 case Xin_Fp64to32: 1389 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1390 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1391 return; 1392 case Xin_FpCMov: 1393 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1394 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1395 return; 1396 case Xin_FpLdCW: 1397 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1398 return; 1399 case Xin_FpStSW_AX: 1400 addHRegUse(u, HRmWrite, hregX86_EAX()); 1401 return; 1402 case Xin_FpCmp: 1403 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1404 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1405 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1406 addHRegUse(u, HRmWrite, hregX86_EAX()); 1407 return; 1408 case Xin_SseLdSt: 1409 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1410 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1411 i->Xin.SseLdSt.reg); 1412 return; 1413 case Xin_SseLdzLO: 1414 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1415 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1416 return; 1417 case Xin_SseConst: 1418 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1419 return; 1420 case Xin_Sse32Fx4: 1421 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1422 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1423 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1424 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1425 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1426 addHRegUse(u, unary ? HRmWrite : HRmModify, 1427 i->Xin.Sse32Fx4.dst); 1428 return; 1429 case Xin_Sse32FLo: 1430 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1431 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1432 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1433 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1434 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1435 addHRegUse(u, unary ? HRmWrite : HRmModify, 1436 i->Xin.Sse32FLo.dst); 1437 return; 1438 case Xin_Sse64Fx2: 1439 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1440 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1441 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1442 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1443 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1444 addHRegUse(u, unary ? HRmWrite : HRmModify, 1445 i->Xin.Sse64Fx2.dst); 1446 return; 1447 case Xin_Sse64FLo: 1448 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1449 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1450 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1451 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1452 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1453 addHRegUse(u, unary ? HRmWrite : HRmModify, 1454 i->Xin.Sse64FLo.dst); 1455 return; 1456 case Xin_SseReRg: 1457 if (i->Xin.SseReRg.op == Xsse_XOR 1458 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) { 1459 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1460 /* (as opposed to a rite of passage :-) */ 1461 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1462 } else { 1463 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1464 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1465 ? HRmWrite : HRmModify, 1466 i->Xin.SseReRg.dst); 1467 } 1468 return; 1469 case Xin_SseCMov: 1470 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1471 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1472 return; 1473 case Xin_SseShuf: 1474 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1475 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1476 return; 1477 case Xin_EvCheck: 1478 /* We expect both amodes only to mention %ebp, so this is in 1479 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1480 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); 1481 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); 1482 return; 1483 case Xin_ProfInc: 1484 /* does not use any registers. */ 1485 return; 1486 default: 1487 ppX86Instr(i, False); 1488 vpanic("getRegUsage_X86Instr"); 1489 } 1490 } 1491 1492 /* local helper */ 1493 static void mapReg( HRegRemap* m, HReg* r ) 1494 { 1495 *r = lookupHRegRemap(m, *r); 1496 } 1497 1498 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1499 { 1500 vassert(mode64 == False); 1501 switch (i->tag) { 1502 case Xin_Alu32R: 1503 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1504 mapReg(m, &i->Xin.Alu32R.dst); 1505 return; 1506 case Xin_Alu32M: 1507 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1508 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1509 return; 1510 case Xin_Sh32: 1511 mapReg(m, &i->Xin.Sh32.dst); 1512 return; 1513 case Xin_Test32: 1514 mapRegs_X86RM(m, i->Xin.Test32.dst); 1515 return; 1516 case Xin_Unary32: 1517 mapReg(m, &i->Xin.Unary32.dst); 1518 return; 1519 case Xin_Lea32: 1520 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1521 mapReg(m, &i->Xin.Lea32.dst); 1522 return; 1523 case Xin_MulL: 1524 mapRegs_X86RM(m, i->Xin.MulL.src); 1525 return; 1526 case Xin_Div: 1527 mapRegs_X86RM(m, i->Xin.Div.src); 1528 return; 1529 case Xin_Sh3232: 1530 mapReg(m, &i->Xin.Sh3232.src); 1531 mapReg(m, &i->Xin.Sh3232.dst); 1532 return; 1533 case Xin_Push: 1534 mapRegs_X86RMI(m, i->Xin.Push.src); 1535 return; 1536 case Xin_Call: 1537 return; 1538 case Xin_XDirect: 1539 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); 1540 return; 1541 case Xin_XIndir: 1542 mapReg(m, &i->Xin.XIndir.dstGA); 1543 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); 1544 return; 1545 case Xin_XAssisted: 1546 mapReg(m, &i->Xin.XAssisted.dstGA); 1547 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); 1548 return; 1549 case Xin_CMov32: 1550 mapRegs_X86RM(m, i->Xin.CMov32.src); 1551 mapReg(m, &i->Xin.CMov32.dst); 1552 return; 1553 case Xin_LoadEX: 1554 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1555 mapReg(m, &i->Xin.LoadEX.dst); 1556 return; 1557 case Xin_Store: 1558 mapReg(m, &i->Xin.Store.src); 1559 mapRegs_X86AMode(m, i->Xin.Store.dst); 1560 return; 1561 case Xin_Set32: 1562 mapReg(m, &i->Xin.Set32.dst); 1563 return; 1564 case Xin_Bsfr32: 1565 mapReg(m, &i->Xin.Bsfr32.src); 1566 mapReg(m, &i->Xin.Bsfr32.dst); 1567 return; 1568 case Xin_MFence: 1569 return; 1570 case Xin_ACAS: 1571 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1572 return; 1573 case Xin_DACAS: 1574 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1575 return; 1576 case Xin_FpUnary: 1577 mapReg(m, &i->Xin.FpUnary.src); 1578 mapReg(m, &i->Xin.FpUnary.dst); 1579 return; 1580 case Xin_FpBinary: 1581 mapReg(m, &i->Xin.FpBinary.srcL); 1582 mapReg(m, &i->Xin.FpBinary.srcR); 1583 mapReg(m, &i->Xin.FpBinary.dst); 1584 return; 1585 case Xin_FpLdSt: 1586 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1587 mapReg(m, &i->Xin.FpLdSt.reg); 1588 return; 1589 case Xin_FpLdStI: 1590 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1591 mapReg(m, &i->Xin.FpLdStI.reg); 1592 return; 1593 case Xin_Fp64to32: 1594 mapReg(m, &i->Xin.Fp64to32.src); 1595 mapReg(m, &i->Xin.Fp64to32.dst); 1596 return; 1597 case Xin_FpCMov: 1598 mapReg(m, &i->Xin.FpCMov.src); 1599 mapReg(m, &i->Xin.FpCMov.dst); 1600 return; 1601 case Xin_FpLdCW: 1602 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1603 return; 1604 case Xin_FpStSW_AX: 1605 return; 1606 case Xin_FpCmp: 1607 mapReg(m, &i->Xin.FpCmp.srcL); 1608 mapReg(m, &i->Xin.FpCmp.srcR); 1609 mapReg(m, &i->Xin.FpCmp.dst); 1610 return; 1611 case Xin_SseConst: 1612 mapReg(m, &i->Xin.SseConst.dst); 1613 return; 1614 case Xin_SseLdSt: 1615 mapReg(m, &i->Xin.SseLdSt.reg); 1616 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1617 break; 1618 case Xin_SseLdzLO: 1619 mapReg(m, &i->Xin.SseLdzLO.reg); 1620 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1621 break; 1622 case Xin_Sse32Fx4: 1623 mapReg(m, &i->Xin.Sse32Fx4.src); 1624 mapReg(m, &i->Xin.Sse32Fx4.dst); 1625 return; 1626 case Xin_Sse32FLo: 1627 mapReg(m, &i->Xin.Sse32FLo.src); 1628 mapReg(m, &i->Xin.Sse32FLo.dst); 1629 return; 1630 case Xin_Sse64Fx2: 1631 mapReg(m, &i->Xin.Sse64Fx2.src); 1632 mapReg(m, &i->Xin.Sse64Fx2.dst); 1633 return; 1634 case Xin_Sse64FLo: 1635 mapReg(m, &i->Xin.Sse64FLo.src); 1636 mapReg(m, &i->Xin.Sse64FLo.dst); 1637 return; 1638 case Xin_SseReRg: 1639 mapReg(m, &i->Xin.SseReRg.src); 1640 mapReg(m, &i->Xin.SseReRg.dst); 1641 return; 1642 case Xin_SseCMov: 1643 mapReg(m, &i->Xin.SseCMov.src); 1644 mapReg(m, &i->Xin.SseCMov.dst); 1645 return; 1646 case Xin_SseShuf: 1647 mapReg(m, &i->Xin.SseShuf.src); 1648 mapReg(m, &i->Xin.SseShuf.dst); 1649 return; 1650 case Xin_EvCheck: 1651 /* We expect both amodes only to mention %ebp, so this is in 1652 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1653 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); 1654 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); 1655 return; 1656 case Xin_ProfInc: 1657 /* does not use any registers. */ 1658 return; 1659 1660 default: 1661 ppX86Instr(i, mode64); 1662 vpanic("mapRegs_X86Instr"); 1663 } 1664 } 1665 1666 /* Figure out if i represents a reg-reg move, and if so assign the 1667 source and destination to *src and *dst. If in doubt say No. Used 1668 by the register allocator to do move coalescing. 1669 */ 1670 Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst ) 1671 { 1672 /* Moves between integer regs */ 1673 if (i->tag == Xin_Alu32R) { 1674 if (i->Xin.Alu32R.op != Xalu_MOV) 1675 return False; 1676 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1677 return False; 1678 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1679 *dst = i->Xin.Alu32R.dst; 1680 return True; 1681 } 1682 /* Moves between FP regs */ 1683 if (i->tag == Xin_FpUnary) { 1684 if (i->Xin.FpUnary.op != Xfp_MOV) 1685 return False; 1686 *src = i->Xin.FpUnary.src; 1687 *dst = i->Xin.FpUnary.dst; 1688 return True; 1689 } 1690 if (i->tag == Xin_SseReRg) { 1691 if (i->Xin.SseReRg.op != Xsse_MOV) 1692 return False; 1693 *src = i->Xin.SseReRg.src; 1694 *dst = i->Xin.SseReRg.dst; 1695 return True; 1696 } 1697 return False; 1698 } 1699 1700 1701 /* Generate x86 spill/reload instructions under the direction of the 1702 register allocator. Note it's critical these don't write the 1703 condition codes. */ 1704 1705 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1706 HReg rreg, Int offsetB, Bool mode64 ) 1707 { 1708 X86AMode* am; 1709 vassert(offsetB >= 0); 1710 vassert(!hregIsVirtual(rreg)); 1711 vassert(mode64 == False); 1712 *i1 = *i2 = NULL; 1713 am = X86AMode_IR(offsetB, hregX86_EBP()); 1714 switch (hregClass(rreg)) { 1715 case HRcInt32: 1716 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1717 return; 1718 case HRcFlt64: 1719 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1720 return; 1721 case HRcVec128: 1722 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1723 return; 1724 default: 1725 ppHRegClass(hregClass(rreg)); 1726 vpanic("genSpill_X86: unimplemented regclass"); 1727 } 1728 } 1729 1730 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1731 HReg rreg, Int offsetB, Bool mode64 ) 1732 { 1733 X86AMode* am; 1734 vassert(offsetB >= 0); 1735 vassert(!hregIsVirtual(rreg)); 1736 vassert(mode64 == False); 1737 *i1 = *i2 = NULL; 1738 am = X86AMode_IR(offsetB, hregX86_EBP()); 1739 switch (hregClass(rreg)) { 1740 case HRcInt32: 1741 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1742 return; 1743 case HRcFlt64: 1744 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1745 return; 1746 case HRcVec128: 1747 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1748 return; 1749 default: 1750 ppHRegClass(hregClass(rreg)); 1751 vpanic("genReload_X86: unimplemented regclass"); 1752 } 1753 } 1754 1755 /* The given instruction reads the specified vreg exactly once, and 1756 that vreg is currently located at the given spill offset. If 1757 possible, return a variant of the instruction to one which instead 1758 references the spill slot directly. */ 1759 1760 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1761 { 1762 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1763 1764 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1765 Convert to: src=RMI_Mem, dst=Reg 1766 */ 1767 if (i->tag == Xin_Alu32R 1768 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1769 || i->Xin.Alu32R.op == Xalu_XOR) 1770 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1771 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) { 1772 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg)); 1773 return X86Instr_Alu32R( 1774 i->Xin.Alu32R.op, 1775 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1776 i->Xin.Alu32R.dst 1777 ); 1778 } 1779 1780 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1781 Convert to: src=RI_Imm, dst=Mem 1782 */ 1783 if (i->tag == Xin_Alu32R 1784 && (i->Xin.Alu32R.op == Xalu_CMP) 1785 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1786 && sameHReg(i->Xin.Alu32R.dst, vreg)) { 1787 return X86Instr_Alu32M( 1788 i->Xin.Alu32R.op, 1789 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1790 X86AMode_IR( spill_off, hregX86_EBP()) 1791 ); 1792 } 1793 1794 /* Deal with form: Push(RMI_Reg) 1795 Convert to: Push(RMI_Mem) 1796 */ 1797 if (i->tag == Xin_Push 1798 && i->Xin.Push.src->tag == Xrmi_Reg 1799 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) { 1800 return X86Instr_Push( 1801 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1802 ); 1803 } 1804 1805 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1806 Convert to CMov32(RM_Mem, dst) */ 1807 if (i->tag == Xin_CMov32 1808 && i->Xin.CMov32.src->tag == Xrm_Reg 1809 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) { 1810 vassert(! sameHReg(i->Xin.CMov32.dst, vreg)); 1811 return X86Instr_CMov32( 1812 i->Xin.CMov32.cond, 1813 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1814 i->Xin.CMov32.dst 1815 ); 1816 } 1817 1818 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1819 if (i->tag == Xin_Test32 1820 && i->Xin.Test32.dst->tag == Xrm_Reg 1821 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) { 1822 return X86Instr_Test32( 1823 i->Xin.Test32.imm32, 1824 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1825 ); 1826 } 1827 1828 return NULL; 1829 } 1830 1831 1832 /* --------- The x86 assembler (bleh.) --------- */ 1833 1834 inline static UInt iregEnc ( HReg r ) 1835 { 1836 UInt n; 1837 vassert(hregClass(r) == HRcInt32); 1838 vassert(!hregIsVirtual(r)); 1839 n = hregEncoding(r); 1840 vassert(n <= 7); 1841 return n; 1842 } 1843 1844 inline static UInt fregEnc ( HReg r ) 1845 { 1846 UInt n; 1847 vassert(hregClass(r) == HRcFlt64); 1848 vassert(!hregIsVirtual(r)); 1849 n = hregEncoding(r); 1850 vassert(n <= 5); 1851 return n; 1852 } 1853 1854 inline static UInt vregEnc ( HReg r ) 1855 { 1856 UInt n; 1857 vassert(hregClass(r) == HRcVec128); 1858 vassert(!hregIsVirtual(r)); 1859 n = hregEncoding(r); 1860 vassert(n <= 7); 1861 return n; 1862 } 1863 1864 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem ) 1865 { 1866 vassert(mod < 4); 1867 vassert((reg|regmem) < 8); 1868 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) ); 1869 } 1870 1871 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase ) 1872 { 1873 vassert(shift < 4); 1874 vassert((regindex|regbase) < 8); 1875 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) ); 1876 } 1877 1878 static UChar* emit32 ( UChar* p, UInt w32 ) 1879 { 1880 *p++ = toUChar( w32 & 0x000000FF); 1881 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1882 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1883 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1884 return p; 1885 } 1886 1887 /* Does a sign-extend of the lowest 8 bits give 1888 the original number? */ 1889 static Bool fits8bits ( UInt w32 ) 1890 { 1891 Int i32 = (Int)w32; 1892 return toBool(i32 == ((Int)(w32 << 24) >> 24)); 1893 } 1894 1895 1896 /* Forming mod-reg-rm bytes and scale-index-base bytes. 1897 1898 greg, 0(ereg) | ereg != ESP && ereg != EBP 1899 = 00 greg ereg 1900 1901 greg, d8(ereg) | ereg != ESP 1902 = 01 greg ereg, d8 1903 1904 greg, d32(ereg) | ereg != ESP 1905 = 10 greg ereg, d32 1906 1907 greg, d8(%esp) = 01 greg 100, 0x24, d8 1908 1909 ----------------------------------------------- 1910 1911 greg, d8(base,index,scale) 1912 | index != ESP 1913 = 01 greg 100, scale index base, d8 1914 1915 greg, d32(base,index,scale) 1916 | index != ESP 1917 = 10 greg 100, scale index base, d32 1918 */ 1919 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am ) 1920 { 1921 if (am->tag == Xam_IR) { 1922 if (am->Xam.IR.imm == 0 1923 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1924 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) { 1925 *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg)); 1926 return p; 1927 } 1928 if (fits8bits(am->Xam.IR.imm) 1929 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1930 *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg)); 1931 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1932 return p; 1933 } 1934 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1935 *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg)); 1936 p = emit32(p, am->Xam.IR.imm); 1937 return p; 1938 } 1939 if (sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1940 && fits8bits(am->Xam.IR.imm)) { 1941 *p++ = mkModRegRM(1, gregEnc, 4); 1942 *p++ = 0x24; 1943 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1944 return p; 1945 } 1946 ppX86AMode(am); 1947 vpanic("doAMode_M: can't emit amode IR"); 1948 /*NOTREACHED*/ 1949 } 1950 if (am->tag == Xam_IRRS) { 1951 if (fits8bits(am->Xam.IRRS.imm) 1952 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1953 *p++ = mkModRegRM(1, gregEnc, 4); 1954 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index), 1955 iregEnc(am->Xam.IRRS.base)); 1956 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1957 return p; 1958 } 1959 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1960 *p++ = mkModRegRM(2, gregEnc, 4); 1961 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index), 1962 iregEnc(am->Xam.IRRS.base)); 1963 p = emit32(p, am->Xam.IRRS.imm); 1964 return p; 1965 } 1966 ppX86AMode(am); 1967 vpanic("doAMode_M: can't emit amode IRRS"); 1968 /*NOTREACHED*/ 1969 } 1970 vpanic("doAMode_M: unknown amode"); 1971 /*NOTREACHED*/ 1972 } 1973 1974 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1975 { 1976 return doAMode_M__wrk(p, iregEnc(greg), am); 1977 } 1978 1979 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am ) 1980 { 1981 vassert(gregEnc < 8); 1982 return doAMode_M__wrk(p, gregEnc, am); 1983 } 1984 1985 1986 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1987 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc ) 1988 { 1989 *p++ = mkModRegRM(3, gregEnc, eregEnc); 1990 return p; 1991 } 1992 1993 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1994 { 1995 return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg)); 1996 } 1997 1998 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg ) 1999 { 2000 vassert(gregEnc < 8); 2001 return doAMode_R__wrk(p, gregEnc, iregEnc(ereg)); 2002 } 2003 2004 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc ) 2005 { 2006 vassert( (gregEnc|eregEnc) < 8); 2007 return doAMode_R__wrk(p, gregEnc, eregEnc); 2008 } 2009 2010 2011 /* Emit ffree %st(7) */ 2012 static UChar* do_ffree_st7 ( UChar* p ) 2013 { 2014 *p++ = 0xDD; 2015 *p++ = 0xC7; 2016 return p; 2017 } 2018 2019 /* Emit fstp %st(i), 1 <= i <= 7 */ 2020 static UChar* do_fstp_st ( UChar* p, Int i ) 2021 { 2022 vassert(1 <= i && i <= 7); 2023 *p++ = 0xDD; 2024 *p++ = toUChar(0xD8+i); 2025 return p; 2026 } 2027 2028 /* Emit fld %st(i), 0 <= i <= 6 */ 2029 static UChar* do_fld_st ( UChar* p, Int i ) 2030 { 2031 vassert(0 <= i && i <= 6); 2032 *p++ = 0xD9; 2033 *p++ = toUChar(0xC0+i); 2034 return p; 2035 } 2036 2037 /* Emit f<op> %st(0) */ 2038 static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 2039 { 2040 switch (op) { 2041 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2042 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2043 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2044 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2045 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2046 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2047 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2048 case Xfp_MOV: break; 2049 case Xfp_TAN: 2050 /* fptan pushes 1.0 on the FP stack, except when the argument 2051 is out of range. Hence we have to do the instruction, 2052 then inspect C2 to see if there is an out of range 2053 condition. If there is, we skip the fincstp that is used 2054 by the in-range case to get rid of this extra 1.0 2055 value. */ 2056 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */ 2057 *p++ = 0xD9; *p++ = 0xF2; // fptan 2058 *p++ = 0x50; // pushl %eax 2059 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax 2060 *p++ = 0x66; *p++ = 0xA9; 2061 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax 2062 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp 2063 *p++ = 0xD9; *p++ = 0xF7; // fincstp 2064 *p++ = 0x58; // after_fincstp: popl %eax 2065 break; 2066 default: 2067 vpanic("do_fop1_st: unknown op"); 2068 } 2069 return p; 2070 } 2071 2072 /* Emit f<op> %st(i), 1 <= i <= 5 */ 2073 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 2074 { 2075 Int subopc; 2076 switch (op) { 2077 case Xfp_ADD: subopc = 0; break; 2078 case Xfp_SUB: subopc = 4; break; 2079 case Xfp_MUL: subopc = 1; break; 2080 case Xfp_DIV: subopc = 6; break; 2081 default: vpanic("do_fop2_st: unknown op"); 2082 } 2083 *p++ = 0xD8; 2084 p = doAMode_R_enc_enc(p, subopc, i); 2085 return p; 2086 } 2087 2088 /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2089 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2090 */ 2091 static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2092 { 2093 UInt w; 2094 vassert(0 == (tags & ~0xF)); 2095 if (tags == 0) { 2096 /* pushl $0x00000000 */ 2097 *p++ = 0x6A; 2098 *p++ = 0x00; 2099 } 2100 else 2101 /* pushl $0xFFFFFFFF */ 2102 if (tags == 0xF) { 2103 *p++ = 0x6A; 2104 *p++ = 0xFF; 2105 } else { 2106 vassert(0); /* awaiting test case */ 2107 w = 0; 2108 if (tags & 1) w |= 0x000000FF; 2109 if (tags & 2) w |= 0x0000FF00; 2110 if (tags & 4) w |= 0x00FF0000; 2111 if (tags & 8) w |= 0xFF000000; 2112 *p++ = 0x68; 2113 p = emit32(p, w); 2114 } 2115 return p; 2116 } 2117 2118 /* Emit an instruction into buf and return the number of bytes used. 2119 Note that buf is not the insn's final place, and therefore it is 2120 imperative to emit position-independent code. If the emitted 2121 instruction was a profiler inc, set *is_profInc to True, else 2122 leave it unchanged. */ 2123 2124 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, 2125 UChar* buf, Int nbuf, const X86Instr* i, 2126 Bool mode64, VexEndness endness_host, 2127 const void* disp_cp_chain_me_to_slowEP, 2128 const void* disp_cp_chain_me_to_fastEP, 2129 const void* disp_cp_xindir, 2130 const void* disp_cp_xassisted ) 2131 { 2132 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2133 2134 UInt xtra; 2135 UChar* p = &buf[0]; 2136 UChar* ptmp; 2137 vassert(nbuf >= 32); 2138 vassert(mode64 == False); 2139 2140 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2141 2142 switch (i->tag) { 2143 2144 case Xin_Alu32R: 2145 /* Deal specially with MOV */ 2146 if (i->Xin.Alu32R.op == Xalu_MOV) { 2147 switch (i->Xin.Alu32R.src->tag) { 2148 case Xrmi_Imm: 2149 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst)); 2150 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2151 goto done; 2152 case Xrmi_Reg: 2153 *p++ = 0x89; 2154 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2155 i->Xin.Alu32R.dst); 2156 goto done; 2157 case Xrmi_Mem: 2158 *p++ = 0x8B; 2159 p = doAMode_M(p, i->Xin.Alu32R.dst, 2160 i->Xin.Alu32R.src->Xrmi.Mem.am); 2161 goto done; 2162 default: 2163 goto bad; 2164 } 2165 } 2166 /* MUL */ 2167 if (i->Xin.Alu32R.op == Xalu_MUL) { 2168 switch (i->Xin.Alu32R.src->tag) { 2169 case Xrmi_Reg: 2170 *p++ = 0x0F; 2171 *p++ = 0xAF; 2172 p = doAMode_R(p, i->Xin.Alu32R.dst, 2173 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2174 goto done; 2175 case Xrmi_Mem: 2176 *p++ = 0x0F; 2177 *p++ = 0xAF; 2178 p = doAMode_M(p, i->Xin.Alu32R.dst, 2179 i->Xin.Alu32R.src->Xrmi.Mem.am); 2180 goto done; 2181 case Xrmi_Imm: 2182 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2183 *p++ = 0x6B; 2184 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2185 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2186 } else { 2187 *p++ = 0x69; 2188 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2189 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2190 } 2191 goto done; 2192 default: 2193 goto bad; 2194 } 2195 } 2196 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2197 opc = opc_rr = subopc_imm = opc_imma = 0; 2198 switch (i->Xin.Alu32R.op) { 2199 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2200 subopc_imm = 2; opc_imma = 0x15; break; 2201 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2202 subopc_imm = 0; opc_imma = 0x05; break; 2203 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2204 subopc_imm = 5; opc_imma = 0x2D; break; 2205 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2206 subopc_imm = 3; opc_imma = 0x1D; break; 2207 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2208 subopc_imm = 4; opc_imma = 0x25; break; 2209 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2210 subopc_imm = 6; opc_imma = 0x35; break; 2211 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2212 subopc_imm = 1; opc_imma = 0x0D; break; 2213 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2214 subopc_imm = 7; opc_imma = 0x3D; break; 2215 default: goto bad; 2216 } 2217 switch (i->Xin.Alu32R.src->tag) { 2218 case Xrmi_Imm: 2219 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX()) 2220 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2221 *p++ = toUChar(opc_imma); 2222 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2223 } else 2224 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2225 *p++ = 0x83; 2226 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst); 2227 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2228 } else { 2229 *p++ = 0x81; 2230 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst); 2231 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2232 } 2233 goto done; 2234 case Xrmi_Reg: 2235 *p++ = toUChar(opc_rr); 2236 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2237 i->Xin.Alu32R.dst); 2238 goto done; 2239 case Xrmi_Mem: 2240 *p++ = toUChar(opc); 2241 p = doAMode_M(p, i->Xin.Alu32R.dst, 2242 i->Xin.Alu32R.src->Xrmi.Mem.am); 2243 goto done; 2244 default: 2245 goto bad; 2246 } 2247 break; 2248 2249 case Xin_Alu32M: 2250 /* Deal specially with MOV */ 2251 if (i->Xin.Alu32M.op == Xalu_MOV) { 2252 switch (i->Xin.Alu32M.src->tag) { 2253 case Xri_Reg: 2254 *p++ = 0x89; 2255 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2256 i->Xin.Alu32M.dst); 2257 goto done; 2258 case Xri_Imm: 2259 *p++ = 0xC7; 2260 p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst); 2261 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2262 goto done; 2263 default: 2264 goto bad; 2265 } 2266 } 2267 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2268 allowed here. */ 2269 opc = subopc_imm = opc_imma = 0; 2270 switch (i->Xin.Alu32M.op) { 2271 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2272 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2273 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2274 default: goto bad; 2275 } 2276 switch (i->Xin.Alu32M.src->tag) { 2277 case Xri_Reg: 2278 *p++ = toUChar(opc); 2279 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2280 i->Xin.Alu32M.dst); 2281 goto done; 2282 case Xri_Imm: 2283 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2284 *p++ = 0x83; 2285 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst); 2286 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2287 goto done; 2288 } else { 2289 *p++ = 0x81; 2290 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst); 2291 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2292 goto done; 2293 } 2294 default: 2295 goto bad; 2296 } 2297 break; 2298 2299 case Xin_Sh32: 2300 opc_cl = opc_imm = subopc = 0; 2301 switch (i->Xin.Sh32.op) { 2302 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2303 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2304 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2305 default: goto bad; 2306 } 2307 if (i->Xin.Sh32.src == 0) { 2308 *p++ = toUChar(opc_cl); 2309 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst); 2310 } else { 2311 *p++ = toUChar(opc_imm); 2312 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst); 2313 *p++ = (UChar)(i->Xin.Sh32.src); 2314 } 2315 goto done; 2316 2317 case Xin_Test32: 2318 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2319 /* testl $imm32, %reg */ 2320 *p++ = 0xF7; 2321 p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg); 2322 p = emit32(p, i->Xin.Test32.imm32); 2323 goto done; 2324 } else { 2325 /* testl $imm32, amode */ 2326 *p++ = 0xF7; 2327 p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am); 2328 p = emit32(p, i->Xin.Test32.imm32); 2329 goto done; 2330 } 2331 2332 case Xin_Unary32: 2333 if (i->Xin.Unary32.op == Xun_NOT) { 2334 *p++ = 0xF7; 2335 p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst); 2336 goto done; 2337 } 2338 if (i->Xin.Unary32.op == Xun_NEG) { 2339 *p++ = 0xF7; 2340 p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst); 2341 goto done; 2342 } 2343 break; 2344 2345 case Xin_Lea32: 2346 *p++ = 0x8D; 2347 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2348 goto done; 2349 2350 case Xin_MulL: 2351 subopc = i->Xin.MulL.syned ? 5 : 4; 2352 *p++ = 0xF7; 2353 switch (i->Xin.MulL.src->tag) { 2354 case Xrm_Mem: 2355 p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am); 2356 goto done; 2357 case Xrm_Reg: 2358 p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg); 2359 goto done; 2360 default: 2361 goto bad; 2362 } 2363 break; 2364 2365 case Xin_Div: 2366 subopc = i->Xin.Div.syned ? 7 : 6; 2367 *p++ = 0xF7; 2368 switch (i->Xin.Div.src->tag) { 2369 case Xrm_Mem: 2370 p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am); 2371 goto done; 2372 case Xrm_Reg: 2373 p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg); 2374 goto done; 2375 default: 2376 goto bad; 2377 } 2378 break; 2379 2380 case Xin_Sh3232: 2381 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2382 if (i->Xin.Sh3232.amt == 0) { 2383 /* shldl/shrdl by %cl */ 2384 *p++ = 0x0F; 2385 if (i->Xin.Sh3232.op == Xsh_SHL) { 2386 *p++ = 0xA5; 2387 } else { 2388 *p++ = 0xAD; 2389 } 2390 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2391 goto done; 2392 } 2393 break; 2394 2395 case Xin_Push: 2396 switch (i->Xin.Push.src->tag) { 2397 case Xrmi_Mem: 2398 *p++ = 0xFF; 2399 p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am); 2400 goto done; 2401 case Xrmi_Imm: 2402 *p++ = 0x68; 2403 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2404 goto done; 2405 case Xrmi_Reg: 2406 *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg)); 2407 goto done; 2408 default: 2409 goto bad; 2410 } 2411 2412 case Xin_Call: 2413 if (i->Xin.Call.cond != Xcc_ALWAYS 2414 && i->Xin.Call.rloc.pri != RLPri_None) { 2415 /* The call might not happen (it isn't unconditional) and it 2416 returns a result. In this case we will need to generate a 2417 control flow diamond to put 0x555..555 in the return 2418 register(s) in the case where the call doesn't happen. If 2419 this ever becomes necessary, maybe copy code from the ARM 2420 equivalent. Until that day, just give up. */ 2421 goto bad; 2422 } 2423 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2424 for explanation of this. */ 2425 switch (i->Xin.Call.regparms) { 2426 case 0: irno = iregEnc(hregX86_EAX()); break; 2427 case 1: irno = iregEnc(hregX86_EDX()); break; 2428 case 2: irno = iregEnc(hregX86_ECX()); break; 2429 case 3: irno = iregEnc(hregX86_EDI()); break; 2430 default: vpanic(" emit_X86Instr:call:regparms"); 2431 } 2432 /* jump over the following two insns if the condition does not 2433 hold */ 2434 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2435 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2436 *p++ = 0x07; /* 7 bytes in the next two insns */ 2437 } 2438 /* movl $target, %tmp */ 2439 *p++ = toUChar(0xB8 + irno); 2440 p = emit32(p, i->Xin.Call.target); 2441 /* call *%tmp */ 2442 *p++ = 0xFF; 2443 *p++ = toUChar(0xD0 + irno); 2444 goto done; 2445 2446 case Xin_XDirect: { 2447 /* NB: what goes on here has to be very closely coordinated with the 2448 chainXDirect_X86 and unchainXDirect_X86 below. */ 2449 /* We're generating chain-me requests here, so we need to be 2450 sure this is actually allowed -- no-redir translations can't 2451 use chain-me's. Hence: */ 2452 vassert(disp_cp_chain_me_to_slowEP != NULL); 2453 vassert(disp_cp_chain_me_to_fastEP != NULL); 2454 2455 /* Use ptmp for backpatching conditional jumps. */ 2456 ptmp = NULL; 2457 2458 /* First off, if this is conditional, create a conditional 2459 jump over the rest of it. */ 2460 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2461 /* jmp fwds if !condition */ 2462 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); 2463 ptmp = p; /* fill in this bit later */ 2464 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2465 } 2466 2467 /* Update the guest EIP. */ 2468 /* movl $dstGA, amEIP */ 2469 *p++ = 0xC7; 2470 p = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP); 2471 p = emit32(p, i->Xin.XDirect.dstGA); 2472 2473 /* --- FIRST PATCHABLE BYTE follows --- */ 2474 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling 2475 to) backs up the return address, so as to find the address of 2476 the first patchable byte. So: don't change the length of the 2477 two instructions below. */ 2478 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ 2479 *p++ = 0xBA; 2480 const void* disp_cp_chain_me 2481 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 2482 : disp_cp_chain_me_to_slowEP; 2483 p = emit32(p, (UInt)(Addr)disp_cp_chain_me); 2484 /* call *%edx */ 2485 *p++ = 0xFF; 2486 *p++ = 0xD2; 2487 /* --- END of PATCHABLE BYTES --- */ 2488 2489 /* Fix up the conditional jump, if there was one. */ 2490 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2491 Int delta = p - ptmp; 2492 vassert(delta > 0 && delta < 40); 2493 *ptmp = toUChar(delta-1); 2494 } 2495 goto done; 2496 } 2497 2498 case Xin_XIndir: { 2499 /* We're generating transfers that could lead indirectly to a 2500 chain-me, so we need to be sure this is actually allowed -- 2501 no-redir translations are not allowed to reach normal 2502 translations without going through the scheduler. That means 2503 no XDirects or XIndirs out from no-redir translations. 2504 Hence: */ 2505 vassert(disp_cp_xindir != NULL); 2506 2507 /* Use ptmp for backpatching conditional jumps. */ 2508 ptmp = NULL; 2509 2510 /* First off, if this is conditional, create a conditional 2511 jump over the rest of it. */ 2512 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2513 /* jmp fwds if !condition */ 2514 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); 2515 ptmp = p; /* fill in this bit later */ 2516 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2517 } 2518 2519 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2520 *p++ = 0x89; 2521 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2522 2523 /* movl $disp_indir, %edx */ 2524 *p++ = 0xBA; 2525 p = emit32(p, (UInt)(Addr)disp_cp_xindir); 2526 /* jmp *%edx */ 2527 *p++ = 0xFF; 2528 *p++ = 0xE2; 2529 2530 /* Fix up the conditional jump, if there was one. */ 2531 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2532 Int delta = p - ptmp; 2533 vassert(delta > 0 && delta < 40); 2534 *ptmp = toUChar(delta-1); 2535 } 2536 goto done; 2537 } 2538 2539 case Xin_XAssisted: { 2540 /* Use ptmp for backpatching conditional jumps. */ 2541 ptmp = NULL; 2542 2543 /* First off, if this is conditional, create a conditional 2544 jump over the rest of it. */ 2545 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2546 /* jmp fwds if !condition */ 2547 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); 2548 ptmp = p; /* fill in this bit later */ 2549 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2550 } 2551 2552 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2553 *p++ = 0x89; 2554 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2555 /* movl $magic_number, %ebp. */ 2556 UInt trcval = 0; 2557 switch (i->Xin.XAssisted.jk) { 2558 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; 2559 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; 2560 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; 2561 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break; 2562 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break; 2563 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break; 2564 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; 2565 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; 2566 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; 2567 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; 2568 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; 2569 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; 2570 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; 2571 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; 2572 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; 2573 /* We don't expect to see the following being assisted. */ 2574 case Ijk_Ret: 2575 case Ijk_Call: 2576 /* fallthrough */ 2577 default: 2578 ppIRJumpKind(i->Xin.XAssisted.jk); 2579 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); 2580 } 2581 vassert(trcval != 0); 2582 *p++ = 0xBD; 2583 p = emit32(p, trcval); 2584 2585 /* movl $disp_indir, %edx */ 2586 *p++ = 0xBA; 2587 p = emit32(p, (UInt)(Addr)disp_cp_xassisted); 2588 /* jmp *%edx */ 2589 *p++ = 0xFF; 2590 *p++ = 0xE2; 2591 2592 /* Fix up the conditional jump, if there was one. */ 2593 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2594 Int delta = p - ptmp; 2595 vassert(delta > 0 && delta < 40); 2596 *ptmp = toUChar(delta-1); 2597 } 2598 goto done; 2599 } 2600 2601 case Xin_CMov32: 2602 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2603 2604 /* This generates cmov, which is illegal on P54/P55. */ 2605 /* 2606 *p++ = 0x0F; 2607 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2608 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2609 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2610 goto done; 2611 } 2612 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2613 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2614 goto done; 2615 } 2616 */ 2617 2618 /* Alternative version which works on any x86 variant. */ 2619 /* jmp fwds if !condition */ 2620 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2621 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2622 ptmp = p; 2623 2624 switch (i->Xin.CMov32.src->tag) { 2625 case Xrm_Reg: 2626 /* Big sigh. This is movl E -> G ... */ 2627 *p++ = 0x89; 2628 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2629 i->Xin.CMov32.dst); 2630 2631 break; 2632 case Xrm_Mem: 2633 /* ... whereas this is movl G -> E. That's why the args 2634 to doAMode_R appear to be the wrong way round in the 2635 Xrm_Reg case. */ 2636 *p++ = 0x8B; 2637 p = doAMode_M(p, i->Xin.CMov32.dst, 2638 i->Xin.CMov32.src->Xrm.Mem.am); 2639 break; 2640 default: 2641 goto bad; 2642 } 2643 /* Fill in the jump offset. */ 2644 *(ptmp-1) = toUChar(p - ptmp); 2645 goto done; 2646 2647 break; 2648 2649 case Xin_LoadEX: 2650 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2651 /* movzbl */ 2652 *p++ = 0x0F; 2653 *p++ = 0xB6; 2654 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2655 goto done; 2656 } 2657 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2658 /* movzwl */ 2659 *p++ = 0x0F; 2660 *p++ = 0xB7; 2661 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2662 goto done; 2663 } 2664 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2665 /* movsbl */ 2666 *p++ = 0x0F; 2667 *p++ = 0xBE; 2668 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2669 goto done; 2670 } 2671 break; 2672 2673 case Xin_Set32: 2674 /* Make the destination register be 1 or 0, depending on whether 2675 the relevant condition holds. We have to dodge and weave 2676 when the destination is %esi or %edi as we cannot directly 2677 emit the native 'setb %reg' for those. Further complication: 2678 the top 24 bits of the destination should be forced to zero, 2679 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2680 Sigh. So start off my moving $0 into the dest. */ 2681 2682 /* Do we need to swap in %eax? */ 2683 if (iregEnc(i->Xin.Set32.dst) >= 4) { 2684 /* xchg %eax, %dst */ 2685 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst)); 2686 /* movl $0, %eax */ 2687 *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX())); 2688 p = emit32(p, 0); 2689 /* setb lo8(%eax) */ 2690 *p++ = 0x0F; 2691 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2692 p = doAMode_R_enc_reg(p, 0, hregX86_EAX()); 2693 /* xchg %eax, %dst */ 2694 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst)); 2695 } else { 2696 /* movl $0, %dst */ 2697 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst)); 2698 p = emit32(p, 0); 2699 /* setb lo8(%dst) */ 2700 *p++ = 0x0F; 2701 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2702 p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst); 2703 } 2704 goto done; 2705 2706 case Xin_Bsfr32: 2707 *p++ = 0x0F; 2708 if (i->Xin.Bsfr32.isFwds) { 2709 *p++ = 0xBC; 2710 } else { 2711 *p++ = 0xBD; 2712 } 2713 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2714 goto done; 2715 2716 case Xin_MFence: 2717 /* see comment in hdefs.h re this insn */ 2718 if (0) vex_printf("EMIT FENCE\n"); 2719 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2720 |VEX_HWCAPS_X86_SSE2)) { 2721 /* mfence */ 2722 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2723 goto done; 2724 } 2725 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) { 2726 /* sfence */ 2727 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2728 /* lock addl $0,0(%esp) */ 2729 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2730 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2731 goto done; 2732 } 2733 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2734 /* lock addl $0,0(%esp) */ 2735 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2736 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2737 goto done; 2738 } 2739 vpanic("emit_X86Instr:mfence:hwcaps"); 2740 /*NOTREACHED*/ 2741 break; 2742 2743 case Xin_ACAS: 2744 /* lock */ 2745 *p++ = 0xF0; 2746 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2747 in %ebx. The new-value register is hardwired to be %ebx 2748 since letting it be any integer register gives the problem 2749 that %sil and %dil are unaddressible on x86 and hence we 2750 would have to resort to the same kind of trickery as with 2751 byte-sized Xin.Store, just below. Given that this isn't 2752 performance critical, it is simpler just to force the 2753 register operand to %ebx (could equally be %ecx or %edx). 2754 (Although %ebx is more consistent with cmpxchg8b.) */ 2755 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2756 *p++ = 0x0F; 2757 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2758 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2759 goto done; 2760 2761 case Xin_DACAS: 2762 /* lock */ 2763 *p++ = 0xF0; 2764 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2765 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2766 aren't encoded in the insn. */ 2767 *p++ = 0x0F; 2768 *p++ = 0xC7; 2769 p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr); 2770 goto done; 2771 2772 case Xin_Store: 2773 if (i->Xin.Store.sz == 2) { 2774 /* This case, at least, is simple, given that we can 2775 reference the low 16 bits of any integer register. */ 2776 *p++ = 0x66; 2777 *p++ = 0x89; 2778 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2779 goto done; 2780 } 2781 2782 if (i->Xin.Store.sz == 1) { 2783 /* We have to do complex dodging and weaving if src is not 2784 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2785 if (iregEnc(i->Xin.Store.src) < 4) { 2786 /* we're OK, can do it directly */ 2787 *p++ = 0x88; 2788 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2789 goto done; 2790 } else { 2791 /* Bleh. This means the source is %edi or %esi. Since 2792 the address mode can only mention three registers, at 2793 least one of %eax/%ebx/%ecx/%edx must be available to 2794 temporarily swap the source into, so the store can 2795 happen. So we have to look at the regs mentioned 2796 in the amode. */ 2797 HReg swap = INVALID_HREG; 2798 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2799 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2800 HRegUsage u; 2801 initHRegUsage(&u); 2802 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2803 /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; } 2804 else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; } 2805 else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; } 2806 else if (! HRegUsage__contains(&u, edx)) { swap = edx; } 2807 vassert(! hregIsInvalid(swap)); 2808 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2809 *p++ = 0x87; 2810 p = doAMode_R(p, i->Xin.Store.src, swap); 2811 /* movb lo8{%swap}, (dst) */ 2812 *p++ = 0x88; 2813 p = doAMode_M(p, swap, i->Xin.Store.dst); 2814 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2815 *p++ = 0x87; 2816 p = doAMode_R(p, i->Xin.Store.src, swap); 2817 goto done; 2818 } 2819 } /* if (i->Xin.Store.sz == 1) */ 2820 break; 2821 2822 case Xin_FpUnary: 2823 /* gop %src, %dst 2824 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2825 */ 2826 p = do_ffree_st7(p); 2827 p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src)); 2828 p = do_fop1_st(p, i->Xin.FpUnary.op); 2829 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst)); 2830 goto done; 2831 2832 case Xin_FpBinary: 2833 if (i->Xin.FpBinary.op == Xfp_YL2X 2834 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2835 /* Have to do this specially. */ 2836 /* ffree %st7 ; fld %st(srcL) ; 2837 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2838 p = do_ffree_st7(p); 2839 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2840 p = do_ffree_st7(p); 2841 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR)); 2842 *p++ = 0xD9; 2843 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2844 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2845 goto done; 2846 } 2847 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2848 /* Have to do this specially. */ 2849 /* ffree %st7 ; fld %st(srcL) ; 2850 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2851 p = do_ffree_st7(p); 2852 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2853 p = do_ffree_st7(p); 2854 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR)); 2855 *p++ = 0xD9; *p++ = 0xF3; 2856 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2857 goto done; 2858 } 2859 if (i->Xin.FpBinary.op == Xfp_PREM 2860 || i->Xin.FpBinary.op == Xfp_PREM1 2861 || i->Xin.FpBinary.op == Xfp_SCALE) { 2862 /* Have to do this specially. */ 2863 /* ffree %st7 ; fld %st(srcR) ; 2864 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2865 fincstp ; ffree %st7 */ 2866 p = do_ffree_st7(p); 2867 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR)); 2868 p = do_ffree_st7(p); 2869 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL)); 2870 *p++ = 0xD9; 2871 switch (i->Xin.FpBinary.op) { 2872 case Xfp_PREM: *p++ = 0xF8; break; 2873 case Xfp_PREM1: *p++ = 0xF5; break; 2874 case Xfp_SCALE: *p++ = 0xFD; break; 2875 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2876 } 2877 p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst)); 2878 *p++ = 0xD9; *p++ = 0xF7; 2879 p = do_ffree_st7(p); 2880 goto done; 2881 } 2882 /* General case */ 2883 /* gop %srcL, %srcR, %dst 2884 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2885 */ 2886 p = do_ffree_st7(p); 2887 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL)); 2888 p = do_fop2_st(p, i->Xin.FpBinary.op, 2889 1+fregEnc(i->Xin.FpBinary.srcR)); 2890 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst)); 2891 goto done; 2892 2893 case Xin_FpLdSt: 2894 if (i->Xin.FpLdSt.isLoad) { 2895 /* Load from memory into %fakeN. 2896 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2897 */ 2898 p = do_ffree_st7(p); 2899 switch (i->Xin.FpLdSt.sz) { 2900 case 4: 2901 *p++ = 0xD9; 2902 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr); 2903 break; 2904 case 8: 2905 *p++ = 0xDD; 2906 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr); 2907 break; 2908 case 10: 2909 *p++ = 0xDB; 2910 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr); 2911 break; 2912 default: 2913 vpanic("emitX86Instr(FpLdSt,load)"); 2914 } 2915 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg)); 2916 goto done; 2917 } else { 2918 /* Store from %fakeN into memory. 2919 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2920 */ 2921 p = do_ffree_st7(p); 2922 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg)); 2923 switch (i->Xin.FpLdSt.sz) { 2924 case 4: 2925 *p++ = 0xD9; 2926 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr); 2927 break; 2928 case 8: 2929 *p++ = 0xDD; 2930 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr); 2931 break; 2932 case 10: 2933 *p++ = 0xDB; 2934 p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr); 2935 break; 2936 default: 2937 vpanic("emitX86Instr(FpLdSt,store)"); 2938 } 2939 goto done; 2940 } 2941 break; 2942 2943 case Xin_FpLdStI: 2944 if (i->Xin.FpLdStI.isLoad) { 2945 /* Load from memory into %fakeN, converting from an int. 2946 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2947 */ 2948 switch (i->Xin.FpLdStI.sz) { 2949 case 8: opc = 0xDF; subopc_imm = 5; break; 2950 case 4: opc = 0xDB; subopc_imm = 0; break; 2951 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2952 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2953 } 2954 p = do_ffree_st7(p); 2955 *p++ = toUChar(opc); 2956 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr); 2957 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg)); 2958 goto done; 2959 } else { 2960 /* Store from %fakeN into memory, converting to an int. 2961 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2962 */ 2963 switch (i->Xin.FpLdStI.sz) { 2964 case 8: opc = 0xDF; subopc_imm = 7; break; 2965 case 4: opc = 0xDB; subopc_imm = 3; break; 2966 case 2: opc = 0xDF; subopc_imm = 3; break; 2967 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2968 } 2969 p = do_ffree_st7(p); 2970 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg)); 2971 *p++ = toUChar(opc); 2972 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr); 2973 goto done; 2974 } 2975 break; 2976 2977 case Xin_Fp64to32: 2978 /* ffree %st7 ; fld %st(src) */ 2979 p = do_ffree_st7(p); 2980 p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src)); 2981 /* subl $4, %esp */ 2982 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2983 /* fstps (%esp) */ 2984 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2985 /* flds (%esp) */ 2986 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2987 /* addl $4, %esp */ 2988 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2989 /* fstp %st(1+dst) */ 2990 p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst)); 2991 goto done; 2992 2993 case Xin_FpCMov: 2994 /* jmp fwds if !condition */ 2995 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2996 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2997 ptmp = p; 2998 2999 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 3000 p = do_ffree_st7(p); 3001 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src)); 3002 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst)); 3003 3004 /* Fill in the jump offset. */ 3005 *(ptmp-1) = toUChar(p - ptmp); 3006 goto done; 3007 3008 case Xin_FpLdCW: 3009 *p++ = 0xD9; 3010 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr); 3011 goto done; 3012 3013 case Xin_FpStSW_AX: 3014 /* note, this emits fnstsw %ax, not fstsw %ax */ 3015 *p++ = 0xDF; 3016 *p++ = 0xE0; 3017 goto done; 3018 3019 case Xin_FpCmp: 3020 /* gcmp %fL, %fR, %dst 3021 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 3022 fnstsw %ax ; movl %eax, %dst 3023 */ 3024 /* ffree %st7 */ 3025 p = do_ffree_st7(p); 3026 /* fpush %fL */ 3027 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL)); 3028 /* fucomp %(fR+1) */ 3029 *p++ = 0xDD; 3030 *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR)))); 3031 /* fnstsw %ax */ 3032 *p++ = 0xDF; 3033 *p++ = 0xE0; 3034 /* movl %eax, %dst */ 3035 *p++ = 0x89; 3036 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 3037 goto done; 3038 3039 case Xin_SseConst: { 3040 UShort con = i->Xin.SseConst.con; 3041 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 3042 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 3043 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 3044 p = push_word_from_tags(p, toUShort(con & 0xF)); 3045 /* movl (%esp), %xmm-dst */ 3046 *p++ = 0x0F; 3047 *p++ = 0x10; 3048 *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst))); 3049 *p++ = 0x24; 3050 /* addl $16, %esp */ 3051 *p++ = 0x83; 3052 *p++ = 0xC4; 3053 *p++ = 0x10; 3054 goto done; 3055 } 3056 3057 case Xin_SseLdSt: 3058 *p++ = 0x0F; 3059 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 3060 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr); 3061 goto done; 3062 3063 case Xin_SseLdzLO: 3064 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 3065 /* movs[sd] amode, %xmm-dst */ 3066 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3067 *p++ = 0x0F; 3068 *p++ = 0x10; 3069 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr); 3070 goto done; 3071 3072 case Xin_Sse32Fx4: 3073 xtra = 0; 3074 *p++ = 0x0F; 3075 switch (i->Xin.Sse32Fx4.op) { 3076 case Xsse_ADDF: *p++ = 0x58; break; 3077 case Xsse_DIVF: *p++ = 0x5E; break; 3078 case Xsse_MAXF: *p++ = 0x5F; break; 3079 case Xsse_MINF: *p++ = 0x5D; break; 3080 case Xsse_MULF: *p++ = 0x59; break; 3081 case Xsse_RCPF: *p++ = 0x53; break; 3082 case Xsse_RSQRTF: *p++ = 0x52; break; 3083 case Xsse_SQRTF: *p++ = 0x51; break; 3084 case Xsse_SUBF: *p++ = 0x5C; break; 3085 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3086 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3087 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3088 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3089 default: goto bad; 3090 } 3091 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst), 3092 vregEnc(i->Xin.Sse32Fx4.src) ); 3093 if (xtra & 0x100) 3094 *p++ = toUChar(xtra & 0xFF); 3095 goto done; 3096 3097 case Xin_Sse64Fx2: 3098 xtra = 0; 3099 *p++ = 0x66; 3100 *p++ = 0x0F; 3101 switch (i->Xin.Sse64Fx2.op) { 3102 case Xsse_ADDF: *p++ = 0x58; break; 3103 case Xsse_DIVF: *p++ = 0x5E; break; 3104 case Xsse_MAXF: *p++ = 0x5F; break; 3105 case Xsse_MINF: *p++ = 0x5D; break; 3106 case Xsse_MULF: *p++ = 0x59; break; 3107 case Xsse_RCPF: *p++ = 0x53; break; 3108 case Xsse_RSQRTF: *p++ = 0x52; break; 3109 case Xsse_SQRTF: *p++ = 0x51; break; 3110 case Xsse_SUBF: *p++ = 0x5C; break; 3111 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3112 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3113 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3114 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3115 default: goto bad; 3116 } 3117 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst), 3118 vregEnc(i->Xin.Sse64Fx2.src) ); 3119 if (xtra & 0x100) 3120 *p++ = toUChar(xtra & 0xFF); 3121 goto done; 3122 3123 case Xin_Sse32FLo: 3124 xtra = 0; 3125 *p++ = 0xF3; 3126 *p++ = 0x0F; 3127 switch (i->Xin.Sse32FLo.op) { 3128 case Xsse_ADDF: *p++ = 0x58; break; 3129 case Xsse_DIVF: *p++ = 0x5E; break; 3130 case Xsse_MAXF: *p++ = 0x5F; break; 3131 case Xsse_MINF: *p++ = 0x5D; break; 3132 case Xsse_MULF: *p++ = 0x59; break; 3133 case Xsse_RCPF: *p++ = 0x53; break; 3134 case Xsse_RSQRTF: *p++ = 0x52; break; 3135 case Xsse_SQRTF: *p++ = 0x51; break; 3136 case Xsse_SUBF: *p++ = 0x5C; break; 3137 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3138 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3139 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3140 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3141 default: goto bad; 3142 } 3143 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst), 3144 vregEnc(i->Xin.Sse32FLo.src) ); 3145 if (xtra & 0x100) 3146 *p++ = toUChar(xtra & 0xFF); 3147 goto done; 3148 3149 case Xin_Sse64FLo: 3150 xtra = 0; 3151 *p++ = 0xF2; 3152 *p++ = 0x0F; 3153 switch (i->Xin.Sse64FLo.op) { 3154 case Xsse_ADDF: *p++ = 0x58; break; 3155 case Xsse_DIVF: *p++ = 0x5E; break; 3156 case Xsse_MAXF: *p++ = 0x5F; break; 3157 case Xsse_MINF: *p++ = 0x5D; break; 3158 case Xsse_MULF: *p++ = 0x59; break; 3159 case Xsse_RCPF: *p++ = 0x53; break; 3160 case Xsse_RSQRTF: *p++ = 0x52; break; 3161 case Xsse_SQRTF: *p++ = 0x51; break; 3162 case Xsse_SUBF: *p++ = 0x5C; break; 3163 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3164 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3165 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3166 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3167 default: goto bad; 3168 } 3169 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst), 3170 vregEnc(i->Xin.Sse64FLo.src) ); 3171 if (xtra & 0x100) 3172 *p++ = toUChar(xtra & 0xFF); 3173 goto done; 3174 3175 case Xin_SseReRg: 3176 # define XX(_n) *p++ = (_n) 3177 switch (i->Xin.SseReRg.op) { 3178 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 3179 case Xsse_OR: XX(0x0F); XX(0x56); break; 3180 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3181 case Xsse_AND: XX(0x0F); XX(0x54); break; 3182 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3183 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3184 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3185 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3186 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3187 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3188 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3189 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3190 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3191 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3192 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3193 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3194 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3195 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3196 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3197 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3198 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3199 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3200 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3201 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3202 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3203 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3204 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3205 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3206 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3207 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3208 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3209 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3210 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3211 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3212 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3213 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3214 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3215 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3216 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3217 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3218 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3219 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3220 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3221 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3222 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3223 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3224 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3225 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3226 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3227 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3228 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3229 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3230 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3231 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3232 default: goto bad; 3233 } 3234 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst), 3235 vregEnc(i->Xin.SseReRg.src) ); 3236 # undef XX 3237 goto done; 3238 3239 case Xin_SseCMov: 3240 /* jmp fwds if !condition */ 3241 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3242 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3243 ptmp = p; 3244 3245 /* movaps %src, %dst */ 3246 *p++ = 0x0F; 3247 *p++ = 0x28; 3248 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst), 3249 vregEnc(i->Xin.SseCMov.src) ); 3250 3251 /* Fill in the jump offset. */ 3252 *(ptmp-1) = toUChar(p - ptmp); 3253 goto done; 3254 3255 case Xin_SseShuf: 3256 *p++ = 0x66; 3257 *p++ = 0x0F; 3258 *p++ = 0x70; 3259 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst), 3260 vregEnc(i->Xin.SseShuf.src) ); 3261 *p++ = (UChar)(i->Xin.SseShuf.order); 3262 goto done; 3263 3264 case Xin_EvCheck: { 3265 /* We generate: 3266 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) 3267 (2 bytes) jns nofail expected taken 3268 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) 3269 nofail: 3270 */ 3271 /* This is heavily asserted re instruction lengths. It needs to 3272 be. If we get given unexpected forms of .amCounter or 3273 .amFailAddr -- basically, anything that's not of the form 3274 uimm7(%ebp) -- they are likely to fail. */ 3275 /* Note also that after the decl we must be very careful not to 3276 read the carry flag, else we get a partial flags stall. 3277 js/jns avoids that, though. */ 3278 UChar* p0 = p; 3279 /* --- decl 8(%ebp) --- */ 3280 /* "1" because + there's no register in this encoding; 3281 instead the register + field is used as a sub opcode. The 3282 encoding for "decl r/m32" + is FF /1, hence the "1". */ 3283 *p++ = 0xFF; 3284 p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter); 3285 vassert(p - p0 == 3); 3286 /* --- jns nofail --- */ 3287 *p++ = 0x79; 3288 *p++ = 0x03; /* need to check this 0x03 after the next insn */ 3289 vassert(p - p0 == 5); 3290 /* --- jmp* 0(%ebp) --- */ 3291 /* The encoding is FF /4. */ 3292 *p++ = 0xFF; 3293 p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr); 3294 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ 3295 /* And crosscheck .. */ 3296 vassert(evCheckSzB_X86() == 8); 3297 goto done; 3298 } 3299 3300 case Xin_ProfInc: { 3301 /* We generate addl $1,NotKnownYet 3302 adcl $0,NotKnownYet+4 3303 in the expectation that a later call to LibVEX_patchProfCtr 3304 will be used to fill in the immediate fields once the right 3305 value is known. 3306 83 05 00 00 00 00 01 3307 83 15 00 00 00 00 00 3308 */ 3309 *p++ = 0x83; *p++ = 0x05; 3310 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3311 *p++ = 0x01; 3312 *p++ = 0x83; *p++ = 0x15; 3313 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3314 *p++ = 0x00; 3315 /* Tell the caller .. */ 3316 vassert(!(*is_profInc)); 3317 *is_profInc = True; 3318 goto done; 3319 } 3320 3321 default: 3322 goto bad; 3323 } 3324 3325 bad: 3326 ppX86Instr(i, mode64); 3327 vpanic("emit_X86Instr"); 3328 /*NOTREACHED*/ 3329 3330 done: 3331 vassert(p - &buf[0] <= 32); 3332 return p - &buf[0]; 3333 } 3334 3335 3336 /* How big is an event check? See case for Xin_EvCheck in 3337 emit_X86Instr just above. That crosschecks what this returns, so 3338 we can tell if we're inconsistent. */ 3339 Int evCheckSzB_X86 (void) 3340 { 3341 return 8; 3342 } 3343 3344 3345 /* NB: what goes on here has to be very closely coordinated with the 3346 emitInstr case for XDirect, above. */ 3347 VexInvalRange chainXDirect_X86 ( VexEndness endness_host, 3348 void* place_to_chain, 3349 const void* disp_cp_chain_me_EXPECTED, 3350 const void* place_to_jump_to ) 3351 { 3352 vassert(endness_host == VexEndnessLE); 3353 3354 /* What we're expecting to see is: 3355 movl $disp_cp_chain_me_EXPECTED, %edx 3356 call *%edx 3357 viz 3358 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3359 FF D2 3360 */ 3361 UChar* p = (UChar*)place_to_chain; 3362 vassert(p[0] == 0xBA); 3363 vassert(*(UInt*)(&p[1]) == (UInt)(Addr)disp_cp_chain_me_EXPECTED); 3364 vassert(p[5] == 0xFF); 3365 vassert(p[6] == 0xD2); 3366 /* And what we want to change it to is: 3367 jmp disp32 where disp32 is relative to the next insn 3368 ud2; 3369 viz 3370 E9 <4 bytes == disp32> 3371 0F 0B 3372 The replacement has the same length as the original. 3373 */ 3374 /* This is the delta we need to put into a JMP d32 insn. It's 3375 relative to the start of the next insn, hence the -5. */ 3376 Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5; 3377 3378 /* And make the modifications. */ 3379 p[0] = 0xE9; 3380 p[1] = (delta >> 0) & 0xFF; 3381 p[2] = (delta >> 8) & 0xFF; 3382 p[3] = (delta >> 16) & 0xFF; 3383 p[4] = (delta >> 24) & 0xFF; 3384 p[5] = 0x0F; p[6] = 0x0B; 3385 /* sanity check on the delta -- top 32 are all 0 or all 1 */ 3386 delta >>= 32; 3387 vassert(delta == 0LL || delta == -1LL); 3388 VexInvalRange vir = { (HWord)place_to_chain, 7 }; 3389 return vir; 3390 } 3391 3392 3393 /* NB: what goes on here has to be very closely coordinated with the 3394 emitInstr case for XDirect, above. */ 3395 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host, 3396 void* place_to_unchain, 3397 const void* place_to_jump_to_EXPECTED, 3398 const void* disp_cp_chain_me ) 3399 { 3400 vassert(endness_host == VexEndnessLE); 3401 3402 /* What we're expecting to see is: 3403 jmp d32 3404 ud2; 3405 viz 3406 E9 <4 bytes == disp32> 3407 0F 0B 3408 */ 3409 UChar* p = (UChar*)place_to_unchain; 3410 Bool valid = False; 3411 if (p[0] == 0xE9 3412 && p[5] == 0x0F && p[6] == 0x0B) { 3413 /* Check the offset is right. */ 3414 Int s32 = *(Int*)(&p[1]); 3415 if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) { 3416 valid = True; 3417 if (0) 3418 vex_printf("QQQ unchainXDirect_X86: found valid\n"); 3419 } 3420 } 3421 vassert(valid); 3422 /* And what we want to change it to is: 3423 movl $disp_cp_chain_me, %edx 3424 call *%edx 3425 viz 3426 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3427 FF D2 3428 So it's the same length (convenient, huh). 3429 */ 3430 p[0] = 0xBA; 3431 *(UInt*)(&p[1]) = (UInt)(Addr)disp_cp_chain_me; 3432 p[5] = 0xFF; 3433 p[6] = 0xD2; 3434 VexInvalRange vir = { (HWord)place_to_unchain, 7 }; 3435 return vir; 3436 } 3437 3438 3439 /* Patch the counter address into a profile inc point, as previously 3440 created by the Xin_ProfInc case for emit_X86Instr. */ 3441 VexInvalRange patchProfInc_X86 ( VexEndness endness_host, 3442 void* place_to_patch, 3443 const ULong* location_of_counter ) 3444 { 3445 vassert(endness_host == VexEndnessLE); 3446 vassert(sizeof(ULong*) == 4); 3447 UChar* p = (UChar*)place_to_patch; 3448 vassert(p[0] == 0x83); 3449 vassert(p[1] == 0x05); 3450 vassert(p[2] == 0x00); 3451 vassert(p[3] == 0x00); 3452 vassert(p[4] == 0x00); 3453 vassert(p[5] == 0x00); 3454 vassert(p[6] == 0x01); 3455 vassert(p[7] == 0x83); 3456 vassert(p[8] == 0x15); 3457 vassert(p[9] == 0x00); 3458 vassert(p[10] == 0x00); 3459 vassert(p[11] == 0x00); 3460 vassert(p[12] == 0x00); 3461 vassert(p[13] == 0x00); 3462 UInt imm32 = (UInt)(Addr)location_of_counter; 3463 p[2] = imm32 & 0xFF; imm32 >>= 8; 3464 p[3] = imm32 & 0xFF; imm32 >>= 8; 3465 p[4] = imm32 & 0xFF; imm32 >>= 8; 3466 p[5] = imm32 & 0xFF; imm32 >>= 8; 3467 imm32 = 4 + (UInt)(Addr)location_of_counter; 3468 p[9] = imm32 & 0xFF; imm32 >>= 8; 3469 p[10] = imm32 & 0xFF; imm32 >>= 8; 3470 p[11] = imm32 & 0xFF; imm32 >>= 8; 3471 p[12] = imm32 & 0xFF; imm32 >>= 8; 3472 VexInvalRange vir = { (HWord)place_to_patch, 14 }; 3473 return vir; 3474 } 3475 3476 3477 /*---------------------------------------------------------------*/ 3478 /*--- end host_x86_defs.c ---*/ 3479 /*---------------------------------------------------------------*/ 3480