1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_x86_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegX86 ( HReg reg ) 48 { 49 Int r; 50 static const HChar* ireg32_names[8] 51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 52 /* Be generic for all virtual regs. */ 53 if (hregIsVirtual(reg)) { 54 ppHReg(reg); 55 return; 56 } 57 /* But specific for real regs. */ 58 switch (hregClass(reg)) { 59 case HRcInt32: 60 r = hregNumber(reg); 61 vassert(r >= 0 && r < 8); 62 vex_printf("%s", ireg32_names[r]); 63 return; 64 case HRcFlt64: 65 r = hregNumber(reg); 66 vassert(r >= 0 && r < 6); 67 vex_printf("%%fake%d", r); 68 return; 69 case HRcVec128: 70 r = hregNumber(reg); 71 vassert(r >= 0 && r < 8); 72 vex_printf("%%xmm%d", r); 73 return; 74 default: 75 vpanic("ppHRegX86"); 76 } 77 } 78 79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); } 80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); } 81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); } 82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); } 83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); } 84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); } 85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); } 86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); } 87 88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 94 95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); } 96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); } 97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); } 98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); } 99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); } 100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); } 101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); } 102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); } 103 104 105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) 106 { 107 *nregs = 20; 108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 109 (*arr)[0] = hregX86_EAX(); 110 (*arr)[1] = hregX86_EBX(); 111 (*arr)[2] = hregX86_ECX(); 112 (*arr)[3] = hregX86_EDX(); 113 (*arr)[4] = hregX86_ESI(); 114 (*arr)[5] = hregX86_EDI(); 115 (*arr)[6] = hregX86_FAKE0(); 116 (*arr)[7] = hregX86_FAKE1(); 117 (*arr)[8] = hregX86_FAKE2(); 118 (*arr)[9] = hregX86_FAKE3(); 119 (*arr)[10] = hregX86_FAKE4(); 120 (*arr)[11] = hregX86_FAKE5(); 121 (*arr)[12] = hregX86_XMM0(); 122 (*arr)[13] = hregX86_XMM1(); 123 (*arr)[14] = hregX86_XMM2(); 124 (*arr)[15] = hregX86_XMM3(); 125 (*arr)[16] = hregX86_XMM4(); 126 (*arr)[17] = hregX86_XMM5(); 127 (*arr)[18] = hregX86_XMM6(); 128 (*arr)[19] = hregX86_XMM7(); 129 } 130 131 132 /* --------- Condition codes, Intel encoding. --------- */ 133 134 const HChar* showX86CondCode ( X86CondCode cond ) 135 { 136 switch (cond) { 137 case Xcc_O: return "o"; 138 case Xcc_NO: return "no"; 139 case Xcc_B: return "b"; 140 case Xcc_NB: return "nb"; 141 case Xcc_Z: return "z"; 142 case Xcc_NZ: return "nz"; 143 case Xcc_BE: return "be"; 144 case Xcc_NBE: return "nbe"; 145 case Xcc_S: return "s"; 146 case Xcc_NS: return "ns"; 147 case Xcc_P: return "p"; 148 case Xcc_NP: return "np"; 149 case Xcc_L: return "l"; 150 case Xcc_NL: return "nl"; 151 case Xcc_LE: return "le"; 152 case Xcc_NLE: return "nle"; 153 case Xcc_ALWAYS: return "ALWAYS"; 154 default: vpanic("ppX86CondCode"); 155 } 156 } 157 158 159 /* --------- X86AMode: memory address expressions. --------- */ 160 161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 163 am->tag = Xam_IR; 164 am->Xam.IR.imm = imm32; 165 am->Xam.IR.reg = reg; 166 return am; 167 } 168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 170 am->tag = Xam_IRRS; 171 am->Xam.IRRS.imm = imm32; 172 am->Xam.IRRS.base = base; 173 am->Xam.IRRS.index = indEx; 174 am->Xam.IRRS.shift = shift; 175 vassert(shift >= 0 && shift <= 3); 176 return am; 177 } 178 179 X86AMode* dopyX86AMode ( X86AMode* am ) { 180 switch (am->tag) { 181 case Xam_IR: 182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 183 case Xam_IRRS: 184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 185 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 186 default: 187 vpanic("dopyX86AMode"); 188 } 189 } 190 191 void ppX86AMode ( X86AMode* am ) { 192 switch (am->tag) { 193 case Xam_IR: 194 if (am->Xam.IR.imm == 0) 195 vex_printf("("); 196 else 197 vex_printf("0x%x(", am->Xam.IR.imm); 198 ppHRegX86(am->Xam.IR.reg); 199 vex_printf(")"); 200 return; 201 case Xam_IRRS: 202 vex_printf("0x%x(", am->Xam.IRRS.imm); 203 ppHRegX86(am->Xam.IRRS.base); 204 vex_printf(","); 205 ppHRegX86(am->Xam.IRRS.index); 206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 207 return; 208 default: 209 vpanic("ppX86AMode"); 210 } 211 } 212 213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 214 switch (am->tag) { 215 case Xam_IR: 216 addHRegUse(u, HRmRead, am->Xam.IR.reg); 217 return; 218 case Xam_IRRS: 219 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 220 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 221 return; 222 default: 223 vpanic("addRegUsage_X86AMode"); 224 } 225 } 226 227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 228 switch (am->tag) { 229 case Xam_IR: 230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 231 return; 232 case Xam_IRRS: 233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 235 return; 236 default: 237 vpanic("mapRegs_X86AMode"); 238 } 239 } 240 241 /* --------- Operand, which can be reg, immediate or memory. --------- */ 242 243 X86RMI* X86RMI_Imm ( UInt imm32 ) { 244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 245 op->tag = Xrmi_Imm; 246 op->Xrmi.Imm.imm32 = imm32; 247 return op; 248 } 249 X86RMI* X86RMI_Reg ( HReg reg ) { 250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 251 op->tag = Xrmi_Reg; 252 op->Xrmi.Reg.reg = reg; 253 return op; 254 } 255 X86RMI* X86RMI_Mem ( X86AMode* am ) { 256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 257 op->tag = Xrmi_Mem; 258 op->Xrmi.Mem.am = am; 259 return op; 260 } 261 262 void ppX86RMI ( X86RMI* op ) { 263 switch (op->tag) { 264 case Xrmi_Imm: 265 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 266 return; 267 case Xrmi_Reg: 268 ppHRegX86(op->Xrmi.Reg.reg); 269 return; 270 case Xrmi_Mem: 271 ppX86AMode(op->Xrmi.Mem.am); 272 return; 273 default: 274 vpanic("ppX86RMI"); 275 } 276 } 277 278 /* An X86RMI can only be used in a "read" context (what would it mean 279 to write or modify a literal?) and so we enumerate its registers 280 accordingly. */ 281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 282 switch (op->tag) { 283 case Xrmi_Imm: 284 return; 285 case Xrmi_Reg: 286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 287 return; 288 case Xrmi_Mem: 289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 290 return; 291 default: 292 vpanic("addRegUsage_X86RMI"); 293 } 294 } 295 296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 297 switch (op->tag) { 298 case Xrmi_Imm: 299 return; 300 case Xrmi_Reg: 301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 302 return; 303 case Xrmi_Mem: 304 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 305 return; 306 default: 307 vpanic("mapRegs_X86RMI"); 308 } 309 } 310 311 312 /* --------- Operand, which can be reg or immediate only. --------- */ 313 314 X86RI* X86RI_Imm ( UInt imm32 ) { 315 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 316 op->tag = Xri_Imm; 317 op->Xri.Imm.imm32 = imm32; 318 return op; 319 } 320 X86RI* X86RI_Reg ( HReg reg ) { 321 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 322 op->tag = Xri_Reg; 323 op->Xri.Reg.reg = reg; 324 return op; 325 } 326 327 void ppX86RI ( X86RI* op ) { 328 switch (op->tag) { 329 case Xri_Imm: 330 vex_printf("$0x%x", op->Xri.Imm.imm32); 331 return; 332 case Xri_Reg: 333 ppHRegX86(op->Xri.Reg.reg); 334 return; 335 default: 336 vpanic("ppX86RI"); 337 } 338 } 339 340 /* An X86RI can only be used in a "read" context (what would it mean 341 to write or modify a literal?) and so we enumerate its registers 342 accordingly. */ 343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 344 switch (op->tag) { 345 case Xri_Imm: 346 return; 347 case Xri_Reg: 348 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 349 return; 350 default: 351 vpanic("addRegUsage_X86RI"); 352 } 353 } 354 355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 356 switch (op->tag) { 357 case Xri_Imm: 358 return; 359 case Xri_Reg: 360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 361 return; 362 default: 363 vpanic("mapRegs_X86RI"); 364 } 365 } 366 367 368 /* --------- Operand, which can be reg or memory only. --------- */ 369 370 X86RM* X86RM_Reg ( HReg reg ) { 371 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 372 op->tag = Xrm_Reg; 373 op->Xrm.Reg.reg = reg; 374 return op; 375 } 376 X86RM* X86RM_Mem ( X86AMode* am ) { 377 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 378 op->tag = Xrm_Mem; 379 op->Xrm.Mem.am = am; 380 return op; 381 } 382 383 void ppX86RM ( X86RM* op ) { 384 switch (op->tag) { 385 case Xrm_Mem: 386 ppX86AMode(op->Xrm.Mem.am); 387 return; 388 case Xrm_Reg: 389 ppHRegX86(op->Xrm.Reg.reg); 390 return; 391 default: 392 vpanic("ppX86RM"); 393 } 394 } 395 396 /* Because an X86RM can be both a source or destination operand, we 397 have to supply a mode -- pertaining to the operand as a whole -- 398 indicating how it's being used. */ 399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 400 switch (op->tag) { 401 case Xrm_Mem: 402 /* Memory is read, written or modified. So we just want to 403 know the regs read by the amode. */ 404 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 405 return; 406 case Xrm_Reg: 407 /* reg is read, written or modified. Add it in the 408 appropriate way. */ 409 addHRegUse(u, mode, op->Xrm.Reg.reg); 410 return; 411 default: 412 vpanic("addRegUsage_X86RM"); 413 } 414 } 415 416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 417 { 418 switch (op->tag) { 419 case Xrm_Mem: 420 mapRegs_X86AMode(m, op->Xrm.Mem.am); 421 return; 422 case Xrm_Reg: 423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 424 return; 425 default: 426 vpanic("mapRegs_X86RM"); 427 } 428 } 429 430 431 /* --------- Instructions. --------- */ 432 433 const HChar* showX86UnaryOp ( X86UnaryOp op ) { 434 switch (op) { 435 case Xun_NOT: return "not"; 436 case Xun_NEG: return "neg"; 437 default: vpanic("showX86UnaryOp"); 438 } 439 } 440 441 const HChar* showX86AluOp ( X86AluOp op ) { 442 switch (op) { 443 case Xalu_MOV: return "mov"; 444 case Xalu_CMP: return "cmp"; 445 case Xalu_ADD: return "add"; 446 case Xalu_SUB: return "sub"; 447 case Xalu_ADC: return "adc"; 448 case Xalu_SBB: return "sbb"; 449 case Xalu_AND: return "and"; 450 case Xalu_OR: return "or"; 451 case Xalu_XOR: return "xor"; 452 case Xalu_MUL: return "mul"; 453 default: vpanic("showX86AluOp"); 454 } 455 } 456 457 const HChar* showX86ShiftOp ( X86ShiftOp op ) { 458 switch (op) { 459 case Xsh_SHL: return "shl"; 460 case Xsh_SHR: return "shr"; 461 case Xsh_SAR: return "sar"; 462 default: vpanic("showX86ShiftOp"); 463 } 464 } 465 466 const HChar* showX86FpOp ( X86FpOp op ) { 467 switch (op) { 468 case Xfp_ADD: return "add"; 469 case Xfp_SUB: return "sub"; 470 case Xfp_MUL: return "mul"; 471 case Xfp_DIV: return "div"; 472 case Xfp_SCALE: return "scale"; 473 case Xfp_ATAN: return "atan"; 474 case Xfp_YL2X: return "yl2x"; 475 case Xfp_YL2XP1: return "yl2xp1"; 476 case Xfp_PREM: return "prem"; 477 case Xfp_PREM1: return "prem1"; 478 case Xfp_SQRT: return "sqrt"; 479 case Xfp_ABS: return "abs"; 480 case Xfp_NEG: return "chs"; 481 case Xfp_MOV: return "mov"; 482 case Xfp_SIN: return "sin"; 483 case Xfp_COS: return "cos"; 484 case Xfp_TAN: return "tan"; 485 case Xfp_ROUND: return "round"; 486 case Xfp_2XM1: return "2xm1"; 487 default: vpanic("showX86FpOp"); 488 } 489 } 490 491 const HChar* showX86SseOp ( X86SseOp op ) { 492 switch (op) { 493 case Xsse_MOV: return "mov(?!)"; 494 case Xsse_ADDF: return "add"; 495 case Xsse_SUBF: return "sub"; 496 case Xsse_MULF: return "mul"; 497 case Xsse_DIVF: return "div"; 498 case Xsse_MAXF: return "max"; 499 case Xsse_MINF: return "min"; 500 case Xsse_CMPEQF: return "cmpFeq"; 501 case Xsse_CMPLTF: return "cmpFlt"; 502 case Xsse_CMPLEF: return "cmpFle"; 503 case Xsse_CMPUNF: return "cmpFun"; 504 case Xsse_RCPF: return "rcp"; 505 case Xsse_RSQRTF: return "rsqrt"; 506 case Xsse_SQRTF: return "sqrt"; 507 case Xsse_AND: return "and"; 508 case Xsse_OR: return "or"; 509 case Xsse_XOR: return "xor"; 510 case Xsse_ANDN: return "andn"; 511 case Xsse_ADD8: return "paddb"; 512 case Xsse_ADD16: return "paddw"; 513 case Xsse_ADD32: return "paddd"; 514 case Xsse_ADD64: return "paddq"; 515 case Xsse_QADD8U: return "paddusb"; 516 case Xsse_QADD16U: return "paddusw"; 517 case Xsse_QADD8S: return "paddsb"; 518 case Xsse_QADD16S: return "paddsw"; 519 case Xsse_SUB8: return "psubb"; 520 case Xsse_SUB16: return "psubw"; 521 case Xsse_SUB32: return "psubd"; 522 case Xsse_SUB64: return "psubq"; 523 case Xsse_QSUB8U: return "psubusb"; 524 case Xsse_QSUB16U: return "psubusw"; 525 case Xsse_QSUB8S: return "psubsb"; 526 case Xsse_QSUB16S: return "psubsw"; 527 case Xsse_MUL16: return "pmullw"; 528 case Xsse_MULHI16U: return "pmulhuw"; 529 case Xsse_MULHI16S: return "pmulhw"; 530 case Xsse_AVG8U: return "pavgb"; 531 case Xsse_AVG16U: return "pavgw"; 532 case Xsse_MAX16S: return "pmaxw"; 533 case Xsse_MAX8U: return "pmaxub"; 534 case Xsse_MIN16S: return "pminw"; 535 case Xsse_MIN8U: return "pminub"; 536 case Xsse_CMPEQ8: return "pcmpeqb"; 537 case Xsse_CMPEQ16: return "pcmpeqw"; 538 case Xsse_CMPEQ32: return "pcmpeqd"; 539 case Xsse_CMPGT8S: return "pcmpgtb"; 540 case Xsse_CMPGT16S: return "pcmpgtw"; 541 case Xsse_CMPGT32S: return "pcmpgtd"; 542 case Xsse_SHL16: return "psllw"; 543 case Xsse_SHL32: return "pslld"; 544 case Xsse_SHL64: return "psllq"; 545 case Xsse_SHR16: return "psrlw"; 546 case Xsse_SHR32: return "psrld"; 547 case Xsse_SHR64: return "psrlq"; 548 case Xsse_SAR16: return "psraw"; 549 case Xsse_SAR32: return "psrad"; 550 case Xsse_PACKSSD: return "packssdw"; 551 case Xsse_PACKSSW: return "packsswb"; 552 case Xsse_PACKUSW: return "packuswb"; 553 case Xsse_UNPCKHB: return "punpckhb"; 554 case Xsse_UNPCKHW: return "punpckhw"; 555 case Xsse_UNPCKHD: return "punpckhd"; 556 case Xsse_UNPCKHQ: return "punpckhq"; 557 case Xsse_UNPCKLB: return "punpcklb"; 558 case Xsse_UNPCKLW: return "punpcklw"; 559 case Xsse_UNPCKLD: return "punpckld"; 560 case Xsse_UNPCKLQ: return "punpcklq"; 561 default: vpanic("showX86SseOp"); 562 } 563 } 564 565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 567 i->tag = Xin_Alu32R; 568 i->Xin.Alu32R.op = op; 569 i->Xin.Alu32R.src = src; 570 i->Xin.Alu32R.dst = dst; 571 return i; 572 } 573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 575 i->tag = Xin_Alu32M; 576 i->Xin.Alu32M.op = op; 577 i->Xin.Alu32M.src = src; 578 i->Xin.Alu32M.dst = dst; 579 vassert(op != Xalu_MUL); 580 return i; 581 } 582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 584 i->tag = Xin_Sh32; 585 i->Xin.Sh32.op = op; 586 i->Xin.Sh32.src = src; 587 i->Xin.Sh32.dst = dst; 588 return i; 589 } 590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 592 i->tag = Xin_Test32; 593 i->Xin.Test32.imm32 = imm32; 594 i->Xin.Test32.dst = dst; 595 return i; 596 } 597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 599 i->tag = Xin_Unary32; 600 i->Xin.Unary32.op = op; 601 i->Xin.Unary32.dst = dst; 602 return i; 603 } 604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 606 i->tag = Xin_Lea32; 607 i->Xin.Lea32.am = am; 608 i->Xin.Lea32.dst = dst; 609 return i; 610 } 611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 613 i->tag = Xin_MulL; 614 i->Xin.MulL.syned = syned; 615 i->Xin.MulL.src = src; 616 return i; 617 } 618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 620 i->tag = Xin_Div; 621 i->Xin.Div.syned = syned; 622 i->Xin.Div.src = src; 623 return i; 624 } 625 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 627 i->tag = Xin_Sh3232; 628 i->Xin.Sh3232.op = op; 629 i->Xin.Sh3232.amt = amt; 630 i->Xin.Sh3232.src = src; 631 i->Xin.Sh3232.dst = dst; 632 vassert(op == Xsh_SHL || op == Xsh_SHR); 633 return i; 634 } 635 X86Instr* X86Instr_Push( X86RMI* src ) { 636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 637 i->tag = Xin_Push; 638 i->Xin.Push.src = src; 639 return i; 640 } 641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms, 642 RetLoc rloc ) { 643 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 644 i->tag = Xin_Call; 645 i->Xin.Call.cond = cond; 646 i->Xin.Call.target = target; 647 i->Xin.Call.regparms = regparms; 648 i->Xin.Call.rloc = rloc; 649 vassert(regparms >= 0 && regparms <= 3); 650 vassert(is_sane_RetLoc(rloc)); 651 return i; 652 } 653 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, 654 X86CondCode cond, Bool toFastEP ) { 655 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 656 i->tag = Xin_XDirect; 657 i->Xin.XDirect.dstGA = dstGA; 658 i->Xin.XDirect.amEIP = amEIP; 659 i->Xin.XDirect.cond = cond; 660 i->Xin.XDirect.toFastEP = toFastEP; 661 return i; 662 } 663 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, 664 X86CondCode cond ) { 665 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 666 i->tag = Xin_XIndir; 667 i->Xin.XIndir.dstGA = dstGA; 668 i->Xin.XIndir.amEIP = amEIP; 669 i->Xin.XIndir.cond = cond; 670 return i; 671 } 672 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, 673 X86CondCode cond, IRJumpKind jk ) { 674 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 675 i->tag = Xin_XAssisted; 676 i->Xin.XAssisted.dstGA = dstGA; 677 i->Xin.XAssisted.amEIP = amEIP; 678 i->Xin.XAssisted.cond = cond; 679 i->Xin.XAssisted.jk = jk; 680 return i; 681 } 682 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 683 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 684 i->tag = Xin_CMov32; 685 i->Xin.CMov32.cond = cond; 686 i->Xin.CMov32.src = src; 687 i->Xin.CMov32.dst = dst; 688 vassert(cond != Xcc_ALWAYS); 689 return i; 690 } 691 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 692 X86AMode* src, HReg dst ) { 693 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 694 i->tag = Xin_LoadEX; 695 i->Xin.LoadEX.szSmall = szSmall; 696 i->Xin.LoadEX.syned = syned; 697 i->Xin.LoadEX.src = src; 698 i->Xin.LoadEX.dst = dst; 699 vassert(szSmall == 1 || szSmall == 2); 700 return i; 701 } 702 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 703 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 704 i->tag = Xin_Store; 705 i->Xin.Store.sz = sz; 706 i->Xin.Store.src = src; 707 i->Xin.Store.dst = dst; 708 vassert(sz == 1 || sz == 2); 709 return i; 710 } 711 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 712 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 713 i->tag = Xin_Set32; 714 i->Xin.Set32.cond = cond; 715 i->Xin.Set32.dst = dst; 716 return i; 717 } 718 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 719 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 720 i->tag = Xin_Bsfr32; 721 i->Xin.Bsfr32.isFwds = isFwds; 722 i->Xin.Bsfr32.src = src; 723 i->Xin.Bsfr32.dst = dst; 724 return i; 725 } 726 X86Instr* X86Instr_MFence ( UInt hwcaps ) { 727 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 728 i->tag = Xin_MFence; 729 i->Xin.MFence.hwcaps = hwcaps; 730 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT 731 |VEX_HWCAPS_X86_SSE1 732 |VEX_HWCAPS_X86_SSE2 733 |VEX_HWCAPS_X86_SSE3 734 |VEX_HWCAPS_X86_LZCNT))); 735 return i; 736 } 737 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 738 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 739 i->tag = Xin_ACAS; 740 i->Xin.ACAS.addr = addr; 741 i->Xin.ACAS.sz = sz; 742 vassert(sz == 4 || sz == 2 || sz == 1); 743 return i; 744 } 745 X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 746 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 747 i->tag = Xin_DACAS; 748 i->Xin.DACAS.addr = addr; 749 return i; 750 } 751 752 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 753 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 754 i->tag = Xin_FpUnary; 755 i->Xin.FpUnary.op = op; 756 i->Xin.FpUnary.src = src; 757 i->Xin.FpUnary.dst = dst; 758 return i; 759 } 760 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 761 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 762 i->tag = Xin_FpBinary; 763 i->Xin.FpBinary.op = op; 764 i->Xin.FpBinary.srcL = srcL; 765 i->Xin.FpBinary.srcR = srcR; 766 i->Xin.FpBinary.dst = dst; 767 return i; 768 } 769 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 770 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 771 i->tag = Xin_FpLdSt; 772 i->Xin.FpLdSt.isLoad = isLoad; 773 i->Xin.FpLdSt.sz = sz; 774 i->Xin.FpLdSt.reg = reg; 775 i->Xin.FpLdSt.addr = addr; 776 vassert(sz == 4 || sz == 8 || sz == 10); 777 return i; 778 } 779 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 780 HReg reg, X86AMode* addr ) { 781 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 782 i->tag = Xin_FpLdStI; 783 i->Xin.FpLdStI.isLoad = isLoad; 784 i->Xin.FpLdStI.sz = sz; 785 i->Xin.FpLdStI.reg = reg; 786 i->Xin.FpLdStI.addr = addr; 787 vassert(sz == 2 || sz == 4 || sz == 8); 788 return i; 789 } 790 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 791 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 792 i->tag = Xin_Fp64to32; 793 i->Xin.Fp64to32.src = src; 794 i->Xin.Fp64to32.dst = dst; 795 return i; 796 } 797 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 798 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 799 i->tag = Xin_FpCMov; 800 i->Xin.FpCMov.cond = cond; 801 i->Xin.FpCMov.src = src; 802 i->Xin.FpCMov.dst = dst; 803 vassert(cond != Xcc_ALWAYS); 804 return i; 805 } 806 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 807 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 808 i->tag = Xin_FpLdCW; 809 i->Xin.FpLdCW.addr = addr; 810 return i; 811 } 812 X86Instr* X86Instr_FpStSW_AX ( void ) { 813 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 814 i->tag = Xin_FpStSW_AX; 815 return i; 816 } 817 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 818 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 819 i->tag = Xin_FpCmp; 820 i->Xin.FpCmp.srcL = srcL; 821 i->Xin.FpCmp.srcR = srcR; 822 i->Xin.FpCmp.dst = dst; 823 return i; 824 } 825 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 826 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 827 i->tag = Xin_SseConst; 828 i->Xin.SseConst.con = con; 829 i->Xin.SseConst.dst = dst; 830 vassert(hregClass(dst) == HRcVec128); 831 return i; 832 } 833 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 834 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 835 i->tag = Xin_SseLdSt; 836 i->Xin.SseLdSt.isLoad = isLoad; 837 i->Xin.SseLdSt.reg = reg; 838 i->Xin.SseLdSt.addr = addr; 839 return i; 840 } 841 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 842 { 843 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 844 i->tag = Xin_SseLdzLO; 845 i->Xin.SseLdzLO.sz = toUChar(sz); 846 i->Xin.SseLdzLO.reg = reg; 847 i->Xin.SseLdzLO.addr = addr; 848 vassert(sz == 4 || sz == 8); 849 return i; 850 } 851 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 852 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 853 i->tag = Xin_Sse32Fx4; 854 i->Xin.Sse32Fx4.op = op; 855 i->Xin.Sse32Fx4.src = src; 856 i->Xin.Sse32Fx4.dst = dst; 857 vassert(op != Xsse_MOV); 858 return i; 859 } 860 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 861 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 862 i->tag = Xin_Sse32FLo; 863 i->Xin.Sse32FLo.op = op; 864 i->Xin.Sse32FLo.src = src; 865 i->Xin.Sse32FLo.dst = dst; 866 vassert(op != Xsse_MOV); 867 return i; 868 } 869 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 870 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 871 i->tag = Xin_Sse64Fx2; 872 i->Xin.Sse64Fx2.op = op; 873 i->Xin.Sse64Fx2.src = src; 874 i->Xin.Sse64Fx2.dst = dst; 875 vassert(op != Xsse_MOV); 876 return i; 877 } 878 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 879 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 880 i->tag = Xin_Sse64FLo; 881 i->Xin.Sse64FLo.op = op; 882 i->Xin.Sse64FLo.src = src; 883 i->Xin.Sse64FLo.dst = dst; 884 vassert(op != Xsse_MOV); 885 return i; 886 } 887 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 888 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 889 i->tag = Xin_SseReRg; 890 i->Xin.SseReRg.op = op; 891 i->Xin.SseReRg.src = re; 892 i->Xin.SseReRg.dst = rg; 893 return i; 894 } 895 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 896 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 897 i->tag = Xin_SseCMov; 898 i->Xin.SseCMov.cond = cond; 899 i->Xin.SseCMov.src = src; 900 i->Xin.SseCMov.dst = dst; 901 vassert(cond != Xcc_ALWAYS); 902 return i; 903 } 904 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 905 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 906 i->tag = Xin_SseShuf; 907 i->Xin.SseShuf.order = order; 908 i->Xin.SseShuf.src = src; 909 i->Xin.SseShuf.dst = dst; 910 vassert(order >= 0 && order <= 0xFF); 911 return i; 912 } 913 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, 914 X86AMode* amFailAddr ) { 915 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 916 i->tag = Xin_EvCheck; 917 i->Xin.EvCheck.amCounter = amCounter; 918 i->Xin.EvCheck.amFailAddr = amFailAddr; 919 return i; 920 } 921 X86Instr* X86Instr_ProfInc ( void ) { 922 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 923 i->tag = Xin_ProfInc; 924 return i; 925 } 926 927 void ppX86Instr ( X86Instr* i, Bool mode64 ) { 928 vassert(mode64 == False); 929 switch (i->tag) { 930 case Xin_Alu32R: 931 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 932 ppX86RMI(i->Xin.Alu32R.src); 933 vex_printf(","); 934 ppHRegX86(i->Xin.Alu32R.dst); 935 return; 936 case Xin_Alu32M: 937 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 938 ppX86RI(i->Xin.Alu32M.src); 939 vex_printf(","); 940 ppX86AMode(i->Xin.Alu32M.dst); 941 return; 942 case Xin_Sh32: 943 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 944 if (i->Xin.Sh32.src == 0) 945 vex_printf("%%cl,"); 946 else 947 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 948 ppHRegX86(i->Xin.Sh32.dst); 949 return; 950 case Xin_Test32: 951 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 952 ppX86RM(i->Xin.Test32.dst); 953 return; 954 case Xin_Unary32: 955 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 956 ppHRegX86(i->Xin.Unary32.dst); 957 return; 958 case Xin_Lea32: 959 vex_printf("leal "); 960 ppX86AMode(i->Xin.Lea32.am); 961 vex_printf(","); 962 ppHRegX86(i->Xin.Lea32.dst); 963 return; 964 case Xin_MulL: 965 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 966 ppX86RM(i->Xin.MulL.src); 967 return; 968 case Xin_Div: 969 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 970 ppX86RM(i->Xin.Div.src); 971 return; 972 case Xin_Sh3232: 973 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 974 if (i->Xin.Sh3232.amt == 0) 975 vex_printf(" %%cl,"); 976 else 977 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 978 ppHRegX86(i->Xin.Sh3232.src); 979 vex_printf(","); 980 ppHRegX86(i->Xin.Sh3232.dst); 981 return; 982 case Xin_Push: 983 vex_printf("pushl "); 984 ppX86RMI(i->Xin.Push.src); 985 return; 986 case Xin_Call: 987 vex_printf("call%s[%d,", 988 i->Xin.Call.cond==Xcc_ALWAYS 989 ? "" : showX86CondCode(i->Xin.Call.cond), 990 i->Xin.Call.regparms); 991 ppRetLoc(i->Xin.Call.rloc); 992 vex_printf("] 0x%x", i->Xin.Call.target); 993 break; 994 case Xin_XDirect: 995 vex_printf("(xDirect) "); 996 vex_printf("if (%%eflags.%s) { ", 997 showX86CondCode(i->Xin.XDirect.cond)); 998 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); 999 ppX86AMode(i->Xin.XDirect.amEIP); 1000 vex_printf("; "); 1001 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", 1002 i->Xin.XDirect.toFastEP ? "fast" : "slow"); 1003 return; 1004 case Xin_XIndir: 1005 vex_printf("(xIndir) "); 1006 vex_printf("if (%%eflags.%s) { movl ", 1007 showX86CondCode(i->Xin.XIndir.cond)); 1008 ppHRegX86(i->Xin.XIndir.dstGA); 1009 vex_printf(","); 1010 ppX86AMode(i->Xin.XIndir.amEIP); 1011 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); 1012 return; 1013 case Xin_XAssisted: 1014 vex_printf("(xAssisted) "); 1015 vex_printf("if (%%eflags.%s) { ", 1016 showX86CondCode(i->Xin.XAssisted.cond)); 1017 vex_printf("movl "); 1018 ppHRegX86(i->Xin.XAssisted.dstGA); 1019 vex_printf(","); 1020 ppX86AMode(i->Xin.XAssisted.amEIP); 1021 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", 1022 (Int)i->Xin.XAssisted.jk); 1023 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); 1024 return; 1025 case Xin_CMov32: 1026 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 1027 ppX86RM(i->Xin.CMov32.src); 1028 vex_printf(","); 1029 ppHRegX86(i->Xin.CMov32.dst); 1030 return; 1031 case Xin_LoadEX: 1032 vex_printf("mov%c%cl ", 1033 i->Xin.LoadEX.syned ? 's' : 'z', 1034 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 1035 ppX86AMode(i->Xin.LoadEX.src); 1036 vex_printf(","); 1037 ppHRegX86(i->Xin.LoadEX.dst); 1038 return; 1039 case Xin_Store: 1040 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 1041 ppHRegX86(i->Xin.Store.src); 1042 vex_printf(","); 1043 ppX86AMode(i->Xin.Store.dst); 1044 return; 1045 case Xin_Set32: 1046 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 1047 ppHRegX86(i->Xin.Set32.dst); 1048 return; 1049 case Xin_Bsfr32: 1050 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1051 ppHRegX86(i->Xin.Bsfr32.src); 1052 vex_printf(","); 1053 ppHRegX86(i->Xin.Bsfr32.dst); 1054 return; 1055 case Xin_MFence: 1056 vex_printf("mfence(%s)", 1057 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1058 return; 1059 case Xin_ACAS: 1060 vex_printf("lock cmpxchg%c ", 1061 i->Xin.ACAS.sz==1 ? 'b' 1062 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1063 vex_printf("{%%eax->%%ebx},"); 1064 ppX86AMode(i->Xin.ACAS.addr); 1065 return; 1066 case Xin_DACAS: 1067 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1068 ppX86AMode(i->Xin.DACAS.addr); 1069 return; 1070 case Xin_FpUnary: 1071 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1072 ppHRegX86(i->Xin.FpUnary.src); 1073 vex_printf(","); 1074 ppHRegX86(i->Xin.FpUnary.dst); 1075 break; 1076 case Xin_FpBinary: 1077 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1078 ppHRegX86(i->Xin.FpBinary.srcL); 1079 vex_printf(","); 1080 ppHRegX86(i->Xin.FpBinary.srcR); 1081 vex_printf(","); 1082 ppHRegX86(i->Xin.FpBinary.dst); 1083 break; 1084 case Xin_FpLdSt: 1085 if (i->Xin.FpLdSt.isLoad) { 1086 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1087 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1088 ppX86AMode(i->Xin.FpLdSt.addr); 1089 vex_printf(", "); 1090 ppHRegX86(i->Xin.FpLdSt.reg); 1091 } else { 1092 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1093 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1094 ppHRegX86(i->Xin.FpLdSt.reg); 1095 vex_printf(", "); 1096 ppX86AMode(i->Xin.FpLdSt.addr); 1097 } 1098 return; 1099 case Xin_FpLdStI: 1100 if (i->Xin.FpLdStI.isLoad) { 1101 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1102 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1103 ppX86AMode(i->Xin.FpLdStI.addr); 1104 vex_printf(", "); 1105 ppHRegX86(i->Xin.FpLdStI.reg); 1106 } else { 1107 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1108 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1109 ppHRegX86(i->Xin.FpLdStI.reg); 1110 vex_printf(", "); 1111 ppX86AMode(i->Xin.FpLdStI.addr); 1112 } 1113 return; 1114 case Xin_Fp64to32: 1115 vex_printf("gdtof "); 1116 ppHRegX86(i->Xin.Fp64to32.src); 1117 vex_printf(","); 1118 ppHRegX86(i->Xin.Fp64to32.dst); 1119 return; 1120 case Xin_FpCMov: 1121 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1122 ppHRegX86(i->Xin.FpCMov.src); 1123 vex_printf(","); 1124 ppHRegX86(i->Xin.FpCMov.dst); 1125 return; 1126 case Xin_FpLdCW: 1127 vex_printf("fldcw "); 1128 ppX86AMode(i->Xin.FpLdCW.addr); 1129 return; 1130 case Xin_FpStSW_AX: 1131 vex_printf("fstsw %%ax"); 1132 return; 1133 case Xin_FpCmp: 1134 vex_printf("gcmp "); 1135 ppHRegX86(i->Xin.FpCmp.srcL); 1136 vex_printf(","); 1137 ppHRegX86(i->Xin.FpCmp.srcR); 1138 vex_printf(","); 1139 ppHRegX86(i->Xin.FpCmp.dst); 1140 break; 1141 case Xin_SseConst: 1142 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1143 ppHRegX86(i->Xin.SseConst.dst); 1144 break; 1145 case Xin_SseLdSt: 1146 vex_printf("movups "); 1147 if (i->Xin.SseLdSt.isLoad) { 1148 ppX86AMode(i->Xin.SseLdSt.addr); 1149 vex_printf(","); 1150 ppHRegX86(i->Xin.SseLdSt.reg); 1151 } else { 1152 ppHRegX86(i->Xin.SseLdSt.reg); 1153 vex_printf(","); 1154 ppX86AMode(i->Xin.SseLdSt.addr); 1155 } 1156 return; 1157 case Xin_SseLdzLO: 1158 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1159 ppX86AMode(i->Xin.SseLdzLO.addr); 1160 vex_printf(","); 1161 ppHRegX86(i->Xin.SseLdzLO.reg); 1162 return; 1163 case Xin_Sse32Fx4: 1164 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1165 ppHRegX86(i->Xin.Sse32Fx4.src); 1166 vex_printf(","); 1167 ppHRegX86(i->Xin.Sse32Fx4.dst); 1168 return; 1169 case Xin_Sse32FLo: 1170 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1171 ppHRegX86(i->Xin.Sse32FLo.src); 1172 vex_printf(","); 1173 ppHRegX86(i->Xin.Sse32FLo.dst); 1174 return; 1175 case Xin_Sse64Fx2: 1176 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1177 ppHRegX86(i->Xin.Sse64Fx2.src); 1178 vex_printf(","); 1179 ppHRegX86(i->Xin.Sse64Fx2.dst); 1180 return; 1181 case Xin_Sse64FLo: 1182 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1183 ppHRegX86(i->Xin.Sse64FLo.src); 1184 vex_printf(","); 1185 ppHRegX86(i->Xin.Sse64FLo.dst); 1186 return; 1187 case Xin_SseReRg: 1188 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1189 ppHRegX86(i->Xin.SseReRg.src); 1190 vex_printf(","); 1191 ppHRegX86(i->Xin.SseReRg.dst); 1192 return; 1193 case Xin_SseCMov: 1194 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1195 ppHRegX86(i->Xin.SseCMov.src); 1196 vex_printf(","); 1197 ppHRegX86(i->Xin.SseCMov.dst); 1198 return; 1199 case Xin_SseShuf: 1200 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); 1201 ppHRegX86(i->Xin.SseShuf.src); 1202 vex_printf(","); 1203 ppHRegX86(i->Xin.SseShuf.dst); 1204 return; 1205 case Xin_EvCheck: 1206 vex_printf("(evCheck) decl "); 1207 ppX86AMode(i->Xin.EvCheck.amCounter); 1208 vex_printf("; jns nofail; jmp *"); 1209 ppX86AMode(i->Xin.EvCheck.amFailAddr); 1210 vex_printf("; nofail:"); 1211 return; 1212 case Xin_ProfInc: 1213 vex_printf("(profInc) addl $1,NotKnownYet; " 1214 "adcl $0,NotKnownYet+4"); 1215 return; 1216 default: 1217 vpanic("ppX86Instr"); 1218 } 1219 } 1220 1221 /* --------- Helpers for register allocation. --------- */ 1222 1223 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) 1224 { 1225 Bool unary; 1226 vassert(mode64 == False); 1227 initHRegUsage(u); 1228 switch (i->tag) { 1229 case Xin_Alu32R: 1230 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1231 if (i->Xin.Alu32R.op == Xalu_MOV) { 1232 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1233 return; 1234 } 1235 if (i->Xin.Alu32R.op == Xalu_CMP) { 1236 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1237 return; 1238 } 1239 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1240 return; 1241 case Xin_Alu32M: 1242 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1243 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1244 return; 1245 case Xin_Sh32: 1246 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1247 if (i->Xin.Sh32.src == 0) 1248 addHRegUse(u, HRmRead, hregX86_ECX()); 1249 return; 1250 case Xin_Test32: 1251 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1252 return; 1253 case Xin_Unary32: 1254 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1255 return; 1256 case Xin_Lea32: 1257 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1258 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1259 return; 1260 case Xin_MulL: 1261 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1262 addHRegUse(u, HRmModify, hregX86_EAX()); 1263 addHRegUse(u, HRmWrite, hregX86_EDX()); 1264 return; 1265 case Xin_Div: 1266 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1267 addHRegUse(u, HRmModify, hregX86_EAX()); 1268 addHRegUse(u, HRmModify, hregX86_EDX()); 1269 return; 1270 case Xin_Sh3232: 1271 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1272 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1273 if (i->Xin.Sh3232.amt == 0) 1274 addHRegUse(u, HRmRead, hregX86_ECX()); 1275 return; 1276 case Xin_Push: 1277 addRegUsage_X86RMI(u, i->Xin.Push.src); 1278 addHRegUse(u, HRmModify, hregX86_ESP()); 1279 return; 1280 case Xin_Call: 1281 /* This is a bit subtle. */ 1282 /* First off, claim it trashes all the caller-saved regs 1283 which fall within the register allocator's jurisdiction. 1284 These I believe to be %eax %ecx %edx and all the xmm 1285 registers. */ 1286 addHRegUse(u, HRmWrite, hregX86_EAX()); 1287 addHRegUse(u, HRmWrite, hregX86_ECX()); 1288 addHRegUse(u, HRmWrite, hregX86_EDX()); 1289 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1290 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1291 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1292 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1293 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1294 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1295 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1296 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1297 /* Now we have to state any parameter-carrying registers 1298 which might be read. This depends on the regparmness. */ 1299 switch (i->Xin.Call.regparms) { 1300 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1301 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1302 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1303 case 0: break; 1304 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1305 } 1306 /* Finally, there is the issue that the insn trashes a 1307 register because the literal target address has to be 1308 loaded into a register. Fortunately, for the 0/1/2 1309 regparm case, we can use EAX, EDX and ECX respectively, so 1310 this does not cause any further damage. For the 3-regparm 1311 case, we'll have to choose another register arbitrarily -- 1312 since A, D and C are used for parameters -- and so we might 1313 as well choose EDI. */ 1314 if (i->Xin.Call.regparms == 3) 1315 addHRegUse(u, HRmWrite, hregX86_EDI()); 1316 /* Upshot of this is that the assembler really must observe 1317 the here-stated convention of which register to use as an 1318 address temporary, depending on the regparmness: 0==EAX, 1319 1==EDX, 2==ECX, 3==EDI. */ 1320 return; 1321 /* XDirect/XIndir/XAssisted are also a bit subtle. They 1322 conditionally exit the block. Hence we only need to list (1) 1323 the registers that they read, and (2) the registers that they 1324 write in the case where the block is not exited. (2) is 1325 empty, hence only (1) is relevant here. */ 1326 case Xin_XDirect: 1327 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); 1328 return; 1329 case Xin_XIndir: 1330 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); 1331 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); 1332 return; 1333 case Xin_XAssisted: 1334 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); 1335 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); 1336 return; 1337 case Xin_CMov32: 1338 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1339 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1340 return; 1341 case Xin_LoadEX: 1342 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1343 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1344 return; 1345 case Xin_Store: 1346 addHRegUse(u, HRmRead, i->Xin.Store.src); 1347 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1348 return; 1349 case Xin_Set32: 1350 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1351 return; 1352 case Xin_Bsfr32: 1353 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1354 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1355 return; 1356 case Xin_MFence: 1357 return; 1358 case Xin_ACAS: 1359 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1360 addHRegUse(u, HRmRead, hregX86_EBX()); 1361 addHRegUse(u, HRmModify, hregX86_EAX()); 1362 return; 1363 case Xin_DACAS: 1364 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1365 addHRegUse(u, HRmRead, hregX86_ECX()); 1366 addHRegUse(u, HRmRead, hregX86_EBX()); 1367 addHRegUse(u, HRmModify, hregX86_EDX()); 1368 addHRegUse(u, HRmModify, hregX86_EAX()); 1369 return; 1370 case Xin_FpUnary: 1371 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1372 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1373 return; 1374 case Xin_FpBinary: 1375 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1376 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1377 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1378 return; 1379 case Xin_FpLdSt: 1380 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1381 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1382 i->Xin.FpLdSt.reg); 1383 return; 1384 case Xin_FpLdStI: 1385 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1386 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1387 i->Xin.FpLdStI.reg); 1388 return; 1389 case Xin_Fp64to32: 1390 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1391 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1392 return; 1393 case Xin_FpCMov: 1394 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1395 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1396 return; 1397 case Xin_FpLdCW: 1398 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1399 return; 1400 case Xin_FpStSW_AX: 1401 addHRegUse(u, HRmWrite, hregX86_EAX()); 1402 return; 1403 case Xin_FpCmp: 1404 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1405 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1406 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1407 addHRegUse(u, HRmWrite, hregX86_EAX()); 1408 return; 1409 case Xin_SseLdSt: 1410 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1411 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1412 i->Xin.SseLdSt.reg); 1413 return; 1414 case Xin_SseLdzLO: 1415 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1416 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1417 return; 1418 case Xin_SseConst: 1419 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1420 return; 1421 case Xin_Sse32Fx4: 1422 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1423 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1424 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1425 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1426 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1427 addHRegUse(u, unary ? HRmWrite : HRmModify, 1428 i->Xin.Sse32Fx4.dst); 1429 return; 1430 case Xin_Sse32FLo: 1431 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1432 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1433 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1434 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1435 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1436 addHRegUse(u, unary ? HRmWrite : HRmModify, 1437 i->Xin.Sse32FLo.dst); 1438 return; 1439 case Xin_Sse64Fx2: 1440 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1441 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1442 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1443 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1444 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1445 addHRegUse(u, unary ? HRmWrite : HRmModify, 1446 i->Xin.Sse64Fx2.dst); 1447 return; 1448 case Xin_Sse64FLo: 1449 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1450 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1451 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1452 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1453 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1454 addHRegUse(u, unary ? HRmWrite : HRmModify, 1455 i->Xin.Sse64FLo.dst); 1456 return; 1457 case Xin_SseReRg: 1458 if (i->Xin.SseReRg.op == Xsse_XOR 1459 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) { 1460 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1461 /* (as opposed to a rite of passage :-) */ 1462 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1463 } else { 1464 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1465 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1466 ? HRmWrite : HRmModify, 1467 i->Xin.SseReRg.dst); 1468 } 1469 return; 1470 case Xin_SseCMov: 1471 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1472 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1473 return; 1474 case Xin_SseShuf: 1475 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1476 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1477 return; 1478 case Xin_EvCheck: 1479 /* We expect both amodes only to mention %ebp, so this is in 1480 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1481 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); 1482 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); 1483 return; 1484 case Xin_ProfInc: 1485 /* does not use any registers. */ 1486 return; 1487 default: 1488 ppX86Instr(i, False); 1489 vpanic("getRegUsage_X86Instr"); 1490 } 1491 } 1492 1493 /* local helper */ 1494 static void mapReg( HRegRemap* m, HReg* r ) 1495 { 1496 *r = lookupHRegRemap(m, *r); 1497 } 1498 1499 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1500 { 1501 vassert(mode64 == False); 1502 switch (i->tag) { 1503 case Xin_Alu32R: 1504 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1505 mapReg(m, &i->Xin.Alu32R.dst); 1506 return; 1507 case Xin_Alu32M: 1508 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1509 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1510 return; 1511 case Xin_Sh32: 1512 mapReg(m, &i->Xin.Sh32.dst); 1513 return; 1514 case Xin_Test32: 1515 mapRegs_X86RM(m, i->Xin.Test32.dst); 1516 return; 1517 case Xin_Unary32: 1518 mapReg(m, &i->Xin.Unary32.dst); 1519 return; 1520 case Xin_Lea32: 1521 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1522 mapReg(m, &i->Xin.Lea32.dst); 1523 return; 1524 case Xin_MulL: 1525 mapRegs_X86RM(m, i->Xin.MulL.src); 1526 return; 1527 case Xin_Div: 1528 mapRegs_X86RM(m, i->Xin.Div.src); 1529 return; 1530 case Xin_Sh3232: 1531 mapReg(m, &i->Xin.Sh3232.src); 1532 mapReg(m, &i->Xin.Sh3232.dst); 1533 return; 1534 case Xin_Push: 1535 mapRegs_X86RMI(m, i->Xin.Push.src); 1536 return; 1537 case Xin_Call: 1538 return; 1539 case Xin_XDirect: 1540 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); 1541 return; 1542 case Xin_XIndir: 1543 mapReg(m, &i->Xin.XIndir.dstGA); 1544 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); 1545 return; 1546 case Xin_XAssisted: 1547 mapReg(m, &i->Xin.XAssisted.dstGA); 1548 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); 1549 return; 1550 case Xin_CMov32: 1551 mapRegs_X86RM(m, i->Xin.CMov32.src); 1552 mapReg(m, &i->Xin.CMov32.dst); 1553 return; 1554 case Xin_LoadEX: 1555 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1556 mapReg(m, &i->Xin.LoadEX.dst); 1557 return; 1558 case Xin_Store: 1559 mapReg(m, &i->Xin.Store.src); 1560 mapRegs_X86AMode(m, i->Xin.Store.dst); 1561 return; 1562 case Xin_Set32: 1563 mapReg(m, &i->Xin.Set32.dst); 1564 return; 1565 case Xin_Bsfr32: 1566 mapReg(m, &i->Xin.Bsfr32.src); 1567 mapReg(m, &i->Xin.Bsfr32.dst); 1568 return; 1569 case Xin_MFence: 1570 return; 1571 case Xin_ACAS: 1572 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1573 return; 1574 case Xin_DACAS: 1575 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1576 return; 1577 case Xin_FpUnary: 1578 mapReg(m, &i->Xin.FpUnary.src); 1579 mapReg(m, &i->Xin.FpUnary.dst); 1580 return; 1581 case Xin_FpBinary: 1582 mapReg(m, &i->Xin.FpBinary.srcL); 1583 mapReg(m, &i->Xin.FpBinary.srcR); 1584 mapReg(m, &i->Xin.FpBinary.dst); 1585 return; 1586 case Xin_FpLdSt: 1587 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1588 mapReg(m, &i->Xin.FpLdSt.reg); 1589 return; 1590 case Xin_FpLdStI: 1591 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1592 mapReg(m, &i->Xin.FpLdStI.reg); 1593 return; 1594 case Xin_Fp64to32: 1595 mapReg(m, &i->Xin.Fp64to32.src); 1596 mapReg(m, &i->Xin.Fp64to32.dst); 1597 return; 1598 case Xin_FpCMov: 1599 mapReg(m, &i->Xin.FpCMov.src); 1600 mapReg(m, &i->Xin.FpCMov.dst); 1601 return; 1602 case Xin_FpLdCW: 1603 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1604 return; 1605 case Xin_FpStSW_AX: 1606 return; 1607 case Xin_FpCmp: 1608 mapReg(m, &i->Xin.FpCmp.srcL); 1609 mapReg(m, &i->Xin.FpCmp.srcR); 1610 mapReg(m, &i->Xin.FpCmp.dst); 1611 return; 1612 case Xin_SseConst: 1613 mapReg(m, &i->Xin.SseConst.dst); 1614 return; 1615 case Xin_SseLdSt: 1616 mapReg(m, &i->Xin.SseLdSt.reg); 1617 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1618 break; 1619 case Xin_SseLdzLO: 1620 mapReg(m, &i->Xin.SseLdzLO.reg); 1621 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1622 break; 1623 case Xin_Sse32Fx4: 1624 mapReg(m, &i->Xin.Sse32Fx4.src); 1625 mapReg(m, &i->Xin.Sse32Fx4.dst); 1626 return; 1627 case Xin_Sse32FLo: 1628 mapReg(m, &i->Xin.Sse32FLo.src); 1629 mapReg(m, &i->Xin.Sse32FLo.dst); 1630 return; 1631 case Xin_Sse64Fx2: 1632 mapReg(m, &i->Xin.Sse64Fx2.src); 1633 mapReg(m, &i->Xin.Sse64Fx2.dst); 1634 return; 1635 case Xin_Sse64FLo: 1636 mapReg(m, &i->Xin.Sse64FLo.src); 1637 mapReg(m, &i->Xin.Sse64FLo.dst); 1638 return; 1639 case Xin_SseReRg: 1640 mapReg(m, &i->Xin.SseReRg.src); 1641 mapReg(m, &i->Xin.SseReRg.dst); 1642 return; 1643 case Xin_SseCMov: 1644 mapReg(m, &i->Xin.SseCMov.src); 1645 mapReg(m, &i->Xin.SseCMov.dst); 1646 return; 1647 case Xin_SseShuf: 1648 mapReg(m, &i->Xin.SseShuf.src); 1649 mapReg(m, &i->Xin.SseShuf.dst); 1650 return; 1651 case Xin_EvCheck: 1652 /* We expect both amodes only to mention %ebp, so this is in 1653 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1654 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); 1655 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); 1656 return; 1657 case Xin_ProfInc: 1658 /* does not use any registers. */ 1659 return; 1660 1661 default: 1662 ppX86Instr(i, mode64); 1663 vpanic("mapRegs_X86Instr"); 1664 } 1665 } 1666 1667 /* Figure out if i represents a reg-reg move, and if so assign the 1668 source and destination to *src and *dst. If in doubt say No. Used 1669 by the register allocator to do move coalescing. 1670 */ 1671 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst ) 1672 { 1673 /* Moves between integer regs */ 1674 if (i->tag == Xin_Alu32R) { 1675 if (i->Xin.Alu32R.op != Xalu_MOV) 1676 return False; 1677 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1678 return False; 1679 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1680 *dst = i->Xin.Alu32R.dst; 1681 return True; 1682 } 1683 /* Moves between FP regs */ 1684 if (i->tag == Xin_FpUnary) { 1685 if (i->Xin.FpUnary.op != Xfp_MOV) 1686 return False; 1687 *src = i->Xin.FpUnary.src; 1688 *dst = i->Xin.FpUnary.dst; 1689 return True; 1690 } 1691 if (i->tag == Xin_SseReRg) { 1692 if (i->Xin.SseReRg.op != Xsse_MOV) 1693 return False; 1694 *src = i->Xin.SseReRg.src; 1695 *dst = i->Xin.SseReRg.dst; 1696 return True; 1697 } 1698 return False; 1699 } 1700 1701 1702 /* Generate x86 spill/reload instructions under the direction of the 1703 register allocator. Note it's critical these don't write the 1704 condition codes. */ 1705 1706 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1707 HReg rreg, Int offsetB, Bool mode64 ) 1708 { 1709 X86AMode* am; 1710 vassert(offsetB >= 0); 1711 vassert(!hregIsVirtual(rreg)); 1712 vassert(mode64 == False); 1713 *i1 = *i2 = NULL; 1714 am = X86AMode_IR(offsetB, hregX86_EBP()); 1715 switch (hregClass(rreg)) { 1716 case HRcInt32: 1717 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1718 return; 1719 case HRcFlt64: 1720 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1721 return; 1722 case HRcVec128: 1723 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1724 return; 1725 default: 1726 ppHRegClass(hregClass(rreg)); 1727 vpanic("genSpill_X86: unimplemented regclass"); 1728 } 1729 } 1730 1731 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1732 HReg rreg, Int offsetB, Bool mode64 ) 1733 { 1734 X86AMode* am; 1735 vassert(offsetB >= 0); 1736 vassert(!hregIsVirtual(rreg)); 1737 vassert(mode64 == False); 1738 *i1 = *i2 = NULL; 1739 am = X86AMode_IR(offsetB, hregX86_EBP()); 1740 switch (hregClass(rreg)) { 1741 case HRcInt32: 1742 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1743 return; 1744 case HRcFlt64: 1745 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1746 return; 1747 case HRcVec128: 1748 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1749 return; 1750 default: 1751 ppHRegClass(hregClass(rreg)); 1752 vpanic("genReload_X86: unimplemented regclass"); 1753 } 1754 } 1755 1756 /* The given instruction reads the specified vreg exactly once, and 1757 that vreg is currently located at the given spill offset. If 1758 possible, return a variant of the instruction to one which instead 1759 references the spill slot directly. */ 1760 1761 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1762 { 1763 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1764 1765 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1766 Convert to: src=RMI_Mem, dst=Reg 1767 */ 1768 if (i->tag == Xin_Alu32R 1769 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1770 || i->Xin.Alu32R.op == Xalu_XOR) 1771 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1772 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) { 1773 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg)); 1774 return X86Instr_Alu32R( 1775 i->Xin.Alu32R.op, 1776 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1777 i->Xin.Alu32R.dst 1778 ); 1779 } 1780 1781 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1782 Convert to: src=RI_Imm, dst=Mem 1783 */ 1784 if (i->tag == Xin_Alu32R 1785 && (i->Xin.Alu32R.op == Xalu_CMP) 1786 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1787 && sameHReg(i->Xin.Alu32R.dst, vreg)) { 1788 return X86Instr_Alu32M( 1789 i->Xin.Alu32R.op, 1790 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1791 X86AMode_IR( spill_off, hregX86_EBP()) 1792 ); 1793 } 1794 1795 /* Deal with form: Push(RMI_Reg) 1796 Convert to: Push(RMI_Mem) 1797 */ 1798 if (i->tag == Xin_Push 1799 && i->Xin.Push.src->tag == Xrmi_Reg 1800 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) { 1801 return X86Instr_Push( 1802 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1803 ); 1804 } 1805 1806 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1807 Convert to CMov32(RM_Mem, dst) */ 1808 if (i->tag == Xin_CMov32 1809 && i->Xin.CMov32.src->tag == Xrm_Reg 1810 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) { 1811 vassert(! sameHReg(i->Xin.CMov32.dst, vreg)); 1812 return X86Instr_CMov32( 1813 i->Xin.CMov32.cond, 1814 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1815 i->Xin.CMov32.dst 1816 ); 1817 } 1818 1819 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1820 if (i->tag == Xin_Test32 1821 && i->Xin.Test32.dst->tag == Xrm_Reg 1822 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) { 1823 return X86Instr_Test32( 1824 i->Xin.Test32.imm32, 1825 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1826 ); 1827 } 1828 1829 return NULL; 1830 } 1831 1832 1833 /* --------- The x86 assembler (bleh.) --------- */ 1834 1835 static UChar iregNo ( HReg r ) 1836 { 1837 UInt n; 1838 vassert(hregClass(r) == HRcInt32); 1839 vassert(!hregIsVirtual(r)); 1840 n = hregNumber(r); 1841 vassert(n <= 7); 1842 return toUChar(n); 1843 } 1844 1845 static UInt fregNo ( HReg r ) 1846 { 1847 UInt n; 1848 vassert(hregClass(r) == HRcFlt64); 1849 vassert(!hregIsVirtual(r)); 1850 n = hregNumber(r); 1851 vassert(n <= 5); 1852 return n; 1853 } 1854 1855 static UInt vregNo ( HReg r ) 1856 { 1857 UInt n; 1858 vassert(hregClass(r) == HRcVec128); 1859 vassert(!hregIsVirtual(r)); 1860 n = hregNumber(r); 1861 vassert(n <= 7); 1862 return n; 1863 } 1864 1865 static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem ) 1866 { 1867 vassert(mod < 4); 1868 vassert((reg|regmem) < 8); 1869 return toUChar( ((mod & 3) << 6) 1870 | ((reg & 7) << 3) 1871 | (regmem & 7) ); 1872 } 1873 1874 static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase ) 1875 { 1876 vassert(shift < 4); 1877 vassert((regindex|regbase) < 8); 1878 return toUChar( ((shift & 3) << 6) 1879 | ((regindex & 7) << 3) 1880 | (regbase & 7) ); 1881 } 1882 1883 static UChar* emit32 ( UChar* p, UInt w32 ) 1884 { 1885 *p++ = toUChar( w32 & 0x000000FF); 1886 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1887 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1888 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1889 return p; 1890 } 1891 1892 /* Does a sign-extend of the lowest 8 bits give 1893 the original number? */ 1894 static Bool fits8bits ( UInt w32 ) 1895 { 1896 Int i32 = (Int)w32; 1897 return toBool(i32 == ((i32 << 24) >> 24)); 1898 } 1899 1900 1901 /* Forming mod-reg-rm bytes and scale-index-base bytes. 1902 1903 greg, 0(ereg) | ereg != ESP && ereg != EBP 1904 = 00 greg ereg 1905 1906 greg, d8(ereg) | ereg != ESP 1907 = 01 greg ereg, d8 1908 1909 greg, d32(ereg) | ereg != ESP 1910 = 10 greg ereg, d32 1911 1912 greg, d8(%esp) = 01 greg 100, 0x24, d8 1913 1914 ----------------------------------------------- 1915 1916 greg, d8(base,index,scale) 1917 | index != ESP 1918 = 01 greg 100, scale index base, d8 1919 1920 greg, d32(base,index,scale) 1921 | index != ESP 1922 = 10 greg 100, scale index base, d32 1923 */ 1924 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1925 { 1926 if (am->tag == Xam_IR) { 1927 if (am->Xam.IR.imm == 0 1928 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1929 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) { 1930 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg)); 1931 return p; 1932 } 1933 if (fits8bits(am->Xam.IR.imm) 1934 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1935 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg)); 1936 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1937 return p; 1938 } 1939 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { 1940 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg)); 1941 p = emit32(p, am->Xam.IR.imm); 1942 return p; 1943 } 1944 if (sameHReg(am->Xam.IR.reg, hregX86_ESP()) 1945 && fits8bits(am->Xam.IR.imm)) { 1946 *p++ = mkModRegRM(1, iregNo(greg), 4); 1947 *p++ = 0x24; 1948 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1949 return p; 1950 } 1951 ppX86AMode(am); 1952 vpanic("doAMode_M: can't emit amode IR"); 1953 /*NOTREACHED*/ 1954 } 1955 if (am->tag == Xam_IRRS) { 1956 if (fits8bits(am->Xam.IRRS.imm) 1957 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1958 *p++ = mkModRegRM(1, iregNo(greg), 4); 1959 *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index), 1960 iregNo(am->Xam.IRRS.base)); 1961 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1962 return p; 1963 } 1964 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { 1965 *p++ = mkModRegRM(2, iregNo(greg), 4); 1966 *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index), 1967 iregNo(am->Xam.IRRS.base)); 1968 p = emit32(p, am->Xam.IRRS.imm); 1969 return p; 1970 } 1971 ppX86AMode(am); 1972 vpanic("doAMode_M: can't emit amode IRRS"); 1973 /*NOTREACHED*/ 1974 } 1975 vpanic("doAMode_M: unknown amode"); 1976 /*NOTREACHED*/ 1977 } 1978 1979 1980 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1981 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1982 { 1983 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg)); 1984 return p; 1985 } 1986 1987 1988 /* Emit ffree %st(7) */ 1989 static UChar* do_ffree_st7 ( UChar* p ) 1990 { 1991 *p++ = 0xDD; 1992 *p++ = 0xC7; 1993 return p; 1994 } 1995 1996 /* Emit fstp %st(i), 1 <= i <= 7 */ 1997 static UChar* do_fstp_st ( UChar* p, Int i ) 1998 { 1999 vassert(1 <= i && i <= 7); 2000 *p++ = 0xDD; 2001 *p++ = toUChar(0xD8+i); 2002 return p; 2003 } 2004 2005 /* Emit fld %st(i), 0 <= i <= 6 */ 2006 static UChar* do_fld_st ( UChar* p, Int i ) 2007 { 2008 vassert(0 <= i && i <= 6); 2009 *p++ = 0xD9; 2010 *p++ = toUChar(0xC0+i); 2011 return p; 2012 } 2013 2014 /* Emit f<op> %st(0) */ 2015 static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 2016 { 2017 switch (op) { 2018 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2019 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2020 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2021 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2022 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2023 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2024 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2025 case Xfp_MOV: break; 2026 case Xfp_TAN: 2027 /* fptan pushes 1.0 on the FP stack, except when the argument 2028 is out of range. Hence we have to do the instruction, 2029 then inspect C2 to see if there is an out of range 2030 condition. If there is, we skip the fincstp that is used 2031 by the in-range case to get rid of this extra 1.0 2032 value. */ 2033 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */ 2034 *p++ = 0xD9; *p++ = 0xF2; // fptan 2035 *p++ = 0x50; // pushl %eax 2036 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax 2037 *p++ = 0x66; *p++ = 0xA9; 2038 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax 2039 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp 2040 *p++ = 0xD9; *p++ = 0xF7; // fincstp 2041 *p++ = 0x58; // after_fincstp: popl %eax 2042 break; 2043 default: 2044 vpanic("do_fop1_st: unknown op"); 2045 } 2046 return p; 2047 } 2048 2049 /* Emit f<op> %st(i), 1 <= i <= 5 */ 2050 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 2051 { 2052 # define fake(_n) mkHReg((_n), HRcInt32, False) 2053 Int subopc; 2054 switch (op) { 2055 case Xfp_ADD: subopc = 0; break; 2056 case Xfp_SUB: subopc = 4; break; 2057 case Xfp_MUL: subopc = 1; break; 2058 case Xfp_DIV: subopc = 6; break; 2059 default: vpanic("do_fop2_st: unknown op"); 2060 } 2061 *p++ = 0xD8; 2062 p = doAMode_R(p, fake(subopc), fake(i)); 2063 return p; 2064 # undef fake 2065 } 2066 2067 /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2068 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2069 */ 2070 static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2071 { 2072 UInt w; 2073 vassert(0 == (tags & ~0xF)); 2074 if (tags == 0) { 2075 /* pushl $0x00000000 */ 2076 *p++ = 0x6A; 2077 *p++ = 0x00; 2078 } 2079 else 2080 /* pushl $0xFFFFFFFF */ 2081 if (tags == 0xF) { 2082 *p++ = 0x6A; 2083 *p++ = 0xFF; 2084 } else { 2085 vassert(0); /* awaiting test case */ 2086 w = 0; 2087 if (tags & 1) w |= 0x000000FF; 2088 if (tags & 2) w |= 0x0000FF00; 2089 if (tags & 4) w |= 0x00FF0000; 2090 if (tags & 8) w |= 0xFF000000; 2091 *p++ = 0x68; 2092 p = emit32(p, w); 2093 } 2094 return p; 2095 } 2096 2097 /* Emit an instruction into buf and return the number of bytes used. 2098 Note that buf is not the insn's final place, and therefore it is 2099 imperative to emit position-independent code. If the emitted 2100 instruction was a profiler inc, set *is_profInc to True, else 2101 leave it unchanged. */ 2102 2103 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, 2104 UChar* buf, Int nbuf, X86Instr* i, 2105 Bool mode64, 2106 void* disp_cp_chain_me_to_slowEP, 2107 void* disp_cp_chain_me_to_fastEP, 2108 void* disp_cp_xindir, 2109 void* disp_cp_xassisted ) 2110 { 2111 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2112 2113 UInt xtra; 2114 UChar* p = &buf[0]; 2115 UChar* ptmp; 2116 vassert(nbuf >= 32); 2117 vassert(mode64 == False); 2118 2119 /* Wrap an integer as a int register, for use assembling 2120 GrpN insns, in which the greg field is used as a sub-opcode 2121 and does not really contain a register. */ 2122 # define fake(_n) mkHReg((_n), HRcInt32, False) 2123 2124 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2125 2126 switch (i->tag) { 2127 2128 case Xin_Alu32R: 2129 /* Deal specially with MOV */ 2130 if (i->Xin.Alu32R.op == Xalu_MOV) { 2131 switch (i->Xin.Alu32R.src->tag) { 2132 case Xrmi_Imm: 2133 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst)); 2134 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2135 goto done; 2136 case Xrmi_Reg: 2137 *p++ = 0x89; 2138 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2139 i->Xin.Alu32R.dst); 2140 goto done; 2141 case Xrmi_Mem: 2142 *p++ = 0x8B; 2143 p = doAMode_M(p, i->Xin.Alu32R.dst, 2144 i->Xin.Alu32R.src->Xrmi.Mem.am); 2145 goto done; 2146 default: 2147 goto bad; 2148 } 2149 } 2150 /* MUL */ 2151 if (i->Xin.Alu32R.op == Xalu_MUL) { 2152 switch (i->Xin.Alu32R.src->tag) { 2153 case Xrmi_Reg: 2154 *p++ = 0x0F; 2155 *p++ = 0xAF; 2156 p = doAMode_R(p, i->Xin.Alu32R.dst, 2157 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2158 goto done; 2159 case Xrmi_Mem: 2160 *p++ = 0x0F; 2161 *p++ = 0xAF; 2162 p = doAMode_M(p, i->Xin.Alu32R.dst, 2163 i->Xin.Alu32R.src->Xrmi.Mem.am); 2164 goto done; 2165 case Xrmi_Imm: 2166 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2167 *p++ = 0x6B; 2168 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2169 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2170 } else { 2171 *p++ = 0x69; 2172 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2173 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2174 } 2175 goto done; 2176 default: 2177 goto bad; 2178 } 2179 } 2180 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2181 opc = opc_rr = subopc_imm = opc_imma = 0; 2182 switch (i->Xin.Alu32R.op) { 2183 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2184 subopc_imm = 2; opc_imma = 0x15; break; 2185 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2186 subopc_imm = 0; opc_imma = 0x05; break; 2187 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2188 subopc_imm = 5; opc_imma = 0x2D; break; 2189 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2190 subopc_imm = 3; opc_imma = 0x1D; break; 2191 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2192 subopc_imm = 4; opc_imma = 0x25; break; 2193 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2194 subopc_imm = 6; opc_imma = 0x35; break; 2195 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2196 subopc_imm = 1; opc_imma = 0x0D; break; 2197 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2198 subopc_imm = 7; opc_imma = 0x3D; break; 2199 default: goto bad; 2200 } 2201 switch (i->Xin.Alu32R.src->tag) { 2202 case Xrmi_Imm: 2203 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX()) 2204 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2205 *p++ = toUChar(opc_imma); 2206 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2207 } else 2208 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2209 *p++ = 0x83; 2210 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2211 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2212 } else { 2213 *p++ = 0x81; 2214 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2215 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2216 } 2217 goto done; 2218 case Xrmi_Reg: 2219 *p++ = toUChar(opc_rr); 2220 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2221 i->Xin.Alu32R.dst); 2222 goto done; 2223 case Xrmi_Mem: 2224 *p++ = toUChar(opc); 2225 p = doAMode_M(p, i->Xin.Alu32R.dst, 2226 i->Xin.Alu32R.src->Xrmi.Mem.am); 2227 goto done; 2228 default: 2229 goto bad; 2230 } 2231 break; 2232 2233 case Xin_Alu32M: 2234 /* Deal specially with MOV */ 2235 if (i->Xin.Alu32M.op == Xalu_MOV) { 2236 switch (i->Xin.Alu32M.src->tag) { 2237 case Xri_Reg: 2238 *p++ = 0x89; 2239 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2240 i->Xin.Alu32M.dst); 2241 goto done; 2242 case Xri_Imm: 2243 *p++ = 0xC7; 2244 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst); 2245 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2246 goto done; 2247 default: 2248 goto bad; 2249 } 2250 } 2251 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2252 allowed here. */ 2253 opc = subopc_imm = opc_imma = 0; 2254 switch (i->Xin.Alu32M.op) { 2255 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2256 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2257 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2258 default: goto bad; 2259 } 2260 switch (i->Xin.Alu32M.src->tag) { 2261 case Xri_Reg: 2262 *p++ = toUChar(opc); 2263 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2264 i->Xin.Alu32M.dst); 2265 goto done; 2266 case Xri_Imm: 2267 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2268 *p++ = 0x83; 2269 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2270 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2271 goto done; 2272 } else { 2273 *p++ = 0x81; 2274 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2275 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2276 goto done; 2277 } 2278 default: 2279 goto bad; 2280 } 2281 break; 2282 2283 case Xin_Sh32: 2284 opc_cl = opc_imm = subopc = 0; 2285 switch (i->Xin.Sh32.op) { 2286 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2287 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2288 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2289 default: goto bad; 2290 } 2291 if (i->Xin.Sh32.src == 0) { 2292 *p++ = toUChar(opc_cl); 2293 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2294 } else { 2295 *p++ = toUChar(opc_imm); 2296 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2297 *p++ = (UChar)(i->Xin.Sh32.src); 2298 } 2299 goto done; 2300 2301 case Xin_Test32: 2302 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2303 /* testl $imm32, %reg */ 2304 *p++ = 0xF7; 2305 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); 2306 p = emit32(p, i->Xin.Test32.imm32); 2307 goto done; 2308 } else { 2309 /* testl $imm32, amode */ 2310 *p++ = 0xF7; 2311 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); 2312 p = emit32(p, i->Xin.Test32.imm32); 2313 goto done; 2314 } 2315 2316 case Xin_Unary32: 2317 if (i->Xin.Unary32.op == Xun_NOT) { 2318 *p++ = 0xF7; 2319 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst); 2320 goto done; 2321 } 2322 if (i->Xin.Unary32.op == Xun_NEG) { 2323 *p++ = 0xF7; 2324 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst); 2325 goto done; 2326 } 2327 break; 2328 2329 case Xin_Lea32: 2330 *p++ = 0x8D; 2331 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2332 goto done; 2333 2334 case Xin_MulL: 2335 subopc = i->Xin.MulL.syned ? 5 : 4; 2336 *p++ = 0xF7; 2337 switch (i->Xin.MulL.src->tag) { 2338 case Xrm_Mem: 2339 p = doAMode_M(p, fake(subopc), 2340 i->Xin.MulL.src->Xrm.Mem.am); 2341 goto done; 2342 case Xrm_Reg: 2343 p = doAMode_R(p, fake(subopc), 2344 i->Xin.MulL.src->Xrm.Reg.reg); 2345 goto done; 2346 default: 2347 goto bad; 2348 } 2349 break; 2350 2351 case Xin_Div: 2352 subopc = i->Xin.Div.syned ? 7 : 6; 2353 *p++ = 0xF7; 2354 switch (i->Xin.Div.src->tag) { 2355 case Xrm_Mem: 2356 p = doAMode_M(p, fake(subopc), 2357 i->Xin.Div.src->Xrm.Mem.am); 2358 goto done; 2359 case Xrm_Reg: 2360 p = doAMode_R(p, fake(subopc), 2361 i->Xin.Div.src->Xrm.Reg.reg); 2362 goto done; 2363 default: 2364 goto bad; 2365 } 2366 break; 2367 2368 case Xin_Sh3232: 2369 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2370 if (i->Xin.Sh3232.amt == 0) { 2371 /* shldl/shrdl by %cl */ 2372 *p++ = 0x0F; 2373 if (i->Xin.Sh3232.op == Xsh_SHL) { 2374 *p++ = 0xA5; 2375 } else { 2376 *p++ = 0xAD; 2377 } 2378 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2379 goto done; 2380 } 2381 break; 2382 2383 case Xin_Push: 2384 switch (i->Xin.Push.src->tag) { 2385 case Xrmi_Mem: 2386 *p++ = 0xFF; 2387 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am); 2388 goto done; 2389 case Xrmi_Imm: 2390 *p++ = 0x68; 2391 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2392 goto done; 2393 case Xrmi_Reg: 2394 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg)); 2395 goto done; 2396 default: 2397 goto bad; 2398 } 2399 2400 case Xin_Call: 2401 if (i->Xin.Call.cond != Xcc_ALWAYS 2402 && i->Xin.Call.rloc.pri != RLPri_None) { 2403 /* The call might not happen (it isn't unconditional) and it 2404 returns a result. In this case we will need to generate a 2405 control flow diamond to put 0x555..555 in the return 2406 register(s) in the case where the call doesn't happen. If 2407 this ever becomes necessary, maybe copy code from the ARM 2408 equivalent. Until that day, just give up. */ 2409 goto bad; 2410 } 2411 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2412 for explanation of this. */ 2413 switch (i->Xin.Call.regparms) { 2414 case 0: irno = iregNo(hregX86_EAX()); break; 2415 case 1: irno = iregNo(hregX86_EDX()); break; 2416 case 2: irno = iregNo(hregX86_ECX()); break; 2417 case 3: irno = iregNo(hregX86_EDI()); break; 2418 default: vpanic(" emit_X86Instr:call:regparms"); 2419 } 2420 /* jump over the following two insns if the condition does not 2421 hold */ 2422 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2423 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2424 *p++ = 0x07; /* 7 bytes in the next two insns */ 2425 } 2426 /* movl $target, %tmp */ 2427 *p++ = toUChar(0xB8 + irno); 2428 p = emit32(p, i->Xin.Call.target); 2429 /* call *%tmp */ 2430 *p++ = 0xFF; 2431 *p++ = toUChar(0xD0 + irno); 2432 goto done; 2433 2434 case Xin_XDirect: { 2435 /* NB: what goes on here has to be very closely coordinated with the 2436 chainXDirect_X86 and unchainXDirect_X86 below. */ 2437 /* We're generating chain-me requests here, so we need to be 2438 sure this is actually allowed -- no-redir translations can't 2439 use chain-me's. Hence: */ 2440 vassert(disp_cp_chain_me_to_slowEP != NULL); 2441 vassert(disp_cp_chain_me_to_fastEP != NULL); 2442 2443 /* Use ptmp for backpatching conditional jumps. */ 2444 ptmp = NULL; 2445 2446 /* First off, if this is conditional, create a conditional 2447 jump over the rest of it. */ 2448 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2449 /* jmp fwds if !condition */ 2450 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); 2451 ptmp = p; /* fill in this bit later */ 2452 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2453 } 2454 2455 /* Update the guest EIP. */ 2456 /* movl $dstGA, amEIP */ 2457 *p++ = 0xC7; 2458 p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP); 2459 p = emit32(p, i->Xin.XDirect.dstGA); 2460 2461 /* --- FIRST PATCHABLE BYTE follows --- */ 2462 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling 2463 to) backs up the return address, so as to find the address of 2464 the first patchable byte. So: don't change the length of the 2465 two instructions below. */ 2466 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ 2467 *p++ = 0xBA; 2468 void* disp_cp_chain_me 2469 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 2470 : disp_cp_chain_me_to_slowEP; 2471 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me)); 2472 /* call *%edx */ 2473 *p++ = 0xFF; 2474 *p++ = 0xD2; 2475 /* --- END of PATCHABLE BYTES --- */ 2476 2477 /* Fix up the conditional jump, if there was one. */ 2478 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2479 Int delta = p - ptmp; 2480 vassert(delta > 0 && delta < 40); 2481 *ptmp = toUChar(delta-1); 2482 } 2483 goto done; 2484 } 2485 2486 case Xin_XIndir: { 2487 /* We're generating transfers that could lead indirectly to a 2488 chain-me, so we need to be sure this is actually allowed -- 2489 no-redir translations are not allowed to reach normal 2490 translations without going through the scheduler. That means 2491 no XDirects or XIndirs out from no-redir translations. 2492 Hence: */ 2493 vassert(disp_cp_xindir != NULL); 2494 2495 /* Use ptmp for backpatching conditional jumps. */ 2496 ptmp = NULL; 2497 2498 /* First off, if this is conditional, create a conditional 2499 jump over the rest of it. */ 2500 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2501 /* jmp fwds if !condition */ 2502 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); 2503 ptmp = p; /* fill in this bit later */ 2504 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2505 } 2506 2507 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2508 *p++ = 0x89; 2509 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2510 2511 /* movl $disp_indir, %edx */ 2512 *p++ = 0xBA; 2513 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); 2514 /* jmp *%edx */ 2515 *p++ = 0xFF; 2516 *p++ = 0xE2; 2517 2518 /* Fix up the conditional jump, if there was one. */ 2519 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2520 Int delta = p - ptmp; 2521 vassert(delta > 0 && delta < 40); 2522 *ptmp = toUChar(delta-1); 2523 } 2524 goto done; 2525 } 2526 2527 case Xin_XAssisted: { 2528 /* Use ptmp for backpatching conditional jumps. */ 2529 ptmp = NULL; 2530 2531 /* First off, if this is conditional, create a conditional 2532 jump over the rest of it. */ 2533 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2534 /* jmp fwds if !condition */ 2535 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); 2536 ptmp = p; /* fill in this bit later */ 2537 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2538 } 2539 2540 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2541 *p++ = 0x89; 2542 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2543 /* movl $magic_number, %ebp. */ 2544 UInt trcval = 0; 2545 switch (i->Xin.XAssisted.jk) { 2546 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; 2547 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; 2548 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; 2549 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break; 2550 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break; 2551 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break; 2552 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; 2553 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; 2554 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; 2555 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; 2556 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; 2557 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; 2558 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; 2559 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; 2560 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; 2561 /* We don't expect to see the following being assisted. */ 2562 case Ijk_Ret: 2563 case Ijk_Call: 2564 /* fallthrough */ 2565 default: 2566 ppIRJumpKind(i->Xin.XAssisted.jk); 2567 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); 2568 } 2569 vassert(trcval != 0); 2570 *p++ = 0xBD; 2571 p = emit32(p, trcval); 2572 2573 /* movl $disp_indir, %edx */ 2574 *p++ = 0xBA; 2575 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted)); 2576 /* jmp *%edx */ 2577 *p++ = 0xFF; 2578 *p++ = 0xE2; 2579 2580 /* Fix up the conditional jump, if there was one. */ 2581 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2582 Int delta = p - ptmp; 2583 vassert(delta > 0 && delta < 40); 2584 *ptmp = toUChar(delta-1); 2585 } 2586 goto done; 2587 } 2588 2589 case Xin_CMov32: 2590 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2591 2592 /* This generates cmov, which is illegal on P54/P55. */ 2593 /* 2594 *p++ = 0x0F; 2595 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2596 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2597 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2598 goto done; 2599 } 2600 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2601 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2602 goto done; 2603 } 2604 */ 2605 2606 /* Alternative version which works on any x86 variant. */ 2607 /* jmp fwds if !condition */ 2608 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2609 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2610 ptmp = p; 2611 2612 switch (i->Xin.CMov32.src->tag) { 2613 case Xrm_Reg: 2614 /* Big sigh. This is movl E -> G ... */ 2615 *p++ = 0x89; 2616 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2617 i->Xin.CMov32.dst); 2618 2619 break; 2620 case Xrm_Mem: 2621 /* ... whereas this is movl G -> E. That's why the args 2622 to doAMode_R appear to be the wrong way round in the 2623 Xrm_Reg case. */ 2624 *p++ = 0x8B; 2625 p = doAMode_M(p, i->Xin.CMov32.dst, 2626 i->Xin.CMov32.src->Xrm.Mem.am); 2627 break; 2628 default: 2629 goto bad; 2630 } 2631 /* Fill in the jump offset. */ 2632 *(ptmp-1) = toUChar(p - ptmp); 2633 goto done; 2634 2635 break; 2636 2637 case Xin_LoadEX: 2638 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2639 /* movzbl */ 2640 *p++ = 0x0F; 2641 *p++ = 0xB6; 2642 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2643 goto done; 2644 } 2645 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2646 /* movzwl */ 2647 *p++ = 0x0F; 2648 *p++ = 0xB7; 2649 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2650 goto done; 2651 } 2652 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2653 /* movsbl */ 2654 *p++ = 0x0F; 2655 *p++ = 0xBE; 2656 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2657 goto done; 2658 } 2659 break; 2660 2661 case Xin_Set32: 2662 /* Make the destination register be 1 or 0, depending on whether 2663 the relevant condition holds. We have to dodge and weave 2664 when the destination is %esi or %edi as we cannot directly 2665 emit the native 'setb %reg' for those. Further complication: 2666 the top 24 bits of the destination should be forced to zero, 2667 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2668 Sigh. So start off my moving $0 into the dest. */ 2669 2670 /* Do we need to swap in %eax? */ 2671 if (iregNo(i->Xin.Set32.dst) >= 4) { 2672 /* xchg %eax, %dst */ 2673 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2674 /* movl $0, %eax */ 2675 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX())); 2676 p = emit32(p, 0); 2677 /* setb lo8(%eax) */ 2678 *p++ = 0x0F; 2679 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2680 p = doAMode_R(p, fake(0), hregX86_EAX()); 2681 /* xchg %eax, %dst */ 2682 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2683 } else { 2684 /* movl $0, %dst */ 2685 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst)); 2686 p = emit32(p, 0); 2687 /* setb lo8(%dst) */ 2688 *p++ = 0x0F; 2689 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2690 p = doAMode_R(p, fake(0), i->Xin.Set32.dst); 2691 } 2692 goto done; 2693 2694 case Xin_Bsfr32: 2695 *p++ = 0x0F; 2696 if (i->Xin.Bsfr32.isFwds) { 2697 *p++ = 0xBC; 2698 } else { 2699 *p++ = 0xBD; 2700 } 2701 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2702 goto done; 2703 2704 case Xin_MFence: 2705 /* see comment in hdefs.h re this insn */ 2706 if (0) vex_printf("EMIT FENCE\n"); 2707 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2708 |VEX_HWCAPS_X86_SSE2)) { 2709 /* mfence */ 2710 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2711 goto done; 2712 } 2713 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) { 2714 /* sfence */ 2715 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2716 /* lock addl $0,0(%esp) */ 2717 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2718 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2719 goto done; 2720 } 2721 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2722 /* lock addl $0,0(%esp) */ 2723 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2724 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2725 goto done; 2726 } 2727 vpanic("emit_X86Instr:mfence:hwcaps"); 2728 /*NOTREACHED*/ 2729 break; 2730 2731 case Xin_ACAS: 2732 /* lock */ 2733 *p++ = 0xF0; 2734 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2735 in %ebx. The new-value register is hardwired to be %ebx 2736 since letting it be any integer register gives the problem 2737 that %sil and %dil are unaddressible on x86 and hence we 2738 would have to resort to the same kind of trickery as with 2739 byte-sized Xin.Store, just below. Given that this isn't 2740 performance critical, it is simpler just to force the 2741 register operand to %ebx (could equally be %ecx or %edx). 2742 (Although %ebx is more consistent with cmpxchg8b.) */ 2743 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2744 *p++ = 0x0F; 2745 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2746 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2747 goto done; 2748 2749 case Xin_DACAS: 2750 /* lock */ 2751 *p++ = 0xF0; 2752 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2753 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2754 aren't encoded in the insn. */ 2755 *p++ = 0x0F; 2756 *p++ = 0xC7; 2757 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr); 2758 goto done; 2759 2760 case Xin_Store: 2761 if (i->Xin.Store.sz == 2) { 2762 /* This case, at least, is simple, given that we can 2763 reference the low 16 bits of any integer register. */ 2764 *p++ = 0x66; 2765 *p++ = 0x89; 2766 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2767 goto done; 2768 } 2769 2770 if (i->Xin.Store.sz == 1) { 2771 /* We have to do complex dodging and weaving if src is not 2772 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2773 if (iregNo(i->Xin.Store.src) < 4) { 2774 /* we're OK, can do it directly */ 2775 *p++ = 0x88; 2776 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2777 goto done; 2778 } else { 2779 /* Bleh. This means the source is %edi or %esi. Since 2780 the address mode can only mention three registers, at 2781 least one of %eax/%ebx/%ecx/%edx must be available to 2782 temporarily swap the source into, so the store can 2783 happen. So we have to look at the regs mentioned 2784 in the amode. */ 2785 HReg swap = INVALID_HREG; 2786 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2787 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2788 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True; 2789 HRegUsage u; 2790 Int j; 2791 initHRegUsage(&u); 2792 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2793 for (j = 0; j < u.n_used; j++) { 2794 HReg r = u.hreg[j]; 2795 if (sameHReg(r, eax)) a_ok = False; 2796 if (sameHReg(r, ebx)) b_ok = False; 2797 if (sameHReg(r, ecx)) c_ok = False; 2798 if (sameHReg(r, edx)) d_ok = False; 2799 } 2800 if (a_ok) swap = eax; 2801 if (b_ok) swap = ebx; 2802 if (c_ok) swap = ecx; 2803 if (d_ok) swap = edx; 2804 vassert(! hregIsInvalid(swap)); 2805 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2806 *p++ = 0x87; 2807 p = doAMode_R(p, i->Xin.Store.src, swap); 2808 /* movb lo8{%swap}, (dst) */ 2809 *p++ = 0x88; 2810 p = doAMode_M(p, swap, i->Xin.Store.dst); 2811 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2812 *p++ = 0x87; 2813 p = doAMode_R(p, i->Xin.Store.src, swap); 2814 goto done; 2815 } 2816 } /* if (i->Xin.Store.sz == 1) */ 2817 break; 2818 2819 case Xin_FpUnary: 2820 /* gop %src, %dst 2821 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2822 */ 2823 p = do_ffree_st7(p); 2824 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 2825 p = do_fop1_st(p, i->Xin.FpUnary.op); 2826 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 2827 goto done; 2828 2829 case Xin_FpBinary: 2830 if (i->Xin.FpBinary.op == Xfp_YL2X 2831 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2832 /* Have to do this specially. */ 2833 /* ffree %st7 ; fld %st(srcL) ; 2834 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2835 p = do_ffree_st7(p); 2836 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2837 p = do_ffree_st7(p); 2838 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2839 *p++ = 0xD9; 2840 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2841 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2842 goto done; 2843 } 2844 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2845 /* Have to do this specially. */ 2846 /* ffree %st7 ; fld %st(srcL) ; 2847 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2848 p = do_ffree_st7(p); 2849 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2850 p = do_ffree_st7(p); 2851 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2852 *p++ = 0xD9; *p++ = 0xF3; 2853 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2854 goto done; 2855 } 2856 if (i->Xin.FpBinary.op == Xfp_PREM 2857 || i->Xin.FpBinary.op == Xfp_PREM1 2858 || i->Xin.FpBinary.op == Xfp_SCALE) { 2859 /* Have to do this specially. */ 2860 /* ffree %st7 ; fld %st(srcR) ; 2861 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2862 fincstp ; ffree %st7 */ 2863 p = do_ffree_st7(p); 2864 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 2865 p = do_ffree_st7(p); 2866 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 2867 *p++ = 0xD9; 2868 switch (i->Xin.FpBinary.op) { 2869 case Xfp_PREM: *p++ = 0xF8; break; 2870 case Xfp_PREM1: *p++ = 0xF5; break; 2871 case Xfp_SCALE: *p++ = 0xFD; break; 2872 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2873 } 2874 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 2875 *p++ = 0xD9; *p++ = 0xF7; 2876 p = do_ffree_st7(p); 2877 goto done; 2878 } 2879 /* General case */ 2880 /* gop %srcL, %srcR, %dst 2881 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2882 */ 2883 p = do_ffree_st7(p); 2884 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2885 p = do_fop2_st(p, i->Xin.FpBinary.op, 2886 1+hregNumber(i->Xin.FpBinary.srcR)); 2887 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2888 goto done; 2889 2890 case Xin_FpLdSt: 2891 if (i->Xin.FpLdSt.isLoad) { 2892 /* Load from memory into %fakeN. 2893 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2894 */ 2895 p = do_ffree_st7(p); 2896 switch (i->Xin.FpLdSt.sz) { 2897 case 4: 2898 *p++ = 0xD9; 2899 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2900 break; 2901 case 8: 2902 *p++ = 0xDD; 2903 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2904 break; 2905 case 10: 2906 *p++ = 0xDB; 2907 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr); 2908 break; 2909 default: 2910 vpanic("emitX86Instr(FpLdSt,load)"); 2911 } 2912 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 2913 goto done; 2914 } else { 2915 /* Store from %fakeN into memory. 2916 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2917 */ 2918 p = do_ffree_st7(p); 2919 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 2920 switch (i->Xin.FpLdSt.sz) { 2921 case 4: 2922 *p++ = 0xD9; 2923 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2924 break; 2925 case 8: 2926 *p++ = 0xDD; 2927 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2928 break; 2929 case 10: 2930 *p++ = 0xDB; 2931 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr); 2932 break; 2933 default: 2934 vpanic("emitX86Instr(FpLdSt,store)"); 2935 } 2936 goto done; 2937 } 2938 break; 2939 2940 case Xin_FpLdStI: 2941 if (i->Xin.FpLdStI.isLoad) { 2942 /* Load from memory into %fakeN, converting from an int. 2943 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2944 */ 2945 switch (i->Xin.FpLdStI.sz) { 2946 case 8: opc = 0xDF; subopc_imm = 5; break; 2947 case 4: opc = 0xDB; subopc_imm = 0; break; 2948 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2949 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2950 } 2951 p = do_ffree_st7(p); 2952 *p++ = toUChar(opc); 2953 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2954 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 2955 goto done; 2956 } else { 2957 /* Store from %fakeN into memory, converting to an int. 2958 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2959 */ 2960 switch (i->Xin.FpLdStI.sz) { 2961 case 8: opc = 0xDF; subopc_imm = 7; break; 2962 case 4: opc = 0xDB; subopc_imm = 3; break; 2963 case 2: opc = 0xDF; subopc_imm = 3; break; 2964 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2965 } 2966 p = do_ffree_st7(p); 2967 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 2968 *p++ = toUChar(opc); 2969 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2970 goto done; 2971 } 2972 break; 2973 2974 case Xin_Fp64to32: 2975 /* ffree %st7 ; fld %st(src) */ 2976 p = do_ffree_st7(p); 2977 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 2978 /* subl $4, %esp */ 2979 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2980 /* fstps (%esp) */ 2981 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2982 /* flds (%esp) */ 2983 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2984 /* addl $4, %esp */ 2985 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2986 /* fstp %st(1+dst) */ 2987 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 2988 goto done; 2989 2990 case Xin_FpCMov: 2991 /* jmp fwds if !condition */ 2992 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2993 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2994 ptmp = p; 2995 2996 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 2997 p = do_ffree_st7(p); 2998 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 2999 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 3000 3001 /* Fill in the jump offset. */ 3002 *(ptmp-1) = toUChar(p - ptmp); 3003 goto done; 3004 3005 case Xin_FpLdCW: 3006 *p++ = 0xD9; 3007 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr); 3008 goto done; 3009 3010 case Xin_FpStSW_AX: 3011 /* note, this emits fnstsw %ax, not fstsw %ax */ 3012 *p++ = 0xDF; 3013 *p++ = 0xE0; 3014 goto done; 3015 3016 case Xin_FpCmp: 3017 /* gcmp %fL, %fR, %dst 3018 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 3019 fnstsw %ax ; movl %eax, %dst 3020 */ 3021 /* ffree %st7 */ 3022 p = do_ffree_st7(p); 3023 /* fpush %fL */ 3024 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 3025 /* fucomp %(fR+1) */ 3026 *p++ = 0xDD; 3027 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)))); 3028 /* fnstsw %ax */ 3029 *p++ = 0xDF; 3030 *p++ = 0xE0; 3031 /* movl %eax, %dst */ 3032 *p++ = 0x89; 3033 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 3034 goto done; 3035 3036 case Xin_SseConst: { 3037 UShort con = i->Xin.SseConst.con; 3038 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 3039 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 3040 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 3041 p = push_word_from_tags(p, toUShort(con & 0xF)); 3042 /* movl (%esp), %xmm-dst */ 3043 *p++ = 0x0F; 3044 *p++ = 0x10; 3045 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst))); 3046 *p++ = 0x24; 3047 /* addl $16, %esp */ 3048 *p++ = 0x83; 3049 *p++ = 0xC4; 3050 *p++ = 0x10; 3051 goto done; 3052 } 3053 3054 case Xin_SseLdSt: 3055 *p++ = 0x0F; 3056 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 3057 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr); 3058 goto done; 3059 3060 case Xin_SseLdzLO: 3061 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 3062 /* movs[sd] amode, %xmm-dst */ 3063 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3064 *p++ = 0x0F; 3065 *p++ = 0x10; 3066 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)), 3067 i->Xin.SseLdzLO.addr); 3068 goto done; 3069 3070 case Xin_Sse32Fx4: 3071 xtra = 0; 3072 *p++ = 0x0F; 3073 switch (i->Xin.Sse32Fx4.op) { 3074 case Xsse_ADDF: *p++ = 0x58; break; 3075 case Xsse_DIVF: *p++ = 0x5E; break; 3076 case Xsse_MAXF: *p++ = 0x5F; break; 3077 case Xsse_MINF: *p++ = 0x5D; break; 3078 case Xsse_MULF: *p++ = 0x59; break; 3079 case Xsse_RCPF: *p++ = 0x53; break; 3080 case Xsse_RSQRTF: *p++ = 0x52; break; 3081 case Xsse_SQRTF: *p++ = 0x51; break; 3082 case Xsse_SUBF: *p++ = 0x5C; break; 3083 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3084 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3085 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3086 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3087 default: goto bad; 3088 } 3089 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)), 3090 fake(vregNo(i->Xin.Sse32Fx4.src)) ); 3091 if (xtra & 0x100) 3092 *p++ = toUChar(xtra & 0xFF); 3093 goto done; 3094 3095 case Xin_Sse64Fx2: 3096 xtra = 0; 3097 *p++ = 0x66; 3098 *p++ = 0x0F; 3099 switch (i->Xin.Sse64Fx2.op) { 3100 case Xsse_ADDF: *p++ = 0x58; break; 3101 case Xsse_DIVF: *p++ = 0x5E; break; 3102 case Xsse_MAXF: *p++ = 0x5F; break; 3103 case Xsse_MINF: *p++ = 0x5D; break; 3104 case Xsse_MULF: *p++ = 0x59; break; 3105 case Xsse_RCPF: *p++ = 0x53; break; 3106 case Xsse_RSQRTF: *p++ = 0x52; break; 3107 case Xsse_SQRTF: *p++ = 0x51; break; 3108 case Xsse_SUBF: *p++ = 0x5C; break; 3109 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3110 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3111 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3112 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3113 default: goto bad; 3114 } 3115 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), 3116 fake(vregNo(i->Xin.Sse64Fx2.src)) ); 3117 if (xtra & 0x100) 3118 *p++ = toUChar(xtra & 0xFF); 3119 goto done; 3120 3121 case Xin_Sse32FLo: 3122 xtra = 0; 3123 *p++ = 0xF3; 3124 *p++ = 0x0F; 3125 switch (i->Xin.Sse32FLo.op) { 3126 case Xsse_ADDF: *p++ = 0x58; break; 3127 case Xsse_DIVF: *p++ = 0x5E; break; 3128 case Xsse_MAXF: *p++ = 0x5F; break; 3129 case Xsse_MINF: *p++ = 0x5D; break; 3130 case Xsse_MULF: *p++ = 0x59; break; 3131 case Xsse_RCPF: *p++ = 0x53; break; 3132 case Xsse_RSQRTF: *p++ = 0x52; break; 3133 case Xsse_SQRTF: *p++ = 0x51; break; 3134 case Xsse_SUBF: *p++ = 0x5C; break; 3135 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3136 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3137 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3138 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3139 default: goto bad; 3140 } 3141 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)), 3142 fake(vregNo(i->Xin.Sse32FLo.src)) ); 3143 if (xtra & 0x100) 3144 *p++ = toUChar(xtra & 0xFF); 3145 goto done; 3146 3147 case Xin_Sse64FLo: 3148 xtra = 0; 3149 *p++ = 0xF2; 3150 *p++ = 0x0F; 3151 switch (i->Xin.Sse64FLo.op) { 3152 case Xsse_ADDF: *p++ = 0x58; break; 3153 case Xsse_DIVF: *p++ = 0x5E; break; 3154 case Xsse_MAXF: *p++ = 0x5F; break; 3155 case Xsse_MINF: *p++ = 0x5D; break; 3156 case Xsse_MULF: *p++ = 0x59; break; 3157 case Xsse_RCPF: *p++ = 0x53; break; 3158 case Xsse_RSQRTF: *p++ = 0x52; break; 3159 case Xsse_SQRTF: *p++ = 0x51; break; 3160 case Xsse_SUBF: *p++ = 0x5C; break; 3161 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3162 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3163 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3164 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3165 default: goto bad; 3166 } 3167 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)), 3168 fake(vregNo(i->Xin.Sse64FLo.src)) ); 3169 if (xtra & 0x100) 3170 *p++ = toUChar(xtra & 0xFF); 3171 goto done; 3172 3173 case Xin_SseReRg: 3174 # define XX(_n) *p++ = (_n) 3175 switch (i->Xin.SseReRg.op) { 3176 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 3177 case Xsse_OR: XX(0x0F); XX(0x56); break; 3178 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3179 case Xsse_AND: XX(0x0F); XX(0x54); break; 3180 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3181 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3182 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3183 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3184 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3185 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3186 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3187 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3188 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3189 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3190 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3191 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3192 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3193 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3194 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3195 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3196 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3197 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3198 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3199 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3200 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3201 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3202 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3203 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3204 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3205 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3206 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3207 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3208 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3209 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3210 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3211 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3212 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3213 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3214 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3215 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3216 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3217 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3218 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3219 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3220 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3221 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3222 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3223 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3224 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3225 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3226 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3227 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3228 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3229 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3230 default: goto bad; 3231 } 3232 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)), 3233 fake(vregNo(i->Xin.SseReRg.src)) ); 3234 # undef XX 3235 goto done; 3236 3237 case Xin_SseCMov: 3238 /* jmp fwds if !condition */ 3239 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3240 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3241 ptmp = p; 3242 3243 /* movaps %src, %dst */ 3244 *p++ = 0x0F; 3245 *p++ = 0x28; 3246 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)), 3247 fake(vregNo(i->Xin.SseCMov.src)) ); 3248 3249 /* Fill in the jump offset. */ 3250 *(ptmp-1) = toUChar(p - ptmp); 3251 goto done; 3252 3253 case Xin_SseShuf: 3254 *p++ = 0x66; 3255 *p++ = 0x0F; 3256 *p++ = 0x70; 3257 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)), 3258 fake(vregNo(i->Xin.SseShuf.src)) ); 3259 *p++ = (UChar)(i->Xin.SseShuf.order); 3260 goto done; 3261 3262 case Xin_EvCheck: { 3263 /* We generate: 3264 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) 3265 (2 bytes) jns nofail expected taken 3266 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) 3267 nofail: 3268 */ 3269 /* This is heavily asserted re instruction lengths. It needs to 3270 be. If we get given unexpected forms of .amCounter or 3271 .amFailAddr -- basically, anything that's not of the form 3272 uimm7(%ebp) -- they are likely to fail. */ 3273 /* Note also that after the decl we must be very careful not to 3274 read the carry flag, else we get a partial flags stall. 3275 js/jns avoids that, though. */ 3276 UChar* p0 = p; 3277 /* --- decl 8(%ebp) --- */ 3278 /* "fake(1)" because + there's no register in this encoding; 3279 instead the register + field is used as a sub opcode. The 3280 encoding for "decl r/m32" + is FF /1, hence the fake(1). */ 3281 *p++ = 0xFF; 3282 p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter); 3283 vassert(p - p0 == 3); 3284 /* --- jns nofail --- */ 3285 *p++ = 0x79; 3286 *p++ = 0x03; /* need to check this 0x03 after the next insn */ 3287 vassert(p - p0 == 5); 3288 /* --- jmp* 0(%ebp) --- */ 3289 /* The encoding is FF /4. */ 3290 *p++ = 0xFF; 3291 p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr); 3292 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ 3293 /* And crosscheck .. */ 3294 vassert(evCheckSzB_X86() == 8); 3295 goto done; 3296 } 3297 3298 case Xin_ProfInc: { 3299 /* We generate addl $1,NotKnownYet 3300 adcl $0,NotKnownYet+4 3301 in the expectation that a later call to LibVEX_patchProfCtr 3302 will be used to fill in the immediate fields once the right 3303 value is known. 3304 83 05 00 00 00 00 01 3305 83 15 00 00 00 00 00 3306 */ 3307 *p++ = 0x83; *p++ = 0x05; 3308 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3309 *p++ = 0x01; 3310 *p++ = 0x83; *p++ = 0x15; 3311 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3312 *p++ = 0x00; 3313 /* Tell the caller .. */ 3314 vassert(!(*is_profInc)); 3315 *is_profInc = True; 3316 goto done; 3317 } 3318 3319 default: 3320 goto bad; 3321 } 3322 3323 bad: 3324 ppX86Instr(i, mode64); 3325 vpanic("emit_X86Instr"); 3326 /*NOTREACHED*/ 3327 3328 done: 3329 vassert(p - &buf[0] <= 32); 3330 return p - &buf[0]; 3331 3332 # undef fake 3333 } 3334 3335 3336 /* How big is an event check? See case for Xin_EvCheck in 3337 emit_X86Instr just above. That crosschecks what this returns, so 3338 we can tell if we're inconsistent. */ 3339 Int evCheckSzB_X86 ( void ) 3340 { 3341 return 8; 3342 } 3343 3344 3345 /* NB: what goes on here has to be very closely coordinated with the 3346 emitInstr case for XDirect, above. */ 3347 VexInvalRange chainXDirect_X86 ( void* place_to_chain, 3348 void* disp_cp_chain_me_EXPECTED, 3349 void* place_to_jump_to ) 3350 { 3351 /* What we're expecting to see is: 3352 movl $disp_cp_chain_me_EXPECTED, %edx 3353 call *%edx 3354 viz 3355 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3356 FF D2 3357 */ 3358 UChar* p = (UChar*)place_to_chain; 3359 vassert(p[0] == 0xBA); 3360 vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); 3361 vassert(p[5] == 0xFF); 3362 vassert(p[6] == 0xD2); 3363 /* And what we want to change it to is: 3364 jmp disp32 where disp32 is relative to the next insn 3365 ud2; 3366 viz 3367 E9 <4 bytes == disp32> 3368 0F 0B 3369 The replacement has the same length as the original. 3370 */ 3371 /* This is the delta we need to put into a JMP d32 insn. It's 3372 relative to the start of the next insn, hence the -5. */ 3373 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; 3374 3375 /* And make the modifications. */ 3376 p[0] = 0xE9; 3377 p[1] = (delta >> 0) & 0xFF; 3378 p[2] = (delta >> 8) & 0xFF; 3379 p[3] = (delta >> 16) & 0xFF; 3380 p[4] = (delta >> 24) & 0xFF; 3381 p[5] = 0x0F; p[6] = 0x0B; 3382 /* sanity check on the delta -- top 32 are all 0 or all 1 */ 3383 delta >>= 32; 3384 vassert(delta == 0LL || delta == -1LL); 3385 VexInvalRange vir = { (HWord)place_to_chain, 7 }; 3386 return vir; 3387 } 3388 3389 3390 /* NB: what goes on here has to be very closely coordinated with the 3391 emitInstr case for XDirect, above. */ 3392 VexInvalRange unchainXDirect_X86 ( void* place_to_unchain, 3393 void* place_to_jump_to_EXPECTED, 3394 void* disp_cp_chain_me ) 3395 { 3396 /* What we're expecting to see is: 3397 jmp d32 3398 ud2; 3399 viz 3400 E9 <4 bytes == disp32> 3401 0F 0B 3402 */ 3403 UChar* p = (UChar*)place_to_unchain; 3404 Bool valid = False; 3405 if (p[0] == 0xE9 3406 && p[5] == 0x0F && p[6] == 0x0B) { 3407 /* Check the offset is right. */ 3408 Int s32 = *(Int*)(&p[1]); 3409 if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) { 3410 valid = True; 3411 if (0) 3412 vex_printf("QQQ unchainXDirect_X86: found valid\n"); 3413 } 3414 } 3415 vassert(valid); 3416 /* And what we want to change it to is: 3417 movl $disp_cp_chain_me, %edx 3418 call *%edx 3419 viz 3420 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3421 FF D2 3422 So it's the same length (convenient, huh). 3423 */ 3424 p[0] = 0xBA; 3425 *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me); 3426 p[5] = 0xFF; 3427 p[6] = 0xD2; 3428 VexInvalRange vir = { (HWord)place_to_unchain, 7 }; 3429 return vir; 3430 } 3431 3432 3433 /* Patch the counter address into a profile inc point, as previously 3434 created by the Xin_ProfInc case for emit_X86Instr. */ 3435 VexInvalRange patchProfInc_X86 ( void* place_to_patch, 3436 ULong* location_of_counter ) 3437 { 3438 vassert(sizeof(ULong*) == 4); 3439 UChar* p = (UChar*)place_to_patch; 3440 vassert(p[0] == 0x83); 3441 vassert(p[1] == 0x05); 3442 vassert(p[2] == 0x00); 3443 vassert(p[3] == 0x00); 3444 vassert(p[4] == 0x00); 3445 vassert(p[5] == 0x00); 3446 vassert(p[6] == 0x01); 3447 vassert(p[7] == 0x83); 3448 vassert(p[8] == 0x15); 3449 vassert(p[9] == 0x00); 3450 vassert(p[10] == 0x00); 3451 vassert(p[11] == 0x00); 3452 vassert(p[12] == 0x00); 3453 vassert(p[13] == 0x00); 3454 UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter); 3455 p[2] = imm32 & 0xFF; imm32 >>= 8; 3456 p[3] = imm32 & 0xFF; imm32 >>= 8; 3457 p[4] = imm32 & 0xFF; imm32 >>= 8; 3458 p[5] = imm32 & 0xFF; imm32 >>= 8; 3459 imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter); 3460 p[9] = imm32 & 0xFF; imm32 >>= 8; 3461 p[10] = imm32 & 0xFF; imm32 >>= 8; 3462 p[11] = imm32 & 0xFF; imm32 >>= 8; 3463 p[12] = imm32 & 0xFF; imm32 >>= 8; 3464 VexInvalRange vir = { (HWord)place_to_patch, 14 }; 3465 return vir; 3466 } 3467 3468 3469 /*---------------------------------------------------------------*/ 3470 /*--- end host_x86_defs.c ---*/ 3471 /*---------------------------------------------------------------*/ 3472