1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_x86_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegX86 ( HReg reg ) 48 { 49 Int r; 50 static HChar* ireg32_names[8] 51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 52 /* Be generic for all virtual regs. */ 53 if (hregIsVirtual(reg)) { 54 ppHReg(reg); 55 return; 56 } 57 /* But specific for real regs. */ 58 switch (hregClass(reg)) { 59 case HRcInt32: 60 r = hregNumber(reg); 61 vassert(r >= 0 && r < 8); 62 vex_printf("%s", ireg32_names[r]); 63 return; 64 case HRcFlt64: 65 r = hregNumber(reg); 66 vassert(r >= 0 && r < 6); 67 vex_printf("%%fake%d", r); 68 return; 69 case HRcVec128: 70 r = hregNumber(reg); 71 vassert(r >= 0 && r < 8); 72 vex_printf("%%xmm%d", r); 73 return; 74 default: 75 vpanic("ppHRegX86"); 76 } 77 } 78 79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); } 80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); } 81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); } 82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); } 83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); } 84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); } 85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); } 86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); } 87 88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 94 95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); } 96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); } 97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); } 98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); } 99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); } 100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); } 101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); } 102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); } 103 104 105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) 106 { 107 *nregs = 20; 108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 109 (*arr)[0] = hregX86_EAX(); 110 (*arr)[1] = hregX86_EBX(); 111 (*arr)[2] = hregX86_ECX(); 112 (*arr)[3] = hregX86_EDX(); 113 (*arr)[4] = hregX86_ESI(); 114 (*arr)[5] = hregX86_EDI(); 115 (*arr)[6] = hregX86_FAKE0(); 116 (*arr)[7] = hregX86_FAKE1(); 117 (*arr)[8] = hregX86_FAKE2(); 118 (*arr)[9] = hregX86_FAKE3(); 119 (*arr)[10] = hregX86_FAKE4(); 120 (*arr)[11] = hregX86_FAKE5(); 121 (*arr)[12] = hregX86_XMM0(); 122 (*arr)[13] = hregX86_XMM1(); 123 (*arr)[14] = hregX86_XMM2(); 124 (*arr)[15] = hregX86_XMM3(); 125 (*arr)[16] = hregX86_XMM4(); 126 (*arr)[17] = hregX86_XMM5(); 127 (*arr)[18] = hregX86_XMM6(); 128 (*arr)[19] = hregX86_XMM7(); 129 } 130 131 132 /* --------- Condition codes, Intel encoding. --------- */ 133 134 HChar* showX86CondCode ( X86CondCode cond ) 135 { 136 switch (cond) { 137 case Xcc_O: return "o"; 138 case Xcc_NO: return "no"; 139 case Xcc_B: return "b"; 140 case Xcc_NB: return "nb"; 141 case Xcc_Z: return "z"; 142 case Xcc_NZ: return "nz"; 143 case Xcc_BE: return "be"; 144 case Xcc_NBE: return "nbe"; 145 case Xcc_S: return "s"; 146 case Xcc_NS: return "ns"; 147 case Xcc_P: return "p"; 148 case Xcc_NP: return "np"; 149 case Xcc_L: return "l"; 150 case Xcc_NL: return "nl"; 151 case Xcc_LE: return "le"; 152 case Xcc_NLE: return "nle"; 153 case Xcc_ALWAYS: return "ALWAYS"; 154 default: vpanic("ppX86CondCode"); 155 } 156 } 157 158 159 /* --------- X86AMode: memory address expressions. --------- */ 160 161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 163 am->tag = Xam_IR; 164 am->Xam.IR.imm = imm32; 165 am->Xam.IR.reg = reg; 166 return am; 167 } 168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 170 am->tag = Xam_IRRS; 171 am->Xam.IRRS.imm = imm32; 172 am->Xam.IRRS.base = base; 173 am->Xam.IRRS.index = indEx; 174 am->Xam.IRRS.shift = shift; 175 vassert(shift >= 0 && shift <= 3); 176 return am; 177 } 178 179 X86AMode* dopyX86AMode ( X86AMode* am ) { 180 switch (am->tag) { 181 case Xam_IR: 182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 183 case Xam_IRRS: 184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 185 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 186 default: 187 vpanic("dopyX86AMode"); 188 } 189 } 190 191 void ppX86AMode ( X86AMode* am ) { 192 switch (am->tag) { 193 case Xam_IR: 194 if (am->Xam.IR.imm == 0) 195 vex_printf("("); 196 else 197 vex_printf("0x%x(", am->Xam.IR.imm); 198 ppHRegX86(am->Xam.IR.reg); 199 vex_printf(")"); 200 return; 201 case Xam_IRRS: 202 vex_printf("0x%x(", am->Xam.IRRS.imm); 203 ppHRegX86(am->Xam.IRRS.base); 204 vex_printf(","); 205 ppHRegX86(am->Xam.IRRS.index); 206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 207 return; 208 default: 209 vpanic("ppX86AMode"); 210 } 211 } 212 213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 214 switch (am->tag) { 215 case Xam_IR: 216 addHRegUse(u, HRmRead, am->Xam.IR.reg); 217 return; 218 case Xam_IRRS: 219 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 220 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 221 return; 222 default: 223 vpanic("addRegUsage_X86AMode"); 224 } 225 } 226 227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 228 switch (am->tag) { 229 case Xam_IR: 230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 231 return; 232 case Xam_IRRS: 233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 235 return; 236 default: 237 vpanic("mapRegs_X86AMode"); 238 } 239 } 240 241 /* --------- Operand, which can be reg, immediate or memory. --------- */ 242 243 X86RMI* X86RMI_Imm ( UInt imm32 ) { 244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 245 op->tag = Xrmi_Imm; 246 op->Xrmi.Imm.imm32 = imm32; 247 return op; 248 } 249 X86RMI* X86RMI_Reg ( HReg reg ) { 250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 251 op->tag = Xrmi_Reg; 252 op->Xrmi.Reg.reg = reg; 253 return op; 254 } 255 X86RMI* X86RMI_Mem ( X86AMode* am ) { 256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 257 op->tag = Xrmi_Mem; 258 op->Xrmi.Mem.am = am; 259 return op; 260 } 261 262 void ppX86RMI ( X86RMI* op ) { 263 switch (op->tag) { 264 case Xrmi_Imm: 265 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 266 return; 267 case Xrmi_Reg: 268 ppHRegX86(op->Xrmi.Reg.reg); 269 return; 270 case Xrmi_Mem: 271 ppX86AMode(op->Xrmi.Mem.am); 272 return; 273 default: 274 vpanic("ppX86RMI"); 275 } 276 } 277 278 /* An X86RMI can only be used in a "read" context (what would it mean 279 to write or modify a literal?) and so we enumerate its registers 280 accordingly. */ 281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 282 switch (op->tag) { 283 case Xrmi_Imm: 284 return; 285 case Xrmi_Reg: 286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 287 return; 288 case Xrmi_Mem: 289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 290 return; 291 default: 292 vpanic("addRegUsage_X86RMI"); 293 } 294 } 295 296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 297 switch (op->tag) { 298 case Xrmi_Imm: 299 return; 300 case Xrmi_Reg: 301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 302 return; 303 case Xrmi_Mem: 304 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 305 return; 306 default: 307 vpanic("mapRegs_X86RMI"); 308 } 309 } 310 311 312 /* --------- Operand, which can be reg or immediate only. --------- */ 313 314 X86RI* X86RI_Imm ( UInt imm32 ) { 315 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 316 op->tag = Xri_Imm; 317 op->Xri.Imm.imm32 = imm32; 318 return op; 319 } 320 X86RI* X86RI_Reg ( HReg reg ) { 321 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 322 op->tag = Xri_Reg; 323 op->Xri.Reg.reg = reg; 324 return op; 325 } 326 327 void ppX86RI ( X86RI* op ) { 328 switch (op->tag) { 329 case Xri_Imm: 330 vex_printf("$0x%x", op->Xri.Imm.imm32); 331 return; 332 case Xri_Reg: 333 ppHRegX86(op->Xri.Reg.reg); 334 return; 335 default: 336 vpanic("ppX86RI"); 337 } 338 } 339 340 /* An X86RI can only be used in a "read" context (what would it mean 341 to write or modify a literal?) and so we enumerate its registers 342 accordingly. */ 343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 344 switch (op->tag) { 345 case Xri_Imm: 346 return; 347 case Xri_Reg: 348 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 349 return; 350 default: 351 vpanic("addRegUsage_X86RI"); 352 } 353 } 354 355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 356 switch (op->tag) { 357 case Xri_Imm: 358 return; 359 case Xri_Reg: 360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 361 return; 362 default: 363 vpanic("mapRegs_X86RI"); 364 } 365 } 366 367 368 /* --------- Operand, which can be reg or memory only. --------- */ 369 370 X86RM* X86RM_Reg ( HReg reg ) { 371 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 372 op->tag = Xrm_Reg; 373 op->Xrm.Reg.reg = reg; 374 return op; 375 } 376 X86RM* X86RM_Mem ( X86AMode* am ) { 377 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 378 op->tag = Xrm_Mem; 379 op->Xrm.Mem.am = am; 380 return op; 381 } 382 383 void ppX86RM ( X86RM* op ) { 384 switch (op->tag) { 385 case Xrm_Mem: 386 ppX86AMode(op->Xrm.Mem.am); 387 return; 388 case Xrm_Reg: 389 ppHRegX86(op->Xrm.Reg.reg); 390 return; 391 default: 392 vpanic("ppX86RM"); 393 } 394 } 395 396 /* Because an X86RM can be both a source or destination operand, we 397 have to supply a mode -- pertaining to the operand as a whole -- 398 indicating how it's being used. */ 399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 400 switch (op->tag) { 401 case Xrm_Mem: 402 /* Memory is read, written or modified. So we just want to 403 know the regs read by the amode. */ 404 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 405 return; 406 case Xrm_Reg: 407 /* reg is read, written or modified. Add it in the 408 appropriate way. */ 409 addHRegUse(u, mode, op->Xrm.Reg.reg); 410 return; 411 default: 412 vpanic("addRegUsage_X86RM"); 413 } 414 } 415 416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 417 { 418 switch (op->tag) { 419 case Xrm_Mem: 420 mapRegs_X86AMode(m, op->Xrm.Mem.am); 421 return; 422 case Xrm_Reg: 423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 424 return; 425 default: 426 vpanic("mapRegs_X86RM"); 427 } 428 } 429 430 431 /* --------- Instructions. --------- */ 432 433 HChar* showX86UnaryOp ( X86UnaryOp op ) { 434 switch (op) { 435 case Xun_NOT: return "not"; 436 case Xun_NEG: return "neg"; 437 default: vpanic("showX86UnaryOp"); 438 } 439 } 440 441 HChar* showX86AluOp ( X86AluOp op ) { 442 switch (op) { 443 case Xalu_MOV: return "mov"; 444 case Xalu_CMP: return "cmp"; 445 case Xalu_ADD: return "add"; 446 case Xalu_SUB: return "sub"; 447 case Xalu_ADC: return "adc"; 448 case Xalu_SBB: return "sbb"; 449 case Xalu_AND: return "and"; 450 case Xalu_OR: return "or"; 451 case Xalu_XOR: return "xor"; 452 case Xalu_MUL: return "mul"; 453 default: vpanic("showX86AluOp"); 454 } 455 } 456 457 HChar* showX86ShiftOp ( X86ShiftOp op ) { 458 switch (op) { 459 case Xsh_SHL: return "shl"; 460 case Xsh_SHR: return "shr"; 461 case Xsh_SAR: return "sar"; 462 default: vpanic("showX86ShiftOp"); 463 } 464 } 465 466 HChar* showX86FpOp ( X86FpOp op ) { 467 switch (op) { 468 case Xfp_ADD: return "add"; 469 case Xfp_SUB: return "sub"; 470 case Xfp_MUL: return "mul"; 471 case Xfp_DIV: return "div"; 472 case Xfp_SCALE: return "scale"; 473 case Xfp_ATAN: return "atan"; 474 case Xfp_YL2X: return "yl2x"; 475 case Xfp_YL2XP1: return "yl2xp1"; 476 case Xfp_PREM: return "prem"; 477 case Xfp_PREM1: return "prem1"; 478 case Xfp_SQRT: return "sqrt"; 479 case Xfp_ABS: return "abs"; 480 case Xfp_NEG: return "chs"; 481 case Xfp_MOV: return "mov"; 482 case Xfp_SIN: return "sin"; 483 case Xfp_COS: return "cos"; 484 case Xfp_TAN: return "tan"; 485 case Xfp_ROUND: return "round"; 486 case Xfp_2XM1: return "2xm1"; 487 default: vpanic("showX86FpOp"); 488 } 489 } 490 491 HChar* showX86SseOp ( X86SseOp op ) { 492 switch (op) { 493 case Xsse_MOV: return "mov(?!)"; 494 case Xsse_ADDF: return "add"; 495 case Xsse_SUBF: return "sub"; 496 case Xsse_MULF: return "mul"; 497 case Xsse_DIVF: return "div"; 498 case Xsse_MAXF: return "max"; 499 case Xsse_MINF: return "min"; 500 case Xsse_CMPEQF: return "cmpFeq"; 501 case Xsse_CMPLTF: return "cmpFlt"; 502 case Xsse_CMPLEF: return "cmpFle"; 503 case Xsse_CMPUNF: return "cmpFun"; 504 case Xsse_RCPF: return "rcp"; 505 case Xsse_RSQRTF: return "rsqrt"; 506 case Xsse_SQRTF: return "sqrt"; 507 case Xsse_AND: return "and"; 508 case Xsse_OR: return "or"; 509 case Xsse_XOR: return "xor"; 510 case Xsse_ANDN: return "andn"; 511 case Xsse_ADD8: return "paddb"; 512 case Xsse_ADD16: return "paddw"; 513 case Xsse_ADD32: return "paddd"; 514 case Xsse_ADD64: return "paddq"; 515 case Xsse_QADD8U: return "paddusb"; 516 case Xsse_QADD16U: return "paddusw"; 517 case Xsse_QADD8S: return "paddsb"; 518 case Xsse_QADD16S: return "paddsw"; 519 case Xsse_SUB8: return "psubb"; 520 case Xsse_SUB16: return "psubw"; 521 case Xsse_SUB32: return "psubd"; 522 case Xsse_SUB64: return "psubq"; 523 case Xsse_QSUB8U: return "psubusb"; 524 case Xsse_QSUB16U: return "psubusw"; 525 case Xsse_QSUB8S: return "psubsb"; 526 case Xsse_QSUB16S: return "psubsw"; 527 case Xsse_MUL16: return "pmullw"; 528 case Xsse_MULHI16U: return "pmulhuw"; 529 case Xsse_MULHI16S: return "pmulhw"; 530 case Xsse_AVG8U: return "pavgb"; 531 case Xsse_AVG16U: return "pavgw"; 532 case Xsse_MAX16S: return "pmaxw"; 533 case Xsse_MAX8U: return "pmaxub"; 534 case Xsse_MIN16S: return "pminw"; 535 case Xsse_MIN8U: return "pminub"; 536 case Xsse_CMPEQ8: return "pcmpeqb"; 537 case Xsse_CMPEQ16: return "pcmpeqw"; 538 case Xsse_CMPEQ32: return "pcmpeqd"; 539 case Xsse_CMPGT8S: return "pcmpgtb"; 540 case Xsse_CMPGT16S: return "pcmpgtw"; 541 case Xsse_CMPGT32S: return "pcmpgtd"; 542 case Xsse_SHL16: return "psllw"; 543 case Xsse_SHL32: return "pslld"; 544 case Xsse_SHL64: return "psllq"; 545 case Xsse_SHR16: return "psrlw"; 546 case Xsse_SHR32: return "psrld"; 547 case Xsse_SHR64: return "psrlq"; 548 case Xsse_SAR16: return "psraw"; 549 case Xsse_SAR32: return "psrad"; 550 case Xsse_PACKSSD: return "packssdw"; 551 case Xsse_PACKSSW: return "packsswb"; 552 case Xsse_PACKUSW: return "packuswb"; 553 case Xsse_UNPCKHB: return "punpckhb"; 554 case Xsse_UNPCKHW: return "punpckhw"; 555 case Xsse_UNPCKHD: return "punpckhd"; 556 case Xsse_UNPCKHQ: return "punpckhq"; 557 case Xsse_UNPCKLB: return "punpcklb"; 558 case Xsse_UNPCKLW: return "punpcklw"; 559 case Xsse_UNPCKLD: return "punpckld"; 560 case Xsse_UNPCKLQ: return "punpcklq"; 561 default: vpanic("showX86SseOp"); 562 } 563 } 564 565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 567 i->tag = Xin_Alu32R; 568 i->Xin.Alu32R.op = op; 569 i->Xin.Alu32R.src = src; 570 i->Xin.Alu32R.dst = dst; 571 return i; 572 } 573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 575 i->tag = Xin_Alu32M; 576 i->Xin.Alu32M.op = op; 577 i->Xin.Alu32M.src = src; 578 i->Xin.Alu32M.dst = dst; 579 vassert(op != Xalu_MUL); 580 return i; 581 } 582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 584 i->tag = Xin_Sh32; 585 i->Xin.Sh32.op = op; 586 i->Xin.Sh32.src = src; 587 i->Xin.Sh32.dst = dst; 588 return i; 589 } 590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 592 i->tag = Xin_Test32; 593 i->Xin.Test32.imm32 = imm32; 594 i->Xin.Test32.dst = dst; 595 return i; 596 } 597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 599 i->tag = Xin_Unary32; 600 i->Xin.Unary32.op = op; 601 i->Xin.Unary32.dst = dst; 602 return i; 603 } 604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 606 i->tag = Xin_Lea32; 607 i->Xin.Lea32.am = am; 608 i->Xin.Lea32.dst = dst; 609 return i; 610 } 611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 613 i->tag = Xin_MulL; 614 i->Xin.MulL.syned = syned; 615 i->Xin.MulL.src = src; 616 return i; 617 } 618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 620 i->tag = Xin_Div; 621 i->Xin.Div.syned = syned; 622 i->Xin.Div.src = src; 623 return i; 624 } 625 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 627 i->tag = Xin_Sh3232; 628 i->Xin.Sh3232.op = op; 629 i->Xin.Sh3232.amt = amt; 630 i->Xin.Sh3232.src = src; 631 i->Xin.Sh3232.dst = dst; 632 vassert(op == Xsh_SHL || op == Xsh_SHR); 633 return i; 634 } 635 X86Instr* X86Instr_Push( X86RMI* src ) { 636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 637 i->tag = Xin_Push; 638 i->Xin.Push.src = src; 639 return i; 640 } 641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) { 642 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 643 i->tag = Xin_Call; 644 i->Xin.Call.cond = cond; 645 i->Xin.Call.target = target; 646 i->Xin.Call.regparms = regparms; 647 vassert(regparms >= 0 && regparms <= 3); 648 return i; 649 } 650 X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) { 651 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 652 i->tag = Xin_Goto; 653 i->Xin.Goto.cond = cond; 654 i->Xin.Goto.dst = dst; 655 i->Xin.Goto.jk = jk; 656 return i; 657 } 658 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 659 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 660 i->tag = Xin_CMov32; 661 i->Xin.CMov32.cond = cond; 662 i->Xin.CMov32.src = src; 663 i->Xin.CMov32.dst = dst; 664 vassert(cond != Xcc_ALWAYS); 665 return i; 666 } 667 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 668 X86AMode* src, HReg dst ) { 669 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 670 i->tag = Xin_LoadEX; 671 i->Xin.LoadEX.szSmall = szSmall; 672 i->Xin.LoadEX.syned = syned; 673 i->Xin.LoadEX.src = src; 674 i->Xin.LoadEX.dst = dst; 675 vassert(szSmall == 1 || szSmall == 2); 676 return i; 677 } 678 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 679 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 680 i->tag = Xin_Store; 681 i->Xin.Store.sz = sz; 682 i->Xin.Store.src = src; 683 i->Xin.Store.dst = dst; 684 vassert(sz == 1 || sz == 2); 685 return i; 686 } 687 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 688 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 689 i->tag = Xin_Set32; 690 i->Xin.Set32.cond = cond; 691 i->Xin.Set32.dst = dst; 692 return i; 693 } 694 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 695 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 696 i->tag = Xin_Bsfr32; 697 i->Xin.Bsfr32.isFwds = isFwds; 698 i->Xin.Bsfr32.src = src; 699 i->Xin.Bsfr32.dst = dst; 700 return i; 701 } 702 X86Instr* X86Instr_MFence ( UInt hwcaps ) { 703 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 704 i->tag = Xin_MFence; 705 i->Xin.MFence.hwcaps = hwcaps; 706 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1 707 |VEX_HWCAPS_X86_SSE2 708 |VEX_HWCAPS_X86_SSE3 709 |VEX_HWCAPS_X86_LZCNT))); 710 return i; 711 } 712 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 713 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 714 i->tag = Xin_ACAS; 715 i->Xin.ACAS.addr = addr; 716 i->Xin.ACAS.sz = sz; 717 vassert(sz == 4 || sz == 2 || sz == 1); 718 return i; 719 } 720 X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 721 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 722 i->tag = Xin_DACAS; 723 i->Xin.DACAS.addr = addr; 724 return i; 725 } 726 727 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 728 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 729 i->tag = Xin_FpUnary; 730 i->Xin.FpUnary.op = op; 731 i->Xin.FpUnary.src = src; 732 i->Xin.FpUnary.dst = dst; 733 return i; 734 } 735 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 736 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 737 i->tag = Xin_FpBinary; 738 i->Xin.FpBinary.op = op; 739 i->Xin.FpBinary.srcL = srcL; 740 i->Xin.FpBinary.srcR = srcR; 741 i->Xin.FpBinary.dst = dst; 742 return i; 743 } 744 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 745 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 746 i->tag = Xin_FpLdSt; 747 i->Xin.FpLdSt.isLoad = isLoad; 748 i->Xin.FpLdSt.sz = sz; 749 i->Xin.FpLdSt.reg = reg; 750 i->Xin.FpLdSt.addr = addr; 751 vassert(sz == 4 || sz == 8 || sz == 10); 752 return i; 753 } 754 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 755 HReg reg, X86AMode* addr ) { 756 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 757 i->tag = Xin_FpLdStI; 758 i->Xin.FpLdStI.isLoad = isLoad; 759 i->Xin.FpLdStI.sz = sz; 760 i->Xin.FpLdStI.reg = reg; 761 i->Xin.FpLdStI.addr = addr; 762 vassert(sz == 2 || sz == 4 || sz == 8); 763 return i; 764 } 765 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 766 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 767 i->tag = Xin_Fp64to32; 768 i->Xin.Fp64to32.src = src; 769 i->Xin.Fp64to32.dst = dst; 770 return i; 771 } 772 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 773 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 774 i->tag = Xin_FpCMov; 775 i->Xin.FpCMov.cond = cond; 776 i->Xin.FpCMov.src = src; 777 i->Xin.FpCMov.dst = dst; 778 vassert(cond != Xcc_ALWAYS); 779 return i; 780 } 781 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 782 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 783 i->tag = Xin_FpLdCW; 784 i->Xin.FpLdCW.addr = addr; 785 return i; 786 } 787 X86Instr* X86Instr_FpStSW_AX ( void ) { 788 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 789 i->tag = Xin_FpStSW_AX; 790 return i; 791 } 792 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 793 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 794 i->tag = Xin_FpCmp; 795 i->Xin.FpCmp.srcL = srcL; 796 i->Xin.FpCmp.srcR = srcR; 797 i->Xin.FpCmp.dst = dst; 798 return i; 799 } 800 801 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 802 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 803 i->tag = Xin_SseConst; 804 i->Xin.SseConst.con = con; 805 i->Xin.SseConst.dst = dst; 806 vassert(hregClass(dst) == HRcVec128); 807 return i; 808 } 809 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 810 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 811 i->tag = Xin_SseLdSt; 812 i->Xin.SseLdSt.isLoad = isLoad; 813 i->Xin.SseLdSt.reg = reg; 814 i->Xin.SseLdSt.addr = addr; 815 return i; 816 } 817 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 818 { 819 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 820 i->tag = Xin_SseLdzLO; 821 i->Xin.SseLdzLO.sz = toUChar(sz); 822 i->Xin.SseLdzLO.reg = reg; 823 i->Xin.SseLdzLO.addr = addr; 824 vassert(sz == 4 || sz == 8); 825 return i; 826 } 827 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 828 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 829 i->tag = Xin_Sse32Fx4; 830 i->Xin.Sse32Fx4.op = op; 831 i->Xin.Sse32Fx4.src = src; 832 i->Xin.Sse32Fx4.dst = dst; 833 vassert(op != Xsse_MOV); 834 return i; 835 } 836 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 837 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 838 i->tag = Xin_Sse32FLo; 839 i->Xin.Sse32FLo.op = op; 840 i->Xin.Sse32FLo.src = src; 841 i->Xin.Sse32FLo.dst = dst; 842 vassert(op != Xsse_MOV); 843 return i; 844 } 845 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 846 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 847 i->tag = Xin_Sse64Fx2; 848 i->Xin.Sse64Fx2.op = op; 849 i->Xin.Sse64Fx2.src = src; 850 i->Xin.Sse64Fx2.dst = dst; 851 vassert(op != Xsse_MOV); 852 return i; 853 } 854 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 855 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 856 i->tag = Xin_Sse64FLo; 857 i->Xin.Sse64FLo.op = op; 858 i->Xin.Sse64FLo.src = src; 859 i->Xin.Sse64FLo.dst = dst; 860 vassert(op != Xsse_MOV); 861 return i; 862 } 863 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 864 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 865 i->tag = Xin_SseReRg; 866 i->Xin.SseReRg.op = op; 867 i->Xin.SseReRg.src = re; 868 i->Xin.SseReRg.dst = rg; 869 return i; 870 } 871 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 872 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 873 i->tag = Xin_SseCMov; 874 i->Xin.SseCMov.cond = cond; 875 i->Xin.SseCMov.src = src; 876 i->Xin.SseCMov.dst = dst; 877 vassert(cond != Xcc_ALWAYS); 878 return i; 879 } 880 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 881 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 882 i->tag = Xin_SseShuf; 883 i->Xin.SseShuf.order = order; 884 i->Xin.SseShuf.src = src; 885 i->Xin.SseShuf.dst = dst; 886 vassert(order >= 0 && order <= 0xFF); 887 return i; 888 } 889 890 void ppX86Instr ( X86Instr* i, Bool mode64 ) { 891 vassert(mode64 == False); 892 switch (i->tag) { 893 case Xin_Alu32R: 894 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 895 ppX86RMI(i->Xin.Alu32R.src); 896 vex_printf(","); 897 ppHRegX86(i->Xin.Alu32R.dst); 898 return; 899 case Xin_Alu32M: 900 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 901 ppX86RI(i->Xin.Alu32M.src); 902 vex_printf(","); 903 ppX86AMode(i->Xin.Alu32M.dst); 904 return; 905 case Xin_Sh32: 906 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 907 if (i->Xin.Sh32.src == 0) 908 vex_printf("%%cl,"); 909 else 910 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 911 ppHRegX86(i->Xin.Sh32.dst); 912 return; 913 case Xin_Test32: 914 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 915 ppX86RM(i->Xin.Test32.dst); 916 return; 917 case Xin_Unary32: 918 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 919 ppHRegX86(i->Xin.Unary32.dst); 920 return; 921 case Xin_Lea32: 922 vex_printf("leal "); 923 ppX86AMode(i->Xin.Lea32.am); 924 vex_printf(","); 925 ppHRegX86(i->Xin.Lea32.dst); 926 return; 927 case Xin_MulL: 928 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 929 ppX86RM(i->Xin.MulL.src); 930 return; 931 case Xin_Div: 932 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 933 ppX86RM(i->Xin.Div.src); 934 return; 935 case Xin_Sh3232: 936 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 937 if (i->Xin.Sh3232.amt == 0) 938 vex_printf(" %%cl,"); 939 else 940 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 941 ppHRegX86(i->Xin.Sh3232.src); 942 vex_printf(","); 943 ppHRegX86(i->Xin.Sh3232.dst); 944 return; 945 case Xin_Push: 946 vex_printf("pushl "); 947 ppX86RMI(i->Xin.Push.src); 948 return; 949 case Xin_Call: 950 vex_printf("call%s[%d] ", 951 i->Xin.Call.cond==Xcc_ALWAYS 952 ? "" : showX86CondCode(i->Xin.Call.cond), 953 i->Xin.Call.regparms); 954 vex_printf("0x%x", i->Xin.Call.target); 955 break; 956 case Xin_Goto: 957 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 958 vex_printf("if (%%eflags.%s) { ", 959 showX86CondCode(i->Xin.Goto.cond)); 960 } 961 if (i->Xin.Goto.jk != Ijk_Boring 962 && i->Xin.Goto.jk != Ijk_Call 963 && i->Xin.Goto.jk != Ijk_Ret) { 964 vex_printf("movl $"); 965 ppIRJumpKind(i->Xin.Goto.jk); 966 vex_printf(",%%ebp ; "); 967 } 968 vex_printf("movl "); 969 ppX86RI(i->Xin.Goto.dst); 970 vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx"); 971 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 972 vex_printf(" }"); 973 } 974 return; 975 case Xin_CMov32: 976 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 977 ppX86RM(i->Xin.CMov32.src); 978 vex_printf(","); 979 ppHRegX86(i->Xin.CMov32.dst); 980 return; 981 case Xin_LoadEX: 982 vex_printf("mov%c%cl ", 983 i->Xin.LoadEX.syned ? 's' : 'z', 984 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 985 ppX86AMode(i->Xin.LoadEX.src); 986 vex_printf(","); 987 ppHRegX86(i->Xin.LoadEX.dst); 988 return; 989 case Xin_Store: 990 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 991 ppHRegX86(i->Xin.Store.src); 992 vex_printf(","); 993 ppX86AMode(i->Xin.Store.dst); 994 return; 995 case Xin_Set32: 996 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 997 ppHRegX86(i->Xin.Set32.dst); 998 return; 999 case Xin_Bsfr32: 1000 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1001 ppHRegX86(i->Xin.Bsfr32.src); 1002 vex_printf(","); 1003 ppHRegX86(i->Xin.Bsfr32.dst); 1004 return; 1005 case Xin_MFence: 1006 vex_printf("mfence(%s)", 1007 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1008 return; 1009 case Xin_ACAS: 1010 vex_printf("lock cmpxchg%c ", 1011 i->Xin.ACAS.sz==1 ? 'b' 1012 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1013 vex_printf("{%%eax->%%ebx},"); 1014 ppX86AMode(i->Xin.ACAS.addr); 1015 return; 1016 case Xin_DACAS: 1017 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1018 ppX86AMode(i->Xin.DACAS.addr); 1019 return; 1020 case Xin_FpUnary: 1021 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1022 ppHRegX86(i->Xin.FpUnary.src); 1023 vex_printf(","); 1024 ppHRegX86(i->Xin.FpUnary.dst); 1025 break; 1026 case Xin_FpBinary: 1027 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1028 ppHRegX86(i->Xin.FpBinary.srcL); 1029 vex_printf(","); 1030 ppHRegX86(i->Xin.FpBinary.srcR); 1031 vex_printf(","); 1032 ppHRegX86(i->Xin.FpBinary.dst); 1033 break; 1034 case Xin_FpLdSt: 1035 if (i->Xin.FpLdSt.isLoad) { 1036 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1037 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1038 ppX86AMode(i->Xin.FpLdSt.addr); 1039 vex_printf(", "); 1040 ppHRegX86(i->Xin.FpLdSt.reg); 1041 } else { 1042 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1043 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1044 ppHRegX86(i->Xin.FpLdSt.reg); 1045 vex_printf(", "); 1046 ppX86AMode(i->Xin.FpLdSt.addr); 1047 } 1048 return; 1049 case Xin_FpLdStI: 1050 if (i->Xin.FpLdStI.isLoad) { 1051 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1052 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1053 ppX86AMode(i->Xin.FpLdStI.addr); 1054 vex_printf(", "); 1055 ppHRegX86(i->Xin.FpLdStI.reg); 1056 } else { 1057 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1058 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1059 ppHRegX86(i->Xin.FpLdStI.reg); 1060 vex_printf(", "); 1061 ppX86AMode(i->Xin.FpLdStI.addr); 1062 } 1063 return; 1064 case Xin_Fp64to32: 1065 vex_printf("gdtof "); 1066 ppHRegX86(i->Xin.Fp64to32.src); 1067 vex_printf(","); 1068 ppHRegX86(i->Xin.Fp64to32.dst); 1069 return; 1070 case Xin_FpCMov: 1071 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1072 ppHRegX86(i->Xin.FpCMov.src); 1073 vex_printf(","); 1074 ppHRegX86(i->Xin.FpCMov.dst); 1075 return; 1076 case Xin_FpLdCW: 1077 vex_printf("fldcw "); 1078 ppX86AMode(i->Xin.FpLdCW.addr); 1079 return; 1080 case Xin_FpStSW_AX: 1081 vex_printf("fstsw %%ax"); 1082 return; 1083 case Xin_FpCmp: 1084 vex_printf("gcmp "); 1085 ppHRegX86(i->Xin.FpCmp.srcL); 1086 vex_printf(","); 1087 ppHRegX86(i->Xin.FpCmp.srcR); 1088 vex_printf(","); 1089 ppHRegX86(i->Xin.FpCmp.dst); 1090 break; 1091 case Xin_SseConst: 1092 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1093 ppHRegX86(i->Xin.SseConst.dst); 1094 break; 1095 case Xin_SseLdSt: 1096 vex_printf("movups "); 1097 if (i->Xin.SseLdSt.isLoad) { 1098 ppX86AMode(i->Xin.SseLdSt.addr); 1099 vex_printf(","); 1100 ppHRegX86(i->Xin.SseLdSt.reg); 1101 } else { 1102 ppHRegX86(i->Xin.SseLdSt.reg); 1103 vex_printf(","); 1104 ppX86AMode(i->Xin.SseLdSt.addr); 1105 } 1106 return; 1107 case Xin_SseLdzLO: 1108 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1109 ppX86AMode(i->Xin.SseLdzLO.addr); 1110 vex_printf(","); 1111 ppHRegX86(i->Xin.SseLdzLO.reg); 1112 return; 1113 case Xin_Sse32Fx4: 1114 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1115 ppHRegX86(i->Xin.Sse32Fx4.src); 1116 vex_printf(","); 1117 ppHRegX86(i->Xin.Sse32Fx4.dst); 1118 return; 1119 case Xin_Sse32FLo: 1120 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1121 ppHRegX86(i->Xin.Sse32FLo.src); 1122 vex_printf(","); 1123 ppHRegX86(i->Xin.Sse32FLo.dst); 1124 return; 1125 case Xin_Sse64Fx2: 1126 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1127 ppHRegX86(i->Xin.Sse64Fx2.src); 1128 vex_printf(","); 1129 ppHRegX86(i->Xin.Sse64Fx2.dst); 1130 return; 1131 case Xin_Sse64FLo: 1132 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1133 ppHRegX86(i->Xin.Sse64FLo.src); 1134 vex_printf(","); 1135 ppHRegX86(i->Xin.Sse64FLo.dst); 1136 return; 1137 case Xin_SseReRg: 1138 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1139 ppHRegX86(i->Xin.SseReRg.src); 1140 vex_printf(","); 1141 ppHRegX86(i->Xin.SseReRg.dst); 1142 return; 1143 case Xin_SseCMov: 1144 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1145 ppHRegX86(i->Xin.SseCMov.src); 1146 vex_printf(","); 1147 ppHRegX86(i->Xin.SseCMov.dst); 1148 return; 1149 case Xin_SseShuf: 1150 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); 1151 ppHRegX86(i->Xin.SseShuf.src); 1152 vex_printf(","); 1153 ppHRegX86(i->Xin.SseShuf.dst); 1154 return; 1155 1156 default: 1157 vpanic("ppX86Instr"); 1158 } 1159 } 1160 1161 /* --------- Helpers for register allocation. --------- */ 1162 1163 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) 1164 { 1165 Bool unary; 1166 vassert(mode64 == False); 1167 initHRegUsage(u); 1168 switch (i->tag) { 1169 case Xin_Alu32R: 1170 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1171 if (i->Xin.Alu32R.op == Xalu_MOV) { 1172 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1173 return; 1174 } 1175 if (i->Xin.Alu32R.op == Xalu_CMP) { 1176 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1177 return; 1178 } 1179 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1180 return; 1181 case Xin_Alu32M: 1182 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1183 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1184 return; 1185 case Xin_Sh32: 1186 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1187 if (i->Xin.Sh32.src == 0) 1188 addHRegUse(u, HRmRead, hregX86_ECX()); 1189 return; 1190 case Xin_Test32: 1191 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1192 return; 1193 case Xin_Unary32: 1194 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1195 return; 1196 case Xin_Lea32: 1197 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1198 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1199 return; 1200 case Xin_MulL: 1201 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1202 addHRegUse(u, HRmModify, hregX86_EAX()); 1203 addHRegUse(u, HRmWrite, hregX86_EDX()); 1204 return; 1205 case Xin_Div: 1206 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1207 addHRegUse(u, HRmModify, hregX86_EAX()); 1208 addHRegUse(u, HRmModify, hregX86_EDX()); 1209 return; 1210 case Xin_Sh3232: 1211 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1212 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1213 if (i->Xin.Sh3232.amt == 0) 1214 addHRegUse(u, HRmRead, hregX86_ECX()); 1215 return; 1216 case Xin_Push: 1217 addRegUsage_X86RMI(u, i->Xin.Push.src); 1218 addHRegUse(u, HRmModify, hregX86_ESP()); 1219 return; 1220 case Xin_Call: 1221 /* This is a bit subtle. */ 1222 /* First off, claim it trashes all the caller-saved regs 1223 which fall within the register allocator's jurisdiction. 1224 These I believe to be %eax %ecx %edx and all the xmm 1225 registers. */ 1226 addHRegUse(u, HRmWrite, hregX86_EAX()); 1227 addHRegUse(u, HRmWrite, hregX86_ECX()); 1228 addHRegUse(u, HRmWrite, hregX86_EDX()); 1229 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1230 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1231 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1232 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1233 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1234 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1235 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1236 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1237 /* Now we have to state any parameter-carrying registers 1238 which might be read. This depends on the regparmness. */ 1239 switch (i->Xin.Call.regparms) { 1240 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1241 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1242 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1243 case 0: break; 1244 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1245 } 1246 /* Finally, there is the issue that the insn trashes a 1247 register because the literal target address has to be 1248 loaded into a register. Fortunately, for the 0/1/2 1249 regparm case, we can use EAX, EDX and ECX respectively, so 1250 this does not cause any further damage. For the 3-regparm 1251 case, we'll have to choose another register arbitrarily -- 1252 since A, D and C are used for parameters -- and so we might 1253 as well choose EDI. */ 1254 if (i->Xin.Call.regparms == 3) 1255 addHRegUse(u, HRmWrite, hregX86_EDI()); 1256 /* Upshot of this is that the assembler really must observe 1257 the here-stated convention of which register to use as an 1258 address temporary, depending on the regparmness: 0==EAX, 1259 1==EDX, 2==ECX, 3==EDI. */ 1260 return; 1261 case Xin_Goto: 1262 addRegUsage_X86RI(u, i->Xin.Goto.dst); 1263 addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */ 1264 addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */ 1265 if (i->Xin.Goto.jk != Ijk_Boring 1266 && i->Xin.Goto.jk != Ijk_Call 1267 && i->Xin.Goto.jk != Ijk_Ret) 1268 /* note, this is irrelevant since ebp is not actually 1269 available to the allocator. But still .. */ 1270 addHRegUse(u, HRmWrite, hregX86_EBP()); 1271 return; 1272 case Xin_CMov32: 1273 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1274 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1275 return; 1276 case Xin_LoadEX: 1277 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1278 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1279 return; 1280 case Xin_Store: 1281 addHRegUse(u, HRmRead, i->Xin.Store.src); 1282 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1283 return; 1284 case Xin_Set32: 1285 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1286 return; 1287 case Xin_Bsfr32: 1288 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1289 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1290 return; 1291 case Xin_MFence: 1292 return; 1293 case Xin_ACAS: 1294 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1295 addHRegUse(u, HRmRead, hregX86_EBX()); 1296 addHRegUse(u, HRmModify, hregX86_EAX()); 1297 return; 1298 case Xin_DACAS: 1299 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1300 addHRegUse(u, HRmRead, hregX86_ECX()); 1301 addHRegUse(u, HRmRead, hregX86_EBX()); 1302 addHRegUse(u, HRmModify, hregX86_EDX()); 1303 addHRegUse(u, HRmModify, hregX86_EAX()); 1304 return; 1305 case Xin_FpUnary: 1306 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1307 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1308 return; 1309 case Xin_FpBinary: 1310 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1311 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1312 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1313 return; 1314 case Xin_FpLdSt: 1315 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1316 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1317 i->Xin.FpLdSt.reg); 1318 return; 1319 case Xin_FpLdStI: 1320 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1321 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1322 i->Xin.FpLdStI.reg); 1323 return; 1324 case Xin_Fp64to32: 1325 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1326 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1327 return; 1328 case Xin_FpCMov: 1329 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1330 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1331 return; 1332 case Xin_FpLdCW: 1333 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1334 return; 1335 case Xin_FpStSW_AX: 1336 addHRegUse(u, HRmWrite, hregX86_EAX()); 1337 return; 1338 case Xin_FpCmp: 1339 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1340 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1341 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1342 addHRegUse(u, HRmWrite, hregX86_EAX()); 1343 return; 1344 case Xin_SseLdSt: 1345 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1346 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1347 i->Xin.SseLdSt.reg); 1348 return; 1349 case Xin_SseLdzLO: 1350 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1351 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1352 return; 1353 case Xin_SseConst: 1354 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1355 return; 1356 case Xin_Sse32Fx4: 1357 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1358 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1359 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1360 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1361 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1362 addHRegUse(u, unary ? HRmWrite : HRmModify, 1363 i->Xin.Sse32Fx4.dst); 1364 return; 1365 case Xin_Sse32FLo: 1366 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1367 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1368 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1369 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1370 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1371 addHRegUse(u, unary ? HRmWrite : HRmModify, 1372 i->Xin.Sse32FLo.dst); 1373 return; 1374 case Xin_Sse64Fx2: 1375 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1376 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1377 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1378 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1379 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1380 addHRegUse(u, unary ? HRmWrite : HRmModify, 1381 i->Xin.Sse64Fx2.dst); 1382 return; 1383 case Xin_Sse64FLo: 1384 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1385 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1386 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1387 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1388 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1389 addHRegUse(u, unary ? HRmWrite : HRmModify, 1390 i->Xin.Sse64FLo.dst); 1391 return; 1392 case Xin_SseReRg: 1393 if (i->Xin.SseReRg.op == Xsse_XOR 1394 && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) { 1395 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1396 /* (as opposed to a rite of passage :-) */ 1397 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1398 } else { 1399 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1400 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1401 ? HRmWrite : HRmModify, 1402 i->Xin.SseReRg.dst); 1403 } 1404 return; 1405 case Xin_SseCMov: 1406 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1407 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1408 return; 1409 case Xin_SseShuf: 1410 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1411 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1412 return; 1413 default: 1414 ppX86Instr(i, False); 1415 vpanic("getRegUsage_X86Instr"); 1416 } 1417 } 1418 1419 /* local helper */ 1420 static void mapReg( HRegRemap* m, HReg* r ) 1421 { 1422 *r = lookupHRegRemap(m, *r); 1423 } 1424 1425 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1426 { 1427 vassert(mode64 == False); 1428 switch (i->tag) { 1429 case Xin_Alu32R: 1430 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1431 mapReg(m, &i->Xin.Alu32R.dst); 1432 return; 1433 case Xin_Alu32M: 1434 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1435 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1436 return; 1437 case Xin_Sh32: 1438 mapReg(m, &i->Xin.Sh32.dst); 1439 return; 1440 case Xin_Test32: 1441 mapRegs_X86RM(m, i->Xin.Test32.dst); 1442 return; 1443 case Xin_Unary32: 1444 mapReg(m, &i->Xin.Unary32.dst); 1445 return; 1446 case Xin_Lea32: 1447 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1448 mapReg(m, &i->Xin.Lea32.dst); 1449 return; 1450 case Xin_MulL: 1451 mapRegs_X86RM(m, i->Xin.MulL.src); 1452 return; 1453 case Xin_Div: 1454 mapRegs_X86RM(m, i->Xin.Div.src); 1455 return; 1456 case Xin_Sh3232: 1457 mapReg(m, &i->Xin.Sh3232.src); 1458 mapReg(m, &i->Xin.Sh3232.dst); 1459 return; 1460 case Xin_Push: 1461 mapRegs_X86RMI(m, i->Xin.Push.src); 1462 return; 1463 case Xin_Call: 1464 return; 1465 case Xin_Goto: 1466 mapRegs_X86RI(m, i->Xin.Goto.dst); 1467 return; 1468 case Xin_CMov32: 1469 mapRegs_X86RM(m, i->Xin.CMov32.src); 1470 mapReg(m, &i->Xin.CMov32.dst); 1471 return; 1472 case Xin_LoadEX: 1473 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1474 mapReg(m, &i->Xin.LoadEX.dst); 1475 return; 1476 case Xin_Store: 1477 mapReg(m, &i->Xin.Store.src); 1478 mapRegs_X86AMode(m, i->Xin.Store.dst); 1479 return; 1480 case Xin_Set32: 1481 mapReg(m, &i->Xin.Set32.dst); 1482 return; 1483 case Xin_Bsfr32: 1484 mapReg(m, &i->Xin.Bsfr32.src); 1485 mapReg(m, &i->Xin.Bsfr32.dst); 1486 return; 1487 case Xin_MFence: 1488 return; 1489 case Xin_ACAS: 1490 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1491 return; 1492 case Xin_DACAS: 1493 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1494 return; 1495 case Xin_FpUnary: 1496 mapReg(m, &i->Xin.FpUnary.src); 1497 mapReg(m, &i->Xin.FpUnary.dst); 1498 return; 1499 case Xin_FpBinary: 1500 mapReg(m, &i->Xin.FpBinary.srcL); 1501 mapReg(m, &i->Xin.FpBinary.srcR); 1502 mapReg(m, &i->Xin.FpBinary.dst); 1503 return; 1504 case Xin_FpLdSt: 1505 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1506 mapReg(m, &i->Xin.FpLdSt.reg); 1507 return; 1508 case Xin_FpLdStI: 1509 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1510 mapReg(m, &i->Xin.FpLdStI.reg); 1511 return; 1512 case Xin_Fp64to32: 1513 mapReg(m, &i->Xin.Fp64to32.src); 1514 mapReg(m, &i->Xin.Fp64to32.dst); 1515 return; 1516 case Xin_FpCMov: 1517 mapReg(m, &i->Xin.FpCMov.src); 1518 mapReg(m, &i->Xin.FpCMov.dst); 1519 return; 1520 case Xin_FpLdCW: 1521 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1522 return; 1523 case Xin_FpStSW_AX: 1524 return; 1525 case Xin_FpCmp: 1526 mapReg(m, &i->Xin.FpCmp.srcL); 1527 mapReg(m, &i->Xin.FpCmp.srcR); 1528 mapReg(m, &i->Xin.FpCmp.dst); 1529 return; 1530 case Xin_SseConst: 1531 mapReg(m, &i->Xin.SseConst.dst); 1532 return; 1533 case Xin_SseLdSt: 1534 mapReg(m, &i->Xin.SseLdSt.reg); 1535 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1536 break; 1537 case Xin_SseLdzLO: 1538 mapReg(m, &i->Xin.SseLdzLO.reg); 1539 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1540 break; 1541 case Xin_Sse32Fx4: 1542 mapReg(m, &i->Xin.Sse32Fx4.src); 1543 mapReg(m, &i->Xin.Sse32Fx4.dst); 1544 return; 1545 case Xin_Sse32FLo: 1546 mapReg(m, &i->Xin.Sse32FLo.src); 1547 mapReg(m, &i->Xin.Sse32FLo.dst); 1548 return; 1549 case Xin_Sse64Fx2: 1550 mapReg(m, &i->Xin.Sse64Fx2.src); 1551 mapReg(m, &i->Xin.Sse64Fx2.dst); 1552 return; 1553 case Xin_Sse64FLo: 1554 mapReg(m, &i->Xin.Sse64FLo.src); 1555 mapReg(m, &i->Xin.Sse64FLo.dst); 1556 return; 1557 case Xin_SseReRg: 1558 mapReg(m, &i->Xin.SseReRg.src); 1559 mapReg(m, &i->Xin.SseReRg.dst); 1560 return; 1561 case Xin_SseCMov: 1562 mapReg(m, &i->Xin.SseCMov.src); 1563 mapReg(m, &i->Xin.SseCMov.dst); 1564 return; 1565 case Xin_SseShuf: 1566 mapReg(m, &i->Xin.SseShuf.src); 1567 mapReg(m, &i->Xin.SseShuf.dst); 1568 return; 1569 default: 1570 ppX86Instr(i, mode64); 1571 vpanic("mapRegs_X86Instr"); 1572 } 1573 } 1574 1575 /* Figure out if i represents a reg-reg move, and if so assign the 1576 source and destination to *src and *dst. If in doubt say No. Used 1577 by the register allocator to do move coalescing. 1578 */ 1579 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst ) 1580 { 1581 /* Moves between integer regs */ 1582 if (i->tag == Xin_Alu32R) { 1583 if (i->Xin.Alu32R.op != Xalu_MOV) 1584 return False; 1585 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1586 return False; 1587 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1588 *dst = i->Xin.Alu32R.dst; 1589 return True; 1590 } 1591 /* Moves between FP regs */ 1592 if (i->tag == Xin_FpUnary) { 1593 if (i->Xin.FpUnary.op != Xfp_MOV) 1594 return False; 1595 *src = i->Xin.FpUnary.src; 1596 *dst = i->Xin.FpUnary.dst; 1597 return True; 1598 } 1599 if (i->tag == Xin_SseReRg) { 1600 if (i->Xin.SseReRg.op != Xsse_MOV) 1601 return False; 1602 *src = i->Xin.SseReRg.src; 1603 *dst = i->Xin.SseReRg.dst; 1604 return True; 1605 } 1606 return False; 1607 } 1608 1609 1610 /* Generate x86 spill/reload instructions under the direction of the 1611 register allocator. Note it's critical these don't write the 1612 condition codes. */ 1613 1614 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1615 HReg rreg, Int offsetB, Bool mode64 ) 1616 { 1617 X86AMode* am; 1618 vassert(offsetB >= 0); 1619 vassert(!hregIsVirtual(rreg)); 1620 vassert(mode64 == False); 1621 *i1 = *i2 = NULL; 1622 am = X86AMode_IR(offsetB, hregX86_EBP()); 1623 switch (hregClass(rreg)) { 1624 case HRcInt32: 1625 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1626 return; 1627 case HRcFlt64: 1628 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1629 return; 1630 case HRcVec128: 1631 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1632 return; 1633 default: 1634 ppHRegClass(hregClass(rreg)); 1635 vpanic("genSpill_X86: unimplemented regclass"); 1636 } 1637 } 1638 1639 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1640 HReg rreg, Int offsetB, Bool mode64 ) 1641 { 1642 X86AMode* am; 1643 vassert(offsetB >= 0); 1644 vassert(!hregIsVirtual(rreg)); 1645 vassert(mode64 == False); 1646 *i1 = *i2 = NULL; 1647 am = X86AMode_IR(offsetB, hregX86_EBP()); 1648 switch (hregClass(rreg)) { 1649 case HRcInt32: 1650 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1651 return; 1652 case HRcFlt64: 1653 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1654 return; 1655 case HRcVec128: 1656 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1657 return; 1658 default: 1659 ppHRegClass(hregClass(rreg)); 1660 vpanic("genReload_X86: unimplemented regclass"); 1661 } 1662 } 1663 1664 /* The given instruction reads the specified vreg exactly once, and 1665 that vreg is currently located at the given spill offset. If 1666 possible, return a variant of the instruction to one which instead 1667 references the spill slot directly. */ 1668 1669 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1670 { 1671 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1672 1673 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1674 Convert to: src=RMI_Mem, dst=Reg 1675 */ 1676 if (i->tag == Xin_Alu32R 1677 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1678 || i->Xin.Alu32R.op == Xalu_XOR) 1679 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1680 && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) { 1681 vassert(i->Xin.Alu32R.dst != vreg); 1682 return X86Instr_Alu32R( 1683 i->Xin.Alu32R.op, 1684 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1685 i->Xin.Alu32R.dst 1686 ); 1687 } 1688 1689 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1690 Convert to: src=RI_Imm, dst=Mem 1691 */ 1692 if (i->tag == Xin_Alu32R 1693 && (i->Xin.Alu32R.op == Xalu_CMP) 1694 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1695 && i->Xin.Alu32R.dst == vreg) { 1696 return X86Instr_Alu32M( 1697 i->Xin.Alu32R.op, 1698 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1699 X86AMode_IR( spill_off, hregX86_EBP()) 1700 ); 1701 } 1702 1703 /* Deal with form: Push(RMI_Reg) 1704 Convert to: Push(RMI_Mem) 1705 */ 1706 if (i->tag == Xin_Push 1707 && i->Xin.Push.src->tag == Xrmi_Reg 1708 && i->Xin.Push.src->Xrmi.Reg.reg == vreg) { 1709 return X86Instr_Push( 1710 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1711 ); 1712 } 1713 1714 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1715 Convert to CMov32(RM_Mem, dst) */ 1716 if (i->tag == Xin_CMov32 1717 && i->Xin.CMov32.src->tag == Xrm_Reg 1718 && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) { 1719 vassert(i->Xin.CMov32.dst != vreg); 1720 return X86Instr_CMov32( 1721 i->Xin.CMov32.cond, 1722 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1723 i->Xin.CMov32.dst 1724 ); 1725 } 1726 1727 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1728 if (i->tag == Xin_Test32 1729 && i->Xin.Test32.dst->tag == Xrm_Reg 1730 && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) { 1731 return X86Instr_Test32( 1732 i->Xin.Test32.imm32, 1733 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1734 ); 1735 } 1736 1737 return NULL; 1738 } 1739 1740 1741 /* --------- The x86 assembler (bleh.) --------- */ 1742 1743 static UChar iregNo ( HReg r ) 1744 { 1745 UInt n; 1746 vassert(hregClass(r) == HRcInt32); 1747 vassert(!hregIsVirtual(r)); 1748 n = hregNumber(r); 1749 vassert(n <= 7); 1750 return toUChar(n); 1751 } 1752 1753 static UInt fregNo ( HReg r ) 1754 { 1755 UInt n; 1756 vassert(hregClass(r) == HRcFlt64); 1757 vassert(!hregIsVirtual(r)); 1758 n = hregNumber(r); 1759 vassert(n <= 5); 1760 return n; 1761 } 1762 1763 static UInt vregNo ( HReg r ) 1764 { 1765 UInt n; 1766 vassert(hregClass(r) == HRcVec128); 1767 vassert(!hregIsVirtual(r)); 1768 n = hregNumber(r); 1769 vassert(n <= 7); 1770 return n; 1771 } 1772 1773 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) 1774 { 1775 return toUChar( ((mod & 3) << 6) 1776 | ((reg & 7) << 3) 1777 | (regmem & 7) ); 1778 } 1779 1780 static UChar mkSIB ( Int shift, Int regindex, Int regbase ) 1781 { 1782 return toUChar( ((shift & 3) << 6) 1783 | ((regindex & 7) << 3) 1784 | (regbase & 7) ); 1785 } 1786 1787 static UChar* emit32 ( UChar* p, UInt w32 ) 1788 { 1789 *p++ = toUChar( w32 & 0x000000FF); 1790 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1791 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1792 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1793 return p; 1794 } 1795 1796 /* Does a sign-extend of the lowest 8 bits give 1797 the original number? */ 1798 static Bool fits8bits ( UInt w32 ) 1799 { 1800 Int i32 = (Int)w32; 1801 return toBool(i32 == ((i32 << 24) >> 24)); 1802 } 1803 1804 1805 /* Forming mod-reg-rm bytes and scale-index-base bytes. 1806 1807 greg, 0(ereg) | ereg != ESP && ereg != EBP 1808 = 00 greg ereg 1809 1810 greg, d8(ereg) | ereg != ESP 1811 = 01 greg ereg, d8 1812 1813 greg, d32(ereg) | ereg != ESP 1814 = 10 greg ereg, d32 1815 1816 greg, d8(%esp) = 01 greg 100, 0x24, d8 1817 1818 ----------------------------------------------- 1819 1820 greg, d8(base,index,scale) 1821 | index != ESP 1822 = 01 greg 100, scale index base, d8 1823 1824 greg, d32(base,index,scale) 1825 | index != ESP 1826 = 10 greg 100, scale index base, d32 1827 */ 1828 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1829 { 1830 if (am->tag == Xam_IR) { 1831 if (am->Xam.IR.imm == 0 1832 && am->Xam.IR.reg != hregX86_ESP() 1833 && am->Xam.IR.reg != hregX86_EBP() ) { 1834 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg)); 1835 return p; 1836 } 1837 if (fits8bits(am->Xam.IR.imm) 1838 && am->Xam.IR.reg != hregX86_ESP()) { 1839 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg)); 1840 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1841 return p; 1842 } 1843 if (am->Xam.IR.reg != hregX86_ESP()) { 1844 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg)); 1845 p = emit32(p, am->Xam.IR.imm); 1846 return p; 1847 } 1848 if (am->Xam.IR.reg == hregX86_ESP() 1849 && fits8bits(am->Xam.IR.imm)) { 1850 *p++ = mkModRegRM(1, iregNo(greg), 4); 1851 *p++ = 0x24; 1852 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1853 return p; 1854 } 1855 ppX86AMode(am); 1856 vpanic("doAMode_M: can't emit amode IR"); 1857 /*NOTREACHED*/ 1858 } 1859 if (am->tag == Xam_IRRS) { 1860 if (fits8bits(am->Xam.IRRS.imm) 1861 && am->Xam.IRRS.index != hregX86_ESP()) { 1862 *p++ = mkModRegRM(1, iregNo(greg), 4); 1863 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1864 am->Xam.IRRS.base); 1865 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1866 return p; 1867 } 1868 if (am->Xam.IRRS.index != hregX86_ESP()) { 1869 *p++ = mkModRegRM(2, iregNo(greg), 4); 1870 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1871 am->Xam.IRRS.base); 1872 p = emit32(p, am->Xam.IRRS.imm); 1873 return p; 1874 } 1875 ppX86AMode(am); 1876 vpanic("doAMode_M: can't emit amode IRRS"); 1877 /*NOTREACHED*/ 1878 } 1879 vpanic("doAMode_M: unknown amode"); 1880 /*NOTREACHED*/ 1881 } 1882 1883 1884 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1885 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1886 { 1887 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg)); 1888 return p; 1889 } 1890 1891 1892 /* Emit ffree %st(7) */ 1893 static UChar* do_ffree_st7 ( UChar* p ) 1894 { 1895 *p++ = 0xDD; 1896 *p++ = 0xC7; 1897 return p; 1898 } 1899 1900 /* Emit fstp %st(i), 1 <= i <= 7 */ 1901 static UChar* do_fstp_st ( UChar* p, Int i ) 1902 { 1903 vassert(1 <= i && i <= 7); 1904 *p++ = 0xDD; 1905 *p++ = toUChar(0xD8+i); 1906 return p; 1907 } 1908 1909 /* Emit fld %st(i), 0 <= i <= 6 */ 1910 static UChar* do_fld_st ( UChar* p, Int i ) 1911 { 1912 vassert(0 <= i && i <= 6); 1913 *p++ = 0xD9; 1914 *p++ = toUChar(0xC0+i); 1915 return p; 1916 } 1917 1918 /* Emit f<op> %st(0) */ 1919 static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 1920 { 1921 switch (op) { 1922 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 1923 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 1924 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 1925 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 1926 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 1927 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 1928 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 1929 case Xfp_MOV: break; 1930 case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ 1931 *p++ = 0xD9; *p++ = 0xF2; /* fptan */ 1932 *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ 1933 break; 1934 default: vpanic("do_fop1_st: unknown op"); 1935 } 1936 return p; 1937 } 1938 1939 /* Emit f<op> %st(i), 1 <= i <= 5 */ 1940 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 1941 { 1942 # define fake(_n) mkHReg((_n), HRcInt32, False) 1943 Int subopc; 1944 switch (op) { 1945 case Xfp_ADD: subopc = 0; break; 1946 case Xfp_SUB: subopc = 4; break; 1947 case Xfp_MUL: subopc = 1; break; 1948 case Xfp_DIV: subopc = 6; break; 1949 default: vpanic("do_fop2_st: unknown op"); 1950 } 1951 *p++ = 0xD8; 1952 p = doAMode_R(p, fake(subopc), fake(i)); 1953 return p; 1954 # undef fake 1955 } 1956 1957 /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 1958 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 1959 */ 1960 static UChar* push_word_from_tags ( UChar* p, UShort tags ) 1961 { 1962 UInt w; 1963 vassert(0 == (tags & ~0xF)); 1964 if (tags == 0) { 1965 /* pushl $0x00000000 */ 1966 *p++ = 0x6A; 1967 *p++ = 0x00; 1968 } 1969 else 1970 /* pushl $0xFFFFFFFF */ 1971 if (tags == 0xF) { 1972 *p++ = 0x6A; 1973 *p++ = 0xFF; 1974 } else { 1975 vassert(0); /* awaiting test case */ 1976 w = 0; 1977 if (tags & 1) w |= 0x000000FF; 1978 if (tags & 2) w |= 0x0000FF00; 1979 if (tags & 4) w |= 0x00FF0000; 1980 if (tags & 8) w |= 0xFF000000; 1981 *p++ = 0x68; 1982 p = emit32(p, w); 1983 } 1984 return p; 1985 } 1986 1987 /* Emit an instruction into buf and return the number of bytes used. 1988 Note that buf is not the insn's final place, and therefore it is 1989 imperative to emit position-independent code. */ 1990 1991 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, 1992 Bool mode64, void* dispatch ) 1993 { 1994 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 1995 1996 UInt xtra; 1997 UChar* p = &buf[0]; 1998 UChar* ptmp; 1999 vassert(nbuf >= 32); 2000 vassert(mode64 == False); 2001 2002 /* Wrap an integer as a int register, for use assembling 2003 GrpN insns, in which the greg field is used as a sub-opcode 2004 and does not really contain a register. */ 2005 # define fake(_n) mkHReg((_n), HRcInt32, False) 2006 2007 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2008 2009 switch (i->tag) { 2010 2011 case Xin_Alu32R: 2012 /* Deal specially with MOV */ 2013 if (i->Xin.Alu32R.op == Xalu_MOV) { 2014 switch (i->Xin.Alu32R.src->tag) { 2015 case Xrmi_Imm: 2016 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst)); 2017 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2018 goto done; 2019 case Xrmi_Reg: 2020 *p++ = 0x89; 2021 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2022 i->Xin.Alu32R.dst); 2023 goto done; 2024 case Xrmi_Mem: 2025 *p++ = 0x8B; 2026 p = doAMode_M(p, i->Xin.Alu32R.dst, 2027 i->Xin.Alu32R.src->Xrmi.Mem.am); 2028 goto done; 2029 default: 2030 goto bad; 2031 } 2032 } 2033 /* MUL */ 2034 if (i->Xin.Alu32R.op == Xalu_MUL) { 2035 switch (i->Xin.Alu32R.src->tag) { 2036 case Xrmi_Reg: 2037 *p++ = 0x0F; 2038 *p++ = 0xAF; 2039 p = doAMode_R(p, i->Xin.Alu32R.dst, 2040 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2041 goto done; 2042 case Xrmi_Mem: 2043 *p++ = 0x0F; 2044 *p++ = 0xAF; 2045 p = doAMode_M(p, i->Xin.Alu32R.dst, 2046 i->Xin.Alu32R.src->Xrmi.Mem.am); 2047 goto done; 2048 case Xrmi_Imm: 2049 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2050 *p++ = 0x6B; 2051 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2052 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2053 } else { 2054 *p++ = 0x69; 2055 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2056 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2057 } 2058 goto done; 2059 default: 2060 goto bad; 2061 } 2062 } 2063 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2064 opc = opc_rr = subopc_imm = opc_imma = 0; 2065 switch (i->Xin.Alu32R.op) { 2066 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2067 subopc_imm = 2; opc_imma = 0x15; break; 2068 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2069 subopc_imm = 0; opc_imma = 0x05; break; 2070 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2071 subopc_imm = 5; opc_imma = 0x2D; break; 2072 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2073 subopc_imm = 3; opc_imma = 0x1D; break; 2074 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2075 subopc_imm = 4; opc_imma = 0x25; break; 2076 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2077 subopc_imm = 6; opc_imma = 0x35; break; 2078 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2079 subopc_imm = 1; opc_imma = 0x0D; break; 2080 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2081 subopc_imm = 7; opc_imma = 0x3D; break; 2082 default: goto bad; 2083 } 2084 switch (i->Xin.Alu32R.src->tag) { 2085 case Xrmi_Imm: 2086 if (i->Xin.Alu32R.dst == hregX86_EAX() 2087 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2088 *p++ = toUChar(opc_imma); 2089 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2090 } else 2091 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2092 *p++ = 0x83; 2093 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2094 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2095 } else { 2096 *p++ = 0x81; 2097 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2098 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2099 } 2100 goto done; 2101 case Xrmi_Reg: 2102 *p++ = toUChar(opc_rr); 2103 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2104 i->Xin.Alu32R.dst); 2105 goto done; 2106 case Xrmi_Mem: 2107 *p++ = toUChar(opc); 2108 p = doAMode_M(p, i->Xin.Alu32R.dst, 2109 i->Xin.Alu32R.src->Xrmi.Mem.am); 2110 goto done; 2111 default: 2112 goto bad; 2113 } 2114 break; 2115 2116 case Xin_Alu32M: 2117 /* Deal specially with MOV */ 2118 if (i->Xin.Alu32M.op == Xalu_MOV) { 2119 switch (i->Xin.Alu32M.src->tag) { 2120 case Xri_Reg: 2121 *p++ = 0x89; 2122 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2123 i->Xin.Alu32M.dst); 2124 goto done; 2125 case Xri_Imm: 2126 *p++ = 0xC7; 2127 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst); 2128 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2129 goto done; 2130 default: 2131 goto bad; 2132 } 2133 } 2134 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2135 allowed here. */ 2136 opc = subopc_imm = opc_imma = 0; 2137 switch (i->Xin.Alu32M.op) { 2138 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2139 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2140 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2141 default: goto bad; 2142 } 2143 switch (i->Xin.Alu32M.src->tag) { 2144 case Xri_Reg: 2145 *p++ = toUChar(opc); 2146 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2147 i->Xin.Alu32M.dst); 2148 goto done; 2149 case Xri_Imm: 2150 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2151 *p++ = 0x83; 2152 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2153 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2154 goto done; 2155 } else { 2156 *p++ = 0x81; 2157 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2158 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2159 goto done; 2160 } 2161 default: 2162 goto bad; 2163 } 2164 break; 2165 2166 case Xin_Sh32: 2167 opc_cl = opc_imm = subopc = 0; 2168 switch (i->Xin.Sh32.op) { 2169 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2170 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2171 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2172 default: goto bad; 2173 } 2174 if (i->Xin.Sh32.src == 0) { 2175 *p++ = toUChar(opc_cl); 2176 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2177 } else { 2178 *p++ = toUChar(opc_imm); 2179 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2180 *p++ = (UChar)(i->Xin.Sh32.src); 2181 } 2182 goto done; 2183 2184 case Xin_Test32: 2185 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2186 /* testl $imm32, %reg */ 2187 *p++ = 0xF7; 2188 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); 2189 p = emit32(p, i->Xin.Test32.imm32); 2190 goto done; 2191 } else { 2192 /* testl $imm32, amode */ 2193 *p++ = 0xF7; 2194 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); 2195 p = emit32(p, i->Xin.Test32.imm32); 2196 goto done; 2197 } 2198 2199 case Xin_Unary32: 2200 if (i->Xin.Unary32.op == Xun_NOT) { 2201 *p++ = 0xF7; 2202 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst); 2203 goto done; 2204 } 2205 if (i->Xin.Unary32.op == Xun_NEG) { 2206 *p++ = 0xF7; 2207 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst); 2208 goto done; 2209 } 2210 break; 2211 2212 case Xin_Lea32: 2213 *p++ = 0x8D; 2214 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2215 goto done; 2216 2217 case Xin_MulL: 2218 subopc = i->Xin.MulL.syned ? 5 : 4; 2219 *p++ = 0xF7; 2220 switch (i->Xin.MulL.src->tag) { 2221 case Xrm_Mem: 2222 p = doAMode_M(p, fake(subopc), 2223 i->Xin.MulL.src->Xrm.Mem.am); 2224 goto done; 2225 case Xrm_Reg: 2226 p = doAMode_R(p, fake(subopc), 2227 i->Xin.MulL.src->Xrm.Reg.reg); 2228 goto done; 2229 default: 2230 goto bad; 2231 } 2232 break; 2233 2234 case Xin_Div: 2235 subopc = i->Xin.Div.syned ? 7 : 6; 2236 *p++ = 0xF7; 2237 switch (i->Xin.Div.src->tag) { 2238 case Xrm_Mem: 2239 p = doAMode_M(p, fake(subopc), 2240 i->Xin.Div.src->Xrm.Mem.am); 2241 goto done; 2242 case Xrm_Reg: 2243 p = doAMode_R(p, fake(subopc), 2244 i->Xin.Div.src->Xrm.Reg.reg); 2245 goto done; 2246 default: 2247 goto bad; 2248 } 2249 break; 2250 2251 case Xin_Sh3232: 2252 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2253 if (i->Xin.Sh3232.amt == 0) { 2254 /* shldl/shrdl by %cl */ 2255 *p++ = 0x0F; 2256 if (i->Xin.Sh3232.op == Xsh_SHL) { 2257 *p++ = 0xA5; 2258 } else { 2259 *p++ = 0xAD; 2260 } 2261 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2262 goto done; 2263 } 2264 break; 2265 2266 case Xin_Push: 2267 switch (i->Xin.Push.src->tag) { 2268 case Xrmi_Mem: 2269 *p++ = 0xFF; 2270 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am); 2271 goto done; 2272 case Xrmi_Imm: 2273 *p++ = 0x68; 2274 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2275 goto done; 2276 case Xrmi_Reg: 2277 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg)); 2278 goto done; 2279 default: 2280 goto bad; 2281 } 2282 2283 case Xin_Call: 2284 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2285 for explanation of this. */ 2286 switch (i->Xin.Call.regparms) { 2287 case 0: irno = iregNo(hregX86_EAX()); break; 2288 case 1: irno = iregNo(hregX86_EDX()); break; 2289 case 2: irno = iregNo(hregX86_ECX()); break; 2290 case 3: irno = iregNo(hregX86_EDI()); break; 2291 default: vpanic(" emit_X86Instr:call:regparms"); 2292 } 2293 /* jump over the following two insns if the condition does not 2294 hold */ 2295 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2296 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2297 *p++ = 0x07; /* 7 bytes in the next two insns */ 2298 } 2299 /* movl $target, %tmp */ 2300 *p++ = toUChar(0xB8 + irno); 2301 p = emit32(p, i->Xin.Call.target); 2302 /* call *%tmp */ 2303 *p++ = 0xFF; 2304 *p++ = toUChar(0xD0 + irno); 2305 goto done; 2306 2307 case Xin_Goto: 2308 /* Use ptmp for backpatching conditional jumps. */ 2309 ptmp = NULL; 2310 2311 /* First off, if this is conditional, create a conditional 2312 jump over the rest of it. */ 2313 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 2314 /* jmp fwds if !condition */ 2315 *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1))); 2316 ptmp = p; /* fill in this bit later */ 2317 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2318 } 2319 2320 /* If a non-boring, set %ebp (the guest state pointer) 2321 appropriately. */ 2322 /* movl $magic_number, %ebp */ 2323 switch (i->Xin.Goto.jk) { 2324 case Ijk_ClientReq: 2325 *p++ = 0xBD; 2326 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; 2327 case Ijk_Sys_int128: 2328 *p++ = 0xBD; 2329 p = emit32(p, VEX_TRC_JMP_SYS_INT128); break; 2330 case Ijk_Sys_int129: 2331 *p++ = 0xBD; 2332 p = emit32(p, VEX_TRC_JMP_SYS_INT129); break; 2333 case Ijk_Sys_int130: 2334 *p++ = 0xBD; 2335 p = emit32(p, VEX_TRC_JMP_SYS_INT130); break; 2336 case Ijk_Yield: 2337 *p++ = 0xBD; 2338 p = emit32(p, VEX_TRC_JMP_YIELD); break; 2339 case Ijk_YieldNoRedir: 2340 *p++ = 0xBD; 2341 p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break; 2342 case Ijk_EmWarn: 2343 *p++ = 0xBD; 2344 p = emit32(p, VEX_TRC_JMP_EMWARN); break; 2345 case Ijk_MapFail: 2346 *p++ = 0xBD; 2347 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; 2348 case Ijk_NoDecode: 2349 *p++ = 0xBD; 2350 p = emit32(p, VEX_TRC_JMP_NODECODE); break; 2351 case Ijk_TInval: 2352 *p++ = 0xBD; 2353 p = emit32(p, VEX_TRC_JMP_TINVAL); break; 2354 case Ijk_NoRedir: 2355 *p++ = 0xBD; 2356 p = emit32(p, VEX_TRC_JMP_NOREDIR); break; 2357 case Ijk_Sys_sysenter: 2358 *p++ = 0xBD; 2359 p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break; 2360 case Ijk_SigTRAP: 2361 *p++ = 0xBD; 2362 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; 2363 case Ijk_SigSEGV: 2364 *p++ = 0xBD; 2365 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; 2366 case Ijk_Ret: 2367 case Ijk_Call: 2368 case Ijk_Boring: 2369 break; 2370 default: 2371 ppIRJumpKind(i->Xin.Goto.jk); 2372 vpanic("emit_X86Instr.Xin_Goto: unknown jump kind"); 2373 } 2374 2375 /* Get the destination address into %eax */ 2376 if (i->Xin.Goto.dst->tag == Xri_Imm) { 2377 /* movl $immediate, %eax */ 2378 *p++ = 0xB8; 2379 p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32); 2380 } else { 2381 vassert(i->Xin.Goto.dst->tag == Xri_Reg); 2382 /* movl %reg, %eax */ 2383 if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) { 2384 *p++ = 0x89; 2385 p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX()); 2386 } 2387 } 2388 2389 /* Get the dispatcher address into %edx. This has to happen 2390 after the load of %eax since %edx might be carrying the value 2391 destined for %eax immediately prior to this Xin_Goto. */ 2392 vassert(sizeof(UInt) == sizeof(void*)); 2393 vassert(dispatch != NULL); 2394 /* movl $imm32, %edx */ 2395 *p++ = 0xBA; 2396 p = emit32(p, (UInt)Ptr_to_ULong(dispatch)); 2397 2398 /* jmp *%edx */ 2399 *p++ = 0xFF; 2400 *p++ = 0xE2; 2401 2402 /* Fix up the conditional jump, if there was one. */ 2403 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 2404 Int delta = p - ptmp; 2405 vassert(delta > 0 && delta < 20); 2406 *ptmp = toUChar(delta-1); 2407 } 2408 goto done; 2409 2410 case Xin_CMov32: 2411 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2412 2413 /* This generates cmov, which is illegal on P54/P55. */ 2414 /* 2415 *p++ = 0x0F; 2416 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2417 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2418 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2419 goto done; 2420 } 2421 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2422 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2423 goto done; 2424 } 2425 */ 2426 2427 /* Alternative version which works on any x86 variant. */ 2428 /* jmp fwds if !condition */ 2429 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2430 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2431 ptmp = p; 2432 2433 switch (i->Xin.CMov32.src->tag) { 2434 case Xrm_Reg: 2435 /* Big sigh. This is movl E -> G ... */ 2436 *p++ = 0x89; 2437 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2438 i->Xin.CMov32.dst); 2439 2440 break; 2441 case Xrm_Mem: 2442 /* ... whereas this is movl G -> E. That's why the args 2443 to doAMode_R appear to be the wrong way round in the 2444 Xrm_Reg case. */ 2445 *p++ = 0x8B; 2446 p = doAMode_M(p, i->Xin.CMov32.dst, 2447 i->Xin.CMov32.src->Xrm.Mem.am); 2448 break; 2449 default: 2450 goto bad; 2451 } 2452 /* Fill in the jump offset. */ 2453 *(ptmp-1) = toUChar(p - ptmp); 2454 goto done; 2455 2456 break; 2457 2458 case Xin_LoadEX: 2459 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2460 /* movzbl */ 2461 *p++ = 0x0F; 2462 *p++ = 0xB6; 2463 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2464 goto done; 2465 } 2466 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2467 /* movzwl */ 2468 *p++ = 0x0F; 2469 *p++ = 0xB7; 2470 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2471 goto done; 2472 } 2473 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2474 /* movsbl */ 2475 *p++ = 0x0F; 2476 *p++ = 0xBE; 2477 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2478 goto done; 2479 } 2480 break; 2481 2482 case Xin_Set32: 2483 /* Make the destination register be 1 or 0, depending on whether 2484 the relevant condition holds. We have to dodge and weave 2485 when the destination is %esi or %edi as we cannot directly 2486 emit the native 'setb %reg' for those. Further complication: 2487 the top 24 bits of the destination should be forced to zero, 2488 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2489 Sigh. So start off my moving $0 into the dest. */ 2490 2491 /* Do we need to swap in %eax? */ 2492 if (iregNo(i->Xin.Set32.dst) >= 4) { 2493 /* xchg %eax, %dst */ 2494 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2495 /* movl $0, %eax */ 2496 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX())); 2497 p = emit32(p, 0); 2498 /* setb lo8(%eax) */ 2499 *p++ = 0x0F; 2500 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2501 p = doAMode_R(p, fake(0), hregX86_EAX()); 2502 /* xchg %eax, %dst */ 2503 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2504 } else { 2505 /* movl $0, %dst */ 2506 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst)); 2507 p = emit32(p, 0); 2508 /* setb lo8(%dst) */ 2509 *p++ = 0x0F; 2510 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2511 p = doAMode_R(p, fake(0), i->Xin.Set32.dst); 2512 } 2513 goto done; 2514 2515 case Xin_Bsfr32: 2516 *p++ = 0x0F; 2517 if (i->Xin.Bsfr32.isFwds) { 2518 *p++ = 0xBC; 2519 } else { 2520 *p++ = 0xBD; 2521 } 2522 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2523 goto done; 2524 2525 case Xin_MFence: 2526 /* see comment in hdefs.h re this insn */ 2527 if (0) vex_printf("EMIT FENCE\n"); 2528 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2529 |VEX_HWCAPS_X86_SSE2)) { 2530 /* mfence */ 2531 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2532 goto done; 2533 } 2534 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) { 2535 /* sfence */ 2536 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2537 /* lock addl $0,0(%esp) */ 2538 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2539 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2540 goto done; 2541 } 2542 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2543 /* lock addl $0,0(%esp) */ 2544 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2545 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2546 goto done; 2547 } 2548 vpanic("emit_X86Instr:mfence:hwcaps"); 2549 /*NOTREACHED*/ 2550 break; 2551 2552 case Xin_ACAS: 2553 /* lock */ 2554 *p++ = 0xF0; 2555 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2556 in %ebx. The new-value register is hardwired to be %ebx 2557 since letting it be any integer register gives the problem 2558 that %sil and %dil are unaddressible on x86 and hence we 2559 would have to resort to the same kind of trickery as with 2560 byte-sized Xin.Store, just below. Given that this isn't 2561 performance critical, it is simpler just to force the 2562 register operand to %ebx (could equally be %ecx or %edx). 2563 (Although %ebx is more consistent with cmpxchg8b.) */ 2564 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2565 *p++ = 0x0F; 2566 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2567 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2568 goto done; 2569 2570 case Xin_DACAS: 2571 /* lock */ 2572 *p++ = 0xF0; 2573 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2574 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2575 aren't encoded in the insn. */ 2576 *p++ = 0x0F; 2577 *p++ = 0xC7; 2578 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr); 2579 goto done; 2580 2581 case Xin_Store: 2582 if (i->Xin.Store.sz == 2) { 2583 /* This case, at least, is simple, given that we can 2584 reference the low 16 bits of any integer register. */ 2585 *p++ = 0x66; 2586 *p++ = 0x89; 2587 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2588 goto done; 2589 } 2590 2591 if (i->Xin.Store.sz == 1) { 2592 /* We have to do complex dodging and weaving if src is not 2593 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2594 if (iregNo(i->Xin.Store.src) < 4) { 2595 /* we're OK, can do it directly */ 2596 *p++ = 0x88; 2597 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2598 goto done; 2599 } else { 2600 /* Bleh. This means the source is %edi or %esi. Since 2601 the address mode can only mention three registers, at 2602 least one of %eax/%ebx/%ecx/%edx must be available to 2603 temporarily swap the source into, so the store can 2604 happen. So we have to look at the regs mentioned 2605 in the amode. */ 2606 HReg swap = INVALID_HREG; 2607 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2608 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2609 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True; 2610 HRegUsage u; 2611 Int j; 2612 initHRegUsage(&u); 2613 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2614 for (j = 0; j < u.n_used; j++) { 2615 HReg r = u.hreg[j]; 2616 if (r == eax) a_ok = False; 2617 if (r == ebx) b_ok = False; 2618 if (r == ecx) c_ok = False; 2619 if (r == edx) d_ok = False; 2620 } 2621 if (a_ok) swap = eax; 2622 if (b_ok) swap = ebx; 2623 if (c_ok) swap = ecx; 2624 if (d_ok) swap = edx; 2625 vassert(swap != INVALID_HREG); 2626 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2627 *p++ = 0x87; 2628 p = doAMode_R(p, i->Xin.Store.src, swap); 2629 /* movb lo8{%swap}, (dst) */ 2630 *p++ = 0x88; 2631 p = doAMode_M(p, swap, i->Xin.Store.dst); 2632 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2633 *p++ = 0x87; 2634 p = doAMode_R(p, i->Xin.Store.src, swap); 2635 goto done; 2636 } 2637 } /* if (i->Xin.Store.sz == 1) */ 2638 break; 2639 2640 case Xin_FpUnary: 2641 /* gop %src, %dst 2642 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2643 */ 2644 p = do_ffree_st7(p); 2645 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 2646 p = do_fop1_st(p, i->Xin.FpUnary.op); 2647 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 2648 goto done; 2649 2650 case Xin_FpBinary: 2651 if (i->Xin.FpBinary.op == Xfp_YL2X 2652 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2653 /* Have to do this specially. */ 2654 /* ffree %st7 ; fld %st(srcL) ; 2655 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2656 p = do_ffree_st7(p); 2657 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2658 p = do_ffree_st7(p); 2659 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2660 *p++ = 0xD9; 2661 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2662 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2663 goto done; 2664 } 2665 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2666 /* Have to do this specially. */ 2667 /* ffree %st7 ; fld %st(srcL) ; 2668 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2669 p = do_ffree_st7(p); 2670 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2671 p = do_ffree_st7(p); 2672 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2673 *p++ = 0xD9; *p++ = 0xF3; 2674 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2675 goto done; 2676 } 2677 if (i->Xin.FpBinary.op == Xfp_PREM 2678 || i->Xin.FpBinary.op == Xfp_PREM1 2679 || i->Xin.FpBinary.op == Xfp_SCALE) { 2680 /* Have to do this specially. */ 2681 /* ffree %st7 ; fld %st(srcR) ; 2682 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2683 fincstp ; ffree %st7 */ 2684 p = do_ffree_st7(p); 2685 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 2686 p = do_ffree_st7(p); 2687 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 2688 *p++ = 0xD9; 2689 switch (i->Xin.FpBinary.op) { 2690 case Xfp_PREM: *p++ = 0xF8; break; 2691 case Xfp_PREM1: *p++ = 0xF5; break; 2692 case Xfp_SCALE: *p++ = 0xFD; break; 2693 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2694 } 2695 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 2696 *p++ = 0xD9; *p++ = 0xF7; 2697 p = do_ffree_st7(p); 2698 goto done; 2699 } 2700 /* General case */ 2701 /* gop %srcL, %srcR, %dst 2702 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2703 */ 2704 p = do_ffree_st7(p); 2705 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2706 p = do_fop2_st(p, i->Xin.FpBinary.op, 2707 1+hregNumber(i->Xin.FpBinary.srcR)); 2708 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2709 goto done; 2710 2711 case Xin_FpLdSt: 2712 if (i->Xin.FpLdSt.isLoad) { 2713 /* Load from memory into %fakeN. 2714 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2715 */ 2716 p = do_ffree_st7(p); 2717 switch (i->Xin.FpLdSt.sz) { 2718 case 4: 2719 *p++ = 0xD9; 2720 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2721 break; 2722 case 8: 2723 *p++ = 0xDD; 2724 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2725 break; 2726 case 10: 2727 *p++ = 0xDB; 2728 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr); 2729 break; 2730 default: 2731 vpanic("emitX86Instr(FpLdSt,load)"); 2732 } 2733 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 2734 goto done; 2735 } else { 2736 /* Store from %fakeN into memory. 2737 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2738 */ 2739 p = do_ffree_st7(p); 2740 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 2741 switch (i->Xin.FpLdSt.sz) { 2742 case 4: 2743 *p++ = 0xD9; 2744 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2745 break; 2746 case 8: 2747 *p++ = 0xDD; 2748 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2749 break; 2750 case 10: 2751 *p++ = 0xDB; 2752 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr); 2753 break; 2754 default: 2755 vpanic("emitX86Instr(FpLdSt,store)"); 2756 } 2757 goto done; 2758 } 2759 break; 2760 2761 case Xin_FpLdStI: 2762 if (i->Xin.FpLdStI.isLoad) { 2763 /* Load from memory into %fakeN, converting from an int. 2764 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2765 */ 2766 switch (i->Xin.FpLdStI.sz) { 2767 case 8: opc = 0xDF; subopc_imm = 5; break; 2768 case 4: opc = 0xDB; subopc_imm = 0; break; 2769 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2770 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2771 } 2772 p = do_ffree_st7(p); 2773 *p++ = toUChar(opc); 2774 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2775 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 2776 goto done; 2777 } else { 2778 /* Store from %fakeN into memory, converting to an int. 2779 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2780 */ 2781 switch (i->Xin.FpLdStI.sz) { 2782 case 8: opc = 0xDF; subopc_imm = 7; break; 2783 case 4: opc = 0xDB; subopc_imm = 3; break; 2784 case 2: opc = 0xDF; subopc_imm = 3; break; 2785 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2786 } 2787 p = do_ffree_st7(p); 2788 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 2789 *p++ = toUChar(opc); 2790 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2791 goto done; 2792 } 2793 break; 2794 2795 case Xin_Fp64to32: 2796 /* ffree %st7 ; fld %st(src) */ 2797 p = do_ffree_st7(p); 2798 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 2799 /* subl $4, %esp */ 2800 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2801 /* fstps (%esp) */ 2802 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2803 /* flds (%esp) */ 2804 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2805 /* addl $4, %esp */ 2806 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2807 /* fstp %st(1+dst) */ 2808 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 2809 goto done; 2810 2811 case Xin_FpCMov: 2812 /* jmp fwds if !condition */ 2813 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2814 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2815 ptmp = p; 2816 2817 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 2818 p = do_ffree_st7(p); 2819 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 2820 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 2821 2822 /* Fill in the jump offset. */ 2823 *(ptmp-1) = toUChar(p - ptmp); 2824 goto done; 2825 2826 case Xin_FpLdCW: 2827 *p++ = 0xD9; 2828 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr); 2829 goto done; 2830 2831 case Xin_FpStSW_AX: 2832 /* note, this emits fnstsw %ax, not fstsw %ax */ 2833 *p++ = 0xDF; 2834 *p++ = 0xE0; 2835 goto done; 2836 2837 case Xin_FpCmp: 2838 /* gcmp %fL, %fR, %dst 2839 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 2840 fnstsw %ax ; movl %eax, %dst 2841 */ 2842 /* ffree %st7 */ 2843 p = do_ffree_st7(p); 2844 /* fpush %fL */ 2845 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 2846 /* fucomp %(fR+1) */ 2847 *p++ = 0xDD; 2848 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)))); 2849 /* fnstsw %ax */ 2850 *p++ = 0xDF; 2851 *p++ = 0xE0; 2852 /* movl %eax, %dst */ 2853 *p++ = 0x89; 2854 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 2855 goto done; 2856 2857 case Xin_SseConst: { 2858 UShort con = i->Xin.SseConst.con; 2859 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 2860 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 2861 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 2862 p = push_word_from_tags(p, toUShort(con & 0xF)); 2863 /* movl (%esp), %xmm-dst */ 2864 *p++ = 0x0F; 2865 *p++ = 0x10; 2866 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst))); 2867 *p++ = 0x24; 2868 /* addl $16, %esp */ 2869 *p++ = 0x83; 2870 *p++ = 0xC4; 2871 *p++ = 0x10; 2872 goto done; 2873 } 2874 2875 case Xin_SseLdSt: 2876 *p++ = 0x0F; 2877 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 2878 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr); 2879 goto done; 2880 2881 case Xin_SseLdzLO: 2882 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 2883 /* movs[sd] amode, %xmm-dst */ 2884 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 2885 *p++ = 0x0F; 2886 *p++ = 0x10; 2887 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)), 2888 i->Xin.SseLdzLO.addr); 2889 goto done; 2890 2891 case Xin_Sse32Fx4: 2892 xtra = 0; 2893 *p++ = 0x0F; 2894 switch (i->Xin.Sse32Fx4.op) { 2895 case Xsse_ADDF: *p++ = 0x58; break; 2896 case Xsse_DIVF: *p++ = 0x5E; break; 2897 case Xsse_MAXF: *p++ = 0x5F; break; 2898 case Xsse_MINF: *p++ = 0x5D; break; 2899 case Xsse_MULF: *p++ = 0x59; break; 2900 case Xsse_RCPF: *p++ = 0x53; break; 2901 case Xsse_RSQRTF: *p++ = 0x52; break; 2902 case Xsse_SQRTF: *p++ = 0x51; break; 2903 case Xsse_SUBF: *p++ = 0x5C; break; 2904 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2905 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2906 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2907 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2908 default: goto bad; 2909 } 2910 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)), 2911 fake(vregNo(i->Xin.Sse32Fx4.src)) ); 2912 if (xtra & 0x100) 2913 *p++ = toUChar(xtra & 0xFF); 2914 goto done; 2915 2916 case Xin_Sse64Fx2: 2917 xtra = 0; 2918 *p++ = 0x66; 2919 *p++ = 0x0F; 2920 switch (i->Xin.Sse64Fx2.op) { 2921 case Xsse_ADDF: *p++ = 0x58; break; 2922 case Xsse_DIVF: *p++ = 0x5E; break; 2923 case Xsse_MAXF: *p++ = 0x5F; break; 2924 case Xsse_MINF: *p++ = 0x5D; break; 2925 case Xsse_MULF: *p++ = 0x59; break; 2926 case Xsse_RCPF: *p++ = 0x53; break; 2927 case Xsse_RSQRTF: *p++ = 0x52; break; 2928 case Xsse_SQRTF: *p++ = 0x51; break; 2929 case Xsse_SUBF: *p++ = 0x5C; break; 2930 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2931 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2932 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2933 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2934 default: goto bad; 2935 } 2936 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), 2937 fake(vregNo(i->Xin.Sse64Fx2.src)) ); 2938 if (xtra & 0x100) 2939 *p++ = toUChar(xtra & 0xFF); 2940 goto done; 2941 2942 case Xin_Sse32FLo: 2943 xtra = 0; 2944 *p++ = 0xF3; 2945 *p++ = 0x0F; 2946 switch (i->Xin.Sse32FLo.op) { 2947 case Xsse_ADDF: *p++ = 0x58; break; 2948 case Xsse_DIVF: *p++ = 0x5E; break; 2949 case Xsse_MAXF: *p++ = 0x5F; break; 2950 case Xsse_MINF: *p++ = 0x5D; break; 2951 case Xsse_MULF: *p++ = 0x59; break; 2952 case Xsse_RCPF: *p++ = 0x53; break; 2953 case Xsse_RSQRTF: *p++ = 0x52; break; 2954 case Xsse_SQRTF: *p++ = 0x51; break; 2955 case Xsse_SUBF: *p++ = 0x5C; break; 2956 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2957 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2958 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2959 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2960 default: goto bad; 2961 } 2962 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)), 2963 fake(vregNo(i->Xin.Sse32FLo.src)) ); 2964 if (xtra & 0x100) 2965 *p++ = toUChar(xtra & 0xFF); 2966 goto done; 2967 2968 case Xin_Sse64FLo: 2969 xtra = 0; 2970 *p++ = 0xF2; 2971 *p++ = 0x0F; 2972 switch (i->Xin.Sse64FLo.op) { 2973 case Xsse_ADDF: *p++ = 0x58; break; 2974 case Xsse_DIVF: *p++ = 0x5E; break; 2975 case Xsse_MAXF: *p++ = 0x5F; break; 2976 case Xsse_MINF: *p++ = 0x5D; break; 2977 case Xsse_MULF: *p++ = 0x59; break; 2978 case Xsse_RCPF: *p++ = 0x53; break; 2979 case Xsse_RSQRTF: *p++ = 0x52; break; 2980 case Xsse_SQRTF: *p++ = 0x51; break; 2981 case Xsse_SUBF: *p++ = 0x5C; break; 2982 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2983 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2984 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2985 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2986 default: goto bad; 2987 } 2988 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)), 2989 fake(vregNo(i->Xin.Sse64FLo.src)) ); 2990 if (xtra & 0x100) 2991 *p++ = toUChar(xtra & 0xFF); 2992 goto done; 2993 2994 case Xin_SseReRg: 2995 # define XX(_n) *p++ = (_n) 2996 switch (i->Xin.SseReRg.op) { 2997 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 2998 case Xsse_OR: XX(0x0F); XX(0x56); break; 2999 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3000 case Xsse_AND: XX(0x0F); XX(0x54); break; 3001 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3002 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3003 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3004 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3005 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3006 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3007 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3008 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3009 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3010 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3011 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3012 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3013 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3014 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3015 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3016 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3017 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3018 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3019 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3020 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3021 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3022 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3023 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3024 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3025 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3026 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3027 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3028 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3029 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3030 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3031 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3032 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3033 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3034 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3035 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3036 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3037 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3038 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3039 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3040 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3041 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3042 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3043 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3044 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3045 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3046 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3047 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3048 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3049 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3050 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3051 default: goto bad; 3052 } 3053 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)), 3054 fake(vregNo(i->Xin.SseReRg.src)) ); 3055 # undef XX 3056 goto done; 3057 3058 case Xin_SseCMov: 3059 /* jmp fwds if !condition */ 3060 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3061 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3062 ptmp = p; 3063 3064 /* movaps %src, %dst */ 3065 *p++ = 0x0F; 3066 *p++ = 0x28; 3067 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)), 3068 fake(vregNo(i->Xin.SseCMov.src)) ); 3069 3070 /* Fill in the jump offset. */ 3071 *(ptmp-1) = toUChar(p - ptmp); 3072 goto done; 3073 3074 case Xin_SseShuf: 3075 *p++ = 0x66; 3076 *p++ = 0x0F; 3077 *p++ = 0x70; 3078 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)), 3079 fake(vregNo(i->Xin.SseShuf.src)) ); 3080 *p++ = (UChar)(i->Xin.SseShuf.order); 3081 goto done; 3082 3083 default: 3084 goto bad; 3085 } 3086 3087 bad: 3088 ppX86Instr(i, mode64); 3089 vpanic("emit_X86Instr"); 3090 /*NOTREACHED*/ 3091 3092 done: 3093 vassert(p - &buf[0] <= 32); 3094 return p - &buf[0]; 3095 3096 # undef fake 3097 } 3098 3099 /*---------------------------------------------------------------*/ 3100 /*--- end host_x86_defs.c ---*/ 3101 /*---------------------------------------------------------------*/ 3102