1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_x86_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_x86_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegX86 ( HReg reg ) 48 { 49 Int r; 50 static HChar* ireg32_names[8] 51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 52 /* Be generic for all virtual regs. */ 53 if (hregIsVirtual(reg)) { 54 ppHReg(reg); 55 return; 56 } 57 /* But specific for real regs. */ 58 switch (hregClass(reg)) { 59 case HRcInt32: 60 r = hregNumber(reg); 61 vassert(r >= 0 && r < 8); 62 vex_printf("%s", ireg32_names[r]); 63 return; 64 case HRcFlt64: 65 r = hregNumber(reg); 66 vassert(r >= 0 && r < 6); 67 vex_printf("%%fake%d", r); 68 return; 69 case HRcVec128: 70 r = hregNumber(reg); 71 vassert(r >= 0 && r < 8); 72 vex_printf("%%xmm%d", r); 73 return; 74 default: 75 vpanic("ppHRegX86"); 76 } 77 } 78 79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); } 80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); } 81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); } 82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); } 83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); } 84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); } 85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); } 86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); } 87 88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 94 95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); } 96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); } 97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); } 98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); } 99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); } 100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); } 101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); } 102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); } 103 104 105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) 106 { 107 *nregs = 20; 108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 109 (*arr)[0] = hregX86_EAX(); 110 (*arr)[1] = hregX86_EBX(); 111 (*arr)[2] = hregX86_ECX(); 112 (*arr)[3] = hregX86_EDX(); 113 (*arr)[4] = hregX86_ESI(); 114 (*arr)[5] = hregX86_EDI(); 115 (*arr)[6] = hregX86_FAKE0(); 116 (*arr)[7] = hregX86_FAKE1(); 117 (*arr)[8] = hregX86_FAKE2(); 118 (*arr)[9] = hregX86_FAKE3(); 119 (*arr)[10] = hregX86_FAKE4(); 120 (*arr)[11] = hregX86_FAKE5(); 121 (*arr)[12] = hregX86_XMM0(); 122 (*arr)[13] = hregX86_XMM1(); 123 (*arr)[14] = hregX86_XMM2(); 124 (*arr)[15] = hregX86_XMM3(); 125 (*arr)[16] = hregX86_XMM4(); 126 (*arr)[17] = hregX86_XMM5(); 127 (*arr)[18] = hregX86_XMM6(); 128 (*arr)[19] = hregX86_XMM7(); 129 } 130 131 132 /* --------- Condition codes, Intel encoding. --------- */ 133 134 HChar* showX86CondCode ( X86CondCode cond ) 135 { 136 switch (cond) { 137 case Xcc_O: return "o"; 138 case Xcc_NO: return "no"; 139 case Xcc_B: return "b"; 140 case Xcc_NB: return "nb"; 141 case Xcc_Z: return "z"; 142 case Xcc_NZ: return "nz"; 143 case Xcc_BE: return "be"; 144 case Xcc_NBE: return "nbe"; 145 case Xcc_S: return "s"; 146 case Xcc_NS: return "ns"; 147 case Xcc_P: return "p"; 148 case Xcc_NP: return "np"; 149 case Xcc_L: return "l"; 150 case Xcc_NL: return "nl"; 151 case Xcc_LE: return "le"; 152 case Xcc_NLE: return "nle"; 153 case Xcc_ALWAYS: return "ALWAYS"; 154 default: vpanic("ppX86CondCode"); 155 } 156 } 157 158 159 /* --------- X86AMode: memory address expressions. --------- */ 160 161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 163 am->tag = Xam_IR; 164 am->Xam.IR.imm = imm32; 165 am->Xam.IR.reg = reg; 166 return am; 167 } 168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 170 am->tag = Xam_IRRS; 171 am->Xam.IRRS.imm = imm32; 172 am->Xam.IRRS.base = base; 173 am->Xam.IRRS.index = indEx; 174 am->Xam.IRRS.shift = shift; 175 vassert(shift >= 0 && shift <= 3); 176 return am; 177 } 178 179 X86AMode* dopyX86AMode ( X86AMode* am ) { 180 switch (am->tag) { 181 case Xam_IR: 182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 183 case Xam_IRRS: 184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 185 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 186 default: 187 vpanic("dopyX86AMode"); 188 } 189 } 190 191 void ppX86AMode ( X86AMode* am ) { 192 switch (am->tag) { 193 case Xam_IR: 194 if (am->Xam.IR.imm == 0) 195 vex_printf("("); 196 else 197 vex_printf("0x%x(", am->Xam.IR.imm); 198 ppHRegX86(am->Xam.IR.reg); 199 vex_printf(")"); 200 return; 201 case Xam_IRRS: 202 vex_printf("0x%x(", am->Xam.IRRS.imm); 203 ppHRegX86(am->Xam.IRRS.base); 204 vex_printf(","); 205 ppHRegX86(am->Xam.IRRS.index); 206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 207 return; 208 default: 209 vpanic("ppX86AMode"); 210 } 211 } 212 213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 214 switch (am->tag) { 215 case Xam_IR: 216 addHRegUse(u, HRmRead, am->Xam.IR.reg); 217 return; 218 case Xam_IRRS: 219 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 220 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 221 return; 222 default: 223 vpanic("addRegUsage_X86AMode"); 224 } 225 } 226 227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 228 switch (am->tag) { 229 case Xam_IR: 230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 231 return; 232 case Xam_IRRS: 233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 235 return; 236 default: 237 vpanic("mapRegs_X86AMode"); 238 } 239 } 240 241 /* --------- Operand, which can be reg, immediate or memory. --------- */ 242 243 X86RMI* X86RMI_Imm ( UInt imm32 ) { 244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 245 op->tag = Xrmi_Imm; 246 op->Xrmi.Imm.imm32 = imm32; 247 return op; 248 } 249 X86RMI* X86RMI_Reg ( HReg reg ) { 250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 251 op->tag = Xrmi_Reg; 252 op->Xrmi.Reg.reg = reg; 253 return op; 254 } 255 X86RMI* X86RMI_Mem ( X86AMode* am ) { 256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 257 op->tag = Xrmi_Mem; 258 op->Xrmi.Mem.am = am; 259 return op; 260 } 261 262 void ppX86RMI ( X86RMI* op ) { 263 switch (op->tag) { 264 case Xrmi_Imm: 265 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 266 return; 267 case Xrmi_Reg: 268 ppHRegX86(op->Xrmi.Reg.reg); 269 return; 270 case Xrmi_Mem: 271 ppX86AMode(op->Xrmi.Mem.am); 272 return; 273 default: 274 vpanic("ppX86RMI"); 275 } 276 } 277 278 /* An X86RMI can only be used in a "read" context (what would it mean 279 to write or modify a literal?) and so we enumerate its registers 280 accordingly. */ 281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 282 switch (op->tag) { 283 case Xrmi_Imm: 284 return; 285 case Xrmi_Reg: 286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 287 return; 288 case Xrmi_Mem: 289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 290 return; 291 default: 292 vpanic("addRegUsage_X86RMI"); 293 } 294 } 295 296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 297 switch (op->tag) { 298 case Xrmi_Imm: 299 return; 300 case Xrmi_Reg: 301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 302 return; 303 case Xrmi_Mem: 304 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 305 return; 306 default: 307 vpanic("mapRegs_X86RMI"); 308 } 309 } 310 311 312 /* --------- Operand, which can be reg or immediate only. --------- */ 313 314 X86RI* X86RI_Imm ( UInt imm32 ) { 315 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 316 op->tag = Xri_Imm; 317 op->Xri.Imm.imm32 = imm32; 318 return op; 319 } 320 X86RI* X86RI_Reg ( HReg reg ) { 321 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 322 op->tag = Xri_Reg; 323 op->Xri.Reg.reg = reg; 324 return op; 325 } 326 327 void ppX86RI ( X86RI* op ) { 328 switch (op->tag) { 329 case Xri_Imm: 330 vex_printf("$0x%x", op->Xri.Imm.imm32); 331 return; 332 case Xri_Reg: 333 ppHRegX86(op->Xri.Reg.reg); 334 return; 335 default: 336 vpanic("ppX86RI"); 337 } 338 } 339 340 /* An X86RI can only be used in a "read" context (what would it mean 341 to write or modify a literal?) and so we enumerate its registers 342 accordingly. */ 343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 344 switch (op->tag) { 345 case Xri_Imm: 346 return; 347 case Xri_Reg: 348 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 349 return; 350 default: 351 vpanic("addRegUsage_X86RI"); 352 } 353 } 354 355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 356 switch (op->tag) { 357 case Xri_Imm: 358 return; 359 case Xri_Reg: 360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 361 return; 362 default: 363 vpanic("mapRegs_X86RI"); 364 } 365 } 366 367 368 /* --------- Operand, which can be reg or memory only. --------- */ 369 370 X86RM* X86RM_Reg ( HReg reg ) { 371 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 372 op->tag = Xrm_Reg; 373 op->Xrm.Reg.reg = reg; 374 return op; 375 } 376 X86RM* X86RM_Mem ( X86AMode* am ) { 377 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 378 op->tag = Xrm_Mem; 379 op->Xrm.Mem.am = am; 380 return op; 381 } 382 383 void ppX86RM ( X86RM* op ) { 384 switch (op->tag) { 385 case Xrm_Mem: 386 ppX86AMode(op->Xrm.Mem.am); 387 return; 388 case Xrm_Reg: 389 ppHRegX86(op->Xrm.Reg.reg); 390 return; 391 default: 392 vpanic("ppX86RM"); 393 } 394 } 395 396 /* Because an X86RM can be both a source or destination operand, we 397 have to supply a mode -- pertaining to the operand as a whole -- 398 indicating how it's being used. */ 399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 400 switch (op->tag) { 401 case Xrm_Mem: 402 /* Memory is read, written or modified. So we just want to 403 know the regs read by the amode. */ 404 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 405 return; 406 case Xrm_Reg: 407 /* reg is read, written or modified. Add it in the 408 appropriate way. */ 409 addHRegUse(u, mode, op->Xrm.Reg.reg); 410 return; 411 default: 412 vpanic("addRegUsage_X86RM"); 413 } 414 } 415 416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 417 { 418 switch (op->tag) { 419 case Xrm_Mem: 420 mapRegs_X86AMode(m, op->Xrm.Mem.am); 421 return; 422 case Xrm_Reg: 423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 424 return; 425 default: 426 vpanic("mapRegs_X86RM"); 427 } 428 } 429 430 431 /* --------- Instructions. --------- */ 432 433 HChar* showX86UnaryOp ( X86UnaryOp op ) { 434 switch (op) { 435 case Xun_NOT: return "not"; 436 case Xun_NEG: return "neg"; 437 default: vpanic("showX86UnaryOp"); 438 } 439 } 440 441 HChar* showX86AluOp ( X86AluOp op ) { 442 switch (op) { 443 case Xalu_MOV: return "mov"; 444 case Xalu_CMP: return "cmp"; 445 case Xalu_ADD: return "add"; 446 case Xalu_SUB: return "sub"; 447 case Xalu_ADC: return "adc"; 448 case Xalu_SBB: return "sbb"; 449 case Xalu_AND: return "and"; 450 case Xalu_OR: return "or"; 451 case Xalu_XOR: return "xor"; 452 case Xalu_MUL: return "mul"; 453 default: vpanic("showX86AluOp"); 454 } 455 } 456 457 HChar* showX86ShiftOp ( X86ShiftOp op ) { 458 switch (op) { 459 case Xsh_SHL: return "shl"; 460 case Xsh_SHR: return "shr"; 461 case Xsh_SAR: return "sar"; 462 default: vpanic("showX86ShiftOp"); 463 } 464 } 465 466 HChar* showX86FpOp ( X86FpOp op ) { 467 switch (op) { 468 case Xfp_ADD: return "add"; 469 case Xfp_SUB: return "sub"; 470 case Xfp_MUL: return "mul"; 471 case Xfp_DIV: return "div"; 472 case Xfp_SCALE: return "scale"; 473 case Xfp_ATAN: return "atan"; 474 case Xfp_YL2X: return "yl2x"; 475 case Xfp_YL2XP1: return "yl2xp1"; 476 case Xfp_PREM: return "prem"; 477 case Xfp_PREM1: return "prem1"; 478 case Xfp_SQRT: return "sqrt"; 479 case Xfp_ABS: return "abs"; 480 case Xfp_NEG: return "chs"; 481 case Xfp_MOV: return "mov"; 482 case Xfp_SIN: return "sin"; 483 case Xfp_COS: return "cos"; 484 case Xfp_TAN: return "tan"; 485 case Xfp_ROUND: return "round"; 486 case Xfp_2XM1: return "2xm1"; 487 default: vpanic("showX86FpOp"); 488 } 489 } 490 491 HChar* showX86SseOp ( X86SseOp op ) { 492 switch (op) { 493 case Xsse_MOV: return "mov(?!)"; 494 case Xsse_ADDF: return "add"; 495 case Xsse_SUBF: return "sub"; 496 case Xsse_MULF: return "mul"; 497 case Xsse_DIVF: return "div"; 498 case Xsse_MAXF: return "max"; 499 case Xsse_MINF: return "min"; 500 case Xsse_CMPEQF: return "cmpFeq"; 501 case Xsse_CMPLTF: return "cmpFlt"; 502 case Xsse_CMPLEF: return "cmpFle"; 503 case Xsse_CMPUNF: return "cmpFun"; 504 case Xsse_RCPF: return "rcp"; 505 case Xsse_RSQRTF: return "rsqrt"; 506 case Xsse_SQRTF: return "sqrt"; 507 case Xsse_AND: return "and"; 508 case Xsse_OR: return "or"; 509 case Xsse_XOR: return "xor"; 510 case Xsse_ANDN: return "andn"; 511 case Xsse_ADD8: return "paddb"; 512 case Xsse_ADD16: return "paddw"; 513 case Xsse_ADD32: return "paddd"; 514 case Xsse_ADD64: return "paddq"; 515 case Xsse_QADD8U: return "paddusb"; 516 case Xsse_QADD16U: return "paddusw"; 517 case Xsse_QADD8S: return "paddsb"; 518 case Xsse_QADD16S: return "paddsw"; 519 case Xsse_SUB8: return "psubb"; 520 case Xsse_SUB16: return "psubw"; 521 case Xsse_SUB32: return "psubd"; 522 case Xsse_SUB64: return "psubq"; 523 case Xsse_QSUB8U: return "psubusb"; 524 case Xsse_QSUB16U: return "psubusw"; 525 case Xsse_QSUB8S: return "psubsb"; 526 case Xsse_QSUB16S: return "psubsw"; 527 case Xsse_MUL16: return "pmullw"; 528 case Xsse_MULHI16U: return "pmulhuw"; 529 case Xsse_MULHI16S: return "pmulhw"; 530 case Xsse_AVG8U: return "pavgb"; 531 case Xsse_AVG16U: return "pavgw"; 532 case Xsse_MAX16S: return "pmaxw"; 533 case Xsse_MAX8U: return "pmaxub"; 534 case Xsse_MIN16S: return "pminw"; 535 case Xsse_MIN8U: return "pminub"; 536 case Xsse_CMPEQ8: return "pcmpeqb"; 537 case Xsse_CMPEQ16: return "pcmpeqw"; 538 case Xsse_CMPEQ32: return "pcmpeqd"; 539 case Xsse_CMPGT8S: return "pcmpgtb"; 540 case Xsse_CMPGT16S: return "pcmpgtw"; 541 case Xsse_CMPGT32S: return "pcmpgtd"; 542 case Xsse_SHL16: return "psllw"; 543 case Xsse_SHL32: return "pslld"; 544 case Xsse_SHL64: return "psllq"; 545 case Xsse_SHR16: return "psrlw"; 546 case Xsse_SHR32: return "psrld"; 547 case Xsse_SHR64: return "psrlq"; 548 case Xsse_SAR16: return "psraw"; 549 case Xsse_SAR32: return "psrad"; 550 case Xsse_PACKSSD: return "packssdw"; 551 case Xsse_PACKSSW: return "packsswb"; 552 case Xsse_PACKUSW: return "packuswb"; 553 case Xsse_UNPCKHB: return "punpckhb"; 554 case Xsse_UNPCKHW: return "punpckhw"; 555 case Xsse_UNPCKHD: return "punpckhd"; 556 case Xsse_UNPCKHQ: return "punpckhq"; 557 case Xsse_UNPCKLB: return "punpcklb"; 558 case Xsse_UNPCKLW: return "punpcklw"; 559 case Xsse_UNPCKLD: return "punpckld"; 560 case Xsse_UNPCKLQ: return "punpcklq"; 561 default: vpanic("showX86SseOp"); 562 } 563 } 564 565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 567 i->tag = Xin_Alu32R; 568 i->Xin.Alu32R.op = op; 569 i->Xin.Alu32R.src = src; 570 i->Xin.Alu32R.dst = dst; 571 return i; 572 } 573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 575 i->tag = Xin_Alu32M; 576 i->Xin.Alu32M.op = op; 577 i->Xin.Alu32M.src = src; 578 i->Xin.Alu32M.dst = dst; 579 vassert(op != Xalu_MUL); 580 return i; 581 } 582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 584 i->tag = Xin_Sh32; 585 i->Xin.Sh32.op = op; 586 i->Xin.Sh32.src = src; 587 i->Xin.Sh32.dst = dst; 588 return i; 589 } 590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 592 i->tag = Xin_Test32; 593 i->Xin.Test32.imm32 = imm32; 594 i->Xin.Test32.dst = dst; 595 return i; 596 } 597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 599 i->tag = Xin_Unary32; 600 i->Xin.Unary32.op = op; 601 i->Xin.Unary32.dst = dst; 602 return i; 603 } 604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 606 i->tag = Xin_Lea32; 607 i->Xin.Lea32.am = am; 608 i->Xin.Lea32.dst = dst; 609 return i; 610 } 611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 613 i->tag = Xin_MulL; 614 i->Xin.MulL.syned = syned; 615 i->Xin.MulL.src = src; 616 return i; 617 } 618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 620 i->tag = Xin_Div; 621 i->Xin.Div.syned = syned; 622 i->Xin.Div.src = src; 623 return i; 624 } 625 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 627 i->tag = Xin_Sh3232; 628 i->Xin.Sh3232.op = op; 629 i->Xin.Sh3232.amt = amt; 630 i->Xin.Sh3232.src = src; 631 i->Xin.Sh3232.dst = dst; 632 vassert(op == Xsh_SHL || op == Xsh_SHR); 633 return i; 634 } 635 X86Instr* X86Instr_Push( X86RMI* src ) { 636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 637 i->tag = Xin_Push; 638 i->Xin.Push.src = src; 639 return i; 640 } 641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) { 642 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 643 i->tag = Xin_Call; 644 i->Xin.Call.cond = cond; 645 i->Xin.Call.target = target; 646 i->Xin.Call.regparms = regparms; 647 vassert(regparms >= 0 && regparms <= 3); 648 return i; 649 } 650 X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) { 651 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 652 i->tag = Xin_Goto; 653 i->Xin.Goto.cond = cond; 654 i->Xin.Goto.dst = dst; 655 i->Xin.Goto.jk = jk; 656 return i; 657 } 658 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 659 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 660 i->tag = Xin_CMov32; 661 i->Xin.CMov32.cond = cond; 662 i->Xin.CMov32.src = src; 663 i->Xin.CMov32.dst = dst; 664 vassert(cond != Xcc_ALWAYS); 665 return i; 666 } 667 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 668 X86AMode* src, HReg dst ) { 669 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 670 i->tag = Xin_LoadEX; 671 i->Xin.LoadEX.szSmall = szSmall; 672 i->Xin.LoadEX.syned = syned; 673 i->Xin.LoadEX.src = src; 674 i->Xin.LoadEX.dst = dst; 675 vassert(szSmall == 1 || szSmall == 2); 676 return i; 677 } 678 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 679 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 680 i->tag = Xin_Store; 681 i->Xin.Store.sz = sz; 682 i->Xin.Store.src = src; 683 i->Xin.Store.dst = dst; 684 vassert(sz == 1 || sz == 2); 685 return i; 686 } 687 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 688 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 689 i->tag = Xin_Set32; 690 i->Xin.Set32.cond = cond; 691 i->Xin.Set32.dst = dst; 692 return i; 693 } 694 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 695 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 696 i->tag = Xin_Bsfr32; 697 i->Xin.Bsfr32.isFwds = isFwds; 698 i->Xin.Bsfr32.src = src; 699 i->Xin.Bsfr32.dst = dst; 700 return i; 701 } 702 X86Instr* X86Instr_MFence ( UInt hwcaps ) { 703 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 704 i->tag = Xin_MFence; 705 i->Xin.MFence.hwcaps = hwcaps; 706 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1 707 |VEX_HWCAPS_X86_SSE2 708 |VEX_HWCAPS_X86_SSE3 709 |VEX_HWCAPS_X86_LZCNT))); 710 return i; 711 } 712 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 713 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 714 i->tag = Xin_ACAS; 715 i->Xin.ACAS.addr = addr; 716 i->Xin.ACAS.sz = sz; 717 vassert(sz == 4 || sz == 2 || sz == 1); 718 return i; 719 } 720 X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 721 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 722 i->tag = Xin_DACAS; 723 i->Xin.DACAS.addr = addr; 724 return i; 725 } 726 727 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 728 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 729 i->tag = Xin_FpUnary; 730 i->Xin.FpUnary.op = op; 731 i->Xin.FpUnary.src = src; 732 i->Xin.FpUnary.dst = dst; 733 return i; 734 } 735 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 736 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 737 i->tag = Xin_FpBinary; 738 i->Xin.FpBinary.op = op; 739 i->Xin.FpBinary.srcL = srcL; 740 i->Xin.FpBinary.srcR = srcR; 741 i->Xin.FpBinary.dst = dst; 742 return i; 743 } 744 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 745 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 746 i->tag = Xin_FpLdSt; 747 i->Xin.FpLdSt.isLoad = isLoad; 748 i->Xin.FpLdSt.sz = sz; 749 i->Xin.FpLdSt.reg = reg; 750 i->Xin.FpLdSt.addr = addr; 751 vassert(sz == 4 || sz == 8 || sz == 10); 752 return i; 753 } 754 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 755 HReg reg, X86AMode* addr ) { 756 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 757 i->tag = Xin_FpLdStI; 758 i->Xin.FpLdStI.isLoad = isLoad; 759 i->Xin.FpLdStI.sz = sz; 760 i->Xin.FpLdStI.reg = reg; 761 i->Xin.FpLdStI.addr = addr; 762 vassert(sz == 2 || sz == 4 || sz == 8); 763 return i; 764 } 765 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 766 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 767 i->tag = Xin_Fp64to32; 768 i->Xin.Fp64to32.src = src; 769 i->Xin.Fp64to32.dst = dst; 770 return i; 771 } 772 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 773 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 774 i->tag = Xin_FpCMov; 775 i->Xin.FpCMov.cond = cond; 776 i->Xin.FpCMov.src = src; 777 i->Xin.FpCMov.dst = dst; 778 vassert(cond != Xcc_ALWAYS); 779 return i; 780 } 781 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 782 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 783 i->tag = Xin_FpLdCW; 784 i->Xin.FpLdCW.addr = addr; 785 return i; 786 } 787 X86Instr* X86Instr_FpStSW_AX ( void ) { 788 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 789 i->tag = Xin_FpStSW_AX; 790 return i; 791 } 792 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 793 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 794 i->tag = Xin_FpCmp; 795 i->Xin.FpCmp.srcL = srcL; 796 i->Xin.FpCmp.srcR = srcR; 797 i->Xin.FpCmp.dst = dst; 798 return i; 799 } 800 801 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 802 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 803 i->tag = Xin_SseConst; 804 i->Xin.SseConst.con = con; 805 i->Xin.SseConst.dst = dst; 806 vassert(hregClass(dst) == HRcVec128); 807 return i; 808 } 809 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 810 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 811 i->tag = Xin_SseLdSt; 812 i->Xin.SseLdSt.isLoad = isLoad; 813 i->Xin.SseLdSt.reg = reg; 814 i->Xin.SseLdSt.addr = addr; 815 return i; 816 } 817 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 818 { 819 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 820 i->tag = Xin_SseLdzLO; 821 i->Xin.SseLdzLO.sz = toUChar(sz); 822 i->Xin.SseLdzLO.reg = reg; 823 i->Xin.SseLdzLO.addr = addr; 824 vassert(sz == 4 || sz == 8); 825 return i; 826 } 827 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 828 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 829 i->tag = Xin_Sse32Fx4; 830 i->Xin.Sse32Fx4.op = op; 831 i->Xin.Sse32Fx4.src = src; 832 i->Xin.Sse32Fx4.dst = dst; 833 vassert(op != Xsse_MOV); 834 return i; 835 } 836 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 837 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 838 i->tag = Xin_Sse32FLo; 839 i->Xin.Sse32FLo.op = op; 840 i->Xin.Sse32FLo.src = src; 841 i->Xin.Sse32FLo.dst = dst; 842 vassert(op != Xsse_MOV); 843 return i; 844 } 845 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 846 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 847 i->tag = Xin_Sse64Fx2; 848 i->Xin.Sse64Fx2.op = op; 849 i->Xin.Sse64Fx2.src = src; 850 i->Xin.Sse64Fx2.dst = dst; 851 vassert(op != Xsse_MOV); 852 return i; 853 } 854 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 855 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 856 i->tag = Xin_Sse64FLo; 857 i->Xin.Sse64FLo.op = op; 858 i->Xin.Sse64FLo.src = src; 859 i->Xin.Sse64FLo.dst = dst; 860 vassert(op != Xsse_MOV); 861 return i; 862 } 863 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 864 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 865 i->tag = Xin_SseReRg; 866 i->Xin.SseReRg.op = op; 867 i->Xin.SseReRg.src = re; 868 i->Xin.SseReRg.dst = rg; 869 return i; 870 } 871 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 872 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 873 i->tag = Xin_SseCMov; 874 i->Xin.SseCMov.cond = cond; 875 i->Xin.SseCMov.src = src; 876 i->Xin.SseCMov.dst = dst; 877 vassert(cond != Xcc_ALWAYS); 878 return i; 879 } 880 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 881 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 882 i->tag = Xin_SseShuf; 883 i->Xin.SseShuf.order = order; 884 i->Xin.SseShuf.src = src; 885 i->Xin.SseShuf.dst = dst; 886 vassert(order >= 0 && order <= 0xFF); 887 return i; 888 } 889 890 void ppX86Instr ( X86Instr* i, Bool mode64 ) { 891 vassert(mode64 == False); 892 switch (i->tag) { 893 case Xin_Alu32R: 894 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 895 ppX86RMI(i->Xin.Alu32R.src); 896 vex_printf(","); 897 ppHRegX86(i->Xin.Alu32R.dst); 898 return; 899 case Xin_Alu32M: 900 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 901 ppX86RI(i->Xin.Alu32M.src); 902 vex_printf(","); 903 ppX86AMode(i->Xin.Alu32M.dst); 904 return; 905 case Xin_Sh32: 906 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 907 if (i->Xin.Sh32.src == 0) 908 vex_printf("%%cl,"); 909 else 910 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 911 ppHRegX86(i->Xin.Sh32.dst); 912 return; 913 case Xin_Test32: 914 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 915 ppX86RM(i->Xin.Test32.dst); 916 return; 917 case Xin_Unary32: 918 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 919 ppHRegX86(i->Xin.Unary32.dst); 920 return; 921 case Xin_Lea32: 922 vex_printf("leal "); 923 ppX86AMode(i->Xin.Lea32.am); 924 vex_printf(","); 925 ppHRegX86(i->Xin.Lea32.dst); 926 return; 927 case Xin_MulL: 928 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 929 ppX86RM(i->Xin.MulL.src); 930 return; 931 case Xin_Div: 932 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 933 ppX86RM(i->Xin.Div.src); 934 return; 935 case Xin_Sh3232: 936 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 937 if (i->Xin.Sh3232.amt == 0) 938 vex_printf(" %%cl,"); 939 else 940 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 941 ppHRegX86(i->Xin.Sh3232.src); 942 vex_printf(","); 943 ppHRegX86(i->Xin.Sh3232.dst); 944 return; 945 case Xin_Push: 946 vex_printf("pushl "); 947 ppX86RMI(i->Xin.Push.src); 948 return; 949 case Xin_Call: 950 vex_printf("call%s[%d] ", 951 i->Xin.Call.cond==Xcc_ALWAYS 952 ? "" : showX86CondCode(i->Xin.Call.cond), 953 i->Xin.Call.regparms); 954 vex_printf("0x%x", i->Xin.Call.target); 955 break; 956 case Xin_Goto: 957 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 958 vex_printf("if (%%eflags.%s) { ", 959 showX86CondCode(i->Xin.Goto.cond)); 960 } 961 if (i->Xin.Goto.jk != Ijk_Boring 962 && i->Xin.Goto.jk != Ijk_Call 963 && i->Xin.Goto.jk != Ijk_Ret) { 964 vex_printf("movl $"); 965 ppIRJumpKind(i->Xin.Goto.jk); 966 vex_printf(",%%ebp ; "); 967 } 968 vex_printf("movl "); 969 ppX86RI(i->Xin.Goto.dst); 970 vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx"); 971 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 972 vex_printf(" }"); 973 } 974 return; 975 case Xin_CMov32: 976 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 977 ppX86RM(i->Xin.CMov32.src); 978 vex_printf(","); 979 ppHRegX86(i->Xin.CMov32.dst); 980 return; 981 case Xin_LoadEX: 982 vex_printf("mov%c%cl ", 983 i->Xin.LoadEX.syned ? 's' : 'z', 984 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 985 ppX86AMode(i->Xin.LoadEX.src); 986 vex_printf(","); 987 ppHRegX86(i->Xin.LoadEX.dst); 988 return; 989 case Xin_Store: 990 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 991 ppHRegX86(i->Xin.Store.src); 992 vex_printf(","); 993 ppX86AMode(i->Xin.Store.dst); 994 return; 995 case Xin_Set32: 996 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 997 ppHRegX86(i->Xin.Set32.dst); 998 return; 999 case Xin_Bsfr32: 1000 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1001 ppHRegX86(i->Xin.Bsfr32.src); 1002 vex_printf(","); 1003 ppHRegX86(i->Xin.Bsfr32.dst); 1004 return; 1005 case Xin_MFence: 1006 vex_printf("mfence(%s)", 1007 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1008 return; 1009 case Xin_ACAS: 1010 vex_printf("lock cmpxchg%c ", 1011 i->Xin.ACAS.sz==1 ? 'b' 1012 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1013 vex_printf("{%%eax->%%ebx},"); 1014 ppX86AMode(i->Xin.ACAS.addr); 1015 return; 1016 case Xin_DACAS: 1017 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1018 ppX86AMode(i->Xin.DACAS.addr); 1019 return; 1020 case Xin_FpUnary: 1021 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1022 ppHRegX86(i->Xin.FpUnary.src); 1023 vex_printf(","); 1024 ppHRegX86(i->Xin.FpUnary.dst); 1025 break; 1026 case Xin_FpBinary: 1027 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1028 ppHRegX86(i->Xin.FpBinary.srcL); 1029 vex_printf(","); 1030 ppHRegX86(i->Xin.FpBinary.srcR); 1031 vex_printf(","); 1032 ppHRegX86(i->Xin.FpBinary.dst); 1033 break; 1034 case Xin_FpLdSt: 1035 if (i->Xin.FpLdSt.isLoad) { 1036 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1037 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1038 ppX86AMode(i->Xin.FpLdSt.addr); 1039 vex_printf(", "); 1040 ppHRegX86(i->Xin.FpLdSt.reg); 1041 } else { 1042 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1043 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1044 ppHRegX86(i->Xin.FpLdSt.reg); 1045 vex_printf(", "); 1046 ppX86AMode(i->Xin.FpLdSt.addr); 1047 } 1048 return; 1049 case Xin_FpLdStI: 1050 if (i->Xin.FpLdStI.isLoad) { 1051 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1052 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1053 ppX86AMode(i->Xin.FpLdStI.addr); 1054 vex_printf(", "); 1055 ppHRegX86(i->Xin.FpLdStI.reg); 1056 } else { 1057 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1058 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1059 ppHRegX86(i->Xin.FpLdStI.reg); 1060 vex_printf(", "); 1061 ppX86AMode(i->Xin.FpLdStI.addr); 1062 } 1063 return; 1064 case Xin_Fp64to32: 1065 vex_printf("gdtof "); 1066 ppHRegX86(i->Xin.Fp64to32.src); 1067 vex_printf(","); 1068 ppHRegX86(i->Xin.Fp64to32.dst); 1069 return; 1070 case Xin_FpCMov: 1071 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1072 ppHRegX86(i->Xin.FpCMov.src); 1073 vex_printf(","); 1074 ppHRegX86(i->Xin.FpCMov.dst); 1075 return; 1076 case Xin_FpLdCW: 1077 vex_printf("fldcw "); 1078 ppX86AMode(i->Xin.FpLdCW.addr); 1079 return; 1080 case Xin_FpStSW_AX: 1081 vex_printf("fstsw %%ax"); 1082 return; 1083 case Xin_FpCmp: 1084 vex_printf("gcmp "); 1085 ppHRegX86(i->Xin.FpCmp.srcL); 1086 vex_printf(","); 1087 ppHRegX86(i->Xin.FpCmp.srcR); 1088 vex_printf(","); 1089 ppHRegX86(i->Xin.FpCmp.dst); 1090 break; 1091 case Xin_SseConst: 1092 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1093 ppHRegX86(i->Xin.SseConst.dst); 1094 break; 1095 case Xin_SseLdSt: 1096 vex_printf("movups "); 1097 if (i->Xin.SseLdSt.isLoad) { 1098 ppX86AMode(i->Xin.SseLdSt.addr); 1099 vex_printf(","); 1100 ppHRegX86(i->Xin.SseLdSt.reg); 1101 } else { 1102 ppHRegX86(i->Xin.SseLdSt.reg); 1103 vex_printf(","); 1104 ppX86AMode(i->Xin.SseLdSt.addr); 1105 } 1106 return; 1107 case Xin_SseLdzLO: 1108 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1109 ppX86AMode(i->Xin.SseLdzLO.addr); 1110 vex_printf(","); 1111 ppHRegX86(i->Xin.SseLdzLO.reg); 1112 return; 1113 case Xin_Sse32Fx4: 1114 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1115 ppHRegX86(i->Xin.Sse32Fx4.src); 1116 vex_printf(","); 1117 ppHRegX86(i->Xin.Sse32Fx4.dst); 1118 return; 1119 case Xin_Sse32FLo: 1120 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1121 ppHRegX86(i->Xin.Sse32FLo.src); 1122 vex_printf(","); 1123 ppHRegX86(i->Xin.Sse32FLo.dst); 1124 return; 1125 case Xin_Sse64Fx2: 1126 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1127 ppHRegX86(i->Xin.Sse64Fx2.src); 1128 vex_printf(","); 1129 ppHRegX86(i->Xin.Sse64Fx2.dst); 1130 return; 1131 case Xin_Sse64FLo: 1132 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1133 ppHRegX86(i->Xin.Sse64FLo.src); 1134 vex_printf(","); 1135 ppHRegX86(i->Xin.Sse64FLo.dst); 1136 return; 1137 case Xin_SseReRg: 1138 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1139 ppHRegX86(i->Xin.SseReRg.src); 1140 vex_printf(","); 1141 ppHRegX86(i->Xin.SseReRg.dst); 1142 return; 1143 case Xin_SseCMov: 1144 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1145 ppHRegX86(i->Xin.SseCMov.src); 1146 vex_printf(","); 1147 ppHRegX86(i->Xin.SseCMov.dst); 1148 return; 1149 case Xin_SseShuf: 1150 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); 1151 ppHRegX86(i->Xin.SseShuf.src); 1152 vex_printf(","); 1153 ppHRegX86(i->Xin.SseShuf.dst); 1154 return; 1155 1156 default: 1157 vpanic("ppX86Instr"); 1158 } 1159 } 1160 1161 /* --------- Helpers for register allocation. --------- */ 1162 1163 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) 1164 { 1165 Bool unary; 1166 vassert(mode64 == False); 1167 initHRegUsage(u); 1168 switch (i->tag) { 1169 case Xin_Alu32R: 1170 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1171 if (i->Xin.Alu32R.op == Xalu_MOV) { 1172 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1173 return; 1174 } 1175 if (i->Xin.Alu32R.op == Xalu_CMP) { 1176 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1177 return; 1178 } 1179 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1180 return; 1181 case Xin_Alu32M: 1182 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1183 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1184 return; 1185 case Xin_Sh32: 1186 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1187 if (i->Xin.Sh32.src == 0) 1188 addHRegUse(u, HRmRead, hregX86_ECX()); 1189 return; 1190 case Xin_Test32: 1191 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1192 return; 1193 case Xin_Unary32: 1194 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1195 return; 1196 case Xin_Lea32: 1197 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1198 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1199 return; 1200 case Xin_MulL: 1201 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1202 addHRegUse(u, HRmModify, hregX86_EAX()); 1203 addHRegUse(u, HRmWrite, hregX86_EDX()); 1204 return; 1205 case Xin_Div: 1206 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1207 addHRegUse(u, HRmModify, hregX86_EAX()); 1208 addHRegUse(u, HRmModify, hregX86_EDX()); 1209 return; 1210 case Xin_Sh3232: 1211 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1212 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1213 if (i->Xin.Sh3232.amt == 0) 1214 addHRegUse(u, HRmRead, hregX86_ECX()); 1215 return; 1216 case Xin_Push: 1217 addRegUsage_X86RMI(u, i->Xin.Push.src); 1218 addHRegUse(u, HRmModify, hregX86_ESP()); 1219 return; 1220 case Xin_Call: 1221 /* This is a bit subtle. */ 1222 /* First off, claim it trashes all the caller-saved regs 1223 which fall within the register allocator's jurisdiction. 1224 These I believe to be %eax %ecx %edx and all the xmm 1225 registers. */ 1226 addHRegUse(u, HRmWrite, hregX86_EAX()); 1227 addHRegUse(u, HRmWrite, hregX86_ECX()); 1228 addHRegUse(u, HRmWrite, hregX86_EDX()); 1229 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1230 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1231 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1232 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1233 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1234 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1235 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1236 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1237 /* Now we have to state any parameter-carrying registers 1238 which might be read. This depends on the regparmness. */ 1239 switch (i->Xin.Call.regparms) { 1240 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1241 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1242 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1243 case 0: break; 1244 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1245 } 1246 /* Finally, there is the issue that the insn trashes a 1247 register because the literal target address has to be 1248 loaded into a register. Fortunately, for the 0/1/2 1249 regparm case, we can use EAX, EDX and ECX respectively, so 1250 this does not cause any further damage. For the 3-regparm 1251 case, we'll have to choose another register arbitrarily -- 1252 since A, D and C are used for parameters -- and so we might 1253 as well choose EDI. */ 1254 if (i->Xin.Call.regparms == 3) 1255 addHRegUse(u, HRmWrite, hregX86_EDI()); 1256 /* Upshot of this is that the assembler really must observe 1257 the here-stated convention of which register to use as an 1258 address temporary, depending on the regparmness: 0==EAX, 1259 1==EDX, 2==ECX, 3==EDI. */ 1260 return; 1261 case Xin_Goto: 1262 addRegUsage_X86RI(u, i->Xin.Goto.dst); 1263 addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */ 1264 addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */ 1265 if (i->Xin.Goto.jk != Ijk_Boring 1266 && i->Xin.Goto.jk != Ijk_Call 1267 && i->Xin.Goto.jk != Ijk_Ret) 1268 /* note, this is irrelevant since ebp is not actually 1269 available to the allocator. But still .. */ 1270 addHRegUse(u, HRmWrite, hregX86_EBP()); 1271 return; 1272 case Xin_CMov32: 1273 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1274 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1275 return; 1276 case Xin_LoadEX: 1277 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1278 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1279 return; 1280 case Xin_Store: 1281 addHRegUse(u, HRmRead, i->Xin.Store.src); 1282 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1283 return; 1284 case Xin_Set32: 1285 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1286 return; 1287 case Xin_Bsfr32: 1288 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1289 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1290 return; 1291 case Xin_MFence: 1292 return; 1293 case Xin_ACAS: 1294 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1295 addHRegUse(u, HRmRead, hregX86_EBX()); 1296 addHRegUse(u, HRmModify, hregX86_EAX()); 1297 return; 1298 case Xin_DACAS: 1299 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1300 addHRegUse(u, HRmRead, hregX86_ECX()); 1301 addHRegUse(u, HRmRead, hregX86_EBX()); 1302 addHRegUse(u, HRmModify, hregX86_EDX()); 1303 addHRegUse(u, HRmModify, hregX86_EAX()); 1304 return; 1305 case Xin_FpUnary: 1306 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1307 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1308 return; 1309 case Xin_FpBinary: 1310 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1311 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1312 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1313 return; 1314 case Xin_FpLdSt: 1315 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1316 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1317 i->Xin.FpLdSt.reg); 1318 return; 1319 case Xin_FpLdStI: 1320 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1321 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1322 i->Xin.FpLdStI.reg); 1323 return; 1324 case Xin_Fp64to32: 1325 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1326 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1327 return; 1328 case Xin_FpCMov: 1329 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1330 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1331 return; 1332 case Xin_FpLdCW: 1333 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1334 return; 1335 case Xin_FpStSW_AX: 1336 addHRegUse(u, HRmWrite, hregX86_EAX()); 1337 return; 1338 case Xin_FpCmp: 1339 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1340 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1341 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1342 addHRegUse(u, HRmWrite, hregX86_EAX()); 1343 return; 1344 case Xin_SseLdSt: 1345 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1346 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1347 i->Xin.SseLdSt.reg); 1348 return; 1349 case Xin_SseLdzLO: 1350 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1351 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1352 return; 1353 case Xin_SseConst: 1354 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1355 return; 1356 case Xin_Sse32Fx4: 1357 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1358 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1359 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1360 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1361 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1362 addHRegUse(u, unary ? HRmWrite : HRmModify, 1363 i->Xin.Sse32Fx4.dst); 1364 return; 1365 case Xin_Sse32FLo: 1366 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1367 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1368 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1369 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1370 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1371 addHRegUse(u, unary ? HRmWrite : HRmModify, 1372 i->Xin.Sse32FLo.dst); 1373 return; 1374 case Xin_Sse64Fx2: 1375 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1376 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1377 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1378 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1379 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1380 addHRegUse(u, unary ? HRmWrite : HRmModify, 1381 i->Xin.Sse64Fx2.dst); 1382 return; 1383 case Xin_Sse64FLo: 1384 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1385 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1386 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1387 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1388 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1389 addHRegUse(u, unary ? HRmWrite : HRmModify, 1390 i->Xin.Sse64FLo.dst); 1391 return; 1392 case Xin_SseReRg: 1393 if (i->Xin.SseReRg.op == Xsse_XOR 1394 && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) { 1395 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1396 /* (as opposed to a rite of passage :-) */ 1397 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1398 } else { 1399 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1400 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1401 ? HRmWrite : HRmModify, 1402 i->Xin.SseReRg.dst); 1403 } 1404 return; 1405 case Xin_SseCMov: 1406 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1407 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1408 return; 1409 case Xin_SseShuf: 1410 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1411 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1412 return; 1413 default: 1414 ppX86Instr(i, False); 1415 vpanic("getRegUsage_X86Instr"); 1416 } 1417 } 1418 1419 /* local helper */ 1420 static void mapReg( HRegRemap* m, HReg* r ) 1421 { 1422 *r = lookupHRegRemap(m, *r); 1423 } 1424 1425 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1426 { 1427 vassert(mode64 == False); 1428 switch (i->tag) { 1429 case Xin_Alu32R: 1430 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1431 mapReg(m, &i->Xin.Alu32R.dst); 1432 return; 1433 case Xin_Alu32M: 1434 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1435 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1436 return; 1437 case Xin_Sh32: 1438 mapReg(m, &i->Xin.Sh32.dst); 1439 return; 1440 case Xin_Test32: 1441 mapRegs_X86RM(m, i->Xin.Test32.dst); 1442 return; 1443 case Xin_Unary32: 1444 mapReg(m, &i->Xin.Unary32.dst); 1445 return; 1446 case Xin_Lea32: 1447 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1448 mapReg(m, &i->Xin.Lea32.dst); 1449 return; 1450 case Xin_MulL: 1451 mapRegs_X86RM(m, i->Xin.MulL.src); 1452 return; 1453 case Xin_Div: 1454 mapRegs_X86RM(m, i->Xin.Div.src); 1455 return; 1456 case Xin_Sh3232: 1457 mapReg(m, &i->Xin.Sh3232.src); 1458 mapReg(m, &i->Xin.Sh3232.dst); 1459 return; 1460 case Xin_Push: 1461 mapRegs_X86RMI(m, i->Xin.Push.src); 1462 return; 1463 case Xin_Call: 1464 return; 1465 case Xin_Goto: 1466 mapRegs_X86RI(m, i->Xin.Goto.dst); 1467 return; 1468 case Xin_CMov32: 1469 mapRegs_X86RM(m, i->Xin.CMov32.src); 1470 mapReg(m, &i->Xin.CMov32.dst); 1471 return; 1472 case Xin_LoadEX: 1473 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1474 mapReg(m, &i->Xin.LoadEX.dst); 1475 return; 1476 case Xin_Store: 1477 mapReg(m, &i->Xin.Store.src); 1478 mapRegs_X86AMode(m, i->Xin.Store.dst); 1479 return; 1480 case Xin_Set32: 1481 mapReg(m, &i->Xin.Set32.dst); 1482 return; 1483 case Xin_Bsfr32: 1484 mapReg(m, &i->Xin.Bsfr32.src); 1485 mapReg(m, &i->Xin.Bsfr32.dst); 1486 return; 1487 case Xin_MFence: 1488 return; 1489 case Xin_ACAS: 1490 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1491 return; 1492 case Xin_DACAS: 1493 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1494 return; 1495 case Xin_FpUnary: 1496 mapReg(m, &i->Xin.FpUnary.src); 1497 mapReg(m, &i->Xin.FpUnary.dst); 1498 return; 1499 case Xin_FpBinary: 1500 mapReg(m, &i->Xin.FpBinary.srcL); 1501 mapReg(m, &i->Xin.FpBinary.srcR); 1502 mapReg(m, &i->Xin.FpBinary.dst); 1503 return; 1504 case Xin_FpLdSt: 1505 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1506 mapReg(m, &i->Xin.FpLdSt.reg); 1507 return; 1508 case Xin_FpLdStI: 1509 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1510 mapReg(m, &i->Xin.FpLdStI.reg); 1511 return; 1512 case Xin_Fp64to32: 1513 mapReg(m, &i->Xin.Fp64to32.src); 1514 mapReg(m, &i->Xin.Fp64to32.dst); 1515 return; 1516 case Xin_FpCMov: 1517 mapReg(m, &i->Xin.FpCMov.src); 1518 mapReg(m, &i->Xin.FpCMov.dst); 1519 return; 1520 case Xin_FpLdCW: 1521 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1522 return; 1523 case Xin_FpStSW_AX: 1524 return; 1525 case Xin_FpCmp: 1526 mapReg(m, &i->Xin.FpCmp.srcL); 1527 mapReg(m, &i->Xin.FpCmp.srcR); 1528 mapReg(m, &i->Xin.FpCmp.dst); 1529 return; 1530 case Xin_SseConst: 1531 mapReg(m, &i->Xin.SseConst.dst); 1532 return; 1533 case Xin_SseLdSt: 1534 mapReg(m, &i->Xin.SseLdSt.reg); 1535 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1536 break; 1537 case Xin_SseLdzLO: 1538 mapReg(m, &i->Xin.SseLdzLO.reg); 1539 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1540 break; 1541 case Xin_Sse32Fx4: 1542 mapReg(m, &i->Xin.Sse32Fx4.src); 1543 mapReg(m, &i->Xin.Sse32Fx4.dst); 1544 return; 1545 case Xin_Sse32FLo: 1546 mapReg(m, &i->Xin.Sse32FLo.src); 1547 mapReg(m, &i->Xin.Sse32FLo.dst); 1548 return; 1549 case Xin_Sse64Fx2: 1550 mapReg(m, &i->Xin.Sse64Fx2.src); 1551 mapReg(m, &i->Xin.Sse64Fx2.dst); 1552 return; 1553 case Xin_Sse64FLo: 1554 mapReg(m, &i->Xin.Sse64FLo.src); 1555 mapReg(m, &i->Xin.Sse64FLo.dst); 1556 return; 1557 case Xin_SseReRg: 1558 mapReg(m, &i->Xin.SseReRg.src); 1559 mapReg(m, &i->Xin.SseReRg.dst); 1560 return; 1561 case Xin_SseCMov: 1562 mapReg(m, &i->Xin.SseCMov.src); 1563 mapReg(m, &i->Xin.SseCMov.dst); 1564 return; 1565 case Xin_SseShuf: 1566 mapReg(m, &i->Xin.SseShuf.src); 1567 mapReg(m, &i->Xin.SseShuf.dst); 1568 return; 1569 default: 1570 ppX86Instr(i, mode64); 1571 vpanic("mapRegs_X86Instr"); 1572 } 1573 } 1574 1575 /* Figure out if i represents a reg-reg move, and if so assign the 1576 source and destination to *src and *dst. If in doubt say No. Used 1577 by the register allocator to do move coalescing. 1578 */ 1579 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst ) 1580 { 1581 /* Moves between integer regs */ 1582 if (i->tag == Xin_Alu32R) { 1583 if (i->Xin.Alu32R.op != Xalu_MOV) 1584 return False; 1585 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1586 return False; 1587 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1588 *dst = i->Xin.Alu32R.dst; 1589 return True; 1590 } 1591 /* Moves between FP regs */ 1592 if (i->tag == Xin_FpUnary) { 1593 if (i->Xin.FpUnary.op != Xfp_MOV) 1594 return False; 1595 *src = i->Xin.FpUnary.src; 1596 *dst = i->Xin.FpUnary.dst; 1597 return True; 1598 } 1599 if (i->tag == Xin_SseReRg) { 1600 if (i->Xin.SseReRg.op != Xsse_MOV) 1601 return False; 1602 *src = i->Xin.SseReRg.src; 1603 *dst = i->Xin.SseReRg.dst; 1604 return True; 1605 } 1606 return False; 1607 } 1608 1609 1610 /* Generate x86 spill/reload instructions under the direction of the 1611 register allocator. Note it's critical these don't write the 1612 condition codes. */ 1613 1614 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1615 HReg rreg, Int offsetB, Bool mode64 ) 1616 { 1617 X86AMode* am; 1618 vassert(offsetB >= 0); 1619 vassert(!hregIsVirtual(rreg)); 1620 vassert(mode64 == False); 1621 *i1 = *i2 = NULL; 1622 am = X86AMode_IR(offsetB, hregX86_EBP()); 1623 switch (hregClass(rreg)) { 1624 case HRcInt32: 1625 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1626 return; 1627 case HRcFlt64: 1628 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1629 return; 1630 case HRcVec128: 1631 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1632 return; 1633 default: 1634 ppHRegClass(hregClass(rreg)); 1635 vpanic("genSpill_X86: unimplemented regclass"); 1636 } 1637 } 1638 1639 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1640 HReg rreg, Int offsetB, Bool mode64 ) 1641 { 1642 X86AMode* am; 1643 vassert(offsetB >= 0); 1644 vassert(!hregIsVirtual(rreg)); 1645 vassert(mode64 == False); 1646 *i1 = *i2 = NULL; 1647 am = X86AMode_IR(offsetB, hregX86_EBP()); 1648 switch (hregClass(rreg)) { 1649 case HRcInt32: 1650 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1651 return; 1652 case HRcFlt64: 1653 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1654 return; 1655 case HRcVec128: 1656 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1657 return; 1658 default: 1659 ppHRegClass(hregClass(rreg)); 1660 vpanic("genReload_X86: unimplemented regclass"); 1661 } 1662 } 1663 1664 /* The given instruction reads the specified vreg exactly once, and 1665 that vreg is currently located at the given spill offset. If 1666 possible, return a variant of the instruction to one which instead 1667 references the spill slot directly. */ 1668 1669 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1670 { 1671 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1672 1673 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1674 Convert to: src=RMI_Mem, dst=Reg 1675 */ 1676 if (i->tag == Xin_Alu32R 1677 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1678 || i->Xin.Alu32R.op == Xalu_XOR) 1679 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1680 && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) { 1681 vassert(i->Xin.Alu32R.dst != vreg); 1682 return X86Instr_Alu32R( 1683 i->Xin.Alu32R.op, 1684 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1685 i->Xin.Alu32R.dst 1686 ); 1687 } 1688 1689 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1690 Convert to: src=RI_Imm, dst=Mem 1691 */ 1692 if (i->tag == Xin_Alu32R 1693 && (i->Xin.Alu32R.op == Xalu_CMP) 1694 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1695 && i->Xin.Alu32R.dst == vreg) { 1696 return X86Instr_Alu32M( 1697 i->Xin.Alu32R.op, 1698 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1699 X86AMode_IR( spill_off, hregX86_EBP()) 1700 ); 1701 } 1702 1703 /* Deal with form: Push(RMI_Reg) 1704 Convert to: Push(RMI_Mem) 1705 */ 1706 if (i->tag == Xin_Push 1707 && i->Xin.Push.src->tag == Xrmi_Reg 1708 && i->Xin.Push.src->Xrmi.Reg.reg == vreg) { 1709 return X86Instr_Push( 1710 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1711 ); 1712 } 1713 1714 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1715 Convert to CMov32(RM_Mem, dst) */ 1716 if (i->tag == Xin_CMov32 1717 && i->Xin.CMov32.src->tag == Xrm_Reg 1718 && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) { 1719 vassert(i->Xin.CMov32.dst != vreg); 1720 return X86Instr_CMov32( 1721 i->Xin.CMov32.cond, 1722 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1723 i->Xin.CMov32.dst 1724 ); 1725 } 1726 1727 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1728 if (i->tag == Xin_Test32 1729 && i->Xin.Test32.dst->tag == Xrm_Reg 1730 && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) { 1731 return X86Instr_Test32( 1732 i->Xin.Test32.imm32, 1733 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1734 ); 1735 } 1736 1737 return NULL; 1738 } 1739 1740 1741 /* --------- The x86 assembler (bleh.) --------- */ 1742 1743 static UChar iregNo ( HReg r ) 1744 { 1745 UInt n; 1746 vassert(hregClass(r) == HRcInt32); 1747 vassert(!hregIsVirtual(r)); 1748 n = hregNumber(r); 1749 vassert(n <= 7); 1750 return toUChar(n); 1751 } 1752 1753 static UInt fregNo ( HReg r ) 1754 { 1755 UInt n; 1756 vassert(hregClass(r) == HRcFlt64); 1757 vassert(!hregIsVirtual(r)); 1758 n = hregNumber(r); 1759 vassert(n <= 5); 1760 return n; 1761 } 1762 1763 static UInt vregNo ( HReg r ) 1764 { 1765 UInt n; 1766 vassert(hregClass(r) == HRcVec128); 1767 vassert(!hregIsVirtual(r)); 1768 n = hregNumber(r); 1769 vassert(n <= 7); 1770 return n; 1771 } 1772 1773 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) 1774 { 1775 return toUChar( ((mod & 3) << 6) 1776 | ((reg & 7) << 3) 1777 | (regmem & 7) ); 1778 } 1779 1780 static UChar mkSIB ( Int shift, Int regindex, Int regbase ) 1781 { 1782 return toUChar( ((shift & 3) << 6) 1783 | ((regindex & 7) << 3) 1784 | (regbase & 7) ); 1785 } 1786 1787 static UChar* emit32 ( UChar* p, UInt w32 ) 1788 { 1789 *p++ = toUChar( w32 & 0x000000FF); 1790 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1791 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1792 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1793 return p; 1794 } 1795 1796 /* Does a sign-extend of the lowest 8 bits give 1797 the original number? */ 1798 static Bool fits8bits ( UInt w32 ) 1799 { 1800 Int i32 = (Int)w32; 1801 return toBool(i32 == ((i32 << 24) >> 24)); 1802 } 1803 1804 1805 /* Forming mod-reg-rm bytes and scale-index-base bytes. 1806 1807 greg, 0(ereg) | ereg != ESP && ereg != EBP 1808 = 00 greg ereg 1809 1810 greg, d8(ereg) | ereg != ESP 1811 = 01 greg ereg, d8 1812 1813 greg, d32(ereg) | ereg != ESP 1814 = 10 greg ereg, d32 1815 1816 greg, d8(%esp) = 01 greg 100, 0x24, d8 1817 1818 ----------------------------------------------- 1819 1820 greg, d8(base,index,scale) 1821 | index != ESP 1822 = 01 greg 100, scale index base, d8 1823 1824 greg, d32(base,index,scale) 1825 | index != ESP 1826 = 10 greg 100, scale index base, d32 1827 */ 1828 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1829 { 1830 if (am->tag == Xam_IR) { 1831 if (am->Xam.IR.imm == 0 1832 && am->Xam.IR.reg != hregX86_ESP() 1833 && am->Xam.IR.reg != hregX86_EBP() ) { 1834 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg)); 1835 return p; 1836 } 1837 if (fits8bits(am->Xam.IR.imm) 1838 && am->Xam.IR.reg != hregX86_ESP()) { 1839 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg)); 1840 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1841 return p; 1842 } 1843 if (am->Xam.IR.reg != hregX86_ESP()) { 1844 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg)); 1845 p = emit32(p, am->Xam.IR.imm); 1846 return p; 1847 } 1848 if (am->Xam.IR.reg == hregX86_ESP() 1849 && fits8bits(am->Xam.IR.imm)) { 1850 *p++ = mkModRegRM(1, iregNo(greg), 4); 1851 *p++ = 0x24; 1852 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1853 return p; 1854 } 1855 ppX86AMode(am); 1856 vpanic("doAMode_M: can't emit amode IR"); 1857 /*NOTREACHED*/ 1858 } 1859 if (am->tag == Xam_IRRS) { 1860 if (fits8bits(am->Xam.IRRS.imm) 1861 && am->Xam.IRRS.index != hregX86_ESP()) { 1862 *p++ = mkModRegRM(1, iregNo(greg), 4); 1863 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1864 am->Xam.IRRS.base); 1865 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1866 return p; 1867 } 1868 if (am->Xam.IRRS.index != hregX86_ESP()) { 1869 *p++ = mkModRegRM(2, iregNo(greg), 4); 1870 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1871 am->Xam.IRRS.base); 1872 p = emit32(p, am->Xam.IRRS.imm); 1873 return p; 1874 } 1875 ppX86AMode(am); 1876 vpanic("doAMode_M: can't emit amode IRRS"); 1877 /*NOTREACHED*/ 1878 } 1879 vpanic("doAMode_M: unknown amode"); 1880 /*NOTREACHED*/ 1881 } 1882 1883 1884 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1885 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1886 { 1887 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg)); 1888 return p; 1889 } 1890 1891 1892 /* Emit ffree %st(7) */ 1893 static UChar* do_ffree_st7 ( UChar* p ) 1894 { 1895 *p++ = 0xDD; 1896 *p++ = 0xC7; 1897 return p; 1898 } 1899 1900 /* Emit fstp %st(i), 1 <= i <= 7 */ 1901 static UChar* do_fstp_st ( UChar* p, Int i ) 1902 { 1903 vassert(1 <= i && i <= 7); 1904 *p++ = 0xDD; 1905 *p++ = toUChar(0xD8+i); 1906 return p; 1907 } 1908 1909 /* Emit fld %st(i), 0 <= i <= 6 */ 1910 static UChar* do_fld_st ( UChar* p, Int i ) 1911 { 1912 vassert(0 <= i && i <= 6); 1913 *p++ = 0xD9; 1914 *p++ = toUChar(0xC0+i); 1915 return p; 1916 } 1917 1918 /* Emit f<op> %st(0) */ 1919 static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 1920 { 1921 switch (op) { 1922 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 1923 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 1924 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 1925 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 1926 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 1927 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 1928 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 1929 case Xfp_MOV: break; 1930 case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ 1931 *p++ = 0xD9; *p++ = 0xF2; /* fptan */ 1932 *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ 1933 break; 1934 default: vpanic("do_fop1_st: unknown op"); 1935 } 1936 return p; 1937 } 1938 1939 /* Emit f<op> %st(i), 1 <= i <= 5 */ 1940 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 1941 { 1942 # define fake(_n) mkHReg((_n), HRcInt32, False) 1943 Int subopc; 1944 switch (op) { 1945 case Xfp_ADD: subopc = 0; break; 1946 case Xfp_SUB: subopc = 4; break; 1947 case Xfp_MUL: subopc = 1; break; 1948 case Xfp_DIV: subopc = 6; break; 1949 default: vpanic("do_fop2_st: unknown op"); 1950 } 1951 *p++ = 0xD8; 1952 p = doAMode_R(p, fake(subopc), fake(i)); 1953 return p; 1954 # undef fake 1955 } 1956 1957 /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 1958 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 1959 */ 1960 static UChar* push_word_from_tags ( UChar* p, UShort tags ) 1961 { 1962 UInt w; 1963 vassert(0 == (tags & ~0xF)); 1964 if (tags == 0) { 1965 /* pushl $0x00000000 */ 1966 *p++ = 0x6A; 1967 *p++ = 0x00; 1968 } 1969 else 1970 /* pushl $0xFFFFFFFF */ 1971 if (tags == 0xF) { 1972 *p++ = 0x6A; 1973 *p++ = 0xFF; 1974 } else { 1975 vassert(0); /* awaiting test case */ 1976 w = 0; 1977 if (tags & 1) w |= 0x000000FF; 1978 if (tags & 2) w |= 0x0000FF00; 1979 if (tags & 4) w |= 0x00FF0000; 1980 if (tags & 8) w |= 0xFF000000; 1981 *p++ = 0x68; 1982 p = emit32(p, w); 1983 } 1984 return p; 1985 } 1986 1987 /* Emit an instruction into buf and return the number of bytes used. 1988 Note that buf is not the insn's final place, and therefore it is 1989 imperative to emit position-independent code. */ 1990 1991 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, 1992 Bool mode64, 1993 void* dispatch_unassisted, 1994 void* dispatch_assisted ) 1995 { 1996 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 1997 1998 UInt xtra; 1999 UChar* p = &buf[0]; 2000 UChar* ptmp; 2001 vassert(nbuf >= 32); 2002 vassert(mode64 == False); 2003 2004 /* Wrap an integer as a int register, for use assembling 2005 GrpN insns, in which the greg field is used as a sub-opcode 2006 and does not really contain a register. */ 2007 # define fake(_n) mkHReg((_n), HRcInt32, False) 2008 2009 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2010 2011 switch (i->tag) { 2012 2013 case Xin_Alu32R: 2014 /* Deal specially with MOV */ 2015 if (i->Xin.Alu32R.op == Xalu_MOV) { 2016 switch (i->Xin.Alu32R.src->tag) { 2017 case Xrmi_Imm: 2018 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst)); 2019 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2020 goto done; 2021 case Xrmi_Reg: 2022 *p++ = 0x89; 2023 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2024 i->Xin.Alu32R.dst); 2025 goto done; 2026 case Xrmi_Mem: 2027 *p++ = 0x8B; 2028 p = doAMode_M(p, i->Xin.Alu32R.dst, 2029 i->Xin.Alu32R.src->Xrmi.Mem.am); 2030 goto done; 2031 default: 2032 goto bad; 2033 } 2034 } 2035 /* MUL */ 2036 if (i->Xin.Alu32R.op == Xalu_MUL) { 2037 switch (i->Xin.Alu32R.src->tag) { 2038 case Xrmi_Reg: 2039 *p++ = 0x0F; 2040 *p++ = 0xAF; 2041 p = doAMode_R(p, i->Xin.Alu32R.dst, 2042 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2043 goto done; 2044 case Xrmi_Mem: 2045 *p++ = 0x0F; 2046 *p++ = 0xAF; 2047 p = doAMode_M(p, i->Xin.Alu32R.dst, 2048 i->Xin.Alu32R.src->Xrmi.Mem.am); 2049 goto done; 2050 case Xrmi_Imm: 2051 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2052 *p++ = 0x6B; 2053 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2054 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2055 } else { 2056 *p++ = 0x69; 2057 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2058 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2059 } 2060 goto done; 2061 default: 2062 goto bad; 2063 } 2064 } 2065 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2066 opc = opc_rr = subopc_imm = opc_imma = 0; 2067 switch (i->Xin.Alu32R.op) { 2068 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2069 subopc_imm = 2; opc_imma = 0x15; break; 2070 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2071 subopc_imm = 0; opc_imma = 0x05; break; 2072 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2073 subopc_imm = 5; opc_imma = 0x2D; break; 2074 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2075 subopc_imm = 3; opc_imma = 0x1D; break; 2076 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2077 subopc_imm = 4; opc_imma = 0x25; break; 2078 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2079 subopc_imm = 6; opc_imma = 0x35; break; 2080 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2081 subopc_imm = 1; opc_imma = 0x0D; break; 2082 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2083 subopc_imm = 7; opc_imma = 0x3D; break; 2084 default: goto bad; 2085 } 2086 switch (i->Xin.Alu32R.src->tag) { 2087 case Xrmi_Imm: 2088 if (i->Xin.Alu32R.dst == hregX86_EAX() 2089 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2090 *p++ = toUChar(opc_imma); 2091 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2092 } else 2093 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2094 *p++ = 0x83; 2095 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2096 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2097 } else { 2098 *p++ = 0x81; 2099 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2100 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2101 } 2102 goto done; 2103 case Xrmi_Reg: 2104 *p++ = toUChar(opc_rr); 2105 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2106 i->Xin.Alu32R.dst); 2107 goto done; 2108 case Xrmi_Mem: 2109 *p++ = toUChar(opc); 2110 p = doAMode_M(p, i->Xin.Alu32R.dst, 2111 i->Xin.Alu32R.src->Xrmi.Mem.am); 2112 goto done; 2113 default: 2114 goto bad; 2115 } 2116 break; 2117 2118 case Xin_Alu32M: 2119 /* Deal specially with MOV */ 2120 if (i->Xin.Alu32M.op == Xalu_MOV) { 2121 switch (i->Xin.Alu32M.src->tag) { 2122 case Xri_Reg: 2123 *p++ = 0x89; 2124 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2125 i->Xin.Alu32M.dst); 2126 goto done; 2127 case Xri_Imm: 2128 *p++ = 0xC7; 2129 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst); 2130 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2131 goto done; 2132 default: 2133 goto bad; 2134 } 2135 } 2136 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2137 allowed here. */ 2138 opc = subopc_imm = opc_imma = 0; 2139 switch (i->Xin.Alu32M.op) { 2140 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2141 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2142 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2143 default: goto bad; 2144 } 2145 switch (i->Xin.Alu32M.src->tag) { 2146 case Xri_Reg: 2147 *p++ = toUChar(opc); 2148 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2149 i->Xin.Alu32M.dst); 2150 goto done; 2151 case Xri_Imm: 2152 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2153 *p++ = 0x83; 2154 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2155 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2156 goto done; 2157 } else { 2158 *p++ = 0x81; 2159 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2160 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2161 goto done; 2162 } 2163 default: 2164 goto bad; 2165 } 2166 break; 2167 2168 case Xin_Sh32: 2169 opc_cl = opc_imm = subopc = 0; 2170 switch (i->Xin.Sh32.op) { 2171 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2172 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2173 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2174 default: goto bad; 2175 } 2176 if (i->Xin.Sh32.src == 0) { 2177 *p++ = toUChar(opc_cl); 2178 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2179 } else { 2180 *p++ = toUChar(opc_imm); 2181 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2182 *p++ = (UChar)(i->Xin.Sh32.src); 2183 } 2184 goto done; 2185 2186 case Xin_Test32: 2187 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2188 /* testl $imm32, %reg */ 2189 *p++ = 0xF7; 2190 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); 2191 p = emit32(p, i->Xin.Test32.imm32); 2192 goto done; 2193 } else { 2194 /* testl $imm32, amode */ 2195 *p++ = 0xF7; 2196 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); 2197 p = emit32(p, i->Xin.Test32.imm32); 2198 goto done; 2199 } 2200 2201 case Xin_Unary32: 2202 if (i->Xin.Unary32.op == Xun_NOT) { 2203 *p++ = 0xF7; 2204 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst); 2205 goto done; 2206 } 2207 if (i->Xin.Unary32.op == Xun_NEG) { 2208 *p++ = 0xF7; 2209 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst); 2210 goto done; 2211 } 2212 break; 2213 2214 case Xin_Lea32: 2215 *p++ = 0x8D; 2216 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2217 goto done; 2218 2219 case Xin_MulL: 2220 subopc = i->Xin.MulL.syned ? 5 : 4; 2221 *p++ = 0xF7; 2222 switch (i->Xin.MulL.src->tag) { 2223 case Xrm_Mem: 2224 p = doAMode_M(p, fake(subopc), 2225 i->Xin.MulL.src->Xrm.Mem.am); 2226 goto done; 2227 case Xrm_Reg: 2228 p = doAMode_R(p, fake(subopc), 2229 i->Xin.MulL.src->Xrm.Reg.reg); 2230 goto done; 2231 default: 2232 goto bad; 2233 } 2234 break; 2235 2236 case Xin_Div: 2237 subopc = i->Xin.Div.syned ? 7 : 6; 2238 *p++ = 0xF7; 2239 switch (i->Xin.Div.src->tag) { 2240 case Xrm_Mem: 2241 p = doAMode_M(p, fake(subopc), 2242 i->Xin.Div.src->Xrm.Mem.am); 2243 goto done; 2244 case Xrm_Reg: 2245 p = doAMode_R(p, fake(subopc), 2246 i->Xin.Div.src->Xrm.Reg.reg); 2247 goto done; 2248 default: 2249 goto bad; 2250 } 2251 break; 2252 2253 case Xin_Sh3232: 2254 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2255 if (i->Xin.Sh3232.amt == 0) { 2256 /* shldl/shrdl by %cl */ 2257 *p++ = 0x0F; 2258 if (i->Xin.Sh3232.op == Xsh_SHL) { 2259 *p++ = 0xA5; 2260 } else { 2261 *p++ = 0xAD; 2262 } 2263 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2264 goto done; 2265 } 2266 break; 2267 2268 case Xin_Push: 2269 switch (i->Xin.Push.src->tag) { 2270 case Xrmi_Mem: 2271 *p++ = 0xFF; 2272 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am); 2273 goto done; 2274 case Xrmi_Imm: 2275 *p++ = 0x68; 2276 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2277 goto done; 2278 case Xrmi_Reg: 2279 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg)); 2280 goto done; 2281 default: 2282 goto bad; 2283 } 2284 2285 case Xin_Call: 2286 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2287 for explanation of this. */ 2288 switch (i->Xin.Call.regparms) { 2289 case 0: irno = iregNo(hregX86_EAX()); break; 2290 case 1: irno = iregNo(hregX86_EDX()); break; 2291 case 2: irno = iregNo(hregX86_ECX()); break; 2292 case 3: irno = iregNo(hregX86_EDI()); break; 2293 default: vpanic(" emit_X86Instr:call:regparms"); 2294 } 2295 /* jump over the following two insns if the condition does not 2296 hold */ 2297 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2298 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2299 *p++ = 0x07; /* 7 bytes in the next two insns */ 2300 } 2301 /* movl $target, %tmp */ 2302 *p++ = toUChar(0xB8 + irno); 2303 p = emit32(p, i->Xin.Call.target); 2304 /* call *%tmp */ 2305 *p++ = 0xFF; 2306 *p++ = toUChar(0xD0 + irno); 2307 goto done; 2308 2309 case Xin_Goto: { 2310 void* dispatch_to_use = NULL; 2311 vassert(dispatch_unassisted != NULL); 2312 vassert(dispatch_assisted != NULL); 2313 2314 /* Use ptmp for backpatching conditional jumps. */ 2315 ptmp = NULL; 2316 2317 /* First off, if this is conditional, create a conditional 2318 jump over the rest of it. */ 2319 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 2320 /* jmp fwds if !condition */ 2321 *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1))); 2322 ptmp = p; /* fill in this bit later */ 2323 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2324 } 2325 2326 /* If a non-boring, set %ebp (the guest state pointer) 2327 appropriately. Also, decide which dispatcher we need to 2328 use. */ 2329 dispatch_to_use = dispatch_assisted; 2330 2331 /* movl $magic_number, %ebp */ 2332 switch (i->Xin.Goto.jk) { 2333 case Ijk_ClientReq: 2334 *p++ = 0xBD; 2335 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; 2336 case Ijk_Sys_int128: 2337 *p++ = 0xBD; 2338 p = emit32(p, VEX_TRC_JMP_SYS_INT128); break; 2339 case Ijk_Sys_int129: 2340 *p++ = 0xBD; 2341 p = emit32(p, VEX_TRC_JMP_SYS_INT129); break; 2342 case Ijk_Sys_int130: 2343 *p++ = 0xBD; 2344 p = emit32(p, VEX_TRC_JMP_SYS_INT130); break; 2345 case Ijk_Yield: 2346 *p++ = 0xBD; 2347 p = emit32(p, VEX_TRC_JMP_YIELD); break; 2348 case Ijk_YieldNoRedir: 2349 *p++ = 0xBD; 2350 p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break; 2351 case Ijk_EmWarn: 2352 *p++ = 0xBD; 2353 p = emit32(p, VEX_TRC_JMP_EMWARN); break; 2354 case Ijk_MapFail: 2355 *p++ = 0xBD; 2356 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; 2357 case Ijk_NoDecode: 2358 *p++ = 0xBD; 2359 p = emit32(p, VEX_TRC_JMP_NODECODE); break; 2360 case Ijk_TInval: 2361 *p++ = 0xBD; 2362 p = emit32(p, VEX_TRC_JMP_TINVAL); break; 2363 case Ijk_NoRedir: 2364 *p++ = 0xBD; 2365 p = emit32(p, VEX_TRC_JMP_NOREDIR); break; 2366 case Ijk_Sys_sysenter: 2367 *p++ = 0xBD; 2368 p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break; 2369 case Ijk_SigTRAP: 2370 *p++ = 0xBD; 2371 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; 2372 case Ijk_SigSEGV: 2373 *p++ = 0xBD; 2374 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; 2375 case Ijk_Ret: 2376 case Ijk_Call: 2377 case Ijk_Boring: 2378 dispatch_to_use = dispatch_unassisted; 2379 break; 2380 default: 2381 ppIRJumpKind(i->Xin.Goto.jk); 2382 vpanic("emit_X86Instr.Xin_Goto: unknown jump kind"); 2383 } 2384 2385 /* Get the destination address into %eax */ 2386 if (i->Xin.Goto.dst->tag == Xri_Imm) { 2387 /* movl $immediate, %eax */ 2388 *p++ = 0xB8; 2389 p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32); 2390 } else { 2391 vassert(i->Xin.Goto.dst->tag == Xri_Reg); 2392 /* movl %reg, %eax */ 2393 if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) { 2394 *p++ = 0x89; 2395 p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX()); 2396 } 2397 } 2398 2399 /* Get the dispatcher address into %edx. This has to happen 2400 after the load of %eax since %edx might be carrying the value 2401 destined for %eax immediately prior to this Xin_Goto. */ 2402 vassert(sizeof(UInt) == sizeof(void*)); 2403 vassert(dispatch_to_use != NULL); 2404 /* movl $imm32, %edx */ 2405 *p++ = 0xBA; 2406 p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use)); 2407 2408 /* jmp *%edx */ 2409 *p++ = 0xFF; 2410 *p++ = 0xE2; 2411 2412 /* Fix up the conditional jump, if there was one. */ 2413 if (i->Xin.Goto.cond != Xcc_ALWAYS) { 2414 Int delta = p - ptmp; 2415 vassert(delta > 0 && delta < 20); 2416 *ptmp = toUChar(delta-1); 2417 } 2418 goto done; 2419 } 2420 2421 case Xin_CMov32: 2422 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2423 2424 /* This generates cmov, which is illegal on P54/P55. */ 2425 /* 2426 *p++ = 0x0F; 2427 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2428 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2429 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2430 goto done; 2431 } 2432 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2433 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2434 goto done; 2435 } 2436 */ 2437 2438 /* Alternative version which works on any x86 variant. */ 2439 /* jmp fwds if !condition */ 2440 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2441 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2442 ptmp = p; 2443 2444 switch (i->Xin.CMov32.src->tag) { 2445 case Xrm_Reg: 2446 /* Big sigh. This is movl E -> G ... */ 2447 *p++ = 0x89; 2448 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2449 i->Xin.CMov32.dst); 2450 2451 break; 2452 case Xrm_Mem: 2453 /* ... whereas this is movl G -> E. That's why the args 2454 to doAMode_R appear to be the wrong way round in the 2455 Xrm_Reg case. */ 2456 *p++ = 0x8B; 2457 p = doAMode_M(p, i->Xin.CMov32.dst, 2458 i->Xin.CMov32.src->Xrm.Mem.am); 2459 break; 2460 default: 2461 goto bad; 2462 } 2463 /* Fill in the jump offset. */ 2464 *(ptmp-1) = toUChar(p - ptmp); 2465 goto done; 2466 2467 break; 2468 2469 case Xin_LoadEX: 2470 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2471 /* movzbl */ 2472 *p++ = 0x0F; 2473 *p++ = 0xB6; 2474 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2475 goto done; 2476 } 2477 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2478 /* movzwl */ 2479 *p++ = 0x0F; 2480 *p++ = 0xB7; 2481 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2482 goto done; 2483 } 2484 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2485 /* movsbl */ 2486 *p++ = 0x0F; 2487 *p++ = 0xBE; 2488 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2489 goto done; 2490 } 2491 break; 2492 2493 case Xin_Set32: 2494 /* Make the destination register be 1 or 0, depending on whether 2495 the relevant condition holds. We have to dodge and weave 2496 when the destination is %esi or %edi as we cannot directly 2497 emit the native 'setb %reg' for those. Further complication: 2498 the top 24 bits of the destination should be forced to zero, 2499 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2500 Sigh. So start off my moving $0 into the dest. */ 2501 2502 /* Do we need to swap in %eax? */ 2503 if (iregNo(i->Xin.Set32.dst) >= 4) { 2504 /* xchg %eax, %dst */ 2505 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2506 /* movl $0, %eax */ 2507 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX())); 2508 p = emit32(p, 0); 2509 /* setb lo8(%eax) */ 2510 *p++ = 0x0F; 2511 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2512 p = doAMode_R(p, fake(0), hregX86_EAX()); 2513 /* xchg %eax, %dst */ 2514 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2515 } else { 2516 /* movl $0, %dst */ 2517 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst)); 2518 p = emit32(p, 0); 2519 /* setb lo8(%dst) */ 2520 *p++ = 0x0F; 2521 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2522 p = doAMode_R(p, fake(0), i->Xin.Set32.dst); 2523 } 2524 goto done; 2525 2526 case Xin_Bsfr32: 2527 *p++ = 0x0F; 2528 if (i->Xin.Bsfr32.isFwds) { 2529 *p++ = 0xBC; 2530 } else { 2531 *p++ = 0xBD; 2532 } 2533 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2534 goto done; 2535 2536 case Xin_MFence: 2537 /* see comment in hdefs.h re this insn */ 2538 if (0) vex_printf("EMIT FENCE\n"); 2539 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2540 |VEX_HWCAPS_X86_SSE2)) { 2541 /* mfence */ 2542 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2543 goto done; 2544 } 2545 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) { 2546 /* sfence */ 2547 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2548 /* lock addl $0,0(%esp) */ 2549 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2550 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2551 goto done; 2552 } 2553 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2554 /* lock addl $0,0(%esp) */ 2555 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2556 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2557 goto done; 2558 } 2559 vpanic("emit_X86Instr:mfence:hwcaps"); 2560 /*NOTREACHED*/ 2561 break; 2562 2563 case Xin_ACAS: 2564 /* lock */ 2565 *p++ = 0xF0; 2566 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2567 in %ebx. The new-value register is hardwired to be %ebx 2568 since letting it be any integer register gives the problem 2569 that %sil and %dil are unaddressible on x86 and hence we 2570 would have to resort to the same kind of trickery as with 2571 byte-sized Xin.Store, just below. Given that this isn't 2572 performance critical, it is simpler just to force the 2573 register operand to %ebx (could equally be %ecx or %edx). 2574 (Although %ebx is more consistent with cmpxchg8b.) */ 2575 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2576 *p++ = 0x0F; 2577 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2578 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2579 goto done; 2580 2581 case Xin_DACAS: 2582 /* lock */ 2583 *p++ = 0xF0; 2584 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2585 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2586 aren't encoded in the insn. */ 2587 *p++ = 0x0F; 2588 *p++ = 0xC7; 2589 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr); 2590 goto done; 2591 2592 case Xin_Store: 2593 if (i->Xin.Store.sz == 2) { 2594 /* This case, at least, is simple, given that we can 2595 reference the low 16 bits of any integer register. */ 2596 *p++ = 0x66; 2597 *p++ = 0x89; 2598 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2599 goto done; 2600 } 2601 2602 if (i->Xin.Store.sz == 1) { 2603 /* We have to do complex dodging and weaving if src is not 2604 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2605 if (iregNo(i->Xin.Store.src) < 4) { 2606 /* we're OK, can do it directly */ 2607 *p++ = 0x88; 2608 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2609 goto done; 2610 } else { 2611 /* Bleh. This means the source is %edi or %esi. Since 2612 the address mode can only mention three registers, at 2613 least one of %eax/%ebx/%ecx/%edx must be available to 2614 temporarily swap the source into, so the store can 2615 happen. So we have to look at the regs mentioned 2616 in the amode. */ 2617 HReg swap = INVALID_HREG; 2618 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2619 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2620 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True; 2621 HRegUsage u; 2622 Int j; 2623 initHRegUsage(&u); 2624 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2625 for (j = 0; j < u.n_used; j++) { 2626 HReg r = u.hreg[j]; 2627 if (r == eax) a_ok = False; 2628 if (r == ebx) b_ok = False; 2629 if (r == ecx) c_ok = False; 2630 if (r == edx) d_ok = False; 2631 } 2632 if (a_ok) swap = eax; 2633 if (b_ok) swap = ebx; 2634 if (c_ok) swap = ecx; 2635 if (d_ok) swap = edx; 2636 vassert(swap != INVALID_HREG); 2637 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2638 *p++ = 0x87; 2639 p = doAMode_R(p, i->Xin.Store.src, swap); 2640 /* movb lo8{%swap}, (dst) */ 2641 *p++ = 0x88; 2642 p = doAMode_M(p, swap, i->Xin.Store.dst); 2643 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2644 *p++ = 0x87; 2645 p = doAMode_R(p, i->Xin.Store.src, swap); 2646 goto done; 2647 } 2648 } /* if (i->Xin.Store.sz == 1) */ 2649 break; 2650 2651 case Xin_FpUnary: 2652 /* gop %src, %dst 2653 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2654 */ 2655 p = do_ffree_st7(p); 2656 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 2657 p = do_fop1_st(p, i->Xin.FpUnary.op); 2658 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 2659 goto done; 2660 2661 case Xin_FpBinary: 2662 if (i->Xin.FpBinary.op == Xfp_YL2X 2663 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2664 /* Have to do this specially. */ 2665 /* ffree %st7 ; fld %st(srcL) ; 2666 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2667 p = do_ffree_st7(p); 2668 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2669 p = do_ffree_st7(p); 2670 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2671 *p++ = 0xD9; 2672 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2673 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2674 goto done; 2675 } 2676 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2677 /* Have to do this specially. */ 2678 /* ffree %st7 ; fld %st(srcL) ; 2679 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2680 p = do_ffree_st7(p); 2681 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2682 p = do_ffree_st7(p); 2683 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2684 *p++ = 0xD9; *p++ = 0xF3; 2685 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2686 goto done; 2687 } 2688 if (i->Xin.FpBinary.op == Xfp_PREM 2689 || i->Xin.FpBinary.op == Xfp_PREM1 2690 || i->Xin.FpBinary.op == Xfp_SCALE) { 2691 /* Have to do this specially. */ 2692 /* ffree %st7 ; fld %st(srcR) ; 2693 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2694 fincstp ; ffree %st7 */ 2695 p = do_ffree_st7(p); 2696 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 2697 p = do_ffree_st7(p); 2698 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 2699 *p++ = 0xD9; 2700 switch (i->Xin.FpBinary.op) { 2701 case Xfp_PREM: *p++ = 0xF8; break; 2702 case Xfp_PREM1: *p++ = 0xF5; break; 2703 case Xfp_SCALE: *p++ = 0xFD; break; 2704 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2705 } 2706 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 2707 *p++ = 0xD9; *p++ = 0xF7; 2708 p = do_ffree_st7(p); 2709 goto done; 2710 } 2711 /* General case */ 2712 /* gop %srcL, %srcR, %dst 2713 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2714 */ 2715 p = do_ffree_st7(p); 2716 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2717 p = do_fop2_st(p, i->Xin.FpBinary.op, 2718 1+hregNumber(i->Xin.FpBinary.srcR)); 2719 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2720 goto done; 2721 2722 case Xin_FpLdSt: 2723 if (i->Xin.FpLdSt.isLoad) { 2724 /* Load from memory into %fakeN. 2725 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2726 */ 2727 p = do_ffree_st7(p); 2728 switch (i->Xin.FpLdSt.sz) { 2729 case 4: 2730 *p++ = 0xD9; 2731 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2732 break; 2733 case 8: 2734 *p++ = 0xDD; 2735 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2736 break; 2737 case 10: 2738 *p++ = 0xDB; 2739 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr); 2740 break; 2741 default: 2742 vpanic("emitX86Instr(FpLdSt,load)"); 2743 } 2744 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 2745 goto done; 2746 } else { 2747 /* Store from %fakeN into memory. 2748 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2749 */ 2750 p = do_ffree_st7(p); 2751 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 2752 switch (i->Xin.FpLdSt.sz) { 2753 case 4: 2754 *p++ = 0xD9; 2755 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2756 break; 2757 case 8: 2758 *p++ = 0xDD; 2759 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2760 break; 2761 case 10: 2762 *p++ = 0xDB; 2763 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr); 2764 break; 2765 default: 2766 vpanic("emitX86Instr(FpLdSt,store)"); 2767 } 2768 goto done; 2769 } 2770 break; 2771 2772 case Xin_FpLdStI: 2773 if (i->Xin.FpLdStI.isLoad) { 2774 /* Load from memory into %fakeN, converting from an int. 2775 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2776 */ 2777 switch (i->Xin.FpLdStI.sz) { 2778 case 8: opc = 0xDF; subopc_imm = 5; break; 2779 case 4: opc = 0xDB; subopc_imm = 0; break; 2780 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2781 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2782 } 2783 p = do_ffree_st7(p); 2784 *p++ = toUChar(opc); 2785 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2786 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 2787 goto done; 2788 } else { 2789 /* Store from %fakeN into memory, converting to an int. 2790 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2791 */ 2792 switch (i->Xin.FpLdStI.sz) { 2793 case 8: opc = 0xDF; subopc_imm = 7; break; 2794 case 4: opc = 0xDB; subopc_imm = 3; break; 2795 case 2: opc = 0xDF; subopc_imm = 3; break; 2796 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2797 } 2798 p = do_ffree_st7(p); 2799 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 2800 *p++ = toUChar(opc); 2801 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2802 goto done; 2803 } 2804 break; 2805 2806 case Xin_Fp64to32: 2807 /* ffree %st7 ; fld %st(src) */ 2808 p = do_ffree_st7(p); 2809 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 2810 /* subl $4, %esp */ 2811 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2812 /* fstps (%esp) */ 2813 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2814 /* flds (%esp) */ 2815 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2816 /* addl $4, %esp */ 2817 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2818 /* fstp %st(1+dst) */ 2819 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 2820 goto done; 2821 2822 case Xin_FpCMov: 2823 /* jmp fwds if !condition */ 2824 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2825 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2826 ptmp = p; 2827 2828 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 2829 p = do_ffree_st7(p); 2830 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 2831 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 2832 2833 /* Fill in the jump offset. */ 2834 *(ptmp-1) = toUChar(p - ptmp); 2835 goto done; 2836 2837 case Xin_FpLdCW: 2838 *p++ = 0xD9; 2839 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr); 2840 goto done; 2841 2842 case Xin_FpStSW_AX: 2843 /* note, this emits fnstsw %ax, not fstsw %ax */ 2844 *p++ = 0xDF; 2845 *p++ = 0xE0; 2846 goto done; 2847 2848 case Xin_FpCmp: 2849 /* gcmp %fL, %fR, %dst 2850 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 2851 fnstsw %ax ; movl %eax, %dst 2852 */ 2853 /* ffree %st7 */ 2854 p = do_ffree_st7(p); 2855 /* fpush %fL */ 2856 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 2857 /* fucomp %(fR+1) */ 2858 *p++ = 0xDD; 2859 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)))); 2860 /* fnstsw %ax */ 2861 *p++ = 0xDF; 2862 *p++ = 0xE0; 2863 /* movl %eax, %dst */ 2864 *p++ = 0x89; 2865 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 2866 goto done; 2867 2868 case Xin_SseConst: { 2869 UShort con = i->Xin.SseConst.con; 2870 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 2871 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 2872 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 2873 p = push_word_from_tags(p, toUShort(con & 0xF)); 2874 /* movl (%esp), %xmm-dst */ 2875 *p++ = 0x0F; 2876 *p++ = 0x10; 2877 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst))); 2878 *p++ = 0x24; 2879 /* addl $16, %esp */ 2880 *p++ = 0x83; 2881 *p++ = 0xC4; 2882 *p++ = 0x10; 2883 goto done; 2884 } 2885 2886 case Xin_SseLdSt: 2887 *p++ = 0x0F; 2888 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 2889 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr); 2890 goto done; 2891 2892 case Xin_SseLdzLO: 2893 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 2894 /* movs[sd] amode, %xmm-dst */ 2895 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 2896 *p++ = 0x0F; 2897 *p++ = 0x10; 2898 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)), 2899 i->Xin.SseLdzLO.addr); 2900 goto done; 2901 2902 case Xin_Sse32Fx4: 2903 xtra = 0; 2904 *p++ = 0x0F; 2905 switch (i->Xin.Sse32Fx4.op) { 2906 case Xsse_ADDF: *p++ = 0x58; break; 2907 case Xsse_DIVF: *p++ = 0x5E; break; 2908 case Xsse_MAXF: *p++ = 0x5F; break; 2909 case Xsse_MINF: *p++ = 0x5D; break; 2910 case Xsse_MULF: *p++ = 0x59; break; 2911 case Xsse_RCPF: *p++ = 0x53; break; 2912 case Xsse_RSQRTF: *p++ = 0x52; break; 2913 case Xsse_SQRTF: *p++ = 0x51; break; 2914 case Xsse_SUBF: *p++ = 0x5C; break; 2915 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2916 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2917 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2918 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2919 default: goto bad; 2920 } 2921 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)), 2922 fake(vregNo(i->Xin.Sse32Fx4.src)) ); 2923 if (xtra & 0x100) 2924 *p++ = toUChar(xtra & 0xFF); 2925 goto done; 2926 2927 case Xin_Sse64Fx2: 2928 xtra = 0; 2929 *p++ = 0x66; 2930 *p++ = 0x0F; 2931 switch (i->Xin.Sse64Fx2.op) { 2932 case Xsse_ADDF: *p++ = 0x58; break; 2933 case Xsse_DIVF: *p++ = 0x5E; break; 2934 case Xsse_MAXF: *p++ = 0x5F; break; 2935 case Xsse_MINF: *p++ = 0x5D; break; 2936 case Xsse_MULF: *p++ = 0x59; break; 2937 case Xsse_RCPF: *p++ = 0x53; break; 2938 case Xsse_RSQRTF: *p++ = 0x52; break; 2939 case Xsse_SQRTF: *p++ = 0x51; break; 2940 case Xsse_SUBF: *p++ = 0x5C; break; 2941 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2942 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2943 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2944 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2945 default: goto bad; 2946 } 2947 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), 2948 fake(vregNo(i->Xin.Sse64Fx2.src)) ); 2949 if (xtra & 0x100) 2950 *p++ = toUChar(xtra & 0xFF); 2951 goto done; 2952 2953 case Xin_Sse32FLo: 2954 xtra = 0; 2955 *p++ = 0xF3; 2956 *p++ = 0x0F; 2957 switch (i->Xin.Sse32FLo.op) { 2958 case Xsse_ADDF: *p++ = 0x58; break; 2959 case Xsse_DIVF: *p++ = 0x5E; break; 2960 case Xsse_MAXF: *p++ = 0x5F; break; 2961 case Xsse_MINF: *p++ = 0x5D; break; 2962 case Xsse_MULF: *p++ = 0x59; break; 2963 case Xsse_RCPF: *p++ = 0x53; break; 2964 case Xsse_RSQRTF: *p++ = 0x52; break; 2965 case Xsse_SQRTF: *p++ = 0x51; break; 2966 case Xsse_SUBF: *p++ = 0x5C; break; 2967 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2968 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2969 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2970 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2971 default: goto bad; 2972 } 2973 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)), 2974 fake(vregNo(i->Xin.Sse32FLo.src)) ); 2975 if (xtra & 0x100) 2976 *p++ = toUChar(xtra & 0xFF); 2977 goto done; 2978 2979 case Xin_Sse64FLo: 2980 xtra = 0; 2981 *p++ = 0xF2; 2982 *p++ = 0x0F; 2983 switch (i->Xin.Sse64FLo.op) { 2984 case Xsse_ADDF: *p++ = 0x58; break; 2985 case Xsse_DIVF: *p++ = 0x5E; break; 2986 case Xsse_MAXF: *p++ = 0x5F; break; 2987 case Xsse_MINF: *p++ = 0x5D; break; 2988 case Xsse_MULF: *p++ = 0x59; break; 2989 case Xsse_RCPF: *p++ = 0x53; break; 2990 case Xsse_RSQRTF: *p++ = 0x52; break; 2991 case Xsse_SQRTF: *p++ = 0x51; break; 2992 case Xsse_SUBF: *p++ = 0x5C; break; 2993 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 2994 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 2995 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 2996 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 2997 default: goto bad; 2998 } 2999 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)), 3000 fake(vregNo(i->Xin.Sse64FLo.src)) ); 3001 if (xtra & 0x100) 3002 *p++ = toUChar(xtra & 0xFF); 3003 goto done; 3004 3005 case Xin_SseReRg: 3006 # define XX(_n) *p++ = (_n) 3007 switch (i->Xin.SseReRg.op) { 3008 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 3009 case Xsse_OR: XX(0x0F); XX(0x56); break; 3010 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3011 case Xsse_AND: XX(0x0F); XX(0x54); break; 3012 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3013 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3014 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3015 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3016 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3017 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3018 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3019 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3020 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3021 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3022 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3023 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3024 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3025 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3026 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3027 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3028 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3029 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3030 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3031 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3032 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3033 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3034 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3035 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3036 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3037 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3038 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3039 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3040 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3041 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3042 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3043 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3044 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3045 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3046 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3047 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3048 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3049 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3050 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3051 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3052 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3053 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3054 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3055 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3056 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3057 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3058 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3059 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3060 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3061 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3062 default: goto bad; 3063 } 3064 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)), 3065 fake(vregNo(i->Xin.SseReRg.src)) ); 3066 # undef XX 3067 goto done; 3068 3069 case Xin_SseCMov: 3070 /* jmp fwds if !condition */ 3071 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3072 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3073 ptmp = p; 3074 3075 /* movaps %src, %dst */ 3076 *p++ = 0x0F; 3077 *p++ = 0x28; 3078 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)), 3079 fake(vregNo(i->Xin.SseCMov.src)) ); 3080 3081 /* Fill in the jump offset. */ 3082 *(ptmp-1) = toUChar(p - ptmp); 3083 goto done; 3084 3085 case Xin_SseShuf: 3086 *p++ = 0x66; 3087 *p++ = 0x0F; 3088 *p++ = 0x70; 3089 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)), 3090 fake(vregNo(i->Xin.SseShuf.src)) ); 3091 *p++ = (UChar)(i->Xin.SseShuf.order); 3092 goto done; 3093 3094 default: 3095 goto bad; 3096 } 3097 3098 bad: 3099 ppX86Instr(i, mode64); 3100 vpanic("emit_X86Instr"); 3101 /*NOTREACHED*/ 3102 3103 done: 3104 vassert(p - &buf[0] <= 32); 3105 return p - &buf[0]; 3106 3107 # undef fake 3108 } 3109 3110 /*---------------------------------------------------------------*/ 3111 /*--- end host_x86_defs.c ---*/ 3112 /*---------------------------------------------------------------*/ 3113