1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_amd64_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_amd64_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegAMD64 ( HReg reg ) 48 { 49 Int r; 50 static HChar* ireg64_names[16] 51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 53 /* Be generic for all virtual regs. */ 54 if (hregIsVirtual(reg)) { 55 ppHReg(reg); 56 return; 57 } 58 /* But specific for real regs. */ 59 switch (hregClass(reg)) { 60 case HRcInt64: 61 r = hregNumber(reg); 62 vassert(r >= 0 && r < 16); 63 vex_printf("%s", ireg64_names[r]); 64 return; 65 case HRcFlt64: 66 r = hregNumber(reg); 67 vassert(r >= 0 && r < 6); 68 vex_printf("%%fake%d", r); 69 return; 70 case HRcVec128: 71 r = hregNumber(reg); 72 vassert(r >= 0 && r < 16); 73 vex_printf("%%xmm%d", r); 74 return; 75 default: 76 vpanic("ppHRegAMD64"); 77 } 78 } 79 80 static void ppHRegAMD64_lo32 ( HReg reg ) 81 { 82 Int r; 83 static HChar* ireg32_names[16] 84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; 86 /* Be generic for all virtual regs. */ 87 if (hregIsVirtual(reg)) { 88 ppHReg(reg); 89 vex_printf("d"); 90 return; 91 } 92 /* But specific for real regs. */ 93 switch (hregClass(reg)) { 94 case HRcInt64: 95 r = hregNumber(reg); 96 vassert(r >= 0 && r < 16); 97 vex_printf("%s", ireg32_names[r]); 98 return; 99 default: 100 vpanic("ppHRegAMD64_lo32: invalid regclass"); 101 } 102 } 103 104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); } 105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); } 106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); } 107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); } 108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); } 109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); } 110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); } 111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); } 112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); } 113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); } 114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); } 115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); } 116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); } 117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } 118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } 119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } 120 121 //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 122 //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 123 //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 124 //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 125 //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 126 //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 127 //.. 128 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } 129 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } 130 HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); } 131 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); } 132 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); } 133 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); } 134 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); } 135 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); } 136 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); } 137 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); } 138 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); } 139 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); } 140 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); } 141 HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); } 142 HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); } 143 HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); } 144 145 146 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr ) 147 { 148 #if 0 149 *nregs = 6; 150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 151 (*arr)[ 0] = hregAMD64_RSI(); 152 (*arr)[ 1] = hregAMD64_RDI(); 153 (*arr)[ 2] = hregAMD64_RBX(); 154 155 (*arr)[ 3] = hregAMD64_XMM7(); 156 (*arr)[ 4] = hregAMD64_XMM8(); 157 (*arr)[ 5] = hregAMD64_XMM9(); 158 #endif 159 #if 1 160 *nregs = 20; 161 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 162 (*arr)[ 0] = hregAMD64_RSI(); 163 (*arr)[ 1] = hregAMD64_RDI(); 164 (*arr)[ 2] = hregAMD64_R8(); 165 (*arr)[ 3] = hregAMD64_R9(); 166 (*arr)[ 4] = hregAMD64_R12(); 167 (*arr)[ 5] = hregAMD64_R13(); 168 (*arr)[ 6] = hregAMD64_R14(); 169 (*arr)[ 7] = hregAMD64_R15(); 170 (*arr)[ 8] = hregAMD64_RBX(); 171 172 (*arr)[ 9] = hregAMD64_XMM3(); 173 (*arr)[10] = hregAMD64_XMM4(); 174 (*arr)[11] = hregAMD64_XMM5(); 175 (*arr)[12] = hregAMD64_XMM6(); 176 (*arr)[13] = hregAMD64_XMM7(); 177 (*arr)[14] = hregAMD64_XMM8(); 178 (*arr)[15] = hregAMD64_XMM9(); 179 (*arr)[16] = hregAMD64_XMM10(); 180 (*arr)[17] = hregAMD64_XMM11(); 181 (*arr)[18] = hregAMD64_XMM12(); 182 (*arr)[19] = hregAMD64_R10(); 183 #endif 184 } 185 186 187 /* --------- Condition codes, Intel encoding. --------- */ 188 189 HChar* showAMD64CondCode ( AMD64CondCode cond ) 190 { 191 switch (cond) { 192 case Acc_O: return "o"; 193 case Acc_NO: return "no"; 194 case Acc_B: return "b"; 195 case Acc_NB: return "nb"; 196 case Acc_Z: return "z"; 197 case Acc_NZ: return "nz"; 198 case Acc_BE: return "be"; 199 case Acc_NBE: return "nbe"; 200 case Acc_S: return "s"; 201 case Acc_NS: return "ns"; 202 case Acc_P: return "p"; 203 case Acc_NP: return "np"; 204 case Acc_L: return "l"; 205 case Acc_NL: return "nl"; 206 case Acc_LE: return "le"; 207 case Acc_NLE: return "nle"; 208 case Acc_ALWAYS: return "ALWAYS"; 209 default: vpanic("ppAMD64CondCode"); 210 } 211 } 212 213 214 /* --------- AMD64AMode: memory address expressions. --------- */ 215 216 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) { 217 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 218 am->tag = Aam_IR; 219 am->Aam.IR.imm = imm32; 220 am->Aam.IR.reg = reg; 221 return am; 222 } 223 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 224 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 225 am->tag = Aam_IRRS; 226 am->Aam.IRRS.imm = imm32; 227 am->Aam.IRRS.base = base; 228 am->Aam.IRRS.index = indEx; 229 am->Aam.IRRS.shift = shift; 230 vassert(shift >= 0 && shift <= 3); 231 return am; 232 } 233 234 //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) { 235 //.. switch (am->tag) { 236 //.. case Xam_IR: 237 //.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 238 //.. case Xam_IRRS: 239 //.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 240 //.. am->Xam.IRRS.index, am->Xam.IRRS.shift ); 241 //.. default: 242 //.. vpanic("dopyAMD64AMode"); 243 //.. } 244 //.. } 245 246 void ppAMD64AMode ( AMD64AMode* am ) { 247 switch (am->tag) { 248 case Aam_IR: 249 if (am->Aam.IR.imm == 0) 250 vex_printf("("); 251 else 252 vex_printf("0x%x(", am->Aam.IR.imm); 253 ppHRegAMD64(am->Aam.IR.reg); 254 vex_printf(")"); 255 return; 256 case Aam_IRRS: 257 vex_printf("0x%x(", am->Aam.IRRS.imm); 258 ppHRegAMD64(am->Aam.IRRS.base); 259 vex_printf(","); 260 ppHRegAMD64(am->Aam.IRRS.index); 261 vex_printf(",%d)", 1 << am->Aam.IRRS.shift); 262 return; 263 default: 264 vpanic("ppAMD64AMode"); 265 } 266 } 267 268 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) { 269 switch (am->tag) { 270 case Aam_IR: 271 addHRegUse(u, HRmRead, am->Aam.IR.reg); 272 return; 273 case Aam_IRRS: 274 addHRegUse(u, HRmRead, am->Aam.IRRS.base); 275 addHRegUse(u, HRmRead, am->Aam.IRRS.index); 276 return; 277 default: 278 vpanic("addRegUsage_AMD64AMode"); 279 } 280 } 281 282 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) { 283 switch (am->tag) { 284 case Aam_IR: 285 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg); 286 return; 287 case Aam_IRRS: 288 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base); 289 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index); 290 return; 291 default: 292 vpanic("mapRegs_AMD64AMode"); 293 } 294 } 295 296 /* --------- Operand, which can be reg, immediate or memory. --------- */ 297 298 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) { 299 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 300 op->tag = Armi_Imm; 301 op->Armi.Imm.imm32 = imm32; 302 return op; 303 } 304 AMD64RMI* AMD64RMI_Reg ( HReg reg ) { 305 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 306 op->tag = Armi_Reg; 307 op->Armi.Reg.reg = reg; 308 return op; 309 } 310 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) { 311 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 312 op->tag = Armi_Mem; 313 op->Armi.Mem.am = am; 314 return op; 315 } 316 317 void ppAMD64RMI ( AMD64RMI* op ) { 318 switch (op->tag) { 319 case Armi_Imm: 320 vex_printf("$0x%x", op->Armi.Imm.imm32); 321 return; 322 case Armi_Reg: 323 ppHRegAMD64(op->Armi.Reg.reg); 324 return; 325 case Armi_Mem: 326 ppAMD64AMode(op->Armi.Mem.am); 327 return; 328 default: 329 vpanic("ppAMD64RMI"); 330 } 331 } 332 333 /* An AMD64RMI can only be used in a "read" context (what would it mean 334 to write or modify a literal?) and so we enumerate its registers 335 accordingly. */ 336 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) { 337 switch (op->tag) { 338 case Armi_Imm: 339 return; 340 case Armi_Reg: 341 addHRegUse(u, HRmRead, op->Armi.Reg.reg); 342 return; 343 case Armi_Mem: 344 addRegUsage_AMD64AMode(u, op->Armi.Mem.am); 345 return; 346 default: 347 vpanic("addRegUsage_AMD64RMI"); 348 } 349 } 350 351 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) { 352 switch (op->tag) { 353 case Armi_Imm: 354 return; 355 case Armi_Reg: 356 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg); 357 return; 358 case Armi_Mem: 359 mapRegs_AMD64AMode(m, op->Armi.Mem.am); 360 return; 361 default: 362 vpanic("mapRegs_AMD64RMI"); 363 } 364 } 365 366 367 /* --------- Operand, which can be reg or immediate only. --------- */ 368 369 AMD64RI* AMD64RI_Imm ( UInt imm32 ) { 370 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 371 op->tag = Ari_Imm; 372 op->Ari.Imm.imm32 = imm32; 373 return op; 374 } 375 AMD64RI* AMD64RI_Reg ( HReg reg ) { 376 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 377 op->tag = Ari_Reg; 378 op->Ari.Reg.reg = reg; 379 return op; 380 } 381 382 void ppAMD64RI ( AMD64RI* op ) { 383 switch (op->tag) { 384 case Ari_Imm: 385 vex_printf("$0x%x", op->Ari.Imm.imm32); 386 return; 387 case Ari_Reg: 388 ppHRegAMD64(op->Ari.Reg.reg); 389 return; 390 default: 391 vpanic("ppAMD64RI"); 392 } 393 } 394 395 /* An AMD64RI can only be used in a "read" context (what would it mean 396 to write or modify a literal?) and so we enumerate its registers 397 accordingly. */ 398 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) { 399 switch (op->tag) { 400 case Ari_Imm: 401 return; 402 case Ari_Reg: 403 addHRegUse(u, HRmRead, op->Ari.Reg.reg); 404 return; 405 default: 406 vpanic("addRegUsage_AMD64RI"); 407 } 408 } 409 410 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) { 411 switch (op->tag) { 412 case Ari_Imm: 413 return; 414 case Ari_Reg: 415 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg); 416 return; 417 default: 418 vpanic("mapRegs_AMD64RI"); 419 } 420 } 421 422 423 /* --------- Operand, which can be reg or memory only. --------- */ 424 425 AMD64RM* AMD64RM_Reg ( HReg reg ) { 426 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 427 op->tag = Arm_Reg; 428 op->Arm.Reg.reg = reg; 429 return op; 430 } 431 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) { 432 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 433 op->tag = Arm_Mem; 434 op->Arm.Mem.am = am; 435 return op; 436 } 437 438 void ppAMD64RM ( AMD64RM* op ) { 439 switch (op->tag) { 440 case Arm_Mem: 441 ppAMD64AMode(op->Arm.Mem.am); 442 return; 443 case Arm_Reg: 444 ppHRegAMD64(op->Arm.Reg.reg); 445 return; 446 default: 447 vpanic("ppAMD64RM"); 448 } 449 } 450 451 /* Because an AMD64RM can be both a source or destination operand, we 452 have to supply a mode -- pertaining to the operand as a whole -- 453 indicating how it's being used. */ 454 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) { 455 switch (op->tag) { 456 case Arm_Mem: 457 /* Memory is read, written or modified. So we just want to 458 know the regs read by the amode. */ 459 addRegUsage_AMD64AMode(u, op->Arm.Mem.am); 460 return; 461 case Arm_Reg: 462 /* reg is read, written or modified. Add it in the 463 appropriate way. */ 464 addHRegUse(u, mode, op->Arm.Reg.reg); 465 return; 466 default: 467 vpanic("addRegUsage_AMD64RM"); 468 } 469 } 470 471 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op ) 472 { 473 switch (op->tag) { 474 case Arm_Mem: 475 mapRegs_AMD64AMode(m, op->Arm.Mem.am); 476 return; 477 case Arm_Reg: 478 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg); 479 return; 480 default: 481 vpanic("mapRegs_AMD64RM"); 482 } 483 } 484 485 486 /* --------- Instructions. --------- */ 487 488 static HChar* showAMD64ScalarSz ( Int sz ) { 489 switch (sz) { 490 case 2: return "w"; 491 case 4: return "l"; 492 case 8: return "q"; 493 default: vpanic("showAMD64ScalarSz"); 494 } 495 } 496 497 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) { 498 switch (op) { 499 case Aun_NOT: return "not"; 500 case Aun_NEG: return "neg"; 501 default: vpanic("showAMD64UnaryOp"); 502 } 503 } 504 505 HChar* showAMD64AluOp ( AMD64AluOp op ) { 506 switch (op) { 507 case Aalu_MOV: return "mov"; 508 case Aalu_CMP: return "cmp"; 509 case Aalu_ADD: return "add"; 510 case Aalu_SUB: return "sub"; 511 case Aalu_ADC: return "adc"; 512 case Aalu_SBB: return "sbb"; 513 case Aalu_AND: return "and"; 514 case Aalu_OR: return "or"; 515 case Aalu_XOR: return "xor"; 516 case Aalu_MUL: return "imul"; 517 default: vpanic("showAMD64AluOp"); 518 } 519 } 520 521 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { 522 switch (op) { 523 case Ash_SHL: return "shl"; 524 case Ash_SHR: return "shr"; 525 case Ash_SAR: return "sar"; 526 default: vpanic("showAMD64ShiftOp"); 527 } 528 } 529 530 HChar* showA87FpOp ( A87FpOp op ) { 531 switch (op) { 532 //.. case Xfp_ADD: return "add"; 533 //.. case Xfp_SUB: return "sub"; 534 //.. case Xfp_MUL: return "mul"; 535 //.. case Xfp_DIV: return "div"; 536 case Afp_SCALE: return "scale"; 537 case Afp_ATAN: return "atan"; 538 case Afp_YL2X: return "yl2x"; 539 case Afp_YL2XP1: return "yl2xp1"; 540 case Afp_PREM: return "prem"; 541 case Afp_PREM1: return "prem1"; 542 case Afp_SQRT: return "sqrt"; 543 //.. case Xfp_ABS: return "abs"; 544 //.. case Xfp_NEG: return "chs"; 545 //.. case Xfp_MOV: return "mov"; 546 case Afp_SIN: return "sin"; 547 case Afp_COS: return "cos"; 548 case Afp_TAN: return "tan"; 549 case Afp_ROUND: return "round"; 550 case Afp_2XM1: return "2xm1"; 551 default: vpanic("showA87FpOp"); 552 } 553 } 554 555 HChar* showAMD64SseOp ( AMD64SseOp op ) { 556 switch (op) { 557 case Asse_MOV: return "movups"; 558 case Asse_ADDF: return "add"; 559 case Asse_SUBF: return "sub"; 560 case Asse_MULF: return "mul"; 561 case Asse_DIVF: return "div"; 562 case Asse_MAXF: return "max"; 563 case Asse_MINF: return "min"; 564 case Asse_CMPEQF: return "cmpFeq"; 565 case Asse_CMPLTF: return "cmpFlt"; 566 case Asse_CMPLEF: return "cmpFle"; 567 case Asse_CMPUNF: return "cmpFun"; 568 case Asse_RCPF: return "rcp"; 569 case Asse_RSQRTF: return "rsqrt"; 570 case Asse_SQRTF: return "sqrt"; 571 case Asse_AND: return "and"; 572 case Asse_OR: return "or"; 573 case Asse_XOR: return "xor"; 574 case Asse_ANDN: return "andn"; 575 case Asse_ADD8: return "paddb"; 576 case Asse_ADD16: return "paddw"; 577 case Asse_ADD32: return "paddd"; 578 case Asse_ADD64: return "paddq"; 579 case Asse_QADD8U: return "paddusb"; 580 case Asse_QADD16U: return "paddusw"; 581 case Asse_QADD8S: return "paddsb"; 582 case Asse_QADD16S: return "paddsw"; 583 case Asse_SUB8: return "psubb"; 584 case Asse_SUB16: return "psubw"; 585 case Asse_SUB32: return "psubd"; 586 case Asse_SUB64: return "psubq"; 587 case Asse_QSUB8U: return "psubusb"; 588 case Asse_QSUB16U: return "psubusw"; 589 case Asse_QSUB8S: return "psubsb"; 590 case Asse_QSUB16S: return "psubsw"; 591 case Asse_MUL16: return "pmullw"; 592 case Asse_MULHI16U: return "pmulhuw"; 593 case Asse_MULHI16S: return "pmulhw"; 594 case Asse_AVG8U: return "pavgb"; 595 case Asse_AVG16U: return "pavgw"; 596 case Asse_MAX16S: return "pmaxw"; 597 case Asse_MAX8U: return "pmaxub"; 598 case Asse_MIN16S: return "pminw"; 599 case Asse_MIN8U: return "pminub"; 600 case Asse_CMPEQ8: return "pcmpeqb"; 601 case Asse_CMPEQ16: return "pcmpeqw"; 602 case Asse_CMPEQ32: return "pcmpeqd"; 603 case Asse_CMPGT8S: return "pcmpgtb"; 604 case Asse_CMPGT16S: return "pcmpgtw"; 605 case Asse_CMPGT32S: return "pcmpgtd"; 606 case Asse_SHL16: return "psllw"; 607 case Asse_SHL32: return "pslld"; 608 case Asse_SHL64: return "psllq"; 609 case Asse_SHR16: return "psrlw"; 610 case Asse_SHR32: return "psrld"; 611 case Asse_SHR64: return "psrlq"; 612 case Asse_SAR16: return "psraw"; 613 case Asse_SAR32: return "psrad"; 614 case Asse_PACKSSD: return "packssdw"; 615 case Asse_PACKSSW: return "packsswb"; 616 case Asse_PACKUSW: return "packuswb"; 617 case Asse_UNPCKHB: return "punpckhb"; 618 case Asse_UNPCKHW: return "punpckhw"; 619 case Asse_UNPCKHD: return "punpckhd"; 620 case Asse_UNPCKHQ: return "punpckhq"; 621 case Asse_UNPCKLB: return "punpcklb"; 622 case Asse_UNPCKLW: return "punpcklw"; 623 case Asse_UNPCKLD: return "punpckld"; 624 case Asse_UNPCKLQ: return "punpcklq"; 625 default: vpanic("showAMD64SseOp"); 626 } 627 } 628 629 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) { 630 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 631 i->tag = Ain_Imm64; 632 i->Ain.Imm64.imm64 = imm64; 633 i->Ain.Imm64.dst = dst; 634 return i; 635 } 636 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { 637 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 638 i->tag = Ain_Alu64R; 639 i->Ain.Alu64R.op = op; 640 i->Ain.Alu64R.src = src; 641 i->Ain.Alu64R.dst = dst; 642 return i; 643 } 644 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) { 645 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 646 i->tag = Ain_Alu64M; 647 i->Ain.Alu64M.op = op; 648 i->Ain.Alu64M.src = src; 649 i->Ain.Alu64M.dst = dst; 650 vassert(op != Aalu_MUL); 651 return i; 652 } 653 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { 654 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 655 i->tag = Ain_Sh64; 656 i->Ain.Sh64.op = op; 657 i->Ain.Sh64.src = src; 658 i->Ain.Sh64.dst = dst; 659 return i; 660 } 661 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { 662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 663 i->tag = Ain_Test64; 664 i->Ain.Test64.imm32 = imm32; 665 i->Ain.Test64.dst = dst; 666 return i; 667 } 668 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) { 669 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 670 i->tag = Ain_Unary64; 671 i->Ain.Unary64.op = op; 672 i->Ain.Unary64.dst = dst; 673 return i; 674 } 675 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) { 676 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 677 i->tag = Ain_Lea64; 678 i->Ain.Lea64.am = am; 679 i->Ain.Lea64.dst = dst; 680 return i; 681 } 682 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) { 683 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 684 i->tag = Ain_MulL; 685 i->Ain.MulL.syned = syned; 686 i->Ain.MulL.src = src; 687 return i; 688 } 689 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { 690 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 691 i->tag = Ain_Div; 692 i->Ain.Div.syned = syned; 693 i->Ain.Div.sz = sz; 694 i->Ain.Div.src = src; 695 vassert(sz == 4 || sz == 8); 696 return i; 697 } 698 //.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) { 699 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 700 //.. i->tag = Xin_Sh3232; 701 //.. i->Xin.Sh3232.op = op; 702 //.. i->Xin.Sh3232.amt = amt; 703 //.. i->Xin.Sh3232.src = src; 704 //.. i->Xin.Sh3232.dst = dst; 705 //.. vassert(op == Xsh_SHL || op == Xsh_SHR); 706 //.. return i; 707 //.. } 708 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { 709 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 710 i->tag = Ain_Push; 711 i->Ain.Push.src = src; 712 return i; 713 } 714 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) { 715 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 716 i->tag = Ain_Call; 717 i->Ain.Call.cond = cond; 718 i->Ain.Call.target = target; 719 i->Ain.Call.regparms = regparms; 720 vassert(regparms >= 0 && regparms <= 6); 721 return i; 722 } 723 AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) { 724 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 725 i->tag = Ain_Goto; 726 i->Ain.Goto.cond = cond; 727 i->Ain.Goto.dst = dst; 728 i->Ain.Goto.jk = jk; 729 return i; 730 } 731 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { 732 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 733 i->tag = Ain_CMov64; 734 i->Ain.CMov64.cond = cond; 735 i->Ain.CMov64.src = src; 736 i->Ain.CMov64.dst = dst; 737 vassert(cond != Acc_ALWAYS); 738 return i; 739 } 740 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) { 741 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 742 i->tag = Ain_MovxLQ; 743 i->Ain.MovxLQ.syned = syned; 744 i->Ain.MovxLQ.src = src; 745 i->Ain.MovxLQ.dst = dst; 746 return i; 747 } 748 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, 749 AMD64AMode* src, HReg dst ) { 750 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 751 i->tag = Ain_LoadEX; 752 i->Ain.LoadEX.szSmall = szSmall; 753 i->Ain.LoadEX.syned = syned; 754 i->Ain.LoadEX.src = src; 755 i->Ain.LoadEX.dst = dst; 756 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4); 757 return i; 758 } 759 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) { 760 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 761 i->tag = Ain_Store; 762 i->Ain.Store.sz = sz; 763 i->Ain.Store.src = src; 764 i->Ain.Store.dst = dst; 765 vassert(sz == 1 || sz == 2 || sz == 4); 766 return i; 767 } 768 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) { 769 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 770 i->tag = Ain_Set64; 771 i->Ain.Set64.cond = cond; 772 i->Ain.Set64.dst = dst; 773 return i; 774 } 775 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { 776 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 777 i->tag = Ain_Bsfr64; 778 i->Ain.Bsfr64.isFwds = isFwds; 779 i->Ain.Bsfr64.src = src; 780 i->Ain.Bsfr64.dst = dst; 781 return i; 782 } 783 AMD64Instr* AMD64Instr_MFence ( void ) { 784 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 785 i->tag = Ain_MFence; 786 return i; 787 } 788 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) { 789 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 790 i->tag = Ain_ACAS; 791 i->Ain.ACAS.addr = addr; 792 i->Ain.ACAS.sz = sz; 793 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 794 return i; 795 } 796 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) { 797 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 798 i->tag = Ain_DACAS; 799 i->Ain.DACAS.addr = addr; 800 i->Ain.DACAS.sz = sz; 801 vassert(sz == 8 || sz == 4); 802 return i; 803 } 804 805 AMD64Instr* AMD64Instr_A87Free ( Int nregs ) 806 { 807 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 808 i->tag = Ain_A87Free; 809 i->Ain.A87Free.nregs = nregs; 810 vassert(nregs >= 1 && nregs <= 7); 811 return i; 812 } 813 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ) 814 { 815 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 816 i->tag = Ain_A87PushPop; 817 i->Ain.A87PushPop.addr = addr; 818 i->Ain.A87PushPop.isPush = isPush; 819 i->Ain.A87PushPop.szB = szB; 820 vassert(szB == 8 || szB == 4); 821 return i; 822 } 823 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) 824 { 825 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 826 i->tag = Ain_A87FpOp; 827 i->Ain.A87FpOp.op = op; 828 return i; 829 } 830 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) 831 { 832 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 833 i->tag = Ain_A87LdCW; 834 i->Ain.A87LdCW.addr = addr; 835 return i; 836 } 837 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) 838 { 839 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 840 i->tag = Ain_A87StSW; 841 i->Ain.A87StSW.addr = addr; 842 return i; 843 } 844 845 //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) { 846 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 847 //.. i->tag = Xin_FpUnary; 848 //.. i->Xin.FpUnary.op = op; 849 //.. i->Xin.FpUnary.src = src; 850 //.. i->Xin.FpUnary.dst = dst; 851 //.. return i; 852 //.. } 853 //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) { 854 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 855 //.. i->tag = Xin_FpBinary; 856 //.. i->Xin.FpBinary.op = op; 857 //.. i->Xin.FpBinary.srcL = srcL; 858 //.. i->Xin.FpBinary.srcR = srcR; 859 //.. i->Xin.FpBinary.dst = dst; 860 //.. return i; 861 //.. } 862 //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) { 863 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 864 //.. i->tag = Xin_FpLdSt; 865 //.. i->Xin.FpLdSt.isLoad = isLoad; 866 //.. i->Xin.FpLdSt.sz = sz; 867 //.. i->Xin.FpLdSt.reg = reg; 868 //.. i->Xin.FpLdSt.addr = addr; 869 //.. vassert(sz == 4 || sz == 8); 870 //.. return i; 871 //.. } 872 //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, 873 //.. HReg reg, AMD64AMode* addr ) { 874 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 875 //.. i->tag = Xin_FpLdStI; 876 //.. i->Xin.FpLdStI.isLoad = isLoad; 877 //.. i->Xin.FpLdStI.sz = sz; 878 //.. i->Xin.FpLdStI.reg = reg; 879 //.. i->Xin.FpLdStI.addr = addr; 880 //.. vassert(sz == 2 || sz == 4 || sz == 8); 881 //.. return i; 882 //.. } 883 //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) { 884 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 885 //.. i->tag = Xin_Fp64to32; 886 //.. i->Xin.Fp64to32.src = src; 887 //.. i->Xin.Fp64to32.dst = dst; 888 //.. return i; 889 //.. } 890 //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) { 891 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 892 //.. i->tag = Xin_FpCMov; 893 //.. i->Xin.FpCMov.cond = cond; 894 //.. i->Xin.FpCMov.src = src; 895 //.. i->Xin.FpCMov.dst = dst; 896 //.. vassert(cond != Xcc_ALWAYS); 897 //.. return i; 898 //.. } 899 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { 900 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 901 i->tag = Ain_LdMXCSR; 902 i->Ain.LdMXCSR.addr = addr; 903 return i; 904 } 905 //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) { 906 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 907 //.. i->tag = Xin_FpStSW_AX; 908 //.. return i; 909 //.. } 910 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { 911 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 912 i->tag = Ain_SseUComIS; 913 i->Ain.SseUComIS.sz = toUChar(sz); 914 i->Ain.SseUComIS.srcL = srcL; 915 i->Ain.SseUComIS.srcR = srcR; 916 i->Ain.SseUComIS.dst = dst; 917 vassert(sz == 4 || sz == 8); 918 return i; 919 } 920 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) { 921 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 922 i->tag = Ain_SseSI2SF; 923 i->Ain.SseSI2SF.szS = toUChar(szS); 924 i->Ain.SseSI2SF.szD = toUChar(szD); 925 i->Ain.SseSI2SF.src = src; 926 i->Ain.SseSI2SF.dst = dst; 927 vassert(szS == 4 || szS == 8); 928 vassert(szD == 4 || szD == 8); 929 return i; 930 } 931 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) { 932 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 933 i->tag = Ain_SseSF2SI; 934 i->Ain.SseSF2SI.szS = toUChar(szS); 935 i->Ain.SseSF2SI.szD = toUChar(szD); 936 i->Ain.SseSF2SI.src = src; 937 i->Ain.SseSF2SI.dst = dst; 938 vassert(szS == 4 || szS == 8); 939 vassert(szD == 4 || szD == 8); 940 return i; 941 } 942 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) 943 { 944 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 945 i->tag = Ain_SseSDSS; 946 i->Ain.SseSDSS.from64 = from64; 947 i->Ain.SseSDSS.src = src; 948 i->Ain.SseSDSS.dst = dst; 949 return i; 950 } 951 952 //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) { 953 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 954 //.. i->tag = Xin_SseConst; 955 //.. i->Xin.SseConst.con = con; 956 //.. i->Xin.SseConst.dst = dst; 957 //.. vassert(hregClass(dst) == HRcVec128); 958 //.. return i; 959 //.. } 960 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, 961 HReg reg, AMD64AMode* addr ) { 962 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 963 i->tag = Ain_SseLdSt; 964 i->Ain.SseLdSt.isLoad = isLoad; 965 i->Ain.SseLdSt.sz = toUChar(sz); 966 i->Ain.SseLdSt.reg = reg; 967 i->Ain.SseLdSt.addr = addr; 968 vassert(sz == 4 || sz == 8 || sz == 16); 969 return i; 970 } 971 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr ) 972 { 973 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 974 i->tag = Ain_SseLdzLO; 975 i->Ain.SseLdzLO.sz = sz; 976 i->Ain.SseLdzLO.reg = reg; 977 i->Ain.SseLdzLO.addr = addr; 978 vassert(sz == 4 || sz == 8); 979 return i; 980 } 981 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) { 982 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 983 i->tag = Ain_Sse32Fx4; 984 i->Ain.Sse32Fx4.op = op; 985 i->Ain.Sse32Fx4.src = src; 986 i->Ain.Sse32Fx4.dst = dst; 987 vassert(op != Asse_MOV); 988 return i; 989 } 990 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { 991 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 992 i->tag = Ain_Sse32FLo; 993 i->Ain.Sse32FLo.op = op; 994 i->Ain.Sse32FLo.src = src; 995 i->Ain.Sse32FLo.dst = dst; 996 vassert(op != Asse_MOV); 997 return i; 998 } 999 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { 1000 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1001 i->tag = Ain_Sse64Fx2; 1002 i->Ain.Sse64Fx2.op = op; 1003 i->Ain.Sse64Fx2.src = src; 1004 i->Ain.Sse64Fx2.dst = dst; 1005 vassert(op != Asse_MOV); 1006 return i; 1007 } 1008 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { 1009 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1010 i->tag = Ain_Sse64FLo; 1011 i->Ain.Sse64FLo.op = op; 1012 i->Ain.Sse64FLo.src = src; 1013 i->Ain.Sse64FLo.dst = dst; 1014 vassert(op != Asse_MOV); 1015 return i; 1016 } 1017 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) { 1018 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1019 i->tag = Ain_SseReRg; 1020 i->Ain.SseReRg.op = op; 1021 i->Ain.SseReRg.src = re; 1022 i->Ain.SseReRg.dst = rg; 1023 return i; 1024 } 1025 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) { 1026 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1027 i->tag = Ain_SseCMov; 1028 i->Ain.SseCMov.cond = cond; 1029 i->Ain.SseCMov.src = src; 1030 i->Ain.SseCMov.dst = dst; 1031 vassert(cond != Acc_ALWAYS); 1032 return i; 1033 } 1034 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { 1035 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1036 i->tag = Ain_SseShuf; 1037 i->Ain.SseShuf.order = order; 1038 i->Ain.SseShuf.src = src; 1039 i->Ain.SseShuf.dst = dst; 1040 vassert(order >= 0 && order <= 0xFF); 1041 return i; 1042 } 1043 1044 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) 1045 { 1046 vassert(mode64 == True); 1047 switch (i->tag) { 1048 case Ain_Imm64: 1049 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64); 1050 ppHRegAMD64(i->Ain.Imm64.dst); 1051 return; 1052 case Ain_Alu64R: 1053 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op)); 1054 ppAMD64RMI(i->Ain.Alu64R.src); 1055 vex_printf(","); 1056 ppHRegAMD64(i->Ain.Alu64R.dst); 1057 return; 1058 case Ain_Alu64M: 1059 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op)); 1060 ppAMD64RI(i->Ain.Alu64M.src); 1061 vex_printf(","); 1062 ppAMD64AMode(i->Ain.Alu64M.dst); 1063 return; 1064 case Ain_Sh64: 1065 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op)); 1066 if (i->Ain.Sh64.src == 0) 1067 vex_printf("%%cl,"); 1068 else 1069 vex_printf("$%d,", (Int)i->Ain.Sh64.src); 1070 ppHRegAMD64(i->Ain.Sh64.dst); 1071 return; 1072 case Ain_Test64: 1073 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); 1074 ppHRegAMD64(i->Ain.Test64.dst); 1075 return; 1076 case Ain_Unary64: 1077 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op)); 1078 ppHRegAMD64(i->Ain.Unary64.dst); 1079 return; 1080 case Ain_Lea64: 1081 vex_printf("leaq "); 1082 ppAMD64AMode(i->Ain.Lea64.am); 1083 vex_printf(","); 1084 ppHRegAMD64(i->Ain.Lea64.dst); 1085 return; 1086 case Ain_MulL: 1087 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u'); 1088 ppAMD64RM(i->Ain.MulL.src); 1089 return; 1090 case Ain_Div: 1091 vex_printf("%cdiv%s ", 1092 i->Ain.Div.syned ? 's' : 'u', 1093 showAMD64ScalarSz(i->Ain.Div.sz)); 1094 ppAMD64RM(i->Ain.Div.src); 1095 return; 1096 //.. case Xin_Sh3232: 1097 //.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op)); 1098 //.. if (i->Xin.Sh3232.amt == 0) 1099 //.. vex_printf(" %%cl,"); 1100 //.. else 1101 //.. vex_printf(" $%d,", i->Xin.Sh3232.amt); 1102 //.. ppHRegAMD64(i->Xin.Sh3232.src); 1103 //.. vex_printf(","); 1104 //.. ppHRegAMD64(i->Xin.Sh3232.dst); 1105 //.. return; 1106 case Ain_Push: 1107 vex_printf("pushq "); 1108 ppAMD64RMI(i->Ain.Push.src); 1109 return; 1110 case Ain_Call: 1111 vex_printf("call%s[%d] ", 1112 i->Ain.Call.cond==Acc_ALWAYS 1113 ? "" : showAMD64CondCode(i->Ain.Call.cond), 1114 i->Ain.Call.regparms ); 1115 vex_printf("0x%llx", i->Ain.Call.target); 1116 break; 1117 case Ain_Goto: 1118 if (i->Ain.Goto.cond != Acc_ALWAYS) { 1119 vex_printf("if (%%rflags.%s) { ", 1120 showAMD64CondCode(i->Ain.Goto.cond)); 1121 } 1122 if (i->Ain.Goto.jk != Ijk_Boring 1123 && i->Ain.Goto.jk != Ijk_Call 1124 && i->Ain.Goto.jk != Ijk_Ret) { 1125 vex_printf("movl $"); 1126 ppIRJumpKind(i->Ain.Goto.jk); 1127 vex_printf(",%%ebp ; "); 1128 } 1129 vex_printf("movq "); 1130 ppAMD64RI(i->Ain.Goto.dst); 1131 vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx"); 1132 if (i->Ain.Goto.cond != Acc_ALWAYS) { 1133 vex_printf(" }"); 1134 } 1135 return; 1136 case Ain_CMov64: 1137 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); 1138 ppAMD64RM(i->Ain.CMov64.src); 1139 vex_printf(","); 1140 ppHRegAMD64(i->Ain.CMov64.dst); 1141 return; 1142 case Ain_MovxLQ: 1143 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z'); 1144 ppHRegAMD64_lo32(i->Ain.MovxLQ.src); 1145 vex_printf(","); 1146 ppHRegAMD64(i->Ain.MovxLQ.dst); 1147 return; 1148 case Ain_LoadEX: 1149 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) { 1150 vex_printf("movl "); 1151 ppAMD64AMode(i->Ain.LoadEX.src); 1152 vex_printf(","); 1153 ppHRegAMD64_lo32(i->Ain.LoadEX.dst); 1154 } else { 1155 vex_printf("mov%c%cq ", 1156 i->Ain.LoadEX.syned ? 's' : 'z', 1157 i->Ain.LoadEX.szSmall==1 1158 ? 'b' 1159 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l')); 1160 ppAMD64AMode(i->Ain.LoadEX.src); 1161 vex_printf(","); 1162 ppHRegAMD64(i->Ain.LoadEX.dst); 1163 } 1164 return; 1165 case Ain_Store: 1166 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b' 1167 : (i->Ain.Store.sz==2 ? 'w' : 'l')); 1168 ppHRegAMD64(i->Ain.Store.src); 1169 vex_printf(","); 1170 ppAMD64AMode(i->Ain.Store.dst); 1171 return; 1172 case Ain_Set64: 1173 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond)); 1174 ppHRegAMD64(i->Ain.Set64.dst); 1175 return; 1176 case Ain_Bsfr64: 1177 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r'); 1178 ppHRegAMD64(i->Ain.Bsfr64.src); 1179 vex_printf(","); 1180 ppHRegAMD64(i->Ain.Bsfr64.dst); 1181 return; 1182 case Ain_MFence: 1183 vex_printf("mfence" ); 1184 return; 1185 case Ain_ACAS: 1186 vex_printf("lock cmpxchg%c ", 1187 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w' 1188 : i->Ain.ACAS.sz==4 ? 'l' : 'q' ); 1189 vex_printf("{%%rax->%%rbx},"); 1190 ppAMD64AMode(i->Ain.ACAS.addr); 1191 return; 1192 case Ain_DACAS: 1193 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},", 1194 (Int)(2 * i->Ain.DACAS.sz)); 1195 ppAMD64AMode(i->Ain.DACAS.addr); 1196 return; 1197 case Ain_A87Free: 1198 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); 1199 break; 1200 case Ain_A87PushPop: 1201 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ", 1202 i->Ain.A87PushPop.szB == 4 ? 's' : 'l'); 1203 ppAMD64AMode(i->Ain.A87PushPop.addr); 1204 break; 1205 case Ain_A87FpOp: 1206 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op)); 1207 break; 1208 case Ain_A87LdCW: 1209 vex_printf("fldcw "); 1210 ppAMD64AMode(i->Ain.A87LdCW.addr); 1211 break; 1212 case Ain_A87StSW: 1213 vex_printf("fstsw "); 1214 ppAMD64AMode(i->Ain.A87StSW.addr); 1215 break; 1216 //.. case Xin_FpUnary: 1217 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op)); 1218 //.. ppHRegAMD64(i->Xin.FpUnary.src); 1219 //.. vex_printf(","); 1220 //.. ppHRegAMD64(i->Xin.FpUnary.dst); 1221 //.. break; 1222 //.. case Xin_FpBinary: 1223 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op)); 1224 //.. ppHRegAMD64(i->Xin.FpBinary.srcL); 1225 //.. vex_printf(","); 1226 //.. ppHRegAMD64(i->Xin.FpBinary.srcR); 1227 //.. vex_printf(","); 1228 //.. ppHRegAMD64(i->Xin.FpBinary.dst); 1229 //.. break; 1230 //.. case Xin_FpLdSt: 1231 //.. if (i->Xin.FpLdSt.isLoad) { 1232 //.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); 1233 //.. ppAMD64AMode(i->Xin.FpLdSt.addr); 1234 //.. vex_printf(", "); 1235 //.. ppHRegAMD64(i->Xin.FpLdSt.reg); 1236 //.. } else { 1237 //.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); 1238 //.. ppHRegAMD64(i->Xin.FpLdSt.reg); 1239 //.. vex_printf(", "); 1240 //.. ppAMD64AMode(i->Xin.FpLdSt.addr); 1241 //.. } 1242 //.. return; 1243 //.. case Xin_FpLdStI: 1244 //.. if (i->Xin.FpLdStI.isLoad) { 1245 //.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1246 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1247 //.. ppAMD64AMode(i->Xin.FpLdStI.addr); 1248 //.. vex_printf(", "); 1249 //.. ppHRegAMD64(i->Xin.FpLdStI.reg); 1250 //.. } else { 1251 //.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1252 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1253 //.. ppHRegAMD64(i->Xin.FpLdStI.reg); 1254 //.. vex_printf(", "); 1255 //.. ppAMD64AMode(i->Xin.FpLdStI.addr); 1256 //.. } 1257 //.. return; 1258 //.. case Xin_Fp64to32: 1259 //.. vex_printf("gdtof "); 1260 //.. ppHRegAMD64(i->Xin.Fp64to32.src); 1261 //.. vex_printf(","); 1262 //.. ppHRegAMD64(i->Xin.Fp64to32.dst); 1263 //.. return; 1264 //.. case Xin_FpCMov: 1265 //.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond)); 1266 //.. ppHRegAMD64(i->Xin.FpCMov.src); 1267 //.. vex_printf(","); 1268 //.. ppHRegAMD64(i->Xin.FpCMov.dst); 1269 //.. return; 1270 //.. case Xin_FpLdStCW: 1271 //.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw "); 1272 //.. ppAMD64AMode(i->Xin.FpLdStCW.addr); 1273 //.. return; 1274 //.. case Xin_FpStSW_AX: 1275 //.. vex_printf("fstsw %%ax"); 1276 //.. return; 1277 case Ain_LdMXCSR: 1278 vex_printf("ldmxcsr "); 1279 ppAMD64AMode(i->Ain.LdMXCSR.addr); 1280 break; 1281 case Ain_SseUComIS: 1282 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d"); 1283 ppHRegAMD64(i->Ain.SseUComIS.srcL); 1284 vex_printf(","); 1285 ppHRegAMD64(i->Ain.SseUComIS.srcR); 1286 vex_printf(" ; pushfq ; popq "); 1287 ppHRegAMD64(i->Ain.SseUComIS.dst); 1288 break; 1289 case Ain_SseSI2SF: 1290 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d"); 1291 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1292 (i->Ain.SseSI2SF.src); 1293 vex_printf(","); 1294 ppHRegAMD64(i->Ain.SseSI2SF.dst); 1295 break; 1296 case Ain_SseSF2SI: 1297 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d"); 1298 ppHRegAMD64(i->Ain.SseSF2SI.src); 1299 vex_printf(","); 1300 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1301 (i->Ain.SseSF2SI.dst); 1302 break; 1303 case Ain_SseSDSS: 1304 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd "); 1305 ppHRegAMD64(i->Ain.SseSDSS.src); 1306 vex_printf(","); 1307 ppHRegAMD64(i->Ain.SseSDSS.dst); 1308 break; 1309 //.. case Xin_SseConst: 1310 //.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1311 //.. ppHRegAMD64(i->Xin.SseConst.dst); 1312 //.. break; 1313 case Ain_SseLdSt: 1314 switch (i->Ain.SseLdSt.sz) { 1315 case 4: vex_printf("movss "); break; 1316 case 8: vex_printf("movsd "); break; 1317 case 16: vex_printf("movups "); break; 1318 default: vassert(0); 1319 } 1320 if (i->Ain.SseLdSt.isLoad) { 1321 ppAMD64AMode(i->Ain.SseLdSt.addr); 1322 vex_printf(","); 1323 ppHRegAMD64(i->Ain.SseLdSt.reg); 1324 } else { 1325 ppHRegAMD64(i->Ain.SseLdSt.reg); 1326 vex_printf(","); 1327 ppAMD64AMode(i->Ain.SseLdSt.addr); 1328 } 1329 return; 1330 case Ain_SseLdzLO: 1331 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d"); 1332 ppAMD64AMode(i->Ain.SseLdzLO.addr); 1333 vex_printf(","); 1334 ppHRegAMD64(i->Ain.SseLdzLO.reg); 1335 return; 1336 case Ain_Sse32Fx4: 1337 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op)); 1338 ppHRegAMD64(i->Ain.Sse32Fx4.src); 1339 vex_printf(","); 1340 ppHRegAMD64(i->Ain.Sse32Fx4.dst); 1341 return; 1342 case Ain_Sse32FLo: 1343 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op)); 1344 ppHRegAMD64(i->Ain.Sse32FLo.src); 1345 vex_printf(","); 1346 ppHRegAMD64(i->Ain.Sse32FLo.dst); 1347 return; 1348 case Ain_Sse64Fx2: 1349 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); 1350 ppHRegAMD64(i->Ain.Sse64Fx2.src); 1351 vex_printf(","); 1352 ppHRegAMD64(i->Ain.Sse64Fx2.dst); 1353 return; 1354 case Ain_Sse64FLo: 1355 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); 1356 ppHRegAMD64(i->Ain.Sse64FLo.src); 1357 vex_printf(","); 1358 ppHRegAMD64(i->Ain.Sse64FLo.dst); 1359 return; 1360 case Ain_SseReRg: 1361 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op)); 1362 ppHRegAMD64(i->Ain.SseReRg.src); 1363 vex_printf(","); 1364 ppHRegAMD64(i->Ain.SseReRg.dst); 1365 return; 1366 case Ain_SseCMov: 1367 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond)); 1368 ppHRegAMD64(i->Ain.SseCMov.src); 1369 vex_printf(","); 1370 ppHRegAMD64(i->Ain.SseCMov.dst); 1371 return; 1372 case Ain_SseShuf: 1373 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order); 1374 ppHRegAMD64(i->Ain.SseShuf.src); 1375 vex_printf(","); 1376 ppHRegAMD64(i->Ain.SseShuf.dst); 1377 return; 1378 1379 default: 1380 vpanic("ppAMD64Instr"); 1381 } 1382 } 1383 1384 /* --------- Helpers for register allocation. --------- */ 1385 1386 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) 1387 { 1388 Bool unary; 1389 vassert(mode64 == True); 1390 initHRegUsage(u); 1391 switch (i->tag) { 1392 case Ain_Imm64: 1393 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst); 1394 return; 1395 case Ain_Alu64R: 1396 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); 1397 if (i->Ain.Alu64R.op == Aalu_MOV) { 1398 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); 1399 return; 1400 } 1401 if (i->Ain.Alu64R.op == Aalu_CMP) { 1402 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst); 1403 return; 1404 } 1405 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst); 1406 return; 1407 case Ain_Alu64M: 1408 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src); 1409 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst); 1410 return; 1411 case Ain_Sh64: 1412 addHRegUse(u, HRmModify, i->Ain.Sh64.dst); 1413 if (i->Ain.Sh64.src == 0) 1414 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1415 return; 1416 case Ain_Test64: 1417 addHRegUse(u, HRmRead, i->Ain.Test64.dst); 1418 return; 1419 case Ain_Unary64: 1420 addHRegUse(u, HRmModify, i->Ain.Unary64.dst); 1421 return; 1422 case Ain_Lea64: 1423 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am); 1424 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst); 1425 return; 1426 case Ain_MulL: 1427 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead); 1428 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1429 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1430 return; 1431 case Ain_Div: 1432 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead); 1433 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1434 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1435 return; 1436 //.. case Xin_Sh3232: 1437 //.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1438 //.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1439 //.. if (i->Xin.Sh3232.amt == 0) 1440 //.. addHRegUse(u, HRmRead, hregAMD64_ECX()); 1441 //.. return; 1442 case Ain_Push: 1443 addRegUsage_AMD64RMI(u, i->Ain.Push.src); 1444 addHRegUse(u, HRmModify, hregAMD64_RSP()); 1445 return; 1446 case Ain_Call: 1447 /* This is a bit subtle. */ 1448 /* First off, claim it trashes all the caller-saved regs 1449 which fall within the register allocator's jurisdiction. 1450 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 1451 and all the xmm registers. 1452 */ 1453 addHRegUse(u, HRmWrite, hregAMD64_RAX()); 1454 addHRegUse(u, HRmWrite, hregAMD64_RCX()); 1455 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1456 addHRegUse(u, HRmWrite, hregAMD64_RSI()); 1457 addHRegUse(u, HRmWrite, hregAMD64_RDI()); 1458 addHRegUse(u, HRmWrite, hregAMD64_R8()); 1459 addHRegUse(u, HRmWrite, hregAMD64_R9()); 1460 addHRegUse(u, HRmWrite, hregAMD64_R10()); 1461 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1462 addHRegUse(u, HRmWrite, hregAMD64_XMM0()); 1463 addHRegUse(u, HRmWrite, hregAMD64_XMM1()); 1464 addHRegUse(u, HRmWrite, hregAMD64_XMM2()); 1465 addHRegUse(u, HRmWrite, hregAMD64_XMM3()); 1466 addHRegUse(u, HRmWrite, hregAMD64_XMM4()); 1467 addHRegUse(u, HRmWrite, hregAMD64_XMM5()); 1468 addHRegUse(u, HRmWrite, hregAMD64_XMM6()); 1469 addHRegUse(u, HRmWrite, hregAMD64_XMM7()); 1470 addHRegUse(u, HRmWrite, hregAMD64_XMM8()); 1471 addHRegUse(u, HRmWrite, hregAMD64_XMM9()); 1472 addHRegUse(u, HRmWrite, hregAMD64_XMM10()); 1473 addHRegUse(u, HRmWrite, hregAMD64_XMM11()); 1474 addHRegUse(u, HRmWrite, hregAMD64_XMM12()); 1475 addHRegUse(u, HRmWrite, hregAMD64_XMM13()); 1476 addHRegUse(u, HRmWrite, hregAMD64_XMM14()); 1477 addHRegUse(u, HRmWrite, hregAMD64_XMM15()); 1478 1479 /* Now we have to state any parameter-carrying registers 1480 which might be read. This depends on the regparmness. */ 1481 switch (i->Ain.Call.regparms) { 1482 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/ 1483 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/ 1484 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/ 1485 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/ 1486 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/ 1487 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break; 1488 case 0: break; 1489 default: vpanic("getRegUsage_AMD64Instr:Call:regparms"); 1490 } 1491 /* Finally, there is the issue that the insn trashes a 1492 register because the literal target address has to be 1493 loaded into a register. Fortunately, r11 is stated in the 1494 ABI as a scratch register, and so seems a suitable victim. */ 1495 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1496 /* Upshot of this is that the assembler really must use r11, 1497 and no other, as a destination temporary. */ 1498 return; 1499 case Ain_Goto: 1500 addRegUsage_AMD64RI(u, i->Ain.Goto.dst); 1501 addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */ 1502 addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */ 1503 if (i->Ain.Goto.jk != Ijk_Boring 1504 && i->Ain.Goto.jk != Ijk_Call 1505 && i->Ain.Goto.jk != Ijk_Ret) 1506 /* note, this is irrelevant since rbp is not actually 1507 available to the allocator. But still .. */ 1508 addHRegUse(u, HRmWrite, hregAMD64_RBP()); 1509 return; 1510 case Ain_CMov64: 1511 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); 1512 addHRegUse(u, HRmModify, i->Ain.CMov64.dst); 1513 return; 1514 case Ain_MovxLQ: 1515 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src); 1516 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst); 1517 return; 1518 case Ain_LoadEX: 1519 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src); 1520 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst); 1521 return; 1522 case Ain_Store: 1523 addHRegUse(u, HRmRead, i->Ain.Store.src); 1524 addRegUsage_AMD64AMode(u, i->Ain.Store.dst); 1525 return; 1526 case Ain_Set64: 1527 addHRegUse(u, HRmWrite, i->Ain.Set64.dst); 1528 return; 1529 case Ain_Bsfr64: 1530 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src); 1531 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst); 1532 return; 1533 case Ain_MFence: 1534 return; 1535 case Ain_ACAS: 1536 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr); 1537 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1538 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1539 return; 1540 case Ain_DACAS: 1541 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr); 1542 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1543 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1544 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1545 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1546 return; 1547 case Ain_A87Free: 1548 return; 1549 case Ain_A87PushPop: 1550 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr); 1551 return; 1552 case Ain_A87FpOp: 1553 return; 1554 case Ain_A87LdCW: 1555 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr); 1556 return; 1557 case Ain_A87StSW: 1558 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr); 1559 return; 1560 //.. case Xin_FpUnary: 1561 //.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1562 //.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1563 //.. return; 1564 //.. case Xin_FpBinary: 1565 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1566 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1567 //.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1568 //.. return; 1569 //.. case Xin_FpLdSt: 1570 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr); 1571 //.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1572 //.. i->Xin.FpLdSt.reg); 1573 //.. return; 1574 //.. case Xin_FpLdStI: 1575 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr); 1576 //.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1577 //.. i->Xin.FpLdStI.reg); 1578 //.. return; 1579 //.. case Xin_Fp64to32: 1580 //.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1581 //.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1582 //.. return; 1583 //.. case Xin_FpCMov: 1584 //.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1585 //.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1586 //.. return; 1587 case Ain_LdMXCSR: 1588 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr); 1589 return; 1590 //.. case Xin_FpStSW_AX: 1591 //.. addHRegUse(u, HRmWrite, hregAMD64_EAX()); 1592 //.. return; 1593 case Ain_SseUComIS: 1594 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL); 1595 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR); 1596 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst); 1597 return; 1598 case Ain_SseSI2SF: 1599 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src); 1600 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst); 1601 return; 1602 case Ain_SseSF2SI: 1603 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src); 1604 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst); 1605 return; 1606 case Ain_SseSDSS: 1607 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src); 1608 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst); 1609 return; 1610 case Ain_SseLdSt: 1611 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr); 1612 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead, 1613 i->Ain.SseLdSt.reg); 1614 return; 1615 case Ain_SseLdzLO: 1616 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr); 1617 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg); 1618 return; 1619 //.. case Xin_SseConst: 1620 //.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1621 //.. return; 1622 case Ain_Sse32Fx4: 1623 vassert(i->Ain.Sse32Fx4.op != Asse_MOV); 1624 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF 1625 || i->Ain.Sse32Fx4.op == Asse_RSQRTF 1626 || i->Ain.Sse32Fx4.op == Asse_SQRTF ); 1627 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src); 1628 addHRegUse(u, unary ? HRmWrite : HRmModify, 1629 i->Ain.Sse32Fx4.dst); 1630 return; 1631 case Ain_Sse32FLo: 1632 vassert(i->Ain.Sse32FLo.op != Asse_MOV); 1633 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF 1634 || i->Ain.Sse32FLo.op == Asse_RSQRTF 1635 || i->Ain.Sse32FLo.op == Asse_SQRTF ); 1636 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src); 1637 addHRegUse(u, unary ? HRmWrite : HRmModify, 1638 i->Ain.Sse32FLo.dst); 1639 return; 1640 case Ain_Sse64Fx2: 1641 vassert(i->Ain.Sse64Fx2.op != Asse_MOV); 1642 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF 1643 || i->Ain.Sse64Fx2.op == Asse_RSQRTF 1644 || i->Ain.Sse64Fx2.op == Asse_SQRTF ); 1645 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src); 1646 addHRegUse(u, unary ? HRmWrite : HRmModify, 1647 i->Ain.Sse64Fx2.dst); 1648 return; 1649 case Ain_Sse64FLo: 1650 vassert(i->Ain.Sse64FLo.op != Asse_MOV); 1651 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF 1652 || i->Ain.Sse64FLo.op == Asse_RSQRTF 1653 || i->Ain.Sse64FLo.op == Asse_SQRTF ); 1654 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src); 1655 addHRegUse(u, unary ? HRmWrite : HRmModify, 1656 i->Ain.Sse64FLo.dst); 1657 return; 1658 case Ain_SseReRg: 1659 if ( (i->Ain.SseReRg.op == Asse_XOR 1660 || i->Ain.SseReRg.op == Asse_CMPEQ32) 1661 && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) { 1662 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd 1663 r,r' as a write of a value to r, and independent of any 1664 previous value in r */ 1665 /* (as opposed to a rite of passage :-) */ 1666 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst); 1667 } else { 1668 addHRegUse(u, HRmRead, i->Ain.SseReRg.src); 1669 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 1670 ? HRmWrite : HRmModify, 1671 i->Ain.SseReRg.dst); 1672 } 1673 return; 1674 case Ain_SseCMov: 1675 addHRegUse(u, HRmRead, i->Ain.SseCMov.src); 1676 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst); 1677 return; 1678 case Ain_SseShuf: 1679 addHRegUse(u, HRmRead, i->Ain.SseShuf.src); 1680 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst); 1681 return; 1682 default: 1683 ppAMD64Instr(i, mode64); 1684 vpanic("getRegUsage_AMD64Instr"); 1685 } 1686 } 1687 1688 /* local helper */ 1689 static inline void mapReg(HRegRemap* m, HReg* r) 1690 { 1691 *r = lookupHRegRemap(m, *r); 1692 } 1693 1694 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) 1695 { 1696 vassert(mode64 == True); 1697 switch (i->tag) { 1698 case Ain_Imm64: 1699 mapReg(m, &i->Ain.Imm64.dst); 1700 return; 1701 case Ain_Alu64R: 1702 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src); 1703 mapReg(m, &i->Ain.Alu64R.dst); 1704 return; 1705 case Ain_Alu64M: 1706 mapRegs_AMD64RI(m, i->Ain.Alu64M.src); 1707 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst); 1708 return; 1709 case Ain_Sh64: 1710 mapReg(m, &i->Ain.Sh64.dst); 1711 return; 1712 case Ain_Test64: 1713 mapReg(m, &i->Ain.Test64.dst); 1714 return; 1715 case Ain_Unary64: 1716 mapReg(m, &i->Ain.Unary64.dst); 1717 return; 1718 case Ain_Lea64: 1719 mapRegs_AMD64AMode(m, i->Ain.Lea64.am); 1720 mapReg(m, &i->Ain.Lea64.dst); 1721 return; 1722 case Ain_MulL: 1723 mapRegs_AMD64RM(m, i->Ain.MulL.src); 1724 return; 1725 case Ain_Div: 1726 mapRegs_AMD64RM(m, i->Ain.Div.src); 1727 return; 1728 //.. case Xin_Sh3232: 1729 //.. mapReg(m, &i->Xin.Sh3232.src); 1730 //.. mapReg(m, &i->Xin.Sh3232.dst); 1731 //.. return; 1732 case Ain_Push: 1733 mapRegs_AMD64RMI(m, i->Ain.Push.src); 1734 return; 1735 case Ain_Call: 1736 return; 1737 case Ain_Goto: 1738 mapRegs_AMD64RI(m, i->Ain.Goto.dst); 1739 return; 1740 case Ain_CMov64: 1741 mapRegs_AMD64RM(m, i->Ain.CMov64.src); 1742 mapReg(m, &i->Ain.CMov64.dst); 1743 return; 1744 case Ain_MovxLQ: 1745 mapReg(m, &i->Ain.MovxLQ.src); 1746 mapReg(m, &i->Ain.MovxLQ.dst); 1747 return; 1748 case Ain_LoadEX: 1749 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src); 1750 mapReg(m, &i->Ain.LoadEX.dst); 1751 return; 1752 case Ain_Store: 1753 mapReg(m, &i->Ain.Store.src); 1754 mapRegs_AMD64AMode(m, i->Ain.Store.dst); 1755 return; 1756 case Ain_Set64: 1757 mapReg(m, &i->Ain.Set64.dst); 1758 return; 1759 case Ain_Bsfr64: 1760 mapReg(m, &i->Ain.Bsfr64.src); 1761 mapReg(m, &i->Ain.Bsfr64.dst); 1762 return; 1763 case Ain_MFence: 1764 return; 1765 case Ain_ACAS: 1766 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr); 1767 return; 1768 case Ain_DACAS: 1769 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr); 1770 return; 1771 case Ain_A87Free: 1772 return; 1773 case Ain_A87PushPop: 1774 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr); 1775 return; 1776 case Ain_A87FpOp: 1777 return; 1778 case Ain_A87LdCW: 1779 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr); 1780 return; 1781 case Ain_A87StSW: 1782 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr); 1783 return; 1784 //.. case Xin_FpUnary: 1785 //.. mapReg(m, &i->Xin.FpUnary.src); 1786 //.. mapReg(m, &i->Xin.FpUnary.dst); 1787 //.. return; 1788 //.. case Xin_FpBinary: 1789 //.. mapReg(m, &i->Xin.FpBinary.srcL); 1790 //.. mapReg(m, &i->Xin.FpBinary.srcR); 1791 //.. mapReg(m, &i->Xin.FpBinary.dst); 1792 //.. return; 1793 //.. case Xin_FpLdSt: 1794 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr); 1795 //.. mapReg(m, &i->Xin.FpLdSt.reg); 1796 //.. return; 1797 //.. case Xin_FpLdStI: 1798 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr); 1799 //.. mapReg(m, &i->Xin.FpLdStI.reg); 1800 //.. return; 1801 //.. case Xin_Fp64to32: 1802 //.. mapReg(m, &i->Xin.Fp64to32.src); 1803 //.. mapReg(m, &i->Xin.Fp64to32.dst); 1804 //.. return; 1805 //.. case Xin_FpCMov: 1806 //.. mapReg(m, &i->Xin.FpCMov.src); 1807 //.. mapReg(m, &i->Xin.FpCMov.dst); 1808 //.. return; 1809 case Ain_LdMXCSR: 1810 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr); 1811 return; 1812 //.. case Xin_FpStSW_AX: 1813 //.. return; 1814 case Ain_SseUComIS: 1815 mapReg(m, &i->Ain.SseUComIS.srcL); 1816 mapReg(m, &i->Ain.SseUComIS.srcR); 1817 mapReg(m, &i->Ain.SseUComIS.dst); 1818 return; 1819 case Ain_SseSI2SF: 1820 mapReg(m, &i->Ain.SseSI2SF.src); 1821 mapReg(m, &i->Ain.SseSI2SF.dst); 1822 return; 1823 case Ain_SseSF2SI: 1824 mapReg(m, &i->Ain.SseSF2SI.src); 1825 mapReg(m, &i->Ain.SseSF2SI.dst); 1826 return; 1827 case Ain_SseSDSS: 1828 mapReg(m, &i->Ain.SseSDSS.src); 1829 mapReg(m, &i->Ain.SseSDSS.dst); 1830 return; 1831 //.. case Xin_SseConst: 1832 //.. mapReg(m, &i->Xin.SseConst.dst); 1833 //.. return; 1834 case Ain_SseLdSt: 1835 mapReg(m, &i->Ain.SseLdSt.reg); 1836 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr); 1837 break; 1838 case Ain_SseLdzLO: 1839 mapReg(m, &i->Ain.SseLdzLO.reg); 1840 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr); 1841 break; 1842 case Ain_Sse32Fx4: 1843 mapReg(m, &i->Ain.Sse32Fx4.src); 1844 mapReg(m, &i->Ain.Sse32Fx4.dst); 1845 return; 1846 case Ain_Sse32FLo: 1847 mapReg(m, &i->Ain.Sse32FLo.src); 1848 mapReg(m, &i->Ain.Sse32FLo.dst); 1849 return; 1850 case Ain_Sse64Fx2: 1851 mapReg(m, &i->Ain.Sse64Fx2.src); 1852 mapReg(m, &i->Ain.Sse64Fx2.dst); 1853 return; 1854 case Ain_Sse64FLo: 1855 mapReg(m, &i->Ain.Sse64FLo.src); 1856 mapReg(m, &i->Ain.Sse64FLo.dst); 1857 return; 1858 case Ain_SseReRg: 1859 mapReg(m, &i->Ain.SseReRg.src); 1860 mapReg(m, &i->Ain.SseReRg.dst); 1861 return; 1862 case Ain_SseCMov: 1863 mapReg(m, &i->Ain.SseCMov.src); 1864 mapReg(m, &i->Ain.SseCMov.dst); 1865 return; 1866 case Ain_SseShuf: 1867 mapReg(m, &i->Ain.SseShuf.src); 1868 mapReg(m, &i->Ain.SseShuf.dst); 1869 return; 1870 default: 1871 ppAMD64Instr(i, mode64); 1872 vpanic("mapRegs_AMD64Instr"); 1873 } 1874 } 1875 1876 /* Figure out if i represents a reg-reg move, and if so assign the 1877 source and destination to *src and *dst. If in doubt say No. Used 1878 by the register allocator to do move coalescing. 1879 */ 1880 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst ) 1881 { 1882 /* Moves between integer regs */ 1883 if (i->tag == Ain_Alu64R) { 1884 if (i->Ain.Alu64R.op != Aalu_MOV) 1885 return False; 1886 if (i->Ain.Alu64R.src->tag != Armi_Reg) 1887 return False; 1888 *src = i->Ain.Alu64R.src->Armi.Reg.reg; 1889 *dst = i->Ain.Alu64R.dst; 1890 return True; 1891 } 1892 /* Moves between vector regs */ 1893 if (i->tag == Ain_SseReRg) { 1894 if (i->Ain.SseReRg.op != Asse_MOV) 1895 return False; 1896 *src = i->Ain.SseReRg.src; 1897 *dst = i->Ain.SseReRg.dst; 1898 return True; 1899 } 1900 return False; 1901 } 1902 1903 1904 /* Generate amd64 spill/reload instructions under the direction of the 1905 register allocator. Note it's critical these don't write the 1906 condition codes. */ 1907 1908 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1909 HReg rreg, Int offsetB, Bool mode64 ) 1910 { 1911 AMD64AMode* am; 1912 vassert(offsetB >= 0); 1913 vassert(!hregIsVirtual(rreg)); 1914 vassert(mode64 == True); 1915 *i1 = *i2 = NULL; 1916 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1917 switch (hregClass(rreg)) { 1918 case HRcInt64: 1919 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am ); 1920 return; 1921 case HRcVec128: 1922 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am ); 1923 return; 1924 default: 1925 ppHRegClass(hregClass(rreg)); 1926 vpanic("genSpill_AMD64: unimplemented regclass"); 1927 } 1928 } 1929 1930 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1931 HReg rreg, Int offsetB, Bool mode64 ) 1932 { 1933 AMD64AMode* am; 1934 vassert(offsetB >= 0); 1935 vassert(!hregIsVirtual(rreg)); 1936 vassert(mode64 == True); 1937 *i1 = *i2 = NULL; 1938 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1939 switch (hregClass(rreg)) { 1940 case HRcInt64: 1941 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg ); 1942 return; 1943 case HRcVec128: 1944 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am ); 1945 return; 1946 default: 1947 ppHRegClass(hregClass(rreg)); 1948 vpanic("genReload_AMD64: unimplemented regclass"); 1949 } 1950 } 1951 1952 1953 /* --------- The amd64 assembler (bleh.) --------- */ 1954 1955 /* Produce the low three bits of an integer register number. */ 1956 static UChar iregBits210 ( HReg r ) 1957 { 1958 UInt n; 1959 vassert(hregClass(r) == HRcInt64); 1960 vassert(!hregIsVirtual(r)); 1961 n = hregNumber(r); 1962 vassert(n <= 15); 1963 return toUChar(n & 7); 1964 } 1965 1966 /* Produce bit 3 of an integer register number. */ 1967 static UChar iregBit3 ( HReg r ) 1968 { 1969 UInt n; 1970 vassert(hregClass(r) == HRcInt64); 1971 vassert(!hregIsVirtual(r)); 1972 n = hregNumber(r); 1973 vassert(n <= 15); 1974 return toUChar((n >> 3) & 1); 1975 } 1976 1977 /* Produce a complete 4-bit integer register number. */ 1978 static UChar iregBits3210 ( HReg r ) 1979 { 1980 UInt n; 1981 vassert(hregClass(r) == HRcInt64); 1982 vassert(!hregIsVirtual(r)); 1983 n = hregNumber(r); 1984 vassert(n <= 15); 1985 return toUChar(n); 1986 } 1987 1988 /* Given an xmm (128bit V-class) register number, produce the 1989 equivalent numbered register in 64-bit I-class. This is a bit of 1990 fakery which facilitates using functions that work on integer 1991 register numbers to be used when assembling SSE instructions 1992 too. */ 1993 static UInt vreg2ireg ( HReg r ) 1994 { 1995 UInt n; 1996 vassert(hregClass(r) == HRcVec128); 1997 vassert(!hregIsVirtual(r)); 1998 n = hregNumber(r); 1999 vassert(n <= 15); 2000 return mkHReg(n, HRcInt64, False); 2001 } 2002 2003 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) 2004 { 2005 return toUChar( ((mod & 3) << 6) 2006 | ((reg & 7) << 3) 2007 | (regmem & 7) ); 2008 } 2009 2010 static UChar mkSIB ( Int shift, Int regindex, Int regbase ) 2011 { 2012 return toUChar( ((shift & 3) << 6) 2013 | ((regindex & 7) << 3) 2014 | (regbase & 7) ); 2015 } 2016 2017 static UChar* emit32 ( UChar* p, UInt w32 ) 2018 { 2019 *p++ = toUChar((w32) & 0x000000FF); 2020 *p++ = toUChar((w32 >> 8) & 0x000000FF); 2021 *p++ = toUChar((w32 >> 16) & 0x000000FF); 2022 *p++ = toUChar((w32 >> 24) & 0x000000FF); 2023 return p; 2024 } 2025 2026 static UChar* emit64 ( UChar* p, ULong w64 ) 2027 { 2028 p = emit32(p, toUInt(w64 & 0xFFFFFFFF)); 2029 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF)); 2030 return p; 2031 } 2032 2033 /* Does a sign-extend of the lowest 8 bits give 2034 the original number? */ 2035 static Bool fits8bits ( UInt w32 ) 2036 { 2037 Int i32 = (Int)w32; 2038 return toBool(i32 == ((i32 << 24) >> 24)); 2039 } 2040 /* Can the lower 32 bits be signedly widened to produce the whole 2041 64-bit value? In other words, are the top 33 bits either all 0 or 2042 all 1 ? */ 2043 static Bool fitsIn32Bits ( ULong x ) 2044 { 2045 Long y0 = (Long)x; 2046 Long y1 = y0; 2047 y1 <<= 32; 2048 y1 >>=/*s*/ 32; 2049 return toBool(x == y1); 2050 } 2051 2052 2053 /* Forming mod-reg-rm bytes and scale-index-base bytes. 2054 2055 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13 2056 = 00 greg ereg 2057 2058 greg, d8(ereg) | ereg is neither of: RSP R12 2059 = 01 greg ereg, d8 2060 2061 greg, d32(ereg) | ereg is neither of: RSP R12 2062 = 10 greg ereg, d32 2063 2064 greg, d8(ereg) | ereg is either: RSP R12 2065 = 01 greg 100, 0x24, d8 2066 (lowest bit of rex distinguishes R12/RSP) 2067 2068 greg, d32(ereg) | ereg is either: RSP R12 2069 = 10 greg 100, 0x24, d32 2070 (lowest bit of rex distinguishes R12/RSP) 2071 2072 ----------------------------------------------- 2073 2074 greg, d8(base,index,scale) 2075 | index != RSP 2076 = 01 greg 100, scale index base, d8 2077 2078 greg, d32(base,index,scale) 2079 | index != RSP 2080 = 10 greg 100, scale index base, d32 2081 */ 2082 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am ) 2083 { 2084 if (am->tag == Aam_IR) { 2085 if (am->Aam.IR.imm == 0 2086 && am->Aam.IR.reg != hregAMD64_RSP() 2087 && am->Aam.IR.reg != hregAMD64_RBP() 2088 && am->Aam.IR.reg != hregAMD64_R12() 2089 && am->Aam.IR.reg != hregAMD64_R13() 2090 ) { 2091 *p++ = mkModRegRM(0, iregBits210(greg), 2092 iregBits210(am->Aam.IR.reg)); 2093 return p; 2094 } 2095 if (fits8bits(am->Aam.IR.imm) 2096 && am->Aam.IR.reg != hregAMD64_RSP() 2097 && am->Aam.IR.reg != hregAMD64_R12() 2098 ) { 2099 *p++ = mkModRegRM(1, iregBits210(greg), 2100 iregBits210(am->Aam.IR.reg)); 2101 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2102 return p; 2103 } 2104 if (am->Aam.IR.reg != hregAMD64_RSP() 2105 && am->Aam.IR.reg != hregAMD64_R12() 2106 ) { 2107 *p++ = mkModRegRM(2, iregBits210(greg), 2108 iregBits210(am->Aam.IR.reg)); 2109 p = emit32(p, am->Aam.IR.imm); 2110 return p; 2111 } 2112 if ((am->Aam.IR.reg == hregAMD64_RSP() 2113 || am->Aam.IR.reg == hregAMD64_R12()) 2114 && fits8bits(am->Aam.IR.imm)) { 2115 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2116 *p++ = 0x24; 2117 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2118 return p; 2119 } 2120 if (/* (am->Aam.IR.reg == hregAMD64_RSP() 2121 || wait for test case for RSP case */ 2122 am->Aam.IR.reg == hregAMD64_R12()) { 2123 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2124 *p++ = 0x24; 2125 p = emit32(p, am->Aam.IR.imm); 2126 return p; 2127 } 2128 ppAMD64AMode(am); 2129 vpanic("doAMode_M: can't emit amode IR"); 2130 /*NOTREACHED*/ 2131 } 2132 if (am->tag == Aam_IRRS) { 2133 if (fits8bits(am->Aam.IRRS.imm) 2134 && am->Aam.IRRS.index != hregAMD64_RSP()) { 2135 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2136 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, 2137 am->Aam.IRRS.base); 2138 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF); 2139 return p; 2140 } 2141 if (am->Aam.IRRS.index != hregAMD64_RSP()) { 2142 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2143 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, 2144 am->Aam.IRRS.base); 2145 p = emit32(p, am->Aam.IRRS.imm); 2146 return p; 2147 } 2148 ppAMD64AMode(am); 2149 vpanic("doAMode_M: can't emit amode IRRS"); 2150 /*NOTREACHED*/ 2151 } 2152 vpanic("doAMode_M: unknown amode"); 2153 /*NOTREACHED*/ 2154 } 2155 2156 2157 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 2158 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 2159 { 2160 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg)); 2161 return p; 2162 } 2163 2164 2165 /* Clear the W bit on a REX byte, thereby changing the operand size 2166 back to whatever that instruction's default operand size is. */ 2167 static inline UChar clearWBit ( UChar rex ) 2168 { 2169 return toUChar(rex & ~(1<<3)); 2170 } 2171 2172 2173 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */ 2174 static UChar rexAMode_M ( HReg greg, AMD64AMode* am ) 2175 { 2176 if (am->tag == Aam_IR) { 2177 UChar W = 1; /* we want 64-bit mode */ 2178 UChar R = iregBit3(greg); 2179 UChar X = 0; /* not relevant */ 2180 UChar B = iregBit3(am->Aam.IR.reg); 2181 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2182 } 2183 if (am->tag == Aam_IRRS) { 2184 UChar W = 1; /* we want 64-bit mode */ 2185 UChar R = iregBit3(greg); 2186 UChar X = iregBit3(am->Aam.IRRS.index); 2187 UChar B = iregBit3(am->Aam.IRRS.base); 2188 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2189 } 2190 vassert(0); 2191 return 0; /*NOTREACHED*/ 2192 } 2193 2194 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */ 2195 static UChar rexAMode_R ( HReg greg, HReg ereg ) 2196 { 2197 UChar W = 1; /* we want 64-bit mode */ 2198 UChar R = iregBit3(greg); 2199 UChar X = 0; /* not relevant */ 2200 UChar B = iregBit3(ereg); 2201 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2202 } 2203 2204 2205 /* Emit ffree %st(N) */ 2206 static UChar* do_ffree_st ( UChar* p, Int n ) 2207 { 2208 vassert(n >= 0 && n <= 7); 2209 *p++ = 0xDD; 2210 *p++ = toUChar(0xC0 + n); 2211 return p; 2212 } 2213 2214 //.. /* Emit fstp %st(i), 1 <= i <= 7 */ 2215 //.. static UChar* do_fstp_st ( UChar* p, Int i ) 2216 //.. { 2217 //.. vassert(1 <= i && i <= 7); 2218 //.. *p++ = 0xDD; 2219 //.. *p++ = 0xD8+i; 2220 //.. return p; 2221 //.. } 2222 //.. 2223 //.. /* Emit fld %st(i), 0 <= i <= 6 */ 2224 //.. static UChar* do_fld_st ( UChar* p, Int i ) 2225 //.. { 2226 //.. vassert(0 <= i && i <= 6); 2227 //.. *p++ = 0xD9; 2228 //.. *p++ = 0xC0+i; 2229 //.. return p; 2230 //.. } 2231 //.. 2232 //.. /* Emit f<op> %st(0) */ 2233 //.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op ) 2234 //.. { 2235 //.. switch (op) { 2236 //.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2237 //.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2238 //.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2239 //.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2240 //.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2241 //.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2242 //.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2243 //.. case Xfp_MOV: break; 2244 //.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ 2245 //.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */ 2246 //.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ 2247 //.. break; 2248 //.. default: vpanic("do_fop1_st: unknown op"); 2249 //.. } 2250 //.. return p; 2251 //.. } 2252 //.. 2253 //.. /* Emit f<op> %st(i), 1 <= i <= 5 */ 2254 //.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i ) 2255 //.. { 2256 //.. # define fake(_n) mkHReg((_n), HRcInt32, False) 2257 //.. Int subopc; 2258 //.. switch (op) { 2259 //.. case Xfp_ADD: subopc = 0; break; 2260 //.. case Xfp_SUB: subopc = 4; break; 2261 //.. case Xfp_MUL: subopc = 1; break; 2262 //.. case Xfp_DIV: subopc = 6; break; 2263 //.. default: vpanic("do_fop2_st: unknown op"); 2264 //.. } 2265 //.. *p++ = 0xD8; 2266 //.. p = doAMode_R(p, fake(subopc), fake(i)); 2267 //.. return p; 2268 //.. # undef fake 2269 //.. } 2270 //.. 2271 //.. /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2272 //.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2273 //.. */ 2274 //.. static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2275 //.. { 2276 //.. UInt w; 2277 //.. vassert(0 == (tags & ~0xF)); 2278 //.. if (tags == 0) { 2279 //.. /* pushl $0x00000000 */ 2280 //.. *p++ = 0x6A; 2281 //.. *p++ = 0x00; 2282 //.. } 2283 //.. else 2284 //.. /* pushl $0xFFFFFFFF */ 2285 //.. if (tags == 0xF) { 2286 //.. *p++ = 0x6A; 2287 //.. *p++ = 0xFF; 2288 //.. } else { 2289 //.. vassert(0); /* awaiting test case */ 2290 //.. w = 0; 2291 //.. if (tags & 1) w |= 0x000000FF; 2292 //.. if (tags & 2) w |= 0x0000FF00; 2293 //.. if (tags & 4) w |= 0x00FF0000; 2294 //.. if (tags & 8) w |= 0xFF000000; 2295 //.. *p++ = 0x68; 2296 //.. p = emit32(p, w); 2297 //.. } 2298 //.. return p; 2299 //.. } 2300 2301 /* Emit an instruction into buf and return the number of bytes used. 2302 Note that buf is not the insn's final place, and therefore it is 2303 imperative to emit position-independent code. */ 2304 2305 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, 2306 Bool mode64, void* dispatch ) 2307 { 2308 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2309 UInt xtra; 2310 UInt reg; 2311 UChar rex; 2312 UChar* p = &buf[0]; 2313 UChar* ptmp; 2314 Int j; 2315 vassert(nbuf >= 32); 2316 vassert(mode64 == True); 2317 2318 /* Wrap an integer as a int register, for use assembling 2319 GrpN insns, in which the greg field is used as a sub-opcode 2320 and does not really contain a register. */ 2321 # define fake(_n) mkHReg((_n), HRcInt64, False) 2322 2323 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */ 2324 2325 switch (i->tag) { 2326 2327 case Ain_Imm64: 2328 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst))); 2329 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst)); 2330 p = emit64(p, i->Ain.Imm64.imm64); 2331 goto done; 2332 2333 case Ain_Alu64R: 2334 /* Deal specially with MOV */ 2335 if (i->Ain.Alu64R.op == Aalu_MOV) { 2336 switch (i->Ain.Alu64R.src->tag) { 2337 case Armi_Imm: 2338 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFF)) { 2339 /* Actually we could use this form for constants in 2340 the range 0 through 0x7FFFFFFF inclusive, but 2341 limit it to a small range for verifiability 2342 purposes. */ 2343 /* Generate "movl $imm32, 32-bit-register" and let 2344 the default zero-extend rule cause the upper half 2345 of the dst to be zeroed out too. This saves 1 2346 and sometimes 2 bytes compared to the more 2347 obvious encoding in the 'else' branch. */ 2348 if (1 & iregBit3(i->Ain.Alu64R.dst)) 2349 *p++ = 0x41; 2350 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst); 2351 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2352 } else { 2353 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst))); 2354 *p++ = 0xC7; 2355 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst)); 2356 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2357 } 2358 goto done; 2359 case Armi_Reg: 2360 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2361 i->Ain.Alu64R.dst ); 2362 *p++ = 0x89; 2363 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2364 i->Ain.Alu64R.dst); 2365 goto done; 2366 case Armi_Mem: 2367 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2368 i->Ain.Alu64R.src->Armi.Mem.am); 2369 *p++ = 0x8B; 2370 p = doAMode_M(p, i->Ain.Alu64R.dst, 2371 i->Ain.Alu64R.src->Armi.Mem.am); 2372 goto done; 2373 default: 2374 goto bad; 2375 } 2376 } 2377 /* MUL */ 2378 if (i->Ain.Alu64R.op == Aalu_MUL) { 2379 switch (i->Ain.Alu64R.src->tag) { 2380 case Armi_Reg: 2381 *p++ = rexAMode_R( i->Ain.Alu64R.dst, 2382 i->Ain.Alu64R.src->Armi.Reg.reg); 2383 *p++ = 0x0F; 2384 *p++ = 0xAF; 2385 p = doAMode_R(p, i->Ain.Alu64R.dst, 2386 i->Ain.Alu64R.src->Armi.Reg.reg); 2387 goto done; 2388 case Armi_Mem: 2389 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2390 i->Ain.Alu64R.src->Armi.Mem.am); 2391 *p++ = 0x0F; 2392 *p++ = 0xAF; 2393 p = doAMode_M(p, i->Ain.Alu64R.dst, 2394 i->Ain.Alu64R.src->Armi.Mem.am); 2395 goto done; 2396 case Armi_Imm: 2397 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2398 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2399 *p++ = 0x6B; 2400 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2401 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2402 } else { 2403 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2404 *p++ = 0x69; 2405 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2406 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2407 } 2408 goto done; 2409 default: 2410 goto bad; 2411 } 2412 } 2413 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2414 opc = opc_rr = subopc_imm = opc_imma = 0; 2415 switch (i->Ain.Alu64R.op) { 2416 case Aalu_ADC: opc = 0x13; opc_rr = 0x11; 2417 subopc_imm = 2; opc_imma = 0x15; break; 2418 case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 2419 subopc_imm = 0; opc_imma = 0x05; break; 2420 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 2421 subopc_imm = 5; opc_imma = 0x2D; break; 2422 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19; 2423 subopc_imm = 3; opc_imma = 0x1D; break; 2424 case Aalu_AND: opc = 0x23; opc_rr = 0x21; 2425 subopc_imm = 4; opc_imma = 0x25; break; 2426 case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 2427 subopc_imm = 6; opc_imma = 0x35; break; 2428 case Aalu_OR: opc = 0x0B; opc_rr = 0x09; 2429 subopc_imm = 1; opc_imma = 0x0D; break; 2430 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 2431 subopc_imm = 7; opc_imma = 0x3D; break; 2432 default: goto bad; 2433 } 2434 switch (i->Ain.Alu64R.src->tag) { 2435 case Armi_Imm: 2436 if (i->Ain.Alu64R.dst == hregAMD64_RAX() 2437 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2438 goto bad; /* FIXME: awaiting test case */ 2439 *p++ = toUChar(opc_imma); 2440 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2441 } else 2442 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2443 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst ); 2444 *p++ = 0x83; 2445 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2446 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2447 } else { 2448 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst); 2449 *p++ = 0x81; 2450 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2451 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2452 } 2453 goto done; 2454 case Armi_Reg: 2455 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2456 i->Ain.Alu64R.dst); 2457 *p++ = toUChar(opc_rr); 2458 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2459 i->Ain.Alu64R.dst); 2460 goto done; 2461 case Armi_Mem: 2462 *p++ = rexAMode_M( i->Ain.Alu64R.dst, 2463 i->Ain.Alu64R.src->Armi.Mem.am); 2464 *p++ = toUChar(opc); 2465 p = doAMode_M(p, i->Ain.Alu64R.dst, 2466 i->Ain.Alu64R.src->Armi.Mem.am); 2467 goto done; 2468 default: 2469 goto bad; 2470 } 2471 break; 2472 2473 case Ain_Alu64M: 2474 /* Deal specially with MOV */ 2475 if (i->Ain.Alu64M.op == Aalu_MOV) { 2476 switch (i->Ain.Alu64M.src->tag) { 2477 case Ari_Reg: 2478 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg, 2479 i->Ain.Alu64M.dst); 2480 *p++ = 0x89; 2481 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg, 2482 i->Ain.Alu64M.dst); 2483 goto done; 2484 case Ari_Imm: 2485 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst); 2486 *p++ = 0xC7; 2487 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst); 2488 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32); 2489 goto done; 2490 default: 2491 goto bad; 2492 } 2493 } 2494 //.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2495 //.. allowed here. */ 2496 //.. opc = subopc_imm = opc_imma = 0; 2497 //.. switch (i->Xin.Alu32M.op) { 2498 //.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2499 //.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2500 //.. default: goto bad; 2501 //.. } 2502 //.. switch (i->Xin.Alu32M.src->tag) { 2503 //.. case Xri_Reg: 2504 //.. *p++ = opc; 2505 //.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2506 //.. i->Xin.Alu32M.dst); 2507 //.. goto done; 2508 //.. case Xri_Imm: 2509 //.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2510 //.. *p++ = 0x83; 2511 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2512 //.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32; 2513 //.. goto done; 2514 //.. } else { 2515 //.. *p++ = 0x81; 2516 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2517 //.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2518 //.. goto done; 2519 //.. } 2520 //.. default: 2521 //.. goto bad; 2522 //.. } 2523 break; 2524 2525 case Ain_Sh64: 2526 opc_cl = opc_imm = subopc = 0; 2527 switch (i->Ain.Sh64.op) { 2528 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2529 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2530 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2531 default: goto bad; 2532 } 2533 if (i->Ain.Sh64.src == 0) { 2534 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2535 *p++ = toUChar(opc_cl); 2536 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2537 goto done; 2538 } else { 2539 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2540 *p++ = toUChar(opc_imm); 2541 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2542 *p++ = (UChar)(i->Ain.Sh64.src); 2543 goto done; 2544 } 2545 break; 2546 2547 case Ain_Test64: 2548 /* testq sign-extend($imm32), %reg */ 2549 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst); 2550 *p++ = 0xF7; 2551 p = doAMode_R(p, fake(0), i->Ain.Test64.dst); 2552 p = emit32(p, i->Ain.Test64.imm32); 2553 goto done; 2554 2555 case Ain_Unary64: 2556 if (i->Ain.Unary64.op == Aun_NOT) { 2557 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2558 *p++ = 0xF7; 2559 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst); 2560 goto done; 2561 } 2562 if (i->Ain.Unary64.op == Aun_NEG) { 2563 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2564 *p++ = 0xF7; 2565 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst); 2566 goto done; 2567 } 2568 break; 2569 2570 case Ain_Lea64: 2571 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am); 2572 *p++ = 0x8D; 2573 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am); 2574 goto done; 2575 2576 case Ain_MulL: 2577 subopc = i->Ain.MulL.syned ? 5 : 4; 2578 switch (i->Ain.MulL.src->tag) { 2579 case Arm_Mem: 2580 *p++ = rexAMode_M( fake(0), 2581 i->Ain.MulL.src->Arm.Mem.am); 2582 *p++ = 0xF7; 2583 p = doAMode_M(p, fake(subopc), 2584 i->Ain.MulL.src->Arm.Mem.am); 2585 goto done; 2586 case Arm_Reg: 2587 *p++ = rexAMode_R(fake(0), 2588 i->Ain.MulL.src->Arm.Reg.reg); 2589 *p++ = 0xF7; 2590 p = doAMode_R(p, fake(subopc), 2591 i->Ain.MulL.src->Arm.Reg.reg); 2592 goto done; 2593 default: 2594 goto bad; 2595 } 2596 break; 2597 2598 case Ain_Div: 2599 subopc = i->Ain.Div.syned ? 7 : 6; 2600 if (i->Ain.Div.sz == 4) { 2601 switch (i->Ain.Div.src->tag) { 2602 case Arm_Mem: 2603 goto bad; 2604 /*FIXME*/ 2605 *p++ = 0xF7; 2606 p = doAMode_M(p, fake(subopc), 2607 i->Ain.Div.src->Arm.Mem.am); 2608 goto done; 2609 case Arm_Reg: 2610 *p++ = clearWBit( 2611 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg)); 2612 *p++ = 0xF7; 2613 p = doAMode_R(p, fake(subopc), 2614 i->Ain.Div.src->Arm.Reg.reg); 2615 goto done; 2616 default: 2617 goto bad; 2618 } 2619 } 2620 if (i->Ain.Div.sz == 8) { 2621 switch (i->Ain.Div.src->tag) { 2622 case Arm_Mem: 2623 *p++ = rexAMode_M( fake(0), 2624 i->Ain.Div.src->Arm.Mem.am); 2625 *p++ = 0xF7; 2626 p = doAMode_M(p, fake(subopc), 2627 i->Ain.Div.src->Arm.Mem.am); 2628 goto done; 2629 case Arm_Reg: 2630 *p++ = rexAMode_R( fake(0), 2631 i->Ain.Div.src->Arm.Reg.reg); 2632 *p++ = 0xF7; 2633 p = doAMode_R(p, fake(subopc), 2634 i->Ain.Div.src->Arm.Reg.reg); 2635 goto done; 2636 default: 2637 goto bad; 2638 } 2639 } 2640 break; 2641 2642 //.. case Xin_Sh3232: 2643 //.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2644 //.. if (i->Xin.Sh3232.amt == 0) { 2645 //.. /* shldl/shrdl by %cl */ 2646 //.. *p++ = 0x0F; 2647 //.. if (i->Xin.Sh3232.op == Xsh_SHL) { 2648 //.. *p++ = 0xA5; 2649 //.. } else { 2650 //.. *p++ = 0xAD; 2651 //.. } 2652 //.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2653 //.. goto done; 2654 //.. } 2655 //.. break; 2656 2657 case Ain_Push: 2658 switch (i->Ain.Push.src->tag) { 2659 case Armi_Mem: 2660 *p++ = clearWBit( 2661 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am)); 2662 *p++ = 0xFF; 2663 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am); 2664 goto done; 2665 case Armi_Imm: 2666 *p++ = 0x68; 2667 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32); 2668 goto done; 2669 case Armi_Reg: 2670 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg))); 2671 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg)); 2672 goto done; 2673 default: 2674 goto bad; 2675 } 2676 2677 case Ain_Call: { 2678 /* As per detailed comment for Ain_Call in 2679 getRegUsage_AMD64Instr above, %r11 is used as an address 2680 temporary. */ 2681 /* jump over the following two insns if the condition does not 2682 hold */ 2683 Bool shortImm = fitsIn32Bits(i->Ain.Call.target); 2684 if (i->Ain.Call.cond != Acc_ALWAYS) { 2685 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); 2686 *p++ = shortImm ? 10 : 13; 2687 /* 10 or 13 bytes in the next two insns */ 2688 } 2689 if (shortImm) { 2690 /* 7 bytes: movl sign-extend(imm32), %r11 */ 2691 *p++ = 0x49; 2692 *p++ = 0xC7; 2693 *p++ = 0xC3; 2694 p = emit32(p, (UInt)i->Ain.Call.target); 2695 } else { 2696 /* 10 bytes: movabsq $target, %r11 */ 2697 *p++ = 0x49; 2698 *p++ = 0xBB; 2699 p = emit64(p, i->Ain.Call.target); 2700 } 2701 /* 3 bytes: call *%r11 */ 2702 *p++ = 0x41; 2703 *p++ = 0xFF; 2704 *p++ = 0xD3; 2705 goto done; 2706 } 2707 2708 case Ain_Goto: 2709 /* Use ptmp for backpatching conditional jumps. */ 2710 ptmp = NULL; 2711 2712 /* First off, if this is conditional, create a conditional 2713 jump over the rest of it. */ 2714 if (i->Ain.Goto.cond != Acc_ALWAYS) { 2715 /* jmp fwds if !condition */ 2716 *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1)); 2717 ptmp = p; /* fill in this bit later */ 2718 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2719 } 2720 2721 /* If a non-boring, set %rbp (the guest state pointer) 2722 appropriately. Since these numbers are all small positive 2723 integers, we can get away with "movl $N, %ebp" rather than 2724 the longer "movq $N, %rbp". */ 2725 /* movl $magic_number, %ebp */ 2726 switch (i->Ain.Goto.jk) { 2727 case Ijk_ClientReq: 2728 *p++ = 0xBD; 2729 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; 2730 case Ijk_Sys_syscall: 2731 *p++ = 0xBD; 2732 p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break; 2733 case Ijk_Sys_int32: 2734 *p++ = 0xBD; 2735 p = emit32(p, VEX_TRC_JMP_SYS_INT32); break; 2736 case Ijk_Yield: 2737 *p++ = 0xBD; 2738 p = emit32(p, VEX_TRC_JMP_YIELD); break; 2739 case Ijk_YieldNoRedir: 2740 *p++ = 0xBD; 2741 p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break; 2742 case Ijk_EmWarn: 2743 *p++ = 0xBD; 2744 p = emit32(p, VEX_TRC_JMP_EMWARN); break; 2745 case Ijk_MapFail: 2746 *p++ = 0xBD; 2747 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; 2748 case Ijk_NoDecode: 2749 *p++ = 0xBD; 2750 p = emit32(p, VEX_TRC_JMP_NODECODE); break; 2751 case Ijk_TInval: 2752 *p++ = 0xBD; 2753 p = emit32(p, VEX_TRC_JMP_TINVAL); break; 2754 case Ijk_NoRedir: 2755 *p++ = 0xBD; 2756 p = emit32(p, VEX_TRC_JMP_NOREDIR); break; 2757 case Ijk_SigTRAP: 2758 *p++ = 0xBD; 2759 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; 2760 case Ijk_SigSEGV: 2761 *p++ = 0xBD; 2762 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; 2763 case Ijk_Ret: 2764 case Ijk_Call: 2765 case Ijk_Boring: 2766 break; 2767 default: 2768 ppIRJumpKind(i->Ain.Goto.jk); 2769 vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind"); 2770 } 2771 2772 /* Get the destination address into %rax */ 2773 if (i->Ain.Goto.dst->tag == Ari_Imm) { 2774 /* movl sign-ext($immediate), %rax ; ret */ 2775 *p++ = 0x48; 2776 *p++ = 0xC7; 2777 *p++ = 0xC0; 2778 p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32); 2779 } else { 2780 vassert(i->Ain.Goto.dst->tag == Ari_Reg); 2781 /* movq %reg, %rax ; ret */ 2782 if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) { 2783 *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); 2784 *p++ = 0x89; 2785 p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); 2786 } 2787 } 2788 2789 /* Get the dispatcher address into %rdx. This has to happen 2790 after the load of %rax since %rdx might be carrying the value 2791 destined for %rax immediately prior to this Ain_Goto. */ 2792 vassert(sizeof(ULong) == sizeof(void*)); 2793 vassert(dispatch != NULL); 2794 2795 if (fitsIn32Bits(Ptr_to_ULong(dispatch))) { 2796 /* movl sign-extend(imm32), %rdx */ 2797 *p++ = 0x48; 2798 *p++ = 0xC7; 2799 *p++ = 0xC2; 2800 p = emit32(p, (UInt)Ptr_to_ULong(dispatch)); 2801 } else { 2802 /* movabsq $imm64, %rdx */ 2803 *p++ = 0x48; 2804 *p++ = 0xBA; 2805 p = emit64(p, Ptr_to_ULong(dispatch)); 2806 } 2807 /* jmp *%rdx */ 2808 *p++ = 0xFF; 2809 *p++ = 0xE2; 2810 2811 /* Fix up the conditional jump, if there was one. */ 2812 if (i->Ain.Goto.cond != Acc_ALWAYS) { 2813 Int delta = p - ptmp; 2814 vassert(delta > 0 && delta < 30); 2815 *ptmp = toUChar(delta-1); 2816 } 2817 goto done; 2818 2819 case Ain_CMov64: 2820 vassert(i->Ain.CMov64.cond != Acc_ALWAYS); 2821 if (i->Ain.CMov64.src->tag == Arm_Reg) { 2822 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2823 *p++ = 0x0F; 2824 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2825 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2826 goto done; 2827 } 2828 if (i->Ain.CMov64.src->tag == Arm_Mem) { 2829 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2830 *p++ = 0x0F; 2831 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2832 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2833 goto done; 2834 } 2835 break; 2836 2837 case Ain_MovxLQ: 2838 /* No, _don't_ ask me why the sense of the args has to be 2839 different in the S vs Z case. I don't know. */ 2840 if (i->Ain.MovxLQ.syned) { 2841 /* Need REX.W = 1 here, but rexAMode_R does that for us. */ 2842 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2843 *p++ = 0x63; 2844 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2845 } else { 2846 /* Produce a 32-bit reg-reg move, since the implicit 2847 zero-extend does what we want. */ 2848 *p++ = clearWBit ( 2849 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst)); 2850 *p++ = 0x89; 2851 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst); 2852 } 2853 goto done; 2854 2855 case Ain_LoadEX: 2856 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) { 2857 /* movzbq */ 2858 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2859 *p++ = 0x0F; 2860 *p++ = 0xB6; 2861 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2862 goto done; 2863 } 2864 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) { 2865 /* movzwq */ 2866 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2867 *p++ = 0x0F; 2868 *p++ = 0xB7; 2869 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2870 goto done; 2871 } 2872 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) { 2873 /* movzlq */ 2874 /* This isn't really an existing AMD64 instruction per se. 2875 Rather, we have to do a 32-bit load. Because a 32-bit 2876 write implicitly clears the upper 32 bits of the target 2877 register, we get what we want. */ 2878 *p++ = clearWBit( 2879 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src)); 2880 *p++ = 0x8B; 2881 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2882 goto done; 2883 } 2884 break; 2885 2886 case Ain_Set64: 2887 /* Make the destination register be 1 or 0, depending on whether 2888 the relevant condition holds. Complication: the top 56 bits 2889 of the destination should be forced to zero, but doing 'xorq 2890 %r,%r' kills the flag(s) we are about to read. Sigh. So 2891 start off my moving $0 into the dest. */ 2892 reg = iregBits3210(i->Ain.Set64.dst); 2893 vassert(reg < 16); 2894 2895 /* movq $0, %dst */ 2896 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48); 2897 *p++ = 0xC7; 2898 *p++ = toUChar(0xC0 + (reg & 7)); 2899 p = emit32(p, 0); 2900 2901 /* setb lo8(%dst) */ 2902 /* note, 8-bit register rex trickyness. Be careful here. */ 2903 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40); 2904 *p++ = 0x0F; 2905 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond)); 2906 *p++ = toUChar(0xC0 + (reg & 7)); 2907 goto done; 2908 2909 case Ain_Bsfr64: 2910 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 2911 *p++ = 0x0F; 2912 if (i->Ain.Bsfr64.isFwds) { 2913 *p++ = 0xBC; 2914 } else { 2915 *p++ = 0xBD; 2916 } 2917 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 2918 goto done; 2919 2920 case Ain_MFence: 2921 /* mfence */ 2922 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2923 goto done; 2924 2925 case Ain_ACAS: 2926 /* lock */ 2927 *p++ = 0xF0; 2928 if (i->Ain.ACAS.sz == 2) *p++ = 0x66; 2929 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value 2930 in %rbx. The new-value register is hardwired to be %rbx 2931 since dealing with byte integer registers is too much hassle, 2932 so we force the register operand to %rbx (could equally be 2933 %rcx or %rdx). */ 2934 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr ); 2935 if (i->Ain.ACAS.sz != 8) 2936 rex = clearWBit(rex); 2937 2938 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */ 2939 *p++ = 0x0F; 2940 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2941 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr); 2942 goto done; 2943 2944 case Ain_DACAS: 2945 /* lock */ 2946 *p++ = 0xF0; 2947 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new 2948 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so 2949 aren't encoded in the insn. */ 2950 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr ); 2951 if (i->Ain.ACAS.sz != 8) 2952 rex = clearWBit(rex); 2953 *p++ = rex; 2954 *p++ = 0x0F; 2955 *p++ = 0xC7; 2956 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr); 2957 goto done; 2958 2959 case Ain_A87Free: 2960 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7); 2961 for (j = 0; j < i->Ain.A87Free.nregs; j++) { 2962 p = do_ffree_st(p, 7-j); 2963 } 2964 goto done; 2965 2966 case Ain_A87PushPop: 2967 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4); 2968 if (i->Ain.A87PushPop.isPush) { 2969 /* Load from memory into %st(0): flds/fldl amode */ 2970 *p++ = clearWBit( 2971 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); 2972 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 2973 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); 2974 } else { 2975 /* Dump %st(0) to memory: fstps/fstpl amode */ 2976 *p++ = clearWBit( 2977 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); 2978 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 2979 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); 2980 goto done; 2981 } 2982 goto done; 2983 2984 case Ain_A87FpOp: 2985 switch (i->Ain.A87FpOp.op) { 2986 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2987 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2988 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2989 case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break; 2990 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2991 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2992 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; 2993 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break; 2994 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break; 2995 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; 2996 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; 2997 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; 2998 default: goto bad; 2999 } 3000 goto done; 3001 3002 case Ain_A87LdCW: 3003 *p++ = clearWBit( 3004 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) ); 3005 *p++ = 0xD9; 3006 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr); 3007 goto done; 3008 3009 case Ain_A87StSW: 3010 *p++ = clearWBit( 3011 rexAMode_M(fake(7), i->Ain.A87StSW.addr) ); 3012 *p++ = 0xDD; 3013 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr); 3014 goto done; 3015 3016 case Ain_Store: 3017 if (i->Ain.Store.sz == 2) { 3018 /* This just goes to show the crazyness of the instruction 3019 set encoding. We have to insert two prefix bytes, but be 3020 careful to avoid a conflict in what the size should be, by 3021 ensuring that REX.W = 0. */ 3022 *p++ = 0x66; /* override to 16-bits */ 3023 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3024 *p++ = 0x89; 3025 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3026 goto done; 3027 } 3028 if (i->Ain.Store.sz == 4) { 3029 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3030 *p++ = 0x89; 3031 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3032 goto done; 3033 } 3034 if (i->Ain.Store.sz == 1) { 3035 /* This is one place where it would be wrong to skip emitting 3036 a rex byte of 0x40, since the mere presence of rex changes 3037 the meaning of the byte register access. Be careful. */ 3038 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3039 *p++ = 0x88; 3040 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3041 goto done; 3042 } 3043 break; 3044 3045 //.. case Xin_FpUnary: 3046 //.. /* gop %src, %dst 3047 //.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 3048 //.. */ 3049 //.. p = do_ffree_st7(p); 3050 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 3051 //.. p = do_fop1_st(p, i->Xin.FpUnary.op); 3052 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 3053 //.. goto done; 3054 //.. 3055 //.. case Xin_FpBinary: 3056 //.. if (i->Xin.FpBinary.op == Xfp_YL2X 3057 //.. || i->Xin.FpBinary.op == Xfp_YL2XP1) { 3058 //.. /* Have to do this specially. */ 3059 //.. /* ffree %st7 ; fld %st(srcL) ; 3060 //.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 3061 //.. p = do_ffree_st7(p); 3062 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3063 //.. p = do_ffree_st7(p); 3064 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 3065 //.. *p++ = 0xD9; 3066 //.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9; 3067 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3068 //.. goto done; 3069 //.. } 3070 //.. if (i->Xin.FpBinary.op == Xfp_ATAN) { 3071 //.. /* Have to do this specially. */ 3072 //.. /* ffree %st7 ; fld %st(srcL) ; 3073 //.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 3074 //.. p = do_ffree_st7(p); 3075 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3076 //.. p = do_ffree_st7(p); 3077 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 3078 //.. *p++ = 0xD9; *p++ = 0xF3; 3079 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3080 //.. goto done; 3081 //.. } 3082 //.. if (i->Xin.FpBinary.op == Xfp_PREM 3083 //.. || i->Xin.FpBinary.op == Xfp_PREM1 3084 //.. || i->Xin.FpBinary.op == Xfp_SCALE) { 3085 //.. /* Have to do this specially. */ 3086 //.. /* ffree %st7 ; fld %st(srcR) ; 3087 //.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 3088 //.. fincstp ; ffree %st7 */ 3089 //.. p = do_ffree_st7(p); 3090 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 3091 //.. p = do_ffree_st7(p); 3092 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 3093 //.. *p++ = 0xD9; 3094 //.. switch (i->Xin.FpBinary.op) { 3095 //.. case Xfp_PREM: *p++ = 0xF8; break; 3096 //.. case Xfp_PREM1: *p++ = 0xF5; break; 3097 //.. case Xfp_SCALE: *p++ = 0xFD; break; 3098 //.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)"); 3099 //.. } 3100 //.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 3101 //.. *p++ = 0xD9; *p++ = 0xF7; 3102 //.. p = do_ffree_st7(p); 3103 //.. goto done; 3104 //.. } 3105 //.. /* General case */ 3106 //.. /* gop %srcL, %srcR, %dst 3107 //.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 3108 //.. */ 3109 //.. p = do_ffree_st7(p); 3110 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3111 //.. p = do_fop2_st(p, i->Xin.FpBinary.op, 3112 //.. 1+hregNumber(i->Xin.FpBinary.srcR)); 3113 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3114 //.. goto done; 3115 //.. 3116 //.. case Xin_FpLdSt: 3117 //.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8); 3118 //.. if (i->Xin.FpLdSt.isLoad) { 3119 //.. /* Load from memory into %fakeN. 3120 //.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1) 3121 //.. */ 3122 //.. p = do_ffree_st7(p); 3123 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; 3124 //.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 3125 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 3126 //.. goto done; 3127 //.. } else { 3128 //.. /* Store from %fakeN into memory. 3129 //.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 3130 //.. */ 3131 //.. p = do_ffree_st7(p); 3132 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 3133 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; 3134 //.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 3135 //.. goto done; 3136 //.. } 3137 //.. break; 3138 //.. 3139 //.. case Xin_FpLdStI: 3140 //.. if (i->Xin.FpLdStI.isLoad) { 3141 //.. /* Load from memory into %fakeN, converting from an int. 3142 //.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 3143 //.. */ 3144 //.. switch (i->Xin.FpLdStI.sz) { 3145 //.. case 8: opc = 0xDF; subopc_imm = 5; break; 3146 //.. case 4: opc = 0xDB; subopc_imm = 0; break; 3147 //.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 3148 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)"); 3149 //.. } 3150 //.. p = do_ffree_st7(p); 3151 //.. *p++ = opc; 3152 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 3153 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 3154 //.. goto done; 3155 //.. } else { 3156 //.. /* Store from %fakeN into memory, converting to an int. 3157 //.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 3158 //.. */ 3159 //.. switch (i->Xin.FpLdStI.sz) { 3160 //.. case 8: opc = 0xDF; subopc_imm = 7; break; 3161 //.. case 4: opc = 0xDB; subopc_imm = 3; break; 3162 //.. case 2: opc = 0xDF; subopc_imm = 3; break; 3163 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)"); 3164 //.. } 3165 //.. p = do_ffree_st7(p); 3166 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 3167 //.. *p++ = opc; 3168 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 3169 //.. goto done; 3170 //.. } 3171 //.. break; 3172 //.. 3173 //.. case Xin_Fp64to32: 3174 //.. /* ffree %st7 ; fld %st(src) */ 3175 //.. p = do_ffree_st7(p); 3176 //.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 3177 //.. /* subl $4, %esp */ 3178 //.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 3179 //.. /* fstps (%esp) */ 3180 //.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 3181 //.. /* flds (%esp) */ 3182 //.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 3183 //.. /* addl $4, %esp */ 3184 //.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 3185 //.. /* fstp %st(1+dst) */ 3186 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 3187 //.. goto done; 3188 //.. 3189 //.. case Xin_FpCMov: 3190 //.. /* jmp fwds if !condition */ 3191 //.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1); 3192 //.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3193 //.. ptmp = p; 3194 //.. 3195 //.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 3196 //.. p = do_ffree_st7(p); 3197 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 3198 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 3199 //.. 3200 //.. /* Fill in the jump offset. */ 3201 //.. *(ptmp-1) = p - ptmp; 3202 //.. goto done; 3203 3204 case Ain_LdMXCSR: 3205 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr)); 3206 *p++ = 0x0F; 3207 *p++ = 0xAE; 3208 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr); 3209 goto done; 3210 3211 //.. case Xin_FpStSW_AX: 3212 //.. /* note, this emits fnstsw %ax, not fstsw %ax */ 3213 //.. *p++ = 0xDF; 3214 //.. *p++ = 0xE0; 3215 //.. goto done; 3216 3217 case Ain_SseUComIS: 3218 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */ 3219 /* ucomi[sd] %srcL, %srcR */ 3220 if (i->Ain.SseUComIS.sz == 8) { 3221 *p++ = 0x66; 3222 } else { 3223 goto bad; 3224 vassert(i->Ain.SseUComIS.sz == 4); 3225 } 3226 *p++ = clearWBit ( 3227 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL), 3228 vreg2ireg(i->Ain.SseUComIS.srcR) )); 3229 *p++ = 0x0F; 3230 *p++ = 0x2E; 3231 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL), 3232 vreg2ireg(i->Ain.SseUComIS.srcR) ); 3233 /* pushfq */ 3234 *p++ = 0x9C; 3235 /* popq %dst */ 3236 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst))); 3237 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst)); 3238 goto done; 3239 3240 case Ain_SseSI2SF: 3241 /* cvssi2s[sd] %src, %dst */ 3242 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst), 3243 i->Ain.SseSI2SF.src ); 3244 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2); 3245 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex); 3246 *p++ = 0x0F; 3247 *p++ = 0x2A; 3248 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst), 3249 i->Ain.SseSI2SF.src ); 3250 goto done; 3251 3252 case Ain_SseSF2SI: 3253 /* cvss[sd]2si %src, %dst */ 3254 rex = rexAMode_R( i->Ain.SseSF2SI.dst, 3255 vreg2ireg(i->Ain.SseSF2SI.src) ); 3256 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2); 3257 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex); 3258 *p++ = 0x0F; 3259 *p++ = 0x2D; 3260 p = doAMode_R( p, i->Ain.SseSF2SI.dst, 3261 vreg2ireg(i->Ain.SseSF2SI.src) ); 3262 goto done; 3263 3264 case Ain_SseSDSS: 3265 /* cvtsd2ss/cvtss2sd %src, %dst */ 3266 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3); 3267 *p++ = clearWBit( 3268 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst), 3269 vreg2ireg(i->Ain.SseSDSS.src) )); 3270 *p++ = 0x0F; 3271 *p++ = 0x5A; 3272 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst), 3273 vreg2ireg(i->Ain.SseSDSS.src) ); 3274 goto done; 3275 3276 //.. 3277 //.. case Xin_FpCmp: 3278 //.. /* gcmp %fL, %fR, %dst 3279 //.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 3280 //.. fnstsw %ax ; movl %eax, %dst 3281 //.. */ 3282 //.. /* ffree %st7 */ 3283 //.. p = do_ffree_st7(p); 3284 //.. /* fpush %fL */ 3285 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 3286 //.. /* fucomp %(fR+1) */ 3287 //.. *p++ = 0xDD; 3288 //.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))); 3289 //.. /* fnstsw %ax */ 3290 //.. *p++ = 0xDF; 3291 //.. *p++ = 0xE0; 3292 //.. /* movl %eax, %dst */ 3293 //.. *p++ = 0x89; 3294 //.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst); 3295 //.. goto done; 3296 //.. 3297 //.. case Xin_SseConst: { 3298 //.. UShort con = i->Xin.SseConst.con; 3299 //.. p = push_word_from_tags(p, (con >> 12) & 0xF); 3300 //.. p = push_word_from_tags(p, (con >> 8) & 0xF); 3301 //.. p = push_word_from_tags(p, (con >> 4) & 0xF); 3302 //.. p = push_word_from_tags(p, con & 0xF); 3303 //.. /* movl (%esp), %xmm-dst */ 3304 //.. *p++ = 0x0F; 3305 //.. *p++ = 0x10; 3306 //.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)); 3307 //.. *p++ = 0x24; 3308 //.. /* addl $16, %esp */ 3309 //.. *p++ = 0x83; 3310 //.. *p++ = 0xC4; 3311 //.. *p++ = 0x10; 3312 //.. goto done; 3313 //.. } 3314 3315 case Ain_SseLdSt: 3316 if (i->Ain.SseLdSt.sz == 8) { 3317 *p++ = 0xF2; 3318 } else 3319 if (i->Ain.SseLdSt.sz == 4) { 3320 *p++ = 0xF3; 3321 } else 3322 if (i->Ain.SseLdSt.sz != 16) { 3323 vassert(0); 3324 } 3325 *p++ = clearWBit( 3326 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr)); 3327 *p++ = 0x0F; 3328 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11); 3329 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr); 3330 goto done; 3331 3332 case Ain_SseLdzLO: 3333 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8); 3334 /* movs[sd] amode, %xmm-dst */ 3335 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3336 *p++ = clearWBit( 3337 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg), 3338 i->Ain.SseLdzLO.addr)); 3339 *p++ = 0x0F; 3340 *p++ = 0x10; 3341 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg), 3342 i->Ain.SseLdzLO.addr); 3343 goto done; 3344 3345 case Ain_Sse32Fx4: 3346 xtra = 0; 3347 *p++ = clearWBit( 3348 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst), 3349 vreg2ireg(i->Ain.Sse32Fx4.src) )); 3350 *p++ = 0x0F; 3351 switch (i->Ain.Sse32Fx4.op) { 3352 case Asse_ADDF: *p++ = 0x58; break; 3353 case Asse_DIVF: *p++ = 0x5E; break; 3354 case Asse_MAXF: *p++ = 0x5F; break; 3355 case Asse_MINF: *p++ = 0x5D; break; 3356 case Asse_MULF: *p++ = 0x59; break; 3357 case Asse_RCPF: *p++ = 0x53; break; 3358 case Asse_RSQRTF: *p++ = 0x52; break; 3359 case Asse_SQRTF: *p++ = 0x51; break; 3360 case Asse_SUBF: *p++ = 0x5C; break; 3361 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3362 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3363 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3364 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3365 default: goto bad; 3366 } 3367 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst), 3368 vreg2ireg(i->Ain.Sse32Fx4.src) ); 3369 if (xtra & 0x100) 3370 *p++ = toUChar(xtra & 0xFF); 3371 goto done; 3372 3373 case Ain_Sse64Fx2: 3374 xtra = 0; 3375 *p++ = 0x66; 3376 *p++ = clearWBit( 3377 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst), 3378 vreg2ireg(i->Ain.Sse64Fx2.src) )); 3379 *p++ = 0x0F; 3380 switch (i->Ain.Sse64Fx2.op) { 3381 case Asse_ADDF: *p++ = 0x58; break; 3382 case Asse_DIVF: *p++ = 0x5E; break; 3383 case Asse_MAXF: *p++ = 0x5F; break; 3384 case Asse_MINF: *p++ = 0x5D; break; 3385 case Asse_MULF: *p++ = 0x59; break; 3386 //.. case Xsse_RCPF: *p++ = 0x53; break; 3387 //.. case Xsse_RSQRTF: *p++ = 0x52; break; 3388 case Asse_SQRTF: *p++ = 0x51; break; 3389 case Asse_SUBF: *p++ = 0x5C; break; 3390 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3391 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3392 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3393 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3394 default: goto bad; 3395 } 3396 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst), 3397 vreg2ireg(i->Ain.Sse64Fx2.src) ); 3398 if (xtra & 0x100) 3399 *p++ = toUChar(xtra & 0xFF); 3400 goto done; 3401 3402 case Ain_Sse32FLo: 3403 xtra = 0; 3404 *p++ = 0xF3; 3405 *p++ = clearWBit( 3406 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst), 3407 vreg2ireg(i->Ain.Sse32FLo.src) )); 3408 *p++ = 0x0F; 3409 switch (i->Ain.Sse32FLo.op) { 3410 case Asse_ADDF: *p++ = 0x58; break; 3411 case Asse_DIVF: *p++ = 0x5E; break; 3412 case Asse_MAXF: *p++ = 0x5F; break; 3413 case Asse_MINF: *p++ = 0x5D; break; 3414 case Asse_MULF: *p++ = 0x59; break; 3415 case Asse_RCPF: *p++ = 0x53; break; 3416 case Asse_RSQRTF: *p++ = 0x52; break; 3417 case Asse_SQRTF: *p++ = 0x51; break; 3418 case Asse_SUBF: *p++ = 0x5C; break; 3419 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3420 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3421 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3422 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3423 default: goto bad; 3424 } 3425 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst), 3426 vreg2ireg(i->Ain.Sse32FLo.src) ); 3427 if (xtra & 0x100) 3428 *p++ = toUChar(xtra & 0xFF); 3429 goto done; 3430 3431 case Ain_Sse64FLo: 3432 xtra = 0; 3433 *p++ = 0xF2; 3434 *p++ = clearWBit( 3435 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst), 3436 vreg2ireg(i->Ain.Sse64FLo.src) )); 3437 *p++ = 0x0F; 3438 switch (i->Ain.Sse64FLo.op) { 3439 case Asse_ADDF: *p++ = 0x58; break; 3440 case Asse_DIVF: *p++ = 0x5E; break; 3441 case Asse_MAXF: *p++ = 0x5F; break; 3442 case Asse_MINF: *p++ = 0x5D; break; 3443 case Asse_MULF: *p++ = 0x59; break; 3444 //.. case Xsse_RCPF: *p++ = 0x53; break; 3445 //.. case Xsse_RSQRTF: *p++ = 0x52; break; 3446 case Asse_SQRTF: *p++ = 0x51; break; 3447 case Asse_SUBF: *p++ = 0x5C; break; 3448 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3449 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3450 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3451 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3452 default: goto bad; 3453 } 3454 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst), 3455 vreg2ireg(i->Ain.Sse64FLo.src) ); 3456 if (xtra & 0x100) 3457 *p++ = toUChar(xtra & 0xFF); 3458 goto done; 3459 3460 case Ain_SseReRg: 3461 # define XX(_n) *p++ = (_n) 3462 3463 rex = clearWBit( 3464 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst), 3465 vreg2ireg(i->Ain.SseReRg.src) )); 3466 3467 switch (i->Ain.SseReRg.op) { 3468 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break; 3469 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break; 3470 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break; 3471 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break; 3472 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break; 3473 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break; 3474 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break; 3475 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break; 3476 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break; 3477 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break; 3478 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break; 3479 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break; 3480 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break; 3481 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break; 3482 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break; 3483 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break; 3484 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break; 3485 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break; 3486 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break; 3487 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break; 3488 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break; 3489 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break; 3490 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break; 3491 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break; 3492 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break; 3493 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break; 3494 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break; 3495 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break; 3496 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break; 3497 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break; 3498 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break; 3499 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break; 3500 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break; 3501 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break; 3502 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break; 3503 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break; 3504 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break; 3505 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break; 3506 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break; 3507 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break; 3508 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break; 3509 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break; 3510 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break; 3511 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break; 3512 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break; 3513 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break; 3514 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break; 3515 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break; 3516 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break; 3517 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break; 3518 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break; 3519 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break; 3520 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break; 3521 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break; 3522 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break; 3523 default: goto bad; 3524 } 3525 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst), 3526 vreg2ireg(i->Ain.SseReRg.src) ); 3527 # undef XX 3528 goto done; 3529 3530 case Ain_SseCMov: 3531 /* jmp fwds if !condition */ 3532 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1)); 3533 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3534 ptmp = p; 3535 3536 /* movaps %src, %dst */ 3537 *p++ = clearWBit( 3538 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst), 3539 vreg2ireg(i->Ain.SseCMov.src) )); 3540 *p++ = 0x0F; 3541 *p++ = 0x28; 3542 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst), 3543 vreg2ireg(i->Ain.SseCMov.src) ); 3544 3545 /* Fill in the jump offset. */ 3546 *(ptmp-1) = toUChar(p - ptmp); 3547 goto done; 3548 3549 case Ain_SseShuf: 3550 *p++ = 0x66; 3551 *p++ = clearWBit( 3552 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst), 3553 vreg2ireg(i->Ain.SseShuf.src) )); 3554 *p++ = 0x0F; 3555 *p++ = 0x70; 3556 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst), 3557 vreg2ireg(i->Ain.SseShuf.src) ); 3558 *p++ = (UChar)(i->Ain.SseShuf.order); 3559 goto done; 3560 3561 default: 3562 goto bad; 3563 } 3564 3565 bad: 3566 ppAMD64Instr(i, mode64); 3567 vpanic("emit_AMD64Instr"); 3568 /*NOTREACHED*/ 3569 3570 done: 3571 vassert(p - &buf[0] <= 32); 3572 return p - &buf[0]; 3573 3574 # undef fake 3575 } 3576 3577 /*---------------------------------------------------------------*/ 3578 /*--- end host_amd64_defs.c ---*/ 3579 /*---------------------------------------------------------------*/ 3580