1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_amd64_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_amd64_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegAMD64 ( HReg reg ) 48 { 49 Int r; 50 static HChar* ireg64_names[16] 51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 53 /* Be generic for all virtual regs. */ 54 if (hregIsVirtual(reg)) { 55 ppHReg(reg); 56 return; 57 } 58 /* But specific for real regs. */ 59 switch (hregClass(reg)) { 60 case HRcInt64: 61 r = hregNumber(reg); 62 vassert(r >= 0 && r < 16); 63 vex_printf("%s", ireg64_names[r]); 64 return; 65 case HRcFlt64: 66 r = hregNumber(reg); 67 vassert(r >= 0 && r < 6); 68 vex_printf("%%fake%d", r); 69 return; 70 case HRcVec128: 71 r = hregNumber(reg); 72 vassert(r >= 0 && r < 16); 73 vex_printf("%%xmm%d", r); 74 return; 75 default: 76 vpanic("ppHRegAMD64"); 77 } 78 } 79 80 static void ppHRegAMD64_lo32 ( HReg reg ) 81 { 82 Int r; 83 static HChar* ireg32_names[16] 84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; 86 /* Be generic for all virtual regs. */ 87 if (hregIsVirtual(reg)) { 88 ppHReg(reg); 89 vex_printf("d"); 90 return; 91 } 92 /* But specific for real regs. */ 93 switch (hregClass(reg)) { 94 case HRcInt64: 95 r = hregNumber(reg); 96 vassert(r >= 0 && r < 16); 97 vex_printf("%s", ireg32_names[r]); 98 return; 99 default: 100 vpanic("ppHRegAMD64_lo32: invalid regclass"); 101 } 102 } 103 104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); } 105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); } 106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); } 107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); } 108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); } 109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); } 110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); } 111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); } 112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); } 113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); } 114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); } 115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); } 116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); } 117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } 118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } 119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } 120 121 //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 122 //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 123 //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 124 //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 125 //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 126 //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 127 //.. 128 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } 129 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } 130 HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); } 131 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); } 132 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); } 133 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); } 134 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); } 135 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); } 136 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); } 137 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); } 138 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); } 139 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); } 140 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); } 141 HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); } 142 HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); } 143 HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); } 144 145 146 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr ) 147 { 148 #if 0 149 *nregs = 6; 150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 151 (*arr)[ 0] = hregAMD64_RSI(); 152 (*arr)[ 1] = hregAMD64_RDI(); 153 (*arr)[ 2] = hregAMD64_RBX(); 154 155 (*arr)[ 3] = hregAMD64_XMM7(); 156 (*arr)[ 4] = hregAMD64_XMM8(); 157 (*arr)[ 5] = hregAMD64_XMM9(); 158 #endif 159 #if 1 160 *nregs = 20; 161 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 162 (*arr)[ 0] = hregAMD64_RSI(); 163 (*arr)[ 1] = hregAMD64_RDI(); 164 (*arr)[ 2] = hregAMD64_R8(); 165 (*arr)[ 3] = hregAMD64_R9(); 166 (*arr)[ 4] = hregAMD64_R12(); 167 (*arr)[ 5] = hregAMD64_R13(); 168 (*arr)[ 6] = hregAMD64_R14(); 169 (*arr)[ 7] = hregAMD64_R15(); 170 (*arr)[ 8] = hregAMD64_RBX(); 171 172 (*arr)[ 9] = hregAMD64_XMM3(); 173 (*arr)[10] = hregAMD64_XMM4(); 174 (*arr)[11] = hregAMD64_XMM5(); 175 (*arr)[12] = hregAMD64_XMM6(); 176 (*arr)[13] = hregAMD64_XMM7(); 177 (*arr)[14] = hregAMD64_XMM8(); 178 (*arr)[15] = hregAMD64_XMM9(); 179 (*arr)[16] = hregAMD64_XMM10(); 180 (*arr)[17] = hregAMD64_XMM11(); 181 (*arr)[18] = hregAMD64_XMM12(); 182 (*arr)[19] = hregAMD64_R10(); 183 #endif 184 } 185 186 187 /* --------- Condition codes, Intel encoding. --------- */ 188 189 HChar* showAMD64CondCode ( AMD64CondCode cond ) 190 { 191 switch (cond) { 192 case Acc_O: return "o"; 193 case Acc_NO: return "no"; 194 case Acc_B: return "b"; 195 case Acc_NB: return "nb"; 196 case Acc_Z: return "z"; 197 case Acc_NZ: return "nz"; 198 case Acc_BE: return "be"; 199 case Acc_NBE: return "nbe"; 200 case Acc_S: return "s"; 201 case Acc_NS: return "ns"; 202 case Acc_P: return "p"; 203 case Acc_NP: return "np"; 204 case Acc_L: return "l"; 205 case Acc_NL: return "nl"; 206 case Acc_LE: return "le"; 207 case Acc_NLE: return "nle"; 208 case Acc_ALWAYS: return "ALWAYS"; 209 default: vpanic("ppAMD64CondCode"); 210 } 211 } 212 213 214 /* --------- AMD64AMode: memory address expressions. --------- */ 215 216 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) { 217 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 218 am->tag = Aam_IR; 219 am->Aam.IR.imm = imm32; 220 am->Aam.IR.reg = reg; 221 return am; 222 } 223 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 224 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 225 am->tag = Aam_IRRS; 226 am->Aam.IRRS.imm = imm32; 227 am->Aam.IRRS.base = base; 228 am->Aam.IRRS.index = indEx; 229 am->Aam.IRRS.shift = shift; 230 vassert(shift >= 0 && shift <= 3); 231 return am; 232 } 233 234 //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) { 235 //.. switch (am->tag) { 236 //.. case Xam_IR: 237 //.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 238 //.. case Xam_IRRS: 239 //.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 240 //.. am->Xam.IRRS.index, am->Xam.IRRS.shift ); 241 //.. default: 242 //.. vpanic("dopyAMD64AMode"); 243 //.. } 244 //.. } 245 246 void ppAMD64AMode ( AMD64AMode* am ) { 247 switch (am->tag) { 248 case Aam_IR: 249 if (am->Aam.IR.imm == 0) 250 vex_printf("("); 251 else 252 vex_printf("0x%x(", am->Aam.IR.imm); 253 ppHRegAMD64(am->Aam.IR.reg); 254 vex_printf(")"); 255 return; 256 case Aam_IRRS: 257 vex_printf("0x%x(", am->Aam.IRRS.imm); 258 ppHRegAMD64(am->Aam.IRRS.base); 259 vex_printf(","); 260 ppHRegAMD64(am->Aam.IRRS.index); 261 vex_printf(",%d)", 1 << am->Aam.IRRS.shift); 262 return; 263 default: 264 vpanic("ppAMD64AMode"); 265 } 266 } 267 268 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) { 269 switch (am->tag) { 270 case Aam_IR: 271 addHRegUse(u, HRmRead, am->Aam.IR.reg); 272 return; 273 case Aam_IRRS: 274 addHRegUse(u, HRmRead, am->Aam.IRRS.base); 275 addHRegUse(u, HRmRead, am->Aam.IRRS.index); 276 return; 277 default: 278 vpanic("addRegUsage_AMD64AMode"); 279 } 280 } 281 282 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) { 283 switch (am->tag) { 284 case Aam_IR: 285 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg); 286 return; 287 case Aam_IRRS: 288 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base); 289 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index); 290 return; 291 default: 292 vpanic("mapRegs_AMD64AMode"); 293 } 294 } 295 296 /* --------- Operand, which can be reg, immediate or memory. --------- */ 297 298 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) { 299 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 300 op->tag = Armi_Imm; 301 op->Armi.Imm.imm32 = imm32; 302 return op; 303 } 304 AMD64RMI* AMD64RMI_Reg ( HReg reg ) { 305 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 306 op->tag = Armi_Reg; 307 op->Armi.Reg.reg = reg; 308 return op; 309 } 310 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) { 311 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 312 op->tag = Armi_Mem; 313 op->Armi.Mem.am = am; 314 return op; 315 } 316 317 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) { 318 switch (op->tag) { 319 case Armi_Imm: 320 vex_printf("$0x%x", op->Armi.Imm.imm32); 321 return; 322 case Armi_Reg: 323 if (lo32) 324 ppHRegAMD64_lo32(op->Armi.Reg.reg); 325 else 326 ppHRegAMD64(op->Armi.Reg.reg); 327 return; 328 case Armi_Mem: 329 ppAMD64AMode(op->Armi.Mem.am); 330 return; 331 default: 332 vpanic("ppAMD64RMI"); 333 } 334 } 335 void ppAMD64RMI ( AMD64RMI* op ) { 336 ppAMD64RMI_wrk(op, False/*!lo32*/); 337 } 338 void ppAMD64RMI_lo32 ( AMD64RMI* op ) { 339 ppAMD64RMI_wrk(op, True/*lo32*/); 340 } 341 342 /* An AMD64RMI can only be used in a "read" context (what would it mean 343 to write or modify a literal?) and so we enumerate its registers 344 accordingly. */ 345 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) { 346 switch (op->tag) { 347 case Armi_Imm: 348 return; 349 case Armi_Reg: 350 addHRegUse(u, HRmRead, op->Armi.Reg.reg); 351 return; 352 case Armi_Mem: 353 addRegUsage_AMD64AMode(u, op->Armi.Mem.am); 354 return; 355 default: 356 vpanic("addRegUsage_AMD64RMI"); 357 } 358 } 359 360 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) { 361 switch (op->tag) { 362 case Armi_Imm: 363 return; 364 case Armi_Reg: 365 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg); 366 return; 367 case Armi_Mem: 368 mapRegs_AMD64AMode(m, op->Armi.Mem.am); 369 return; 370 default: 371 vpanic("mapRegs_AMD64RMI"); 372 } 373 } 374 375 376 /* --------- Operand, which can be reg or immediate only. --------- */ 377 378 AMD64RI* AMD64RI_Imm ( UInt imm32 ) { 379 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 380 op->tag = Ari_Imm; 381 op->Ari.Imm.imm32 = imm32; 382 return op; 383 } 384 AMD64RI* AMD64RI_Reg ( HReg reg ) { 385 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 386 op->tag = Ari_Reg; 387 op->Ari.Reg.reg = reg; 388 return op; 389 } 390 391 void ppAMD64RI ( AMD64RI* op ) { 392 switch (op->tag) { 393 case Ari_Imm: 394 vex_printf("$0x%x", op->Ari.Imm.imm32); 395 return; 396 case Ari_Reg: 397 ppHRegAMD64(op->Ari.Reg.reg); 398 return; 399 default: 400 vpanic("ppAMD64RI"); 401 } 402 } 403 404 /* An AMD64RI can only be used in a "read" context (what would it mean 405 to write or modify a literal?) and so we enumerate its registers 406 accordingly. */ 407 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) { 408 switch (op->tag) { 409 case Ari_Imm: 410 return; 411 case Ari_Reg: 412 addHRegUse(u, HRmRead, op->Ari.Reg.reg); 413 return; 414 default: 415 vpanic("addRegUsage_AMD64RI"); 416 } 417 } 418 419 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) { 420 switch (op->tag) { 421 case Ari_Imm: 422 return; 423 case Ari_Reg: 424 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg); 425 return; 426 default: 427 vpanic("mapRegs_AMD64RI"); 428 } 429 } 430 431 432 /* --------- Operand, which can be reg or memory only. --------- */ 433 434 AMD64RM* AMD64RM_Reg ( HReg reg ) { 435 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 436 op->tag = Arm_Reg; 437 op->Arm.Reg.reg = reg; 438 return op; 439 } 440 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) { 441 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 442 op->tag = Arm_Mem; 443 op->Arm.Mem.am = am; 444 return op; 445 } 446 447 void ppAMD64RM ( AMD64RM* op ) { 448 switch (op->tag) { 449 case Arm_Mem: 450 ppAMD64AMode(op->Arm.Mem.am); 451 return; 452 case Arm_Reg: 453 ppHRegAMD64(op->Arm.Reg.reg); 454 return; 455 default: 456 vpanic("ppAMD64RM"); 457 } 458 } 459 460 /* Because an AMD64RM can be both a source or destination operand, we 461 have to supply a mode -- pertaining to the operand as a whole -- 462 indicating how it's being used. */ 463 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) { 464 switch (op->tag) { 465 case Arm_Mem: 466 /* Memory is read, written or modified. So we just want to 467 know the regs read by the amode. */ 468 addRegUsage_AMD64AMode(u, op->Arm.Mem.am); 469 return; 470 case Arm_Reg: 471 /* reg is read, written or modified. Add it in the 472 appropriate way. */ 473 addHRegUse(u, mode, op->Arm.Reg.reg); 474 return; 475 default: 476 vpanic("addRegUsage_AMD64RM"); 477 } 478 } 479 480 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op ) 481 { 482 switch (op->tag) { 483 case Arm_Mem: 484 mapRegs_AMD64AMode(m, op->Arm.Mem.am); 485 return; 486 case Arm_Reg: 487 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg); 488 return; 489 default: 490 vpanic("mapRegs_AMD64RM"); 491 } 492 } 493 494 495 /* --------- Instructions. --------- */ 496 497 static HChar* showAMD64ScalarSz ( Int sz ) { 498 switch (sz) { 499 case 2: return "w"; 500 case 4: return "l"; 501 case 8: return "q"; 502 default: vpanic("showAMD64ScalarSz"); 503 } 504 } 505 506 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) { 507 switch (op) { 508 case Aun_NOT: return "not"; 509 case Aun_NEG: return "neg"; 510 default: vpanic("showAMD64UnaryOp"); 511 } 512 } 513 514 HChar* showAMD64AluOp ( AMD64AluOp op ) { 515 switch (op) { 516 case Aalu_MOV: return "mov"; 517 case Aalu_CMP: return "cmp"; 518 case Aalu_ADD: return "add"; 519 case Aalu_SUB: return "sub"; 520 case Aalu_ADC: return "adc"; 521 case Aalu_SBB: return "sbb"; 522 case Aalu_AND: return "and"; 523 case Aalu_OR: return "or"; 524 case Aalu_XOR: return "xor"; 525 case Aalu_MUL: return "imul"; 526 default: vpanic("showAMD64AluOp"); 527 } 528 } 529 530 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { 531 switch (op) { 532 case Ash_SHL: return "shl"; 533 case Ash_SHR: return "shr"; 534 case Ash_SAR: return "sar"; 535 default: vpanic("showAMD64ShiftOp"); 536 } 537 } 538 539 HChar* showA87FpOp ( A87FpOp op ) { 540 switch (op) { 541 //.. case Xfp_ADD: return "add"; 542 //.. case Xfp_SUB: return "sub"; 543 //.. case Xfp_MUL: return "mul"; 544 //.. case Xfp_DIV: return "div"; 545 case Afp_SCALE: return "scale"; 546 case Afp_ATAN: return "atan"; 547 case Afp_YL2X: return "yl2x"; 548 case Afp_YL2XP1: return "yl2xp1"; 549 case Afp_PREM: return "prem"; 550 case Afp_PREM1: return "prem1"; 551 case Afp_SQRT: return "sqrt"; 552 //.. case Xfp_ABS: return "abs"; 553 //.. case Xfp_NEG: return "chs"; 554 //.. case Xfp_MOV: return "mov"; 555 case Afp_SIN: return "sin"; 556 case Afp_COS: return "cos"; 557 case Afp_TAN: return "tan"; 558 case Afp_ROUND: return "round"; 559 case Afp_2XM1: return "2xm1"; 560 default: vpanic("showA87FpOp"); 561 } 562 } 563 564 HChar* showAMD64SseOp ( AMD64SseOp op ) { 565 switch (op) { 566 case Asse_MOV: return "movups"; 567 case Asse_ADDF: return "add"; 568 case Asse_SUBF: return "sub"; 569 case Asse_MULF: return "mul"; 570 case Asse_DIVF: return "div"; 571 case Asse_MAXF: return "max"; 572 case Asse_MINF: return "min"; 573 case Asse_CMPEQF: return "cmpFeq"; 574 case Asse_CMPLTF: return "cmpFlt"; 575 case Asse_CMPLEF: return "cmpFle"; 576 case Asse_CMPUNF: return "cmpFun"; 577 case Asse_RCPF: return "rcp"; 578 case Asse_RSQRTF: return "rsqrt"; 579 case Asse_SQRTF: return "sqrt"; 580 case Asse_AND: return "and"; 581 case Asse_OR: return "or"; 582 case Asse_XOR: return "xor"; 583 case Asse_ANDN: return "andn"; 584 case Asse_ADD8: return "paddb"; 585 case Asse_ADD16: return "paddw"; 586 case Asse_ADD32: return "paddd"; 587 case Asse_ADD64: return "paddq"; 588 case Asse_QADD8U: return "paddusb"; 589 case Asse_QADD16U: return "paddusw"; 590 case Asse_QADD8S: return "paddsb"; 591 case Asse_QADD16S: return "paddsw"; 592 case Asse_SUB8: return "psubb"; 593 case Asse_SUB16: return "psubw"; 594 case Asse_SUB32: return "psubd"; 595 case Asse_SUB64: return "psubq"; 596 case Asse_QSUB8U: return "psubusb"; 597 case Asse_QSUB16U: return "psubusw"; 598 case Asse_QSUB8S: return "psubsb"; 599 case Asse_QSUB16S: return "psubsw"; 600 case Asse_MUL16: return "pmullw"; 601 case Asse_MULHI16U: return "pmulhuw"; 602 case Asse_MULHI16S: return "pmulhw"; 603 case Asse_AVG8U: return "pavgb"; 604 case Asse_AVG16U: return "pavgw"; 605 case Asse_MAX16S: return "pmaxw"; 606 case Asse_MAX8U: return "pmaxub"; 607 case Asse_MIN16S: return "pminw"; 608 case Asse_MIN8U: return "pminub"; 609 case Asse_CMPEQ8: return "pcmpeqb"; 610 case Asse_CMPEQ16: return "pcmpeqw"; 611 case Asse_CMPEQ32: return "pcmpeqd"; 612 case Asse_CMPGT8S: return "pcmpgtb"; 613 case Asse_CMPGT16S: return "pcmpgtw"; 614 case Asse_CMPGT32S: return "pcmpgtd"; 615 case Asse_SHL16: return "psllw"; 616 case Asse_SHL32: return "pslld"; 617 case Asse_SHL64: return "psllq"; 618 case Asse_SHR16: return "psrlw"; 619 case Asse_SHR32: return "psrld"; 620 case Asse_SHR64: return "psrlq"; 621 case Asse_SAR16: return "psraw"; 622 case Asse_SAR32: return "psrad"; 623 case Asse_PACKSSD: return "packssdw"; 624 case Asse_PACKSSW: return "packsswb"; 625 case Asse_PACKUSW: return "packuswb"; 626 case Asse_UNPCKHB: return "punpckhb"; 627 case Asse_UNPCKHW: return "punpckhw"; 628 case Asse_UNPCKHD: return "punpckhd"; 629 case Asse_UNPCKHQ: return "punpckhq"; 630 case Asse_UNPCKLB: return "punpcklb"; 631 case Asse_UNPCKLW: return "punpcklw"; 632 case Asse_UNPCKLD: return "punpckld"; 633 case Asse_UNPCKLQ: return "punpcklq"; 634 default: vpanic("showAMD64SseOp"); 635 } 636 } 637 638 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) { 639 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 640 i->tag = Ain_Imm64; 641 i->Ain.Imm64.imm64 = imm64; 642 i->Ain.Imm64.dst = dst; 643 return i; 644 } 645 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { 646 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 647 i->tag = Ain_Alu64R; 648 i->Ain.Alu64R.op = op; 649 i->Ain.Alu64R.src = src; 650 i->Ain.Alu64R.dst = dst; 651 return i; 652 } 653 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) { 654 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 655 i->tag = Ain_Alu64M; 656 i->Ain.Alu64M.op = op; 657 i->Ain.Alu64M.src = src; 658 i->Ain.Alu64M.dst = dst; 659 vassert(op != Aalu_MUL); 660 return i; 661 } 662 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { 663 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 664 i->tag = Ain_Sh64; 665 i->Ain.Sh64.op = op; 666 i->Ain.Sh64.src = src; 667 i->Ain.Sh64.dst = dst; 668 return i; 669 } 670 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { 671 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 672 i->tag = Ain_Test64; 673 i->Ain.Test64.imm32 = imm32; 674 i->Ain.Test64.dst = dst; 675 return i; 676 } 677 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) { 678 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 679 i->tag = Ain_Unary64; 680 i->Ain.Unary64.op = op; 681 i->Ain.Unary64.dst = dst; 682 return i; 683 } 684 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) { 685 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 686 i->tag = Ain_Lea64; 687 i->Ain.Lea64.am = am; 688 i->Ain.Lea64.dst = dst; 689 return i; 690 } 691 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { 692 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 693 i->tag = Ain_Alu32R; 694 i->Ain.Alu32R.op = op; 695 i->Ain.Alu32R.src = src; 696 i->Ain.Alu32R.dst = dst; 697 switch (op) { 698 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP: 699 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break; 700 default: vassert(0); 701 } 702 return i; 703 } 704 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) { 705 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 706 i->tag = Ain_MulL; 707 i->Ain.MulL.syned = syned; 708 i->Ain.MulL.src = src; 709 return i; 710 } 711 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { 712 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 713 i->tag = Ain_Div; 714 i->Ain.Div.syned = syned; 715 i->Ain.Div.sz = sz; 716 i->Ain.Div.src = src; 717 vassert(sz == 4 || sz == 8); 718 return i; 719 } 720 //.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) { 721 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 722 //.. i->tag = Xin_Sh3232; 723 //.. i->Xin.Sh3232.op = op; 724 //.. i->Xin.Sh3232.amt = amt; 725 //.. i->Xin.Sh3232.src = src; 726 //.. i->Xin.Sh3232.dst = dst; 727 //.. vassert(op == Xsh_SHL || op == Xsh_SHR); 728 //.. return i; 729 //.. } 730 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { 731 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 732 i->tag = Ain_Push; 733 i->Ain.Push.src = src; 734 return i; 735 } 736 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) { 737 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 738 i->tag = Ain_Call; 739 i->Ain.Call.cond = cond; 740 i->Ain.Call.target = target; 741 i->Ain.Call.regparms = regparms; 742 vassert(regparms >= 0 && regparms <= 6); 743 return i; 744 } 745 AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) { 746 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 747 i->tag = Ain_Goto; 748 i->Ain.Goto.cond = cond; 749 i->Ain.Goto.dst = dst; 750 i->Ain.Goto.jk = jk; 751 return i; 752 } 753 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { 754 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 755 i->tag = Ain_CMov64; 756 i->Ain.CMov64.cond = cond; 757 i->Ain.CMov64.src = src; 758 i->Ain.CMov64.dst = dst; 759 vassert(cond != Acc_ALWAYS); 760 return i; 761 } 762 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) { 763 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 764 i->tag = Ain_MovxLQ; 765 i->Ain.MovxLQ.syned = syned; 766 i->Ain.MovxLQ.src = src; 767 i->Ain.MovxLQ.dst = dst; 768 return i; 769 } 770 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, 771 AMD64AMode* src, HReg dst ) { 772 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 773 i->tag = Ain_LoadEX; 774 i->Ain.LoadEX.szSmall = szSmall; 775 i->Ain.LoadEX.syned = syned; 776 i->Ain.LoadEX.src = src; 777 i->Ain.LoadEX.dst = dst; 778 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4); 779 return i; 780 } 781 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) { 782 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 783 i->tag = Ain_Store; 784 i->Ain.Store.sz = sz; 785 i->Ain.Store.src = src; 786 i->Ain.Store.dst = dst; 787 vassert(sz == 1 || sz == 2 || sz == 4); 788 return i; 789 } 790 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) { 791 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 792 i->tag = Ain_Set64; 793 i->Ain.Set64.cond = cond; 794 i->Ain.Set64.dst = dst; 795 return i; 796 } 797 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { 798 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 799 i->tag = Ain_Bsfr64; 800 i->Ain.Bsfr64.isFwds = isFwds; 801 i->Ain.Bsfr64.src = src; 802 i->Ain.Bsfr64.dst = dst; 803 return i; 804 } 805 AMD64Instr* AMD64Instr_MFence ( void ) { 806 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 807 i->tag = Ain_MFence; 808 return i; 809 } 810 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) { 811 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 812 i->tag = Ain_ACAS; 813 i->Ain.ACAS.addr = addr; 814 i->Ain.ACAS.sz = sz; 815 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 816 return i; 817 } 818 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) { 819 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 820 i->tag = Ain_DACAS; 821 i->Ain.DACAS.addr = addr; 822 i->Ain.DACAS.sz = sz; 823 vassert(sz == 8 || sz == 4); 824 return i; 825 } 826 827 AMD64Instr* AMD64Instr_A87Free ( Int nregs ) 828 { 829 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 830 i->tag = Ain_A87Free; 831 i->Ain.A87Free.nregs = nregs; 832 vassert(nregs >= 1 && nregs <= 7); 833 return i; 834 } 835 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ) 836 { 837 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 838 i->tag = Ain_A87PushPop; 839 i->Ain.A87PushPop.addr = addr; 840 i->Ain.A87PushPop.isPush = isPush; 841 i->Ain.A87PushPop.szB = szB; 842 vassert(szB == 8 || szB == 4); 843 return i; 844 } 845 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) 846 { 847 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 848 i->tag = Ain_A87FpOp; 849 i->Ain.A87FpOp.op = op; 850 return i; 851 } 852 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) 853 { 854 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 855 i->tag = Ain_A87LdCW; 856 i->Ain.A87LdCW.addr = addr; 857 return i; 858 } 859 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) 860 { 861 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 862 i->tag = Ain_A87StSW; 863 i->Ain.A87StSW.addr = addr; 864 return i; 865 } 866 867 //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) { 868 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 869 //.. i->tag = Xin_FpUnary; 870 //.. i->Xin.FpUnary.op = op; 871 //.. i->Xin.FpUnary.src = src; 872 //.. i->Xin.FpUnary.dst = dst; 873 //.. return i; 874 //.. } 875 //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) { 876 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 877 //.. i->tag = Xin_FpBinary; 878 //.. i->Xin.FpBinary.op = op; 879 //.. i->Xin.FpBinary.srcL = srcL; 880 //.. i->Xin.FpBinary.srcR = srcR; 881 //.. i->Xin.FpBinary.dst = dst; 882 //.. return i; 883 //.. } 884 //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) { 885 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 886 //.. i->tag = Xin_FpLdSt; 887 //.. i->Xin.FpLdSt.isLoad = isLoad; 888 //.. i->Xin.FpLdSt.sz = sz; 889 //.. i->Xin.FpLdSt.reg = reg; 890 //.. i->Xin.FpLdSt.addr = addr; 891 //.. vassert(sz == 4 || sz == 8); 892 //.. return i; 893 //.. } 894 //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, 895 //.. HReg reg, AMD64AMode* addr ) { 896 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 897 //.. i->tag = Xin_FpLdStI; 898 //.. i->Xin.FpLdStI.isLoad = isLoad; 899 //.. i->Xin.FpLdStI.sz = sz; 900 //.. i->Xin.FpLdStI.reg = reg; 901 //.. i->Xin.FpLdStI.addr = addr; 902 //.. vassert(sz == 2 || sz == 4 || sz == 8); 903 //.. return i; 904 //.. } 905 //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) { 906 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 907 //.. i->tag = Xin_Fp64to32; 908 //.. i->Xin.Fp64to32.src = src; 909 //.. i->Xin.Fp64to32.dst = dst; 910 //.. return i; 911 //.. } 912 //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) { 913 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 914 //.. i->tag = Xin_FpCMov; 915 //.. i->Xin.FpCMov.cond = cond; 916 //.. i->Xin.FpCMov.src = src; 917 //.. i->Xin.FpCMov.dst = dst; 918 //.. vassert(cond != Xcc_ALWAYS); 919 //.. return i; 920 //.. } 921 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { 922 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 923 i->tag = Ain_LdMXCSR; 924 i->Ain.LdMXCSR.addr = addr; 925 return i; 926 } 927 //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) { 928 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 929 //.. i->tag = Xin_FpStSW_AX; 930 //.. return i; 931 //.. } 932 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { 933 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 934 i->tag = Ain_SseUComIS; 935 i->Ain.SseUComIS.sz = toUChar(sz); 936 i->Ain.SseUComIS.srcL = srcL; 937 i->Ain.SseUComIS.srcR = srcR; 938 i->Ain.SseUComIS.dst = dst; 939 vassert(sz == 4 || sz == 8); 940 return i; 941 } 942 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) { 943 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 944 i->tag = Ain_SseSI2SF; 945 i->Ain.SseSI2SF.szS = toUChar(szS); 946 i->Ain.SseSI2SF.szD = toUChar(szD); 947 i->Ain.SseSI2SF.src = src; 948 i->Ain.SseSI2SF.dst = dst; 949 vassert(szS == 4 || szS == 8); 950 vassert(szD == 4 || szD == 8); 951 return i; 952 } 953 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) { 954 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 955 i->tag = Ain_SseSF2SI; 956 i->Ain.SseSF2SI.szS = toUChar(szS); 957 i->Ain.SseSF2SI.szD = toUChar(szD); 958 i->Ain.SseSF2SI.src = src; 959 i->Ain.SseSF2SI.dst = dst; 960 vassert(szS == 4 || szS == 8); 961 vassert(szD == 4 || szD == 8); 962 return i; 963 } 964 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) 965 { 966 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 967 i->tag = Ain_SseSDSS; 968 i->Ain.SseSDSS.from64 = from64; 969 i->Ain.SseSDSS.src = src; 970 i->Ain.SseSDSS.dst = dst; 971 return i; 972 } 973 974 //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) { 975 //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 976 //.. i->tag = Xin_SseConst; 977 //.. i->Xin.SseConst.con = con; 978 //.. i->Xin.SseConst.dst = dst; 979 //.. vassert(hregClass(dst) == HRcVec128); 980 //.. return i; 981 //.. } 982 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, 983 HReg reg, AMD64AMode* addr ) { 984 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 985 i->tag = Ain_SseLdSt; 986 i->Ain.SseLdSt.isLoad = isLoad; 987 i->Ain.SseLdSt.sz = toUChar(sz); 988 i->Ain.SseLdSt.reg = reg; 989 i->Ain.SseLdSt.addr = addr; 990 vassert(sz == 4 || sz == 8 || sz == 16); 991 return i; 992 } 993 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr ) 994 { 995 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 996 i->tag = Ain_SseLdzLO; 997 i->Ain.SseLdzLO.sz = sz; 998 i->Ain.SseLdzLO.reg = reg; 999 i->Ain.SseLdzLO.addr = addr; 1000 vassert(sz == 4 || sz == 8); 1001 return i; 1002 } 1003 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) { 1004 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1005 i->tag = Ain_Sse32Fx4; 1006 i->Ain.Sse32Fx4.op = op; 1007 i->Ain.Sse32Fx4.src = src; 1008 i->Ain.Sse32Fx4.dst = dst; 1009 vassert(op != Asse_MOV); 1010 return i; 1011 } 1012 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { 1013 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1014 i->tag = Ain_Sse32FLo; 1015 i->Ain.Sse32FLo.op = op; 1016 i->Ain.Sse32FLo.src = src; 1017 i->Ain.Sse32FLo.dst = dst; 1018 vassert(op != Asse_MOV); 1019 return i; 1020 } 1021 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { 1022 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1023 i->tag = Ain_Sse64Fx2; 1024 i->Ain.Sse64Fx2.op = op; 1025 i->Ain.Sse64Fx2.src = src; 1026 i->Ain.Sse64Fx2.dst = dst; 1027 vassert(op != Asse_MOV); 1028 return i; 1029 } 1030 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { 1031 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1032 i->tag = Ain_Sse64FLo; 1033 i->Ain.Sse64FLo.op = op; 1034 i->Ain.Sse64FLo.src = src; 1035 i->Ain.Sse64FLo.dst = dst; 1036 vassert(op != Asse_MOV); 1037 return i; 1038 } 1039 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) { 1040 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1041 i->tag = Ain_SseReRg; 1042 i->Ain.SseReRg.op = op; 1043 i->Ain.SseReRg.src = re; 1044 i->Ain.SseReRg.dst = rg; 1045 return i; 1046 } 1047 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) { 1048 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1049 i->tag = Ain_SseCMov; 1050 i->Ain.SseCMov.cond = cond; 1051 i->Ain.SseCMov.src = src; 1052 i->Ain.SseCMov.dst = dst; 1053 vassert(cond != Acc_ALWAYS); 1054 return i; 1055 } 1056 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { 1057 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1058 i->tag = Ain_SseShuf; 1059 i->Ain.SseShuf.order = order; 1060 i->Ain.SseShuf.src = src; 1061 i->Ain.SseShuf.dst = dst; 1062 vassert(order >= 0 && order <= 0xFF); 1063 return i; 1064 } 1065 1066 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) 1067 { 1068 vassert(mode64 == True); 1069 switch (i->tag) { 1070 case Ain_Imm64: 1071 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64); 1072 ppHRegAMD64(i->Ain.Imm64.dst); 1073 return; 1074 case Ain_Alu64R: 1075 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op)); 1076 ppAMD64RMI(i->Ain.Alu64R.src); 1077 vex_printf(","); 1078 ppHRegAMD64(i->Ain.Alu64R.dst); 1079 return; 1080 case Ain_Alu64M: 1081 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op)); 1082 ppAMD64RI(i->Ain.Alu64M.src); 1083 vex_printf(","); 1084 ppAMD64AMode(i->Ain.Alu64M.dst); 1085 return; 1086 case Ain_Sh64: 1087 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op)); 1088 if (i->Ain.Sh64.src == 0) 1089 vex_printf("%%cl,"); 1090 else 1091 vex_printf("$%d,", (Int)i->Ain.Sh64.src); 1092 ppHRegAMD64(i->Ain.Sh64.dst); 1093 return; 1094 case Ain_Test64: 1095 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); 1096 ppHRegAMD64(i->Ain.Test64.dst); 1097 return; 1098 case Ain_Unary64: 1099 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op)); 1100 ppHRegAMD64(i->Ain.Unary64.dst); 1101 return; 1102 case Ain_Lea64: 1103 vex_printf("leaq "); 1104 ppAMD64AMode(i->Ain.Lea64.am); 1105 vex_printf(","); 1106 ppHRegAMD64(i->Ain.Lea64.dst); 1107 return; 1108 case Ain_Alu32R: 1109 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op)); 1110 ppAMD64RMI_lo32(i->Ain.Alu32R.src); 1111 vex_printf(","); 1112 ppHRegAMD64_lo32(i->Ain.Alu32R.dst); 1113 return; 1114 case Ain_MulL: 1115 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u'); 1116 ppAMD64RM(i->Ain.MulL.src); 1117 return; 1118 case Ain_Div: 1119 vex_printf("%cdiv%s ", 1120 i->Ain.Div.syned ? 's' : 'u', 1121 showAMD64ScalarSz(i->Ain.Div.sz)); 1122 ppAMD64RM(i->Ain.Div.src); 1123 return; 1124 //.. case Xin_Sh3232: 1125 //.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op)); 1126 //.. if (i->Xin.Sh3232.amt == 0) 1127 //.. vex_printf(" %%cl,"); 1128 //.. else 1129 //.. vex_printf(" $%d,", i->Xin.Sh3232.amt); 1130 //.. ppHRegAMD64(i->Xin.Sh3232.src); 1131 //.. vex_printf(","); 1132 //.. ppHRegAMD64(i->Xin.Sh3232.dst); 1133 //.. return; 1134 case Ain_Push: 1135 vex_printf("pushq "); 1136 ppAMD64RMI(i->Ain.Push.src); 1137 return; 1138 case Ain_Call: 1139 vex_printf("call%s[%d] ", 1140 i->Ain.Call.cond==Acc_ALWAYS 1141 ? "" : showAMD64CondCode(i->Ain.Call.cond), 1142 i->Ain.Call.regparms ); 1143 vex_printf("0x%llx", i->Ain.Call.target); 1144 break; 1145 case Ain_Goto: 1146 if (i->Ain.Goto.cond != Acc_ALWAYS) { 1147 vex_printf("if (%%rflags.%s) { ", 1148 showAMD64CondCode(i->Ain.Goto.cond)); 1149 } 1150 if (i->Ain.Goto.jk != Ijk_Boring 1151 && i->Ain.Goto.jk != Ijk_Call 1152 && i->Ain.Goto.jk != Ijk_Ret) { 1153 vex_printf("movl $"); 1154 ppIRJumpKind(i->Ain.Goto.jk); 1155 vex_printf(",%%ebp ; "); 1156 } 1157 vex_printf("movq "); 1158 ppAMD64RI(i->Ain.Goto.dst); 1159 vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx"); 1160 if (i->Ain.Goto.cond != Acc_ALWAYS) { 1161 vex_printf(" }"); 1162 } 1163 return; 1164 case Ain_CMov64: 1165 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); 1166 ppAMD64RM(i->Ain.CMov64.src); 1167 vex_printf(","); 1168 ppHRegAMD64(i->Ain.CMov64.dst); 1169 return; 1170 case Ain_MovxLQ: 1171 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z'); 1172 ppHRegAMD64_lo32(i->Ain.MovxLQ.src); 1173 vex_printf(","); 1174 ppHRegAMD64(i->Ain.MovxLQ.dst); 1175 return; 1176 case Ain_LoadEX: 1177 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) { 1178 vex_printf("movl "); 1179 ppAMD64AMode(i->Ain.LoadEX.src); 1180 vex_printf(","); 1181 ppHRegAMD64_lo32(i->Ain.LoadEX.dst); 1182 } else { 1183 vex_printf("mov%c%cq ", 1184 i->Ain.LoadEX.syned ? 's' : 'z', 1185 i->Ain.LoadEX.szSmall==1 1186 ? 'b' 1187 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l')); 1188 ppAMD64AMode(i->Ain.LoadEX.src); 1189 vex_printf(","); 1190 ppHRegAMD64(i->Ain.LoadEX.dst); 1191 } 1192 return; 1193 case Ain_Store: 1194 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b' 1195 : (i->Ain.Store.sz==2 ? 'w' : 'l')); 1196 ppHRegAMD64(i->Ain.Store.src); 1197 vex_printf(","); 1198 ppAMD64AMode(i->Ain.Store.dst); 1199 return; 1200 case Ain_Set64: 1201 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond)); 1202 ppHRegAMD64(i->Ain.Set64.dst); 1203 return; 1204 case Ain_Bsfr64: 1205 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r'); 1206 ppHRegAMD64(i->Ain.Bsfr64.src); 1207 vex_printf(","); 1208 ppHRegAMD64(i->Ain.Bsfr64.dst); 1209 return; 1210 case Ain_MFence: 1211 vex_printf("mfence" ); 1212 return; 1213 case Ain_ACAS: 1214 vex_printf("lock cmpxchg%c ", 1215 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w' 1216 : i->Ain.ACAS.sz==4 ? 'l' : 'q' ); 1217 vex_printf("{%%rax->%%rbx},"); 1218 ppAMD64AMode(i->Ain.ACAS.addr); 1219 return; 1220 case Ain_DACAS: 1221 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},", 1222 (Int)(2 * i->Ain.DACAS.sz)); 1223 ppAMD64AMode(i->Ain.DACAS.addr); 1224 return; 1225 case Ain_A87Free: 1226 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); 1227 break; 1228 case Ain_A87PushPop: 1229 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ", 1230 i->Ain.A87PushPop.szB == 4 ? 's' : 'l'); 1231 ppAMD64AMode(i->Ain.A87PushPop.addr); 1232 break; 1233 case Ain_A87FpOp: 1234 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op)); 1235 break; 1236 case Ain_A87LdCW: 1237 vex_printf("fldcw "); 1238 ppAMD64AMode(i->Ain.A87LdCW.addr); 1239 break; 1240 case Ain_A87StSW: 1241 vex_printf("fstsw "); 1242 ppAMD64AMode(i->Ain.A87StSW.addr); 1243 break; 1244 //.. case Xin_FpUnary: 1245 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op)); 1246 //.. ppHRegAMD64(i->Xin.FpUnary.src); 1247 //.. vex_printf(","); 1248 //.. ppHRegAMD64(i->Xin.FpUnary.dst); 1249 //.. break; 1250 //.. case Xin_FpBinary: 1251 //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op)); 1252 //.. ppHRegAMD64(i->Xin.FpBinary.srcL); 1253 //.. vex_printf(","); 1254 //.. ppHRegAMD64(i->Xin.FpBinary.srcR); 1255 //.. vex_printf(","); 1256 //.. ppHRegAMD64(i->Xin.FpBinary.dst); 1257 //.. break; 1258 //.. case Xin_FpLdSt: 1259 //.. if (i->Xin.FpLdSt.isLoad) { 1260 //.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); 1261 //.. ppAMD64AMode(i->Xin.FpLdSt.addr); 1262 //.. vex_printf(", "); 1263 //.. ppHRegAMD64(i->Xin.FpLdSt.reg); 1264 //.. } else { 1265 //.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); 1266 //.. ppHRegAMD64(i->Xin.FpLdSt.reg); 1267 //.. vex_printf(", "); 1268 //.. ppAMD64AMode(i->Xin.FpLdSt.addr); 1269 //.. } 1270 //.. return; 1271 //.. case Xin_FpLdStI: 1272 //.. if (i->Xin.FpLdStI.isLoad) { 1273 //.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1274 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1275 //.. ppAMD64AMode(i->Xin.FpLdStI.addr); 1276 //.. vex_printf(", "); 1277 //.. ppHRegAMD64(i->Xin.FpLdStI.reg); 1278 //.. } else { 1279 //.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1280 //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1281 //.. ppHRegAMD64(i->Xin.FpLdStI.reg); 1282 //.. vex_printf(", "); 1283 //.. ppAMD64AMode(i->Xin.FpLdStI.addr); 1284 //.. } 1285 //.. return; 1286 //.. case Xin_Fp64to32: 1287 //.. vex_printf("gdtof "); 1288 //.. ppHRegAMD64(i->Xin.Fp64to32.src); 1289 //.. vex_printf(","); 1290 //.. ppHRegAMD64(i->Xin.Fp64to32.dst); 1291 //.. return; 1292 //.. case Xin_FpCMov: 1293 //.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond)); 1294 //.. ppHRegAMD64(i->Xin.FpCMov.src); 1295 //.. vex_printf(","); 1296 //.. ppHRegAMD64(i->Xin.FpCMov.dst); 1297 //.. return; 1298 //.. case Xin_FpLdStCW: 1299 //.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw "); 1300 //.. ppAMD64AMode(i->Xin.FpLdStCW.addr); 1301 //.. return; 1302 //.. case Xin_FpStSW_AX: 1303 //.. vex_printf("fstsw %%ax"); 1304 //.. return; 1305 case Ain_LdMXCSR: 1306 vex_printf("ldmxcsr "); 1307 ppAMD64AMode(i->Ain.LdMXCSR.addr); 1308 break; 1309 case Ain_SseUComIS: 1310 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d"); 1311 ppHRegAMD64(i->Ain.SseUComIS.srcL); 1312 vex_printf(","); 1313 ppHRegAMD64(i->Ain.SseUComIS.srcR); 1314 vex_printf(" ; pushfq ; popq "); 1315 ppHRegAMD64(i->Ain.SseUComIS.dst); 1316 break; 1317 case Ain_SseSI2SF: 1318 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d"); 1319 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1320 (i->Ain.SseSI2SF.src); 1321 vex_printf(","); 1322 ppHRegAMD64(i->Ain.SseSI2SF.dst); 1323 break; 1324 case Ain_SseSF2SI: 1325 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d"); 1326 ppHRegAMD64(i->Ain.SseSF2SI.src); 1327 vex_printf(","); 1328 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1329 (i->Ain.SseSF2SI.dst); 1330 break; 1331 case Ain_SseSDSS: 1332 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd "); 1333 ppHRegAMD64(i->Ain.SseSDSS.src); 1334 vex_printf(","); 1335 ppHRegAMD64(i->Ain.SseSDSS.dst); 1336 break; 1337 //.. case Xin_SseConst: 1338 //.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1339 //.. ppHRegAMD64(i->Xin.SseConst.dst); 1340 //.. break; 1341 case Ain_SseLdSt: 1342 switch (i->Ain.SseLdSt.sz) { 1343 case 4: vex_printf("movss "); break; 1344 case 8: vex_printf("movsd "); break; 1345 case 16: vex_printf("movups "); break; 1346 default: vassert(0); 1347 } 1348 if (i->Ain.SseLdSt.isLoad) { 1349 ppAMD64AMode(i->Ain.SseLdSt.addr); 1350 vex_printf(","); 1351 ppHRegAMD64(i->Ain.SseLdSt.reg); 1352 } else { 1353 ppHRegAMD64(i->Ain.SseLdSt.reg); 1354 vex_printf(","); 1355 ppAMD64AMode(i->Ain.SseLdSt.addr); 1356 } 1357 return; 1358 case Ain_SseLdzLO: 1359 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d"); 1360 ppAMD64AMode(i->Ain.SseLdzLO.addr); 1361 vex_printf(","); 1362 ppHRegAMD64(i->Ain.SseLdzLO.reg); 1363 return; 1364 case Ain_Sse32Fx4: 1365 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op)); 1366 ppHRegAMD64(i->Ain.Sse32Fx4.src); 1367 vex_printf(","); 1368 ppHRegAMD64(i->Ain.Sse32Fx4.dst); 1369 return; 1370 case Ain_Sse32FLo: 1371 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op)); 1372 ppHRegAMD64(i->Ain.Sse32FLo.src); 1373 vex_printf(","); 1374 ppHRegAMD64(i->Ain.Sse32FLo.dst); 1375 return; 1376 case Ain_Sse64Fx2: 1377 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); 1378 ppHRegAMD64(i->Ain.Sse64Fx2.src); 1379 vex_printf(","); 1380 ppHRegAMD64(i->Ain.Sse64Fx2.dst); 1381 return; 1382 case Ain_Sse64FLo: 1383 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); 1384 ppHRegAMD64(i->Ain.Sse64FLo.src); 1385 vex_printf(","); 1386 ppHRegAMD64(i->Ain.Sse64FLo.dst); 1387 return; 1388 case Ain_SseReRg: 1389 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op)); 1390 ppHRegAMD64(i->Ain.SseReRg.src); 1391 vex_printf(","); 1392 ppHRegAMD64(i->Ain.SseReRg.dst); 1393 return; 1394 case Ain_SseCMov: 1395 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond)); 1396 ppHRegAMD64(i->Ain.SseCMov.src); 1397 vex_printf(","); 1398 ppHRegAMD64(i->Ain.SseCMov.dst); 1399 return; 1400 case Ain_SseShuf: 1401 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order); 1402 ppHRegAMD64(i->Ain.SseShuf.src); 1403 vex_printf(","); 1404 ppHRegAMD64(i->Ain.SseShuf.dst); 1405 return; 1406 1407 default: 1408 vpanic("ppAMD64Instr"); 1409 } 1410 } 1411 1412 /* --------- Helpers for register allocation. --------- */ 1413 1414 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) 1415 { 1416 Bool unary; 1417 vassert(mode64 == True); 1418 initHRegUsage(u); 1419 switch (i->tag) { 1420 case Ain_Imm64: 1421 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst); 1422 return; 1423 case Ain_Alu64R: 1424 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); 1425 if (i->Ain.Alu64R.op == Aalu_MOV) { 1426 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); 1427 return; 1428 } 1429 if (i->Ain.Alu64R.op == Aalu_CMP) { 1430 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst); 1431 return; 1432 } 1433 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst); 1434 return; 1435 case Ain_Alu64M: 1436 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src); 1437 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst); 1438 return; 1439 case Ain_Sh64: 1440 addHRegUse(u, HRmModify, i->Ain.Sh64.dst); 1441 if (i->Ain.Sh64.src == 0) 1442 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1443 return; 1444 case Ain_Test64: 1445 addHRegUse(u, HRmRead, i->Ain.Test64.dst); 1446 return; 1447 case Ain_Unary64: 1448 addHRegUse(u, HRmModify, i->Ain.Unary64.dst); 1449 return; 1450 case Ain_Lea64: 1451 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am); 1452 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst); 1453 return; 1454 case Ain_Alu32R: 1455 vassert(i->Ain.Alu32R.op != Aalu_MOV); 1456 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src); 1457 if (i->Ain.Alu32R.op == Aalu_CMP) { 1458 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst); 1459 return; 1460 } 1461 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst); 1462 return; 1463 case Ain_MulL: 1464 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead); 1465 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1466 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1467 return; 1468 case Ain_Div: 1469 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead); 1470 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1471 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1472 return; 1473 //.. case Xin_Sh3232: 1474 //.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1475 //.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1476 //.. if (i->Xin.Sh3232.amt == 0) 1477 //.. addHRegUse(u, HRmRead, hregAMD64_ECX()); 1478 //.. return; 1479 case Ain_Push: 1480 addRegUsage_AMD64RMI(u, i->Ain.Push.src); 1481 addHRegUse(u, HRmModify, hregAMD64_RSP()); 1482 return; 1483 case Ain_Call: 1484 /* This is a bit subtle. */ 1485 /* First off, claim it trashes all the caller-saved regs 1486 which fall within the register allocator's jurisdiction. 1487 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 1488 and all the xmm registers. 1489 */ 1490 addHRegUse(u, HRmWrite, hregAMD64_RAX()); 1491 addHRegUse(u, HRmWrite, hregAMD64_RCX()); 1492 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1493 addHRegUse(u, HRmWrite, hregAMD64_RSI()); 1494 addHRegUse(u, HRmWrite, hregAMD64_RDI()); 1495 addHRegUse(u, HRmWrite, hregAMD64_R8()); 1496 addHRegUse(u, HRmWrite, hregAMD64_R9()); 1497 addHRegUse(u, HRmWrite, hregAMD64_R10()); 1498 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1499 addHRegUse(u, HRmWrite, hregAMD64_XMM0()); 1500 addHRegUse(u, HRmWrite, hregAMD64_XMM1()); 1501 addHRegUse(u, HRmWrite, hregAMD64_XMM2()); 1502 addHRegUse(u, HRmWrite, hregAMD64_XMM3()); 1503 addHRegUse(u, HRmWrite, hregAMD64_XMM4()); 1504 addHRegUse(u, HRmWrite, hregAMD64_XMM5()); 1505 addHRegUse(u, HRmWrite, hregAMD64_XMM6()); 1506 addHRegUse(u, HRmWrite, hregAMD64_XMM7()); 1507 addHRegUse(u, HRmWrite, hregAMD64_XMM8()); 1508 addHRegUse(u, HRmWrite, hregAMD64_XMM9()); 1509 addHRegUse(u, HRmWrite, hregAMD64_XMM10()); 1510 addHRegUse(u, HRmWrite, hregAMD64_XMM11()); 1511 addHRegUse(u, HRmWrite, hregAMD64_XMM12()); 1512 addHRegUse(u, HRmWrite, hregAMD64_XMM13()); 1513 addHRegUse(u, HRmWrite, hregAMD64_XMM14()); 1514 addHRegUse(u, HRmWrite, hregAMD64_XMM15()); 1515 1516 /* Now we have to state any parameter-carrying registers 1517 which might be read. This depends on the regparmness. */ 1518 switch (i->Ain.Call.regparms) { 1519 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/ 1520 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/ 1521 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/ 1522 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/ 1523 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/ 1524 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break; 1525 case 0: break; 1526 default: vpanic("getRegUsage_AMD64Instr:Call:regparms"); 1527 } 1528 /* Finally, there is the issue that the insn trashes a 1529 register because the literal target address has to be 1530 loaded into a register. Fortunately, r11 is stated in the 1531 ABI as a scratch register, and so seems a suitable victim. */ 1532 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1533 /* Upshot of this is that the assembler really must use r11, 1534 and no other, as a destination temporary. */ 1535 return; 1536 case Ain_Goto: 1537 addRegUsage_AMD64RI(u, i->Ain.Goto.dst); 1538 addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */ 1539 addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */ 1540 if (i->Ain.Goto.jk != Ijk_Boring 1541 && i->Ain.Goto.jk != Ijk_Call 1542 && i->Ain.Goto.jk != Ijk_Ret) 1543 /* note, this is irrelevant since rbp is not actually 1544 available to the allocator. But still .. */ 1545 addHRegUse(u, HRmWrite, hregAMD64_RBP()); 1546 return; 1547 case Ain_CMov64: 1548 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); 1549 addHRegUse(u, HRmModify, i->Ain.CMov64.dst); 1550 return; 1551 case Ain_MovxLQ: 1552 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src); 1553 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst); 1554 return; 1555 case Ain_LoadEX: 1556 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src); 1557 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst); 1558 return; 1559 case Ain_Store: 1560 addHRegUse(u, HRmRead, i->Ain.Store.src); 1561 addRegUsage_AMD64AMode(u, i->Ain.Store.dst); 1562 return; 1563 case Ain_Set64: 1564 addHRegUse(u, HRmWrite, i->Ain.Set64.dst); 1565 return; 1566 case Ain_Bsfr64: 1567 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src); 1568 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst); 1569 return; 1570 case Ain_MFence: 1571 return; 1572 case Ain_ACAS: 1573 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr); 1574 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1575 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1576 return; 1577 case Ain_DACAS: 1578 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr); 1579 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1580 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1581 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1582 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1583 return; 1584 case Ain_A87Free: 1585 return; 1586 case Ain_A87PushPop: 1587 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr); 1588 return; 1589 case Ain_A87FpOp: 1590 return; 1591 case Ain_A87LdCW: 1592 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr); 1593 return; 1594 case Ain_A87StSW: 1595 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr); 1596 return; 1597 //.. case Xin_FpUnary: 1598 //.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1599 //.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1600 //.. return; 1601 //.. case Xin_FpBinary: 1602 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1603 //.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1604 //.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1605 //.. return; 1606 //.. case Xin_FpLdSt: 1607 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr); 1608 //.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1609 //.. i->Xin.FpLdSt.reg); 1610 //.. return; 1611 //.. case Xin_FpLdStI: 1612 //.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr); 1613 //.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1614 //.. i->Xin.FpLdStI.reg); 1615 //.. return; 1616 //.. case Xin_Fp64to32: 1617 //.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1618 //.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1619 //.. return; 1620 //.. case Xin_FpCMov: 1621 //.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1622 //.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1623 //.. return; 1624 case Ain_LdMXCSR: 1625 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr); 1626 return; 1627 //.. case Xin_FpStSW_AX: 1628 //.. addHRegUse(u, HRmWrite, hregAMD64_EAX()); 1629 //.. return; 1630 case Ain_SseUComIS: 1631 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL); 1632 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR); 1633 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst); 1634 return; 1635 case Ain_SseSI2SF: 1636 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src); 1637 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst); 1638 return; 1639 case Ain_SseSF2SI: 1640 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src); 1641 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst); 1642 return; 1643 case Ain_SseSDSS: 1644 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src); 1645 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst); 1646 return; 1647 case Ain_SseLdSt: 1648 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr); 1649 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead, 1650 i->Ain.SseLdSt.reg); 1651 return; 1652 case Ain_SseLdzLO: 1653 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr); 1654 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg); 1655 return; 1656 //.. case Xin_SseConst: 1657 //.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1658 //.. return; 1659 case Ain_Sse32Fx4: 1660 vassert(i->Ain.Sse32Fx4.op != Asse_MOV); 1661 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF 1662 || i->Ain.Sse32Fx4.op == Asse_RSQRTF 1663 || i->Ain.Sse32Fx4.op == Asse_SQRTF ); 1664 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src); 1665 addHRegUse(u, unary ? HRmWrite : HRmModify, 1666 i->Ain.Sse32Fx4.dst); 1667 return; 1668 case Ain_Sse32FLo: 1669 vassert(i->Ain.Sse32FLo.op != Asse_MOV); 1670 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF 1671 || i->Ain.Sse32FLo.op == Asse_RSQRTF 1672 || i->Ain.Sse32FLo.op == Asse_SQRTF ); 1673 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src); 1674 addHRegUse(u, unary ? HRmWrite : HRmModify, 1675 i->Ain.Sse32FLo.dst); 1676 return; 1677 case Ain_Sse64Fx2: 1678 vassert(i->Ain.Sse64Fx2.op != Asse_MOV); 1679 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF 1680 || i->Ain.Sse64Fx2.op == Asse_RSQRTF 1681 || i->Ain.Sse64Fx2.op == Asse_SQRTF ); 1682 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src); 1683 addHRegUse(u, unary ? HRmWrite : HRmModify, 1684 i->Ain.Sse64Fx2.dst); 1685 return; 1686 case Ain_Sse64FLo: 1687 vassert(i->Ain.Sse64FLo.op != Asse_MOV); 1688 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF 1689 || i->Ain.Sse64FLo.op == Asse_RSQRTF 1690 || i->Ain.Sse64FLo.op == Asse_SQRTF ); 1691 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src); 1692 addHRegUse(u, unary ? HRmWrite : HRmModify, 1693 i->Ain.Sse64FLo.dst); 1694 return; 1695 case Ain_SseReRg: 1696 if ( (i->Ain.SseReRg.op == Asse_XOR 1697 || i->Ain.SseReRg.op == Asse_CMPEQ32) 1698 && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) { 1699 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd 1700 r,r' as a write of a value to r, and independent of any 1701 previous value in r */ 1702 /* (as opposed to a rite of passage :-) */ 1703 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst); 1704 } else { 1705 addHRegUse(u, HRmRead, i->Ain.SseReRg.src); 1706 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 1707 ? HRmWrite : HRmModify, 1708 i->Ain.SseReRg.dst); 1709 } 1710 return; 1711 case Ain_SseCMov: 1712 addHRegUse(u, HRmRead, i->Ain.SseCMov.src); 1713 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst); 1714 return; 1715 case Ain_SseShuf: 1716 addHRegUse(u, HRmRead, i->Ain.SseShuf.src); 1717 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst); 1718 return; 1719 default: 1720 ppAMD64Instr(i, mode64); 1721 vpanic("getRegUsage_AMD64Instr"); 1722 } 1723 } 1724 1725 /* local helper */ 1726 static inline void mapReg(HRegRemap* m, HReg* r) 1727 { 1728 *r = lookupHRegRemap(m, *r); 1729 } 1730 1731 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) 1732 { 1733 vassert(mode64 == True); 1734 switch (i->tag) { 1735 case Ain_Imm64: 1736 mapReg(m, &i->Ain.Imm64.dst); 1737 return; 1738 case Ain_Alu64R: 1739 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src); 1740 mapReg(m, &i->Ain.Alu64R.dst); 1741 return; 1742 case Ain_Alu64M: 1743 mapRegs_AMD64RI(m, i->Ain.Alu64M.src); 1744 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst); 1745 return; 1746 case Ain_Sh64: 1747 mapReg(m, &i->Ain.Sh64.dst); 1748 return; 1749 case Ain_Test64: 1750 mapReg(m, &i->Ain.Test64.dst); 1751 return; 1752 case Ain_Unary64: 1753 mapReg(m, &i->Ain.Unary64.dst); 1754 return; 1755 case Ain_Lea64: 1756 mapRegs_AMD64AMode(m, i->Ain.Lea64.am); 1757 mapReg(m, &i->Ain.Lea64.dst); 1758 return; 1759 case Ain_Alu32R: 1760 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src); 1761 mapReg(m, &i->Ain.Alu32R.dst); 1762 return; 1763 case Ain_MulL: 1764 mapRegs_AMD64RM(m, i->Ain.MulL.src); 1765 return; 1766 case Ain_Div: 1767 mapRegs_AMD64RM(m, i->Ain.Div.src); 1768 return; 1769 //.. case Xin_Sh3232: 1770 //.. mapReg(m, &i->Xin.Sh3232.src); 1771 //.. mapReg(m, &i->Xin.Sh3232.dst); 1772 //.. return; 1773 case Ain_Push: 1774 mapRegs_AMD64RMI(m, i->Ain.Push.src); 1775 return; 1776 case Ain_Call: 1777 return; 1778 case Ain_Goto: 1779 mapRegs_AMD64RI(m, i->Ain.Goto.dst); 1780 return; 1781 case Ain_CMov64: 1782 mapRegs_AMD64RM(m, i->Ain.CMov64.src); 1783 mapReg(m, &i->Ain.CMov64.dst); 1784 return; 1785 case Ain_MovxLQ: 1786 mapReg(m, &i->Ain.MovxLQ.src); 1787 mapReg(m, &i->Ain.MovxLQ.dst); 1788 return; 1789 case Ain_LoadEX: 1790 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src); 1791 mapReg(m, &i->Ain.LoadEX.dst); 1792 return; 1793 case Ain_Store: 1794 mapReg(m, &i->Ain.Store.src); 1795 mapRegs_AMD64AMode(m, i->Ain.Store.dst); 1796 return; 1797 case Ain_Set64: 1798 mapReg(m, &i->Ain.Set64.dst); 1799 return; 1800 case Ain_Bsfr64: 1801 mapReg(m, &i->Ain.Bsfr64.src); 1802 mapReg(m, &i->Ain.Bsfr64.dst); 1803 return; 1804 case Ain_MFence: 1805 return; 1806 case Ain_ACAS: 1807 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr); 1808 return; 1809 case Ain_DACAS: 1810 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr); 1811 return; 1812 case Ain_A87Free: 1813 return; 1814 case Ain_A87PushPop: 1815 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr); 1816 return; 1817 case Ain_A87FpOp: 1818 return; 1819 case Ain_A87LdCW: 1820 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr); 1821 return; 1822 case Ain_A87StSW: 1823 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr); 1824 return; 1825 //.. case Xin_FpUnary: 1826 //.. mapReg(m, &i->Xin.FpUnary.src); 1827 //.. mapReg(m, &i->Xin.FpUnary.dst); 1828 //.. return; 1829 //.. case Xin_FpBinary: 1830 //.. mapReg(m, &i->Xin.FpBinary.srcL); 1831 //.. mapReg(m, &i->Xin.FpBinary.srcR); 1832 //.. mapReg(m, &i->Xin.FpBinary.dst); 1833 //.. return; 1834 //.. case Xin_FpLdSt: 1835 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr); 1836 //.. mapReg(m, &i->Xin.FpLdSt.reg); 1837 //.. return; 1838 //.. case Xin_FpLdStI: 1839 //.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr); 1840 //.. mapReg(m, &i->Xin.FpLdStI.reg); 1841 //.. return; 1842 //.. case Xin_Fp64to32: 1843 //.. mapReg(m, &i->Xin.Fp64to32.src); 1844 //.. mapReg(m, &i->Xin.Fp64to32.dst); 1845 //.. return; 1846 //.. case Xin_FpCMov: 1847 //.. mapReg(m, &i->Xin.FpCMov.src); 1848 //.. mapReg(m, &i->Xin.FpCMov.dst); 1849 //.. return; 1850 case Ain_LdMXCSR: 1851 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr); 1852 return; 1853 //.. case Xin_FpStSW_AX: 1854 //.. return; 1855 case Ain_SseUComIS: 1856 mapReg(m, &i->Ain.SseUComIS.srcL); 1857 mapReg(m, &i->Ain.SseUComIS.srcR); 1858 mapReg(m, &i->Ain.SseUComIS.dst); 1859 return; 1860 case Ain_SseSI2SF: 1861 mapReg(m, &i->Ain.SseSI2SF.src); 1862 mapReg(m, &i->Ain.SseSI2SF.dst); 1863 return; 1864 case Ain_SseSF2SI: 1865 mapReg(m, &i->Ain.SseSF2SI.src); 1866 mapReg(m, &i->Ain.SseSF2SI.dst); 1867 return; 1868 case Ain_SseSDSS: 1869 mapReg(m, &i->Ain.SseSDSS.src); 1870 mapReg(m, &i->Ain.SseSDSS.dst); 1871 return; 1872 //.. case Xin_SseConst: 1873 //.. mapReg(m, &i->Xin.SseConst.dst); 1874 //.. return; 1875 case Ain_SseLdSt: 1876 mapReg(m, &i->Ain.SseLdSt.reg); 1877 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr); 1878 break; 1879 case Ain_SseLdzLO: 1880 mapReg(m, &i->Ain.SseLdzLO.reg); 1881 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr); 1882 break; 1883 case Ain_Sse32Fx4: 1884 mapReg(m, &i->Ain.Sse32Fx4.src); 1885 mapReg(m, &i->Ain.Sse32Fx4.dst); 1886 return; 1887 case Ain_Sse32FLo: 1888 mapReg(m, &i->Ain.Sse32FLo.src); 1889 mapReg(m, &i->Ain.Sse32FLo.dst); 1890 return; 1891 case Ain_Sse64Fx2: 1892 mapReg(m, &i->Ain.Sse64Fx2.src); 1893 mapReg(m, &i->Ain.Sse64Fx2.dst); 1894 return; 1895 case Ain_Sse64FLo: 1896 mapReg(m, &i->Ain.Sse64FLo.src); 1897 mapReg(m, &i->Ain.Sse64FLo.dst); 1898 return; 1899 case Ain_SseReRg: 1900 mapReg(m, &i->Ain.SseReRg.src); 1901 mapReg(m, &i->Ain.SseReRg.dst); 1902 return; 1903 case Ain_SseCMov: 1904 mapReg(m, &i->Ain.SseCMov.src); 1905 mapReg(m, &i->Ain.SseCMov.dst); 1906 return; 1907 case Ain_SseShuf: 1908 mapReg(m, &i->Ain.SseShuf.src); 1909 mapReg(m, &i->Ain.SseShuf.dst); 1910 return; 1911 default: 1912 ppAMD64Instr(i, mode64); 1913 vpanic("mapRegs_AMD64Instr"); 1914 } 1915 } 1916 1917 /* Figure out if i represents a reg-reg move, and if so assign the 1918 source and destination to *src and *dst. If in doubt say No. Used 1919 by the register allocator to do move coalescing. 1920 */ 1921 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst ) 1922 { 1923 /* Moves between integer regs */ 1924 if (i->tag == Ain_Alu64R) { 1925 if (i->Ain.Alu64R.op != Aalu_MOV) 1926 return False; 1927 if (i->Ain.Alu64R.src->tag != Armi_Reg) 1928 return False; 1929 *src = i->Ain.Alu64R.src->Armi.Reg.reg; 1930 *dst = i->Ain.Alu64R.dst; 1931 return True; 1932 } 1933 /* Moves between vector regs */ 1934 if (i->tag == Ain_SseReRg) { 1935 if (i->Ain.SseReRg.op != Asse_MOV) 1936 return False; 1937 *src = i->Ain.SseReRg.src; 1938 *dst = i->Ain.SseReRg.dst; 1939 return True; 1940 } 1941 return False; 1942 } 1943 1944 1945 /* Generate amd64 spill/reload instructions under the direction of the 1946 register allocator. Note it's critical these don't write the 1947 condition codes. */ 1948 1949 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1950 HReg rreg, Int offsetB, Bool mode64 ) 1951 { 1952 AMD64AMode* am; 1953 vassert(offsetB >= 0); 1954 vassert(!hregIsVirtual(rreg)); 1955 vassert(mode64 == True); 1956 *i1 = *i2 = NULL; 1957 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1958 switch (hregClass(rreg)) { 1959 case HRcInt64: 1960 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am ); 1961 return; 1962 case HRcVec128: 1963 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am ); 1964 return; 1965 default: 1966 ppHRegClass(hregClass(rreg)); 1967 vpanic("genSpill_AMD64: unimplemented regclass"); 1968 } 1969 } 1970 1971 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1972 HReg rreg, Int offsetB, Bool mode64 ) 1973 { 1974 AMD64AMode* am; 1975 vassert(offsetB >= 0); 1976 vassert(!hregIsVirtual(rreg)); 1977 vassert(mode64 == True); 1978 *i1 = *i2 = NULL; 1979 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1980 switch (hregClass(rreg)) { 1981 case HRcInt64: 1982 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg ); 1983 return; 1984 case HRcVec128: 1985 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am ); 1986 return; 1987 default: 1988 ppHRegClass(hregClass(rreg)); 1989 vpanic("genReload_AMD64: unimplemented regclass"); 1990 } 1991 } 1992 1993 1994 /* --------- The amd64 assembler (bleh.) --------- */ 1995 1996 /* Produce the low three bits of an integer register number. */ 1997 static UChar iregBits210 ( HReg r ) 1998 { 1999 UInt n; 2000 vassert(hregClass(r) == HRcInt64); 2001 vassert(!hregIsVirtual(r)); 2002 n = hregNumber(r); 2003 vassert(n <= 15); 2004 return toUChar(n & 7); 2005 } 2006 2007 /* Produce bit 3 of an integer register number. */ 2008 static UChar iregBit3 ( HReg r ) 2009 { 2010 UInt n; 2011 vassert(hregClass(r) == HRcInt64); 2012 vassert(!hregIsVirtual(r)); 2013 n = hregNumber(r); 2014 vassert(n <= 15); 2015 return toUChar((n >> 3) & 1); 2016 } 2017 2018 /* Produce a complete 4-bit integer register number. */ 2019 static UChar iregBits3210 ( HReg r ) 2020 { 2021 UInt n; 2022 vassert(hregClass(r) == HRcInt64); 2023 vassert(!hregIsVirtual(r)); 2024 n = hregNumber(r); 2025 vassert(n <= 15); 2026 return toUChar(n); 2027 } 2028 2029 /* Given an xmm (128bit V-class) register number, produce the 2030 equivalent numbered register in 64-bit I-class. This is a bit of 2031 fakery which facilitates using functions that work on integer 2032 register numbers to be used when assembling SSE instructions 2033 too. */ 2034 static UInt vreg2ireg ( HReg r ) 2035 { 2036 UInt n; 2037 vassert(hregClass(r) == HRcVec128); 2038 vassert(!hregIsVirtual(r)); 2039 n = hregNumber(r); 2040 vassert(n <= 15); 2041 return mkHReg(n, HRcInt64, False); 2042 } 2043 2044 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) 2045 { 2046 return toUChar( ((mod & 3) << 6) 2047 | ((reg & 7) << 3) 2048 | (regmem & 7) ); 2049 } 2050 2051 static UChar mkSIB ( Int shift, Int regindex, Int regbase ) 2052 { 2053 return toUChar( ((shift & 3) << 6) 2054 | ((regindex & 7) << 3) 2055 | (regbase & 7) ); 2056 } 2057 2058 static UChar* emit32 ( UChar* p, UInt w32 ) 2059 { 2060 *p++ = toUChar((w32) & 0x000000FF); 2061 *p++ = toUChar((w32 >> 8) & 0x000000FF); 2062 *p++ = toUChar((w32 >> 16) & 0x000000FF); 2063 *p++ = toUChar((w32 >> 24) & 0x000000FF); 2064 return p; 2065 } 2066 2067 static UChar* emit64 ( UChar* p, ULong w64 ) 2068 { 2069 p = emit32(p, toUInt(w64 & 0xFFFFFFFF)); 2070 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF)); 2071 return p; 2072 } 2073 2074 /* Does a sign-extend of the lowest 8 bits give 2075 the original number? */ 2076 static Bool fits8bits ( UInt w32 ) 2077 { 2078 Int i32 = (Int)w32; 2079 return toBool(i32 == ((i32 << 24) >> 24)); 2080 } 2081 /* Can the lower 32 bits be signedly widened to produce the whole 2082 64-bit value? In other words, are the top 33 bits either all 0 or 2083 all 1 ? */ 2084 static Bool fitsIn32Bits ( ULong x ) 2085 { 2086 Long y0 = (Long)x; 2087 Long y1 = y0; 2088 y1 <<= 32; 2089 y1 >>=/*s*/ 32; 2090 return toBool(x == y1); 2091 } 2092 2093 2094 /* Forming mod-reg-rm bytes and scale-index-base bytes. 2095 2096 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13 2097 = 00 greg ereg 2098 2099 greg, d8(ereg) | ereg is neither of: RSP R12 2100 = 01 greg ereg, d8 2101 2102 greg, d32(ereg) | ereg is neither of: RSP R12 2103 = 10 greg ereg, d32 2104 2105 greg, d8(ereg) | ereg is either: RSP R12 2106 = 01 greg 100, 0x24, d8 2107 (lowest bit of rex distinguishes R12/RSP) 2108 2109 greg, d32(ereg) | ereg is either: RSP R12 2110 = 10 greg 100, 0x24, d32 2111 (lowest bit of rex distinguishes R12/RSP) 2112 2113 ----------------------------------------------- 2114 2115 greg, d8(base,index,scale) 2116 | index != RSP 2117 = 01 greg 100, scale index base, d8 2118 2119 greg, d32(base,index,scale) 2120 | index != RSP 2121 = 10 greg 100, scale index base, d32 2122 */ 2123 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am ) 2124 { 2125 if (am->tag == Aam_IR) { 2126 if (am->Aam.IR.imm == 0 2127 && am->Aam.IR.reg != hregAMD64_RSP() 2128 && am->Aam.IR.reg != hregAMD64_RBP() 2129 && am->Aam.IR.reg != hregAMD64_R12() 2130 && am->Aam.IR.reg != hregAMD64_R13() 2131 ) { 2132 *p++ = mkModRegRM(0, iregBits210(greg), 2133 iregBits210(am->Aam.IR.reg)); 2134 return p; 2135 } 2136 if (fits8bits(am->Aam.IR.imm) 2137 && am->Aam.IR.reg != hregAMD64_RSP() 2138 && am->Aam.IR.reg != hregAMD64_R12() 2139 ) { 2140 *p++ = mkModRegRM(1, iregBits210(greg), 2141 iregBits210(am->Aam.IR.reg)); 2142 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2143 return p; 2144 } 2145 if (am->Aam.IR.reg != hregAMD64_RSP() 2146 && am->Aam.IR.reg != hregAMD64_R12() 2147 ) { 2148 *p++ = mkModRegRM(2, iregBits210(greg), 2149 iregBits210(am->Aam.IR.reg)); 2150 p = emit32(p, am->Aam.IR.imm); 2151 return p; 2152 } 2153 if ((am->Aam.IR.reg == hregAMD64_RSP() 2154 || am->Aam.IR.reg == hregAMD64_R12()) 2155 && fits8bits(am->Aam.IR.imm)) { 2156 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2157 *p++ = 0x24; 2158 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2159 return p; 2160 } 2161 if (/* (am->Aam.IR.reg == hregAMD64_RSP() 2162 || wait for test case for RSP case */ 2163 am->Aam.IR.reg == hregAMD64_R12()) { 2164 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2165 *p++ = 0x24; 2166 p = emit32(p, am->Aam.IR.imm); 2167 return p; 2168 } 2169 ppAMD64AMode(am); 2170 vpanic("doAMode_M: can't emit amode IR"); 2171 /*NOTREACHED*/ 2172 } 2173 if (am->tag == Aam_IRRS) { 2174 if (fits8bits(am->Aam.IRRS.imm) 2175 && am->Aam.IRRS.index != hregAMD64_RSP()) { 2176 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2177 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, 2178 am->Aam.IRRS.base); 2179 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF); 2180 return p; 2181 } 2182 if (am->Aam.IRRS.index != hregAMD64_RSP()) { 2183 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2184 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, 2185 am->Aam.IRRS.base); 2186 p = emit32(p, am->Aam.IRRS.imm); 2187 return p; 2188 } 2189 ppAMD64AMode(am); 2190 vpanic("doAMode_M: can't emit amode IRRS"); 2191 /*NOTREACHED*/ 2192 } 2193 vpanic("doAMode_M: unknown amode"); 2194 /*NOTREACHED*/ 2195 } 2196 2197 2198 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 2199 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 2200 { 2201 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg)); 2202 return p; 2203 } 2204 2205 2206 /* Clear the W bit on a REX byte, thereby changing the operand size 2207 back to whatever that instruction's default operand size is. */ 2208 static inline UChar clearWBit ( UChar rex ) 2209 { 2210 return toUChar(rex & ~(1<<3)); 2211 } 2212 2213 2214 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */ 2215 static UChar rexAMode_M ( HReg greg, AMD64AMode* am ) 2216 { 2217 if (am->tag == Aam_IR) { 2218 UChar W = 1; /* we want 64-bit mode */ 2219 UChar R = iregBit3(greg); 2220 UChar X = 0; /* not relevant */ 2221 UChar B = iregBit3(am->Aam.IR.reg); 2222 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2223 } 2224 if (am->tag == Aam_IRRS) { 2225 UChar W = 1; /* we want 64-bit mode */ 2226 UChar R = iregBit3(greg); 2227 UChar X = iregBit3(am->Aam.IRRS.index); 2228 UChar B = iregBit3(am->Aam.IRRS.base); 2229 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2230 } 2231 vassert(0); 2232 return 0; /*NOTREACHED*/ 2233 } 2234 2235 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */ 2236 static UChar rexAMode_R ( HReg greg, HReg ereg ) 2237 { 2238 UChar W = 1; /* we want 64-bit mode */ 2239 UChar R = iregBit3(greg); 2240 UChar X = 0; /* not relevant */ 2241 UChar B = iregBit3(ereg); 2242 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2243 } 2244 2245 2246 /* Emit ffree %st(N) */ 2247 static UChar* do_ffree_st ( UChar* p, Int n ) 2248 { 2249 vassert(n >= 0 && n <= 7); 2250 *p++ = 0xDD; 2251 *p++ = toUChar(0xC0 + n); 2252 return p; 2253 } 2254 2255 //.. /* Emit fstp %st(i), 1 <= i <= 7 */ 2256 //.. static UChar* do_fstp_st ( UChar* p, Int i ) 2257 //.. { 2258 //.. vassert(1 <= i && i <= 7); 2259 //.. *p++ = 0xDD; 2260 //.. *p++ = 0xD8+i; 2261 //.. return p; 2262 //.. } 2263 //.. 2264 //.. /* Emit fld %st(i), 0 <= i <= 6 */ 2265 //.. static UChar* do_fld_st ( UChar* p, Int i ) 2266 //.. { 2267 //.. vassert(0 <= i && i <= 6); 2268 //.. *p++ = 0xD9; 2269 //.. *p++ = 0xC0+i; 2270 //.. return p; 2271 //.. } 2272 //.. 2273 //.. /* Emit f<op> %st(0) */ 2274 //.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op ) 2275 //.. { 2276 //.. switch (op) { 2277 //.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2278 //.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2279 //.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2280 //.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2281 //.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2282 //.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2283 //.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2284 //.. case Xfp_MOV: break; 2285 //.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ 2286 //.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */ 2287 //.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ 2288 //.. break; 2289 //.. default: vpanic("do_fop1_st: unknown op"); 2290 //.. } 2291 //.. return p; 2292 //.. } 2293 //.. 2294 //.. /* Emit f<op> %st(i), 1 <= i <= 5 */ 2295 //.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i ) 2296 //.. { 2297 //.. # define fake(_n) mkHReg((_n), HRcInt32, False) 2298 //.. Int subopc; 2299 //.. switch (op) { 2300 //.. case Xfp_ADD: subopc = 0; break; 2301 //.. case Xfp_SUB: subopc = 4; break; 2302 //.. case Xfp_MUL: subopc = 1; break; 2303 //.. case Xfp_DIV: subopc = 6; break; 2304 //.. default: vpanic("do_fop2_st: unknown op"); 2305 //.. } 2306 //.. *p++ = 0xD8; 2307 //.. p = doAMode_R(p, fake(subopc), fake(i)); 2308 //.. return p; 2309 //.. # undef fake 2310 //.. } 2311 //.. 2312 //.. /* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2313 //.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2314 //.. */ 2315 //.. static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2316 //.. { 2317 //.. UInt w; 2318 //.. vassert(0 == (tags & ~0xF)); 2319 //.. if (tags == 0) { 2320 //.. /* pushl $0x00000000 */ 2321 //.. *p++ = 0x6A; 2322 //.. *p++ = 0x00; 2323 //.. } 2324 //.. else 2325 //.. /* pushl $0xFFFFFFFF */ 2326 //.. if (tags == 0xF) { 2327 //.. *p++ = 0x6A; 2328 //.. *p++ = 0xFF; 2329 //.. } else { 2330 //.. vassert(0); /* awaiting test case */ 2331 //.. w = 0; 2332 //.. if (tags & 1) w |= 0x000000FF; 2333 //.. if (tags & 2) w |= 0x0000FF00; 2334 //.. if (tags & 4) w |= 0x00FF0000; 2335 //.. if (tags & 8) w |= 0xFF000000; 2336 //.. *p++ = 0x68; 2337 //.. p = emit32(p, w); 2338 //.. } 2339 //.. return p; 2340 //.. } 2341 2342 /* Emit an instruction into buf and return the number of bytes used. 2343 Note that buf is not the insn's final place, and therefore it is 2344 imperative to emit position-independent code. */ 2345 2346 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, 2347 Bool mode64, 2348 void* dispatch_unassisted, 2349 void* dispatch_assisted ) 2350 { 2351 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2352 UInt xtra; 2353 UInt reg; 2354 UChar rex; 2355 UChar* p = &buf[0]; 2356 UChar* ptmp; 2357 Int j; 2358 vassert(nbuf >= 32); 2359 vassert(mode64 == True); 2360 2361 /* Wrap an integer as a int register, for use assembling 2362 GrpN insns, in which the greg field is used as a sub-opcode 2363 and does not really contain a register. */ 2364 # define fake(_n) mkHReg((_n), HRcInt64, False) 2365 2366 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */ 2367 2368 switch (i->tag) { 2369 2370 case Ain_Imm64: 2371 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) { 2372 /* Use the short form (load into 32 bit reg, + default 2373 widening rule) for constants under 1 million. We could 2374 use this form for the range 0 to 0x7FFFFFFF inclusive, but 2375 limit it to a smaller range for verifiability purposes. */ 2376 if (1 & iregBit3(i->Ain.Imm64.dst)) 2377 *p++ = 0x41; 2378 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst); 2379 p = emit32(p, (UInt)i->Ain.Imm64.imm64); 2380 } else { 2381 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst))); 2382 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst)); 2383 p = emit64(p, i->Ain.Imm64.imm64); 2384 } 2385 goto done; 2386 2387 case Ain_Alu64R: 2388 /* Deal specially with MOV */ 2389 if (i->Ain.Alu64R.op == Aalu_MOV) { 2390 switch (i->Ain.Alu64R.src->tag) { 2391 case Armi_Imm: 2392 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) { 2393 /* Actually we could use this form for constants in 2394 the range 0 through 0x7FFFFFFF inclusive, but 2395 limit it to a small range for verifiability 2396 purposes. */ 2397 /* Generate "movl $imm32, 32-bit-register" and let 2398 the default zero-extend rule cause the upper half 2399 of the dst to be zeroed out too. This saves 1 2400 and sometimes 2 bytes compared to the more 2401 obvious encoding in the 'else' branch. */ 2402 if (1 & iregBit3(i->Ain.Alu64R.dst)) 2403 *p++ = 0x41; 2404 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst); 2405 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2406 } else { 2407 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst))); 2408 *p++ = 0xC7; 2409 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst)); 2410 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2411 } 2412 goto done; 2413 case Armi_Reg: 2414 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2415 i->Ain.Alu64R.dst ); 2416 *p++ = 0x89; 2417 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2418 i->Ain.Alu64R.dst); 2419 goto done; 2420 case Armi_Mem: 2421 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2422 i->Ain.Alu64R.src->Armi.Mem.am); 2423 *p++ = 0x8B; 2424 p = doAMode_M(p, i->Ain.Alu64R.dst, 2425 i->Ain.Alu64R.src->Armi.Mem.am); 2426 goto done; 2427 default: 2428 goto bad; 2429 } 2430 } 2431 /* MUL */ 2432 if (i->Ain.Alu64R.op == Aalu_MUL) { 2433 switch (i->Ain.Alu64R.src->tag) { 2434 case Armi_Reg: 2435 *p++ = rexAMode_R( i->Ain.Alu64R.dst, 2436 i->Ain.Alu64R.src->Armi.Reg.reg); 2437 *p++ = 0x0F; 2438 *p++ = 0xAF; 2439 p = doAMode_R(p, i->Ain.Alu64R.dst, 2440 i->Ain.Alu64R.src->Armi.Reg.reg); 2441 goto done; 2442 case Armi_Mem: 2443 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2444 i->Ain.Alu64R.src->Armi.Mem.am); 2445 *p++ = 0x0F; 2446 *p++ = 0xAF; 2447 p = doAMode_M(p, i->Ain.Alu64R.dst, 2448 i->Ain.Alu64R.src->Armi.Mem.am); 2449 goto done; 2450 case Armi_Imm: 2451 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2452 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2453 *p++ = 0x6B; 2454 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2455 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2456 } else { 2457 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2458 *p++ = 0x69; 2459 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2460 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2461 } 2462 goto done; 2463 default: 2464 goto bad; 2465 } 2466 } 2467 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2468 opc = opc_rr = subopc_imm = opc_imma = 0; 2469 switch (i->Ain.Alu64R.op) { 2470 case Aalu_ADC: opc = 0x13; opc_rr = 0x11; 2471 subopc_imm = 2; opc_imma = 0x15; break; 2472 case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 2473 subopc_imm = 0; opc_imma = 0x05; break; 2474 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 2475 subopc_imm = 5; opc_imma = 0x2D; break; 2476 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19; 2477 subopc_imm = 3; opc_imma = 0x1D; break; 2478 case Aalu_AND: opc = 0x23; opc_rr = 0x21; 2479 subopc_imm = 4; opc_imma = 0x25; break; 2480 case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 2481 subopc_imm = 6; opc_imma = 0x35; break; 2482 case Aalu_OR: opc = 0x0B; opc_rr = 0x09; 2483 subopc_imm = 1; opc_imma = 0x0D; break; 2484 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 2485 subopc_imm = 7; opc_imma = 0x3D; break; 2486 default: goto bad; 2487 } 2488 switch (i->Ain.Alu64R.src->tag) { 2489 case Armi_Imm: 2490 if (i->Ain.Alu64R.dst == hregAMD64_RAX() 2491 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2492 goto bad; /* FIXME: awaiting test case */ 2493 *p++ = toUChar(opc_imma); 2494 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2495 } else 2496 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2497 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst ); 2498 *p++ = 0x83; 2499 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2500 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2501 } else { 2502 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst); 2503 *p++ = 0x81; 2504 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2505 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2506 } 2507 goto done; 2508 case Armi_Reg: 2509 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2510 i->Ain.Alu64R.dst); 2511 *p++ = toUChar(opc_rr); 2512 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2513 i->Ain.Alu64R.dst); 2514 goto done; 2515 case Armi_Mem: 2516 *p++ = rexAMode_M( i->Ain.Alu64R.dst, 2517 i->Ain.Alu64R.src->Armi.Mem.am); 2518 *p++ = toUChar(opc); 2519 p = doAMode_M(p, i->Ain.Alu64R.dst, 2520 i->Ain.Alu64R.src->Armi.Mem.am); 2521 goto done; 2522 default: 2523 goto bad; 2524 } 2525 break; 2526 2527 case Ain_Alu64M: 2528 /* Deal specially with MOV */ 2529 if (i->Ain.Alu64M.op == Aalu_MOV) { 2530 switch (i->Ain.Alu64M.src->tag) { 2531 case Ari_Reg: 2532 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg, 2533 i->Ain.Alu64M.dst); 2534 *p++ = 0x89; 2535 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg, 2536 i->Ain.Alu64M.dst); 2537 goto done; 2538 case Ari_Imm: 2539 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst); 2540 *p++ = 0xC7; 2541 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst); 2542 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32); 2543 goto done; 2544 default: 2545 goto bad; 2546 } 2547 } 2548 //.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2549 //.. allowed here. */ 2550 //.. opc = subopc_imm = opc_imma = 0; 2551 //.. switch (i->Xin.Alu32M.op) { 2552 //.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2553 //.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2554 //.. default: goto bad; 2555 //.. } 2556 //.. switch (i->Xin.Alu32M.src->tag) { 2557 //.. case Xri_Reg: 2558 //.. *p++ = opc; 2559 //.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2560 //.. i->Xin.Alu32M.dst); 2561 //.. goto done; 2562 //.. case Xri_Imm: 2563 //.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2564 //.. *p++ = 0x83; 2565 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2566 //.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32; 2567 //.. goto done; 2568 //.. } else { 2569 //.. *p++ = 0x81; 2570 //.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2571 //.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2572 //.. goto done; 2573 //.. } 2574 //.. default: 2575 //.. goto bad; 2576 //.. } 2577 break; 2578 2579 case Ain_Sh64: 2580 opc_cl = opc_imm = subopc = 0; 2581 switch (i->Ain.Sh64.op) { 2582 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2583 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2584 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2585 default: goto bad; 2586 } 2587 if (i->Ain.Sh64.src == 0) { 2588 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2589 *p++ = toUChar(opc_cl); 2590 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2591 goto done; 2592 } else { 2593 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2594 *p++ = toUChar(opc_imm); 2595 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2596 *p++ = (UChar)(i->Ain.Sh64.src); 2597 goto done; 2598 } 2599 break; 2600 2601 case Ain_Test64: 2602 /* testq sign-extend($imm32), %reg */ 2603 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst); 2604 *p++ = 0xF7; 2605 p = doAMode_R(p, fake(0), i->Ain.Test64.dst); 2606 p = emit32(p, i->Ain.Test64.imm32); 2607 goto done; 2608 2609 case Ain_Unary64: 2610 if (i->Ain.Unary64.op == Aun_NOT) { 2611 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2612 *p++ = 0xF7; 2613 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst); 2614 goto done; 2615 } 2616 if (i->Ain.Unary64.op == Aun_NEG) { 2617 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2618 *p++ = 0xF7; 2619 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst); 2620 goto done; 2621 } 2622 break; 2623 2624 case Ain_Lea64: 2625 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am); 2626 *p++ = 0x8D; 2627 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am); 2628 goto done; 2629 2630 case Ain_Alu32R: 2631 /* ADD/SUB/AND/OR/XOR/CMP */ 2632 opc = opc_rr = subopc_imm = opc_imma = 0; 2633 switch (i->Ain.Alu32R.op) { 2634 case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 2635 subopc_imm = 0; opc_imma = 0x05; break; 2636 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 2637 subopc_imm = 5; opc_imma = 0x2D; break; 2638 case Aalu_AND: opc = 0x23; opc_rr = 0x21; 2639 subopc_imm = 4; opc_imma = 0x25; break; 2640 case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 2641 subopc_imm = 6; opc_imma = 0x35; break; 2642 case Aalu_OR: opc = 0x0B; opc_rr = 0x09; 2643 subopc_imm = 1; opc_imma = 0x0D; break; 2644 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 2645 subopc_imm = 7; opc_imma = 0x3D; break; 2646 default: goto bad; 2647 } 2648 switch (i->Ain.Alu32R.src->tag) { 2649 case Armi_Imm: 2650 if (i->Ain.Alu32R.dst == hregAMD64_RAX() 2651 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { 2652 goto bad; /* FIXME: awaiting test case */ 2653 *p++ = toUChar(opc_imma); 2654 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); 2655 } else 2656 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { 2657 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) ); 2658 if (rex != 0x40) *p++ = rex; 2659 *p++ = 0x83; 2660 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); 2661 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32); 2662 } else { 2663 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) ); 2664 if (rex != 0x40) *p++ = rex; 2665 *p++ = 0x81; 2666 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); 2667 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); 2668 } 2669 goto done; 2670 case Armi_Reg: 2671 rex = clearWBit( 2672 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg, 2673 i->Ain.Alu32R.dst) ); 2674 if (rex != 0x40) *p++ = rex; 2675 *p++ = toUChar(opc_rr); 2676 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg, 2677 i->Ain.Alu32R.dst); 2678 goto done; 2679 case Armi_Mem: 2680 rex = clearWBit( 2681 rexAMode_M( i->Ain.Alu32R.dst, 2682 i->Ain.Alu32R.src->Armi.Mem.am) ); 2683 if (rex != 0x40) *p++ = rex; 2684 *p++ = toUChar(opc); 2685 p = doAMode_M(p, i->Ain.Alu32R.dst, 2686 i->Ain.Alu32R.src->Armi.Mem.am); 2687 goto done; 2688 default: 2689 goto bad; 2690 } 2691 break; 2692 2693 case Ain_MulL: 2694 subopc = i->Ain.MulL.syned ? 5 : 4; 2695 switch (i->Ain.MulL.src->tag) { 2696 case Arm_Mem: 2697 *p++ = rexAMode_M( fake(0), 2698 i->Ain.MulL.src->Arm.Mem.am); 2699 *p++ = 0xF7; 2700 p = doAMode_M(p, fake(subopc), 2701 i->Ain.MulL.src->Arm.Mem.am); 2702 goto done; 2703 case Arm_Reg: 2704 *p++ = rexAMode_R(fake(0), 2705 i->Ain.MulL.src->Arm.Reg.reg); 2706 *p++ = 0xF7; 2707 p = doAMode_R(p, fake(subopc), 2708 i->Ain.MulL.src->Arm.Reg.reg); 2709 goto done; 2710 default: 2711 goto bad; 2712 } 2713 break; 2714 2715 case Ain_Div: 2716 subopc = i->Ain.Div.syned ? 7 : 6; 2717 if (i->Ain.Div.sz == 4) { 2718 switch (i->Ain.Div.src->tag) { 2719 case Arm_Mem: 2720 goto bad; 2721 /*FIXME*/ 2722 *p++ = 0xF7; 2723 p = doAMode_M(p, fake(subopc), 2724 i->Ain.Div.src->Arm.Mem.am); 2725 goto done; 2726 case Arm_Reg: 2727 *p++ = clearWBit( 2728 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg)); 2729 *p++ = 0xF7; 2730 p = doAMode_R(p, fake(subopc), 2731 i->Ain.Div.src->Arm.Reg.reg); 2732 goto done; 2733 default: 2734 goto bad; 2735 } 2736 } 2737 if (i->Ain.Div.sz == 8) { 2738 switch (i->Ain.Div.src->tag) { 2739 case Arm_Mem: 2740 *p++ = rexAMode_M( fake(0), 2741 i->Ain.Div.src->Arm.Mem.am); 2742 *p++ = 0xF7; 2743 p = doAMode_M(p, fake(subopc), 2744 i->Ain.Div.src->Arm.Mem.am); 2745 goto done; 2746 case Arm_Reg: 2747 *p++ = rexAMode_R( fake(0), 2748 i->Ain.Div.src->Arm.Reg.reg); 2749 *p++ = 0xF7; 2750 p = doAMode_R(p, fake(subopc), 2751 i->Ain.Div.src->Arm.Reg.reg); 2752 goto done; 2753 default: 2754 goto bad; 2755 } 2756 } 2757 break; 2758 2759 //.. case Xin_Sh3232: 2760 //.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2761 //.. if (i->Xin.Sh3232.amt == 0) { 2762 //.. /* shldl/shrdl by %cl */ 2763 //.. *p++ = 0x0F; 2764 //.. if (i->Xin.Sh3232.op == Xsh_SHL) { 2765 //.. *p++ = 0xA5; 2766 //.. } else { 2767 //.. *p++ = 0xAD; 2768 //.. } 2769 //.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2770 //.. goto done; 2771 //.. } 2772 //.. break; 2773 2774 case Ain_Push: 2775 switch (i->Ain.Push.src->tag) { 2776 case Armi_Mem: 2777 *p++ = clearWBit( 2778 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am)); 2779 *p++ = 0xFF; 2780 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am); 2781 goto done; 2782 case Armi_Imm: 2783 *p++ = 0x68; 2784 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32); 2785 goto done; 2786 case Armi_Reg: 2787 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg))); 2788 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg)); 2789 goto done; 2790 default: 2791 goto bad; 2792 } 2793 2794 case Ain_Call: { 2795 /* As per detailed comment for Ain_Call in 2796 getRegUsage_AMD64Instr above, %r11 is used as an address 2797 temporary. */ 2798 /* jump over the following two insns if the condition does not 2799 hold */ 2800 Bool shortImm = fitsIn32Bits(i->Ain.Call.target); 2801 if (i->Ain.Call.cond != Acc_ALWAYS) { 2802 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); 2803 *p++ = shortImm ? 10 : 13; 2804 /* 10 or 13 bytes in the next two insns */ 2805 } 2806 if (shortImm) { 2807 /* 7 bytes: movl sign-extend(imm32), %r11 */ 2808 *p++ = 0x49; 2809 *p++ = 0xC7; 2810 *p++ = 0xC3; 2811 p = emit32(p, (UInt)i->Ain.Call.target); 2812 } else { 2813 /* 10 bytes: movabsq $target, %r11 */ 2814 *p++ = 0x49; 2815 *p++ = 0xBB; 2816 p = emit64(p, i->Ain.Call.target); 2817 } 2818 /* 3 bytes: call *%r11 */ 2819 *p++ = 0x41; 2820 *p++ = 0xFF; 2821 *p++ = 0xD3; 2822 goto done; 2823 } 2824 2825 case Ain_Goto: { 2826 void* dispatch_to_use = NULL; 2827 vassert(dispatch_unassisted != NULL); 2828 vassert(dispatch_assisted != NULL); 2829 2830 /* Use ptmp for backpatching conditional jumps. */ 2831 ptmp = NULL; 2832 2833 /* First off, if this is conditional, create a conditional 2834 jump over the rest of it. */ 2835 if (i->Ain.Goto.cond != Acc_ALWAYS) { 2836 /* jmp fwds if !condition */ 2837 *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1)); 2838 ptmp = p; /* fill in this bit later */ 2839 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2840 } 2841 2842 /* If a non-boring, set %rbp (the guest state pointer) 2843 appropriately. Since these numbers are all small positive 2844 integers, we can get away with "movl $N, %ebp" rather than 2845 the longer "movq $N, %rbp". Also, decide which dispatcher we 2846 need to use. */ 2847 dispatch_to_use = dispatch_assisted; 2848 2849 /* movl $magic_number, %ebp */ 2850 switch (i->Ain.Goto.jk) { 2851 case Ijk_ClientReq: 2852 *p++ = 0xBD; 2853 p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; 2854 case Ijk_Sys_syscall: 2855 *p++ = 0xBD; 2856 p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break; 2857 case Ijk_Sys_int32: 2858 *p++ = 0xBD; 2859 p = emit32(p, VEX_TRC_JMP_SYS_INT32); break; 2860 case Ijk_Yield: 2861 *p++ = 0xBD; 2862 p = emit32(p, VEX_TRC_JMP_YIELD); break; 2863 case Ijk_YieldNoRedir: 2864 *p++ = 0xBD; 2865 p = emit32(p, VEX_TRC_JMP_YIELD_NOREDIR); break; 2866 case Ijk_EmWarn: 2867 *p++ = 0xBD; 2868 p = emit32(p, VEX_TRC_JMP_EMWARN); break; 2869 case Ijk_MapFail: 2870 *p++ = 0xBD; 2871 p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; 2872 case Ijk_NoDecode: 2873 *p++ = 0xBD; 2874 p = emit32(p, VEX_TRC_JMP_NODECODE); break; 2875 case Ijk_TInval: 2876 *p++ = 0xBD; 2877 p = emit32(p, VEX_TRC_JMP_TINVAL); break; 2878 case Ijk_NoRedir: 2879 *p++ = 0xBD; 2880 p = emit32(p, VEX_TRC_JMP_NOREDIR); break; 2881 case Ijk_SigTRAP: 2882 *p++ = 0xBD; 2883 p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; 2884 case Ijk_SigSEGV: 2885 *p++ = 0xBD; 2886 p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; 2887 case Ijk_Ret: 2888 case Ijk_Call: 2889 case Ijk_Boring: 2890 dispatch_to_use = dispatch_unassisted; 2891 break; 2892 default: 2893 ppIRJumpKind(i->Ain.Goto.jk); 2894 vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind"); 2895 } 2896 2897 /* Get the destination address into %rax */ 2898 if (i->Ain.Goto.dst->tag == Ari_Imm) { 2899 /* movl sign-ext($immediate), %rax ; ret */ 2900 *p++ = 0x48; 2901 *p++ = 0xC7; 2902 *p++ = 0xC0; 2903 p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32); 2904 } else { 2905 vassert(i->Ain.Goto.dst->tag == Ari_Reg); 2906 /* movq %reg, %rax ; ret */ 2907 if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) { 2908 *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); 2909 *p++ = 0x89; 2910 p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); 2911 } 2912 } 2913 2914 /* Get the dispatcher address into %rdx. This has to happen 2915 after the load of %rax since %rdx might be carrying the value 2916 destined for %rax immediately prior to this Ain_Goto. */ 2917 vassert(sizeof(ULong) == sizeof(void*)); 2918 2919 if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) { 2920 /* movl sign-extend(imm32), %rdx */ 2921 *p++ = 0x48; 2922 *p++ = 0xC7; 2923 *p++ = 0xC2; 2924 p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use)); 2925 } else { 2926 /* movabsq $imm64, %rdx */ 2927 *p++ = 0x48; 2928 *p++ = 0xBA; 2929 p = emit64(p, Ptr_to_ULong(dispatch_to_use)); 2930 } 2931 /* jmp *%rdx */ 2932 *p++ = 0xFF; 2933 *p++ = 0xE2; 2934 2935 /* Fix up the conditional jump, if there was one. */ 2936 if (i->Ain.Goto.cond != Acc_ALWAYS) { 2937 Int delta = p - ptmp; 2938 vassert(delta > 0 && delta < 30); 2939 *ptmp = toUChar(delta-1); 2940 } 2941 goto done; 2942 } 2943 2944 case Ain_CMov64: 2945 vassert(i->Ain.CMov64.cond != Acc_ALWAYS); 2946 if (i->Ain.CMov64.src->tag == Arm_Reg) { 2947 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2948 *p++ = 0x0F; 2949 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2950 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2951 goto done; 2952 } 2953 if (i->Ain.CMov64.src->tag == Arm_Mem) { 2954 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2955 *p++ = 0x0F; 2956 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2957 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2958 goto done; 2959 } 2960 break; 2961 2962 case Ain_MovxLQ: 2963 /* No, _don't_ ask me why the sense of the args has to be 2964 different in the S vs Z case. I don't know. */ 2965 if (i->Ain.MovxLQ.syned) { 2966 /* Need REX.W = 1 here, but rexAMode_R does that for us. */ 2967 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2968 *p++ = 0x63; 2969 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2970 } else { 2971 /* Produce a 32-bit reg-reg move, since the implicit 2972 zero-extend does what we want. */ 2973 *p++ = clearWBit ( 2974 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst)); 2975 *p++ = 0x89; 2976 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst); 2977 } 2978 goto done; 2979 2980 case Ain_LoadEX: 2981 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) { 2982 /* movzbq */ 2983 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2984 *p++ = 0x0F; 2985 *p++ = 0xB6; 2986 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2987 goto done; 2988 } 2989 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) { 2990 /* movzwq */ 2991 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2992 *p++ = 0x0F; 2993 *p++ = 0xB7; 2994 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2995 goto done; 2996 } 2997 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) { 2998 /* movzlq */ 2999 /* This isn't really an existing AMD64 instruction per se. 3000 Rather, we have to do a 32-bit load. Because a 32-bit 3001 write implicitly clears the upper 32 bits of the target 3002 register, we get what we want. */ 3003 *p++ = clearWBit( 3004 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src)); 3005 *p++ = 0x8B; 3006 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 3007 goto done; 3008 } 3009 break; 3010 3011 case Ain_Set64: 3012 /* Make the destination register be 1 or 0, depending on whether 3013 the relevant condition holds. Complication: the top 56 bits 3014 of the destination should be forced to zero, but doing 'xorq 3015 %r,%r' kills the flag(s) we are about to read. Sigh. So 3016 start off my moving $0 into the dest. */ 3017 reg = iregBits3210(i->Ain.Set64.dst); 3018 vassert(reg < 16); 3019 3020 /* movq $0, %dst */ 3021 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48); 3022 *p++ = 0xC7; 3023 *p++ = toUChar(0xC0 + (reg & 7)); 3024 p = emit32(p, 0); 3025 3026 /* setb lo8(%dst) */ 3027 /* note, 8-bit register rex trickyness. Be careful here. */ 3028 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40); 3029 *p++ = 0x0F; 3030 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond)); 3031 *p++ = toUChar(0xC0 + (reg & 7)); 3032 goto done; 3033 3034 case Ain_Bsfr64: 3035 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 3036 *p++ = 0x0F; 3037 if (i->Ain.Bsfr64.isFwds) { 3038 *p++ = 0xBC; 3039 } else { 3040 *p++ = 0xBD; 3041 } 3042 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 3043 goto done; 3044 3045 case Ain_MFence: 3046 /* mfence */ 3047 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 3048 goto done; 3049 3050 case Ain_ACAS: 3051 /* lock */ 3052 *p++ = 0xF0; 3053 if (i->Ain.ACAS.sz == 2) *p++ = 0x66; 3054 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value 3055 in %rbx. The new-value register is hardwired to be %rbx 3056 since dealing with byte integer registers is too much hassle, 3057 so we force the register operand to %rbx (could equally be 3058 %rcx or %rdx). */ 3059 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr ); 3060 if (i->Ain.ACAS.sz != 8) 3061 rex = clearWBit(rex); 3062 3063 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */ 3064 *p++ = 0x0F; 3065 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 3066 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr); 3067 goto done; 3068 3069 case Ain_DACAS: 3070 /* lock */ 3071 *p++ = 0xF0; 3072 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new 3073 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so 3074 aren't encoded in the insn. */ 3075 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr ); 3076 if (i->Ain.ACAS.sz != 8) 3077 rex = clearWBit(rex); 3078 *p++ = rex; 3079 *p++ = 0x0F; 3080 *p++ = 0xC7; 3081 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr); 3082 goto done; 3083 3084 case Ain_A87Free: 3085 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7); 3086 for (j = 0; j < i->Ain.A87Free.nregs; j++) { 3087 p = do_ffree_st(p, 7-j); 3088 } 3089 goto done; 3090 3091 case Ain_A87PushPop: 3092 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4); 3093 if (i->Ain.A87PushPop.isPush) { 3094 /* Load from memory into %st(0): flds/fldl amode */ 3095 *p++ = clearWBit( 3096 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); 3097 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 3098 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); 3099 } else { 3100 /* Dump %st(0) to memory: fstps/fstpl amode */ 3101 *p++ = clearWBit( 3102 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); 3103 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 3104 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); 3105 goto done; 3106 } 3107 goto done; 3108 3109 case Ain_A87FpOp: 3110 switch (i->Ain.A87FpOp.op) { 3111 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 3112 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 3113 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 3114 case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break; 3115 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 3116 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 3117 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; 3118 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break; 3119 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break; 3120 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; 3121 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; 3122 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; 3123 default: goto bad; 3124 } 3125 goto done; 3126 3127 case Ain_A87LdCW: 3128 *p++ = clearWBit( 3129 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) ); 3130 *p++ = 0xD9; 3131 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr); 3132 goto done; 3133 3134 case Ain_A87StSW: 3135 *p++ = clearWBit( 3136 rexAMode_M(fake(7), i->Ain.A87StSW.addr) ); 3137 *p++ = 0xDD; 3138 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr); 3139 goto done; 3140 3141 case Ain_Store: 3142 if (i->Ain.Store.sz == 2) { 3143 /* This just goes to show the crazyness of the instruction 3144 set encoding. We have to insert two prefix bytes, but be 3145 careful to avoid a conflict in what the size should be, by 3146 ensuring that REX.W = 0. */ 3147 *p++ = 0x66; /* override to 16-bits */ 3148 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3149 *p++ = 0x89; 3150 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3151 goto done; 3152 } 3153 if (i->Ain.Store.sz == 4) { 3154 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3155 *p++ = 0x89; 3156 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3157 goto done; 3158 } 3159 if (i->Ain.Store.sz == 1) { 3160 /* This is one place where it would be wrong to skip emitting 3161 a rex byte of 0x40, since the mere presence of rex changes 3162 the meaning of the byte register access. Be careful. */ 3163 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3164 *p++ = 0x88; 3165 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3166 goto done; 3167 } 3168 break; 3169 3170 //.. case Xin_FpUnary: 3171 //.. /* gop %src, %dst 3172 //.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 3173 //.. */ 3174 //.. p = do_ffree_st7(p); 3175 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 3176 //.. p = do_fop1_st(p, i->Xin.FpUnary.op); 3177 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 3178 //.. goto done; 3179 //.. 3180 //.. case Xin_FpBinary: 3181 //.. if (i->Xin.FpBinary.op == Xfp_YL2X 3182 //.. || i->Xin.FpBinary.op == Xfp_YL2XP1) { 3183 //.. /* Have to do this specially. */ 3184 //.. /* ffree %st7 ; fld %st(srcL) ; 3185 //.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 3186 //.. p = do_ffree_st7(p); 3187 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3188 //.. p = do_ffree_st7(p); 3189 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 3190 //.. *p++ = 0xD9; 3191 //.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9; 3192 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3193 //.. goto done; 3194 //.. } 3195 //.. if (i->Xin.FpBinary.op == Xfp_ATAN) { 3196 //.. /* Have to do this specially. */ 3197 //.. /* ffree %st7 ; fld %st(srcL) ; 3198 //.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 3199 //.. p = do_ffree_st7(p); 3200 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3201 //.. p = do_ffree_st7(p); 3202 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 3203 //.. *p++ = 0xD9; *p++ = 0xF3; 3204 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3205 //.. goto done; 3206 //.. } 3207 //.. if (i->Xin.FpBinary.op == Xfp_PREM 3208 //.. || i->Xin.FpBinary.op == Xfp_PREM1 3209 //.. || i->Xin.FpBinary.op == Xfp_SCALE) { 3210 //.. /* Have to do this specially. */ 3211 //.. /* ffree %st7 ; fld %st(srcR) ; 3212 //.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 3213 //.. fincstp ; ffree %st7 */ 3214 //.. p = do_ffree_st7(p); 3215 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 3216 //.. p = do_ffree_st7(p); 3217 //.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 3218 //.. *p++ = 0xD9; 3219 //.. switch (i->Xin.FpBinary.op) { 3220 //.. case Xfp_PREM: *p++ = 0xF8; break; 3221 //.. case Xfp_PREM1: *p++ = 0xF5; break; 3222 //.. case Xfp_SCALE: *p++ = 0xFD; break; 3223 //.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)"); 3224 //.. } 3225 //.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 3226 //.. *p++ = 0xD9; *p++ = 0xF7; 3227 //.. p = do_ffree_st7(p); 3228 //.. goto done; 3229 //.. } 3230 //.. /* General case */ 3231 //.. /* gop %srcL, %srcR, %dst 3232 //.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 3233 //.. */ 3234 //.. p = do_ffree_st7(p); 3235 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 3236 //.. p = do_fop2_st(p, i->Xin.FpBinary.op, 3237 //.. 1+hregNumber(i->Xin.FpBinary.srcR)); 3238 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 3239 //.. goto done; 3240 //.. 3241 //.. case Xin_FpLdSt: 3242 //.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8); 3243 //.. if (i->Xin.FpLdSt.isLoad) { 3244 //.. /* Load from memory into %fakeN. 3245 //.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1) 3246 //.. */ 3247 //.. p = do_ffree_st7(p); 3248 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; 3249 //.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 3250 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 3251 //.. goto done; 3252 //.. } else { 3253 //.. /* Store from %fakeN into memory. 3254 //.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 3255 //.. */ 3256 //.. p = do_ffree_st7(p); 3257 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 3258 //.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; 3259 //.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 3260 //.. goto done; 3261 //.. } 3262 //.. break; 3263 //.. 3264 //.. case Xin_FpLdStI: 3265 //.. if (i->Xin.FpLdStI.isLoad) { 3266 //.. /* Load from memory into %fakeN, converting from an int. 3267 //.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 3268 //.. */ 3269 //.. switch (i->Xin.FpLdStI.sz) { 3270 //.. case 8: opc = 0xDF; subopc_imm = 5; break; 3271 //.. case 4: opc = 0xDB; subopc_imm = 0; break; 3272 //.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 3273 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)"); 3274 //.. } 3275 //.. p = do_ffree_st7(p); 3276 //.. *p++ = opc; 3277 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 3278 //.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 3279 //.. goto done; 3280 //.. } else { 3281 //.. /* Store from %fakeN into memory, converting to an int. 3282 //.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 3283 //.. */ 3284 //.. switch (i->Xin.FpLdStI.sz) { 3285 //.. case 8: opc = 0xDF; subopc_imm = 7; break; 3286 //.. case 4: opc = 0xDB; subopc_imm = 3; break; 3287 //.. case 2: opc = 0xDF; subopc_imm = 3; break; 3288 //.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)"); 3289 //.. } 3290 //.. p = do_ffree_st7(p); 3291 //.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 3292 //.. *p++ = opc; 3293 //.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 3294 //.. goto done; 3295 //.. } 3296 //.. break; 3297 //.. 3298 //.. case Xin_Fp64to32: 3299 //.. /* ffree %st7 ; fld %st(src) */ 3300 //.. p = do_ffree_st7(p); 3301 //.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 3302 //.. /* subl $4, %esp */ 3303 //.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 3304 //.. /* fstps (%esp) */ 3305 //.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 3306 //.. /* flds (%esp) */ 3307 //.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 3308 //.. /* addl $4, %esp */ 3309 //.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 3310 //.. /* fstp %st(1+dst) */ 3311 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 3312 //.. goto done; 3313 //.. 3314 //.. case Xin_FpCMov: 3315 //.. /* jmp fwds if !condition */ 3316 //.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1); 3317 //.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3318 //.. ptmp = p; 3319 //.. 3320 //.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 3321 //.. p = do_ffree_st7(p); 3322 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 3323 //.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 3324 //.. 3325 //.. /* Fill in the jump offset. */ 3326 //.. *(ptmp-1) = p - ptmp; 3327 //.. goto done; 3328 3329 case Ain_LdMXCSR: 3330 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr)); 3331 *p++ = 0x0F; 3332 *p++ = 0xAE; 3333 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr); 3334 goto done; 3335 3336 //.. case Xin_FpStSW_AX: 3337 //.. /* note, this emits fnstsw %ax, not fstsw %ax */ 3338 //.. *p++ = 0xDF; 3339 //.. *p++ = 0xE0; 3340 //.. goto done; 3341 3342 case Ain_SseUComIS: 3343 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */ 3344 /* ucomi[sd] %srcL, %srcR */ 3345 if (i->Ain.SseUComIS.sz == 8) { 3346 *p++ = 0x66; 3347 } else { 3348 goto bad; 3349 vassert(i->Ain.SseUComIS.sz == 4); 3350 } 3351 *p++ = clearWBit ( 3352 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL), 3353 vreg2ireg(i->Ain.SseUComIS.srcR) )); 3354 *p++ = 0x0F; 3355 *p++ = 0x2E; 3356 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL), 3357 vreg2ireg(i->Ain.SseUComIS.srcR) ); 3358 /* pushfq */ 3359 *p++ = 0x9C; 3360 /* popq %dst */ 3361 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst))); 3362 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst)); 3363 goto done; 3364 3365 case Ain_SseSI2SF: 3366 /* cvssi2s[sd] %src, %dst */ 3367 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst), 3368 i->Ain.SseSI2SF.src ); 3369 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2); 3370 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex); 3371 *p++ = 0x0F; 3372 *p++ = 0x2A; 3373 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst), 3374 i->Ain.SseSI2SF.src ); 3375 goto done; 3376 3377 case Ain_SseSF2SI: 3378 /* cvss[sd]2si %src, %dst */ 3379 rex = rexAMode_R( i->Ain.SseSF2SI.dst, 3380 vreg2ireg(i->Ain.SseSF2SI.src) ); 3381 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2); 3382 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex); 3383 *p++ = 0x0F; 3384 *p++ = 0x2D; 3385 p = doAMode_R( p, i->Ain.SseSF2SI.dst, 3386 vreg2ireg(i->Ain.SseSF2SI.src) ); 3387 goto done; 3388 3389 case Ain_SseSDSS: 3390 /* cvtsd2ss/cvtss2sd %src, %dst */ 3391 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3); 3392 *p++ = clearWBit( 3393 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst), 3394 vreg2ireg(i->Ain.SseSDSS.src) )); 3395 *p++ = 0x0F; 3396 *p++ = 0x5A; 3397 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst), 3398 vreg2ireg(i->Ain.SseSDSS.src) ); 3399 goto done; 3400 3401 //.. 3402 //.. case Xin_FpCmp: 3403 //.. /* gcmp %fL, %fR, %dst 3404 //.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 3405 //.. fnstsw %ax ; movl %eax, %dst 3406 //.. */ 3407 //.. /* ffree %st7 */ 3408 //.. p = do_ffree_st7(p); 3409 //.. /* fpush %fL */ 3410 //.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 3411 //.. /* fucomp %(fR+1) */ 3412 //.. *p++ = 0xDD; 3413 //.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))); 3414 //.. /* fnstsw %ax */ 3415 //.. *p++ = 0xDF; 3416 //.. *p++ = 0xE0; 3417 //.. /* movl %eax, %dst */ 3418 //.. *p++ = 0x89; 3419 //.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst); 3420 //.. goto done; 3421 //.. 3422 //.. case Xin_SseConst: { 3423 //.. UShort con = i->Xin.SseConst.con; 3424 //.. p = push_word_from_tags(p, (con >> 12) & 0xF); 3425 //.. p = push_word_from_tags(p, (con >> 8) & 0xF); 3426 //.. p = push_word_from_tags(p, (con >> 4) & 0xF); 3427 //.. p = push_word_from_tags(p, con & 0xF); 3428 //.. /* movl (%esp), %xmm-dst */ 3429 //.. *p++ = 0x0F; 3430 //.. *p++ = 0x10; 3431 //.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)); 3432 //.. *p++ = 0x24; 3433 //.. /* addl $16, %esp */ 3434 //.. *p++ = 0x83; 3435 //.. *p++ = 0xC4; 3436 //.. *p++ = 0x10; 3437 //.. goto done; 3438 //.. } 3439 3440 case Ain_SseLdSt: 3441 if (i->Ain.SseLdSt.sz == 8) { 3442 *p++ = 0xF2; 3443 } else 3444 if (i->Ain.SseLdSt.sz == 4) { 3445 *p++ = 0xF3; 3446 } else 3447 if (i->Ain.SseLdSt.sz != 16) { 3448 vassert(0); 3449 } 3450 *p++ = clearWBit( 3451 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr)); 3452 *p++ = 0x0F; 3453 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11); 3454 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr); 3455 goto done; 3456 3457 case Ain_SseLdzLO: 3458 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8); 3459 /* movs[sd] amode, %xmm-dst */ 3460 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3461 *p++ = clearWBit( 3462 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg), 3463 i->Ain.SseLdzLO.addr)); 3464 *p++ = 0x0F; 3465 *p++ = 0x10; 3466 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg), 3467 i->Ain.SseLdzLO.addr); 3468 goto done; 3469 3470 case Ain_Sse32Fx4: 3471 xtra = 0; 3472 *p++ = clearWBit( 3473 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst), 3474 vreg2ireg(i->Ain.Sse32Fx4.src) )); 3475 *p++ = 0x0F; 3476 switch (i->Ain.Sse32Fx4.op) { 3477 case Asse_ADDF: *p++ = 0x58; break; 3478 case Asse_DIVF: *p++ = 0x5E; break; 3479 case Asse_MAXF: *p++ = 0x5F; break; 3480 case Asse_MINF: *p++ = 0x5D; break; 3481 case Asse_MULF: *p++ = 0x59; break; 3482 case Asse_RCPF: *p++ = 0x53; break; 3483 case Asse_RSQRTF: *p++ = 0x52; break; 3484 case Asse_SQRTF: *p++ = 0x51; break; 3485 case Asse_SUBF: *p++ = 0x5C; break; 3486 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3487 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3488 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3489 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3490 default: goto bad; 3491 } 3492 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst), 3493 vreg2ireg(i->Ain.Sse32Fx4.src) ); 3494 if (xtra & 0x100) 3495 *p++ = toUChar(xtra & 0xFF); 3496 goto done; 3497 3498 case Ain_Sse64Fx2: 3499 xtra = 0; 3500 *p++ = 0x66; 3501 *p++ = clearWBit( 3502 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst), 3503 vreg2ireg(i->Ain.Sse64Fx2.src) )); 3504 *p++ = 0x0F; 3505 switch (i->Ain.Sse64Fx2.op) { 3506 case Asse_ADDF: *p++ = 0x58; break; 3507 case Asse_DIVF: *p++ = 0x5E; break; 3508 case Asse_MAXF: *p++ = 0x5F; break; 3509 case Asse_MINF: *p++ = 0x5D; break; 3510 case Asse_MULF: *p++ = 0x59; break; 3511 //.. case Xsse_RCPF: *p++ = 0x53; break; 3512 //.. case Xsse_RSQRTF: *p++ = 0x52; break; 3513 case Asse_SQRTF: *p++ = 0x51; break; 3514 case Asse_SUBF: *p++ = 0x5C; break; 3515 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3516 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3517 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3518 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3519 default: goto bad; 3520 } 3521 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst), 3522 vreg2ireg(i->Ain.Sse64Fx2.src) ); 3523 if (xtra & 0x100) 3524 *p++ = toUChar(xtra & 0xFF); 3525 goto done; 3526 3527 case Ain_Sse32FLo: 3528 xtra = 0; 3529 *p++ = 0xF3; 3530 *p++ = clearWBit( 3531 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst), 3532 vreg2ireg(i->Ain.Sse32FLo.src) )); 3533 *p++ = 0x0F; 3534 switch (i->Ain.Sse32FLo.op) { 3535 case Asse_ADDF: *p++ = 0x58; break; 3536 case Asse_DIVF: *p++ = 0x5E; break; 3537 case Asse_MAXF: *p++ = 0x5F; break; 3538 case Asse_MINF: *p++ = 0x5D; break; 3539 case Asse_MULF: *p++ = 0x59; break; 3540 case Asse_RCPF: *p++ = 0x53; break; 3541 case Asse_RSQRTF: *p++ = 0x52; break; 3542 case Asse_SQRTF: *p++ = 0x51; break; 3543 case Asse_SUBF: *p++ = 0x5C; break; 3544 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3545 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3546 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3547 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3548 default: goto bad; 3549 } 3550 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst), 3551 vreg2ireg(i->Ain.Sse32FLo.src) ); 3552 if (xtra & 0x100) 3553 *p++ = toUChar(xtra & 0xFF); 3554 goto done; 3555 3556 case Ain_Sse64FLo: 3557 xtra = 0; 3558 *p++ = 0xF2; 3559 *p++ = clearWBit( 3560 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst), 3561 vreg2ireg(i->Ain.Sse64FLo.src) )); 3562 *p++ = 0x0F; 3563 switch (i->Ain.Sse64FLo.op) { 3564 case Asse_ADDF: *p++ = 0x58; break; 3565 case Asse_DIVF: *p++ = 0x5E; break; 3566 case Asse_MAXF: *p++ = 0x5F; break; 3567 case Asse_MINF: *p++ = 0x5D; break; 3568 case Asse_MULF: *p++ = 0x59; break; 3569 //.. case Xsse_RCPF: *p++ = 0x53; break; 3570 //.. case Xsse_RSQRTF: *p++ = 0x52; break; 3571 case Asse_SQRTF: *p++ = 0x51; break; 3572 case Asse_SUBF: *p++ = 0x5C; break; 3573 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3574 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3575 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3576 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3577 default: goto bad; 3578 } 3579 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst), 3580 vreg2ireg(i->Ain.Sse64FLo.src) ); 3581 if (xtra & 0x100) 3582 *p++ = toUChar(xtra & 0xFF); 3583 goto done; 3584 3585 case Ain_SseReRg: 3586 # define XX(_n) *p++ = (_n) 3587 3588 rex = clearWBit( 3589 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst), 3590 vreg2ireg(i->Ain.SseReRg.src) )); 3591 3592 switch (i->Ain.SseReRg.op) { 3593 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break; 3594 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break; 3595 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break; 3596 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break; 3597 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break; 3598 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break; 3599 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break; 3600 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break; 3601 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break; 3602 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break; 3603 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break; 3604 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break; 3605 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break; 3606 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break; 3607 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break; 3608 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break; 3609 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break; 3610 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break; 3611 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break; 3612 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break; 3613 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break; 3614 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break; 3615 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break; 3616 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break; 3617 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break; 3618 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break; 3619 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break; 3620 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break; 3621 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break; 3622 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break; 3623 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break; 3624 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break; 3625 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break; 3626 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break; 3627 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break; 3628 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break; 3629 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break; 3630 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break; 3631 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break; 3632 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break; 3633 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break; 3634 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break; 3635 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break; 3636 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break; 3637 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break; 3638 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break; 3639 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break; 3640 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break; 3641 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break; 3642 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break; 3643 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break; 3644 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break; 3645 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break; 3646 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break; 3647 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break; 3648 default: goto bad; 3649 } 3650 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst), 3651 vreg2ireg(i->Ain.SseReRg.src) ); 3652 # undef XX 3653 goto done; 3654 3655 case Ain_SseCMov: 3656 /* jmp fwds if !condition */ 3657 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1)); 3658 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3659 ptmp = p; 3660 3661 /* movaps %src, %dst */ 3662 *p++ = clearWBit( 3663 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst), 3664 vreg2ireg(i->Ain.SseCMov.src) )); 3665 *p++ = 0x0F; 3666 *p++ = 0x28; 3667 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst), 3668 vreg2ireg(i->Ain.SseCMov.src) ); 3669 3670 /* Fill in the jump offset. */ 3671 *(ptmp-1) = toUChar(p - ptmp); 3672 goto done; 3673 3674 case Ain_SseShuf: 3675 *p++ = 0x66; 3676 *p++ = clearWBit( 3677 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst), 3678 vreg2ireg(i->Ain.SseShuf.src) )); 3679 *p++ = 0x0F; 3680 *p++ = 0x70; 3681 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst), 3682 vreg2ireg(i->Ain.SseShuf.src) ); 3683 *p++ = (UChar)(i->Ain.SseShuf.order); 3684 goto done; 3685 3686 default: 3687 goto bad; 3688 } 3689 3690 bad: 3691 ppAMD64Instr(i, mode64); 3692 vpanic("emit_AMD64Instr"); 3693 /*NOTREACHED*/ 3694 3695 done: 3696 vassert(p - &buf[0] <= 32); 3697 return p - &buf[0]; 3698 3699 # undef fake 3700 } 3701 3702 /*---------------------------------------------------------------*/ 3703 /*--- end host_amd64_defs.c ---*/ 3704 /*---------------------------------------------------------------*/ 3705