1 2 /*---------------------------------------------------------------*/ 3 /*--- begin host_amd64_defs.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex.h" 38 #include "libvex_trc_values.h" 39 40 #include "main_util.h" 41 #include "host_generic_regs.h" 42 #include "host_amd64_defs.h" 43 44 45 /* --------- Registers. --------- */ 46 47 void ppHRegAMD64 ( HReg reg ) 48 { 49 Int r; 50 static const HChar* ireg64_names[16] 51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 53 /* Be generic for all virtual regs. */ 54 if (hregIsVirtual(reg)) { 55 ppHReg(reg); 56 return; 57 } 58 /* But specific for real regs. */ 59 switch (hregClass(reg)) { 60 case HRcInt64: 61 r = hregNumber(reg); 62 vassert(r >= 0 && r < 16); 63 vex_printf("%s", ireg64_names[r]); 64 return; 65 case HRcFlt64: 66 r = hregNumber(reg); 67 vassert(r >= 0 && r < 6); 68 vex_printf("%%fake%d", r); 69 return; 70 case HRcVec128: 71 r = hregNumber(reg); 72 vassert(r >= 0 && r < 16); 73 vex_printf("%%xmm%d", r); 74 return; 75 default: 76 vpanic("ppHRegAMD64"); 77 } 78 } 79 80 static void ppHRegAMD64_lo32 ( HReg reg ) 81 { 82 Int r; 83 static const HChar* ireg32_names[16] 84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; 86 /* Be generic for all virtual regs. */ 87 if (hregIsVirtual(reg)) { 88 ppHReg(reg); 89 vex_printf("d"); 90 return; 91 } 92 /* But specific for real regs. */ 93 switch (hregClass(reg)) { 94 case HRcInt64: 95 r = hregNumber(reg); 96 vassert(r >= 0 && r < 16); 97 vex_printf("%s", ireg32_names[r]); 98 return; 99 default: 100 vpanic("ppHRegAMD64_lo32: invalid regclass"); 101 } 102 } 103 104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); } 105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); } 106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); } 107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); } 108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); } 109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); } 110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); } 111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); } 112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); } 113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); } 114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); } 115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); } 116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); } 117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } 118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } 119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } 120 121 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } 122 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } 123 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); } 124 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); } 125 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); } 126 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); } 127 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); } 128 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); } 129 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); } 130 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); } 131 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); } 132 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); } 133 134 135 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr ) 136 { 137 #if 0 138 *nregs = 6; 139 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 140 (*arr)[ 0] = hregAMD64_RSI(); 141 (*arr)[ 1] = hregAMD64_RDI(); 142 (*arr)[ 2] = hregAMD64_RBX(); 143 144 (*arr)[ 3] = hregAMD64_XMM7(); 145 (*arr)[ 4] = hregAMD64_XMM8(); 146 (*arr)[ 5] = hregAMD64_XMM9(); 147 #endif 148 #if 1 149 *nregs = 20; 150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 151 (*arr)[ 0] = hregAMD64_RSI(); 152 (*arr)[ 1] = hregAMD64_RDI(); 153 (*arr)[ 2] = hregAMD64_R8(); 154 (*arr)[ 3] = hregAMD64_R9(); 155 (*arr)[ 4] = hregAMD64_R12(); 156 (*arr)[ 5] = hregAMD64_R13(); 157 (*arr)[ 6] = hregAMD64_R14(); 158 (*arr)[ 7] = hregAMD64_R15(); 159 (*arr)[ 8] = hregAMD64_RBX(); 160 161 (*arr)[ 9] = hregAMD64_XMM3(); 162 (*arr)[10] = hregAMD64_XMM4(); 163 (*arr)[11] = hregAMD64_XMM5(); 164 (*arr)[12] = hregAMD64_XMM6(); 165 (*arr)[13] = hregAMD64_XMM7(); 166 (*arr)[14] = hregAMD64_XMM8(); 167 (*arr)[15] = hregAMD64_XMM9(); 168 (*arr)[16] = hregAMD64_XMM10(); 169 (*arr)[17] = hregAMD64_XMM11(); 170 (*arr)[18] = hregAMD64_XMM12(); 171 (*arr)[19] = hregAMD64_R10(); 172 #endif 173 } 174 175 176 /* --------- Condition codes, Intel encoding. --------- */ 177 178 const HChar* showAMD64CondCode ( AMD64CondCode cond ) 179 { 180 switch (cond) { 181 case Acc_O: return "o"; 182 case Acc_NO: return "no"; 183 case Acc_B: return "b"; 184 case Acc_NB: return "nb"; 185 case Acc_Z: return "z"; 186 case Acc_NZ: return "nz"; 187 case Acc_BE: return "be"; 188 case Acc_NBE: return "nbe"; 189 case Acc_S: return "s"; 190 case Acc_NS: return "ns"; 191 case Acc_P: return "p"; 192 case Acc_NP: return "np"; 193 case Acc_L: return "l"; 194 case Acc_NL: return "nl"; 195 case Acc_LE: return "le"; 196 case Acc_NLE: return "nle"; 197 case Acc_ALWAYS: return "ALWAYS"; 198 default: vpanic("ppAMD64CondCode"); 199 } 200 } 201 202 203 /* --------- AMD64AMode: memory address expressions. --------- */ 204 205 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) { 206 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 207 am->tag = Aam_IR; 208 am->Aam.IR.imm = imm32; 209 am->Aam.IR.reg = reg; 210 return am; 211 } 212 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 213 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); 214 am->tag = Aam_IRRS; 215 am->Aam.IRRS.imm = imm32; 216 am->Aam.IRRS.base = base; 217 am->Aam.IRRS.index = indEx; 218 am->Aam.IRRS.shift = shift; 219 vassert(shift >= 0 && shift <= 3); 220 return am; 221 } 222 223 void ppAMD64AMode ( AMD64AMode* am ) { 224 switch (am->tag) { 225 case Aam_IR: 226 if (am->Aam.IR.imm == 0) 227 vex_printf("("); 228 else 229 vex_printf("0x%x(", am->Aam.IR.imm); 230 ppHRegAMD64(am->Aam.IR.reg); 231 vex_printf(")"); 232 return; 233 case Aam_IRRS: 234 vex_printf("0x%x(", am->Aam.IRRS.imm); 235 ppHRegAMD64(am->Aam.IRRS.base); 236 vex_printf(","); 237 ppHRegAMD64(am->Aam.IRRS.index); 238 vex_printf(",%d)", 1 << am->Aam.IRRS.shift); 239 return; 240 default: 241 vpanic("ppAMD64AMode"); 242 } 243 } 244 245 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) { 246 switch (am->tag) { 247 case Aam_IR: 248 addHRegUse(u, HRmRead, am->Aam.IR.reg); 249 return; 250 case Aam_IRRS: 251 addHRegUse(u, HRmRead, am->Aam.IRRS.base); 252 addHRegUse(u, HRmRead, am->Aam.IRRS.index); 253 return; 254 default: 255 vpanic("addRegUsage_AMD64AMode"); 256 } 257 } 258 259 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) { 260 switch (am->tag) { 261 case Aam_IR: 262 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg); 263 return; 264 case Aam_IRRS: 265 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base); 266 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index); 267 return; 268 default: 269 vpanic("mapRegs_AMD64AMode"); 270 } 271 } 272 273 /* --------- Operand, which can be reg, immediate or memory. --------- */ 274 275 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) { 276 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 277 op->tag = Armi_Imm; 278 op->Armi.Imm.imm32 = imm32; 279 return op; 280 } 281 AMD64RMI* AMD64RMI_Reg ( HReg reg ) { 282 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 283 op->tag = Armi_Reg; 284 op->Armi.Reg.reg = reg; 285 return op; 286 } 287 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) { 288 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); 289 op->tag = Armi_Mem; 290 op->Armi.Mem.am = am; 291 return op; 292 } 293 294 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) { 295 switch (op->tag) { 296 case Armi_Imm: 297 vex_printf("$0x%x", op->Armi.Imm.imm32); 298 return; 299 case Armi_Reg: 300 if (lo32) 301 ppHRegAMD64_lo32(op->Armi.Reg.reg); 302 else 303 ppHRegAMD64(op->Armi.Reg.reg); 304 return; 305 case Armi_Mem: 306 ppAMD64AMode(op->Armi.Mem.am); 307 return; 308 default: 309 vpanic("ppAMD64RMI"); 310 } 311 } 312 void ppAMD64RMI ( AMD64RMI* op ) { 313 ppAMD64RMI_wrk(op, False/*!lo32*/); 314 } 315 void ppAMD64RMI_lo32 ( AMD64RMI* op ) { 316 ppAMD64RMI_wrk(op, True/*lo32*/); 317 } 318 319 /* An AMD64RMI can only be used in a "read" context (what would it mean 320 to write or modify a literal?) and so we enumerate its registers 321 accordingly. */ 322 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) { 323 switch (op->tag) { 324 case Armi_Imm: 325 return; 326 case Armi_Reg: 327 addHRegUse(u, HRmRead, op->Armi.Reg.reg); 328 return; 329 case Armi_Mem: 330 addRegUsage_AMD64AMode(u, op->Armi.Mem.am); 331 return; 332 default: 333 vpanic("addRegUsage_AMD64RMI"); 334 } 335 } 336 337 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) { 338 switch (op->tag) { 339 case Armi_Imm: 340 return; 341 case Armi_Reg: 342 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg); 343 return; 344 case Armi_Mem: 345 mapRegs_AMD64AMode(m, op->Armi.Mem.am); 346 return; 347 default: 348 vpanic("mapRegs_AMD64RMI"); 349 } 350 } 351 352 353 /* --------- Operand, which can be reg or immediate only. --------- */ 354 355 AMD64RI* AMD64RI_Imm ( UInt imm32 ) { 356 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 357 op->tag = Ari_Imm; 358 op->Ari.Imm.imm32 = imm32; 359 return op; 360 } 361 AMD64RI* AMD64RI_Reg ( HReg reg ) { 362 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); 363 op->tag = Ari_Reg; 364 op->Ari.Reg.reg = reg; 365 return op; 366 } 367 368 void ppAMD64RI ( AMD64RI* op ) { 369 switch (op->tag) { 370 case Ari_Imm: 371 vex_printf("$0x%x", op->Ari.Imm.imm32); 372 return; 373 case Ari_Reg: 374 ppHRegAMD64(op->Ari.Reg.reg); 375 return; 376 default: 377 vpanic("ppAMD64RI"); 378 } 379 } 380 381 /* An AMD64RI can only be used in a "read" context (what would it mean 382 to write or modify a literal?) and so we enumerate its registers 383 accordingly. */ 384 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) { 385 switch (op->tag) { 386 case Ari_Imm: 387 return; 388 case Ari_Reg: 389 addHRegUse(u, HRmRead, op->Ari.Reg.reg); 390 return; 391 default: 392 vpanic("addRegUsage_AMD64RI"); 393 } 394 } 395 396 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) { 397 switch (op->tag) { 398 case Ari_Imm: 399 return; 400 case Ari_Reg: 401 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg); 402 return; 403 default: 404 vpanic("mapRegs_AMD64RI"); 405 } 406 } 407 408 409 /* --------- Operand, which can be reg or memory only. --------- */ 410 411 AMD64RM* AMD64RM_Reg ( HReg reg ) { 412 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 413 op->tag = Arm_Reg; 414 op->Arm.Reg.reg = reg; 415 return op; 416 } 417 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) { 418 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); 419 op->tag = Arm_Mem; 420 op->Arm.Mem.am = am; 421 return op; 422 } 423 424 void ppAMD64RM ( AMD64RM* op ) { 425 switch (op->tag) { 426 case Arm_Mem: 427 ppAMD64AMode(op->Arm.Mem.am); 428 return; 429 case Arm_Reg: 430 ppHRegAMD64(op->Arm.Reg.reg); 431 return; 432 default: 433 vpanic("ppAMD64RM"); 434 } 435 } 436 437 /* Because an AMD64RM can be both a source or destination operand, we 438 have to supply a mode -- pertaining to the operand as a whole -- 439 indicating how it's being used. */ 440 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) { 441 switch (op->tag) { 442 case Arm_Mem: 443 /* Memory is read, written or modified. So we just want to 444 know the regs read by the amode. */ 445 addRegUsage_AMD64AMode(u, op->Arm.Mem.am); 446 return; 447 case Arm_Reg: 448 /* reg is read, written or modified. Add it in the 449 appropriate way. */ 450 addHRegUse(u, mode, op->Arm.Reg.reg); 451 return; 452 default: 453 vpanic("addRegUsage_AMD64RM"); 454 } 455 } 456 457 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op ) 458 { 459 switch (op->tag) { 460 case Arm_Mem: 461 mapRegs_AMD64AMode(m, op->Arm.Mem.am); 462 return; 463 case Arm_Reg: 464 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg); 465 return; 466 default: 467 vpanic("mapRegs_AMD64RM"); 468 } 469 } 470 471 472 /* --------- Instructions. --------- */ 473 474 static const HChar* showAMD64ScalarSz ( Int sz ) { 475 switch (sz) { 476 case 2: return "w"; 477 case 4: return "l"; 478 case 8: return "q"; 479 default: vpanic("showAMD64ScalarSz"); 480 } 481 } 482 483 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) { 484 switch (op) { 485 case Aun_NOT: return "not"; 486 case Aun_NEG: return "neg"; 487 default: vpanic("showAMD64UnaryOp"); 488 } 489 } 490 491 const HChar* showAMD64AluOp ( AMD64AluOp op ) { 492 switch (op) { 493 case Aalu_MOV: return "mov"; 494 case Aalu_CMP: return "cmp"; 495 case Aalu_ADD: return "add"; 496 case Aalu_SUB: return "sub"; 497 case Aalu_ADC: return "adc"; 498 case Aalu_SBB: return "sbb"; 499 case Aalu_AND: return "and"; 500 case Aalu_OR: return "or"; 501 case Aalu_XOR: return "xor"; 502 case Aalu_MUL: return "imul"; 503 default: vpanic("showAMD64AluOp"); 504 } 505 } 506 507 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { 508 switch (op) { 509 case Ash_SHL: return "shl"; 510 case Ash_SHR: return "shr"; 511 case Ash_SAR: return "sar"; 512 default: vpanic("showAMD64ShiftOp"); 513 } 514 } 515 516 const HChar* showA87FpOp ( A87FpOp op ) { 517 switch (op) { 518 case Afp_SCALE: return "scale"; 519 case Afp_ATAN: return "atan"; 520 case Afp_YL2X: return "yl2x"; 521 case Afp_YL2XP1: return "yl2xp1"; 522 case Afp_PREM: return "prem"; 523 case Afp_PREM1: return "prem1"; 524 case Afp_SQRT: return "sqrt"; 525 case Afp_SIN: return "sin"; 526 case Afp_COS: return "cos"; 527 case Afp_TAN: return "tan"; 528 case Afp_ROUND: return "round"; 529 case Afp_2XM1: return "2xm1"; 530 default: vpanic("showA87FpOp"); 531 } 532 } 533 534 const HChar* showAMD64SseOp ( AMD64SseOp op ) { 535 switch (op) { 536 case Asse_MOV: return "movups"; 537 case Asse_ADDF: return "add"; 538 case Asse_SUBF: return "sub"; 539 case Asse_MULF: return "mul"; 540 case Asse_DIVF: return "div"; 541 case Asse_MAXF: return "max"; 542 case Asse_MINF: return "min"; 543 case Asse_CMPEQF: return "cmpFeq"; 544 case Asse_CMPLTF: return "cmpFlt"; 545 case Asse_CMPLEF: return "cmpFle"; 546 case Asse_CMPUNF: return "cmpFun"; 547 case Asse_RCPF: return "rcp"; 548 case Asse_RSQRTF: return "rsqrt"; 549 case Asse_SQRTF: return "sqrt"; 550 case Asse_AND: return "and"; 551 case Asse_OR: return "or"; 552 case Asse_XOR: return "xor"; 553 case Asse_ANDN: return "andn"; 554 case Asse_ADD8: return "paddb"; 555 case Asse_ADD16: return "paddw"; 556 case Asse_ADD32: return "paddd"; 557 case Asse_ADD64: return "paddq"; 558 case Asse_QADD8U: return "paddusb"; 559 case Asse_QADD16U: return "paddusw"; 560 case Asse_QADD8S: return "paddsb"; 561 case Asse_QADD16S: return "paddsw"; 562 case Asse_SUB8: return "psubb"; 563 case Asse_SUB16: return "psubw"; 564 case Asse_SUB32: return "psubd"; 565 case Asse_SUB64: return "psubq"; 566 case Asse_QSUB8U: return "psubusb"; 567 case Asse_QSUB16U: return "psubusw"; 568 case Asse_QSUB8S: return "psubsb"; 569 case Asse_QSUB16S: return "psubsw"; 570 case Asse_MUL16: return "pmullw"; 571 case Asse_MULHI16U: return "pmulhuw"; 572 case Asse_MULHI16S: return "pmulhw"; 573 case Asse_AVG8U: return "pavgb"; 574 case Asse_AVG16U: return "pavgw"; 575 case Asse_MAX16S: return "pmaxw"; 576 case Asse_MAX8U: return "pmaxub"; 577 case Asse_MIN16S: return "pminw"; 578 case Asse_MIN8U: return "pminub"; 579 case Asse_CMPEQ8: return "pcmpeqb"; 580 case Asse_CMPEQ16: return "pcmpeqw"; 581 case Asse_CMPEQ32: return "pcmpeqd"; 582 case Asse_CMPGT8S: return "pcmpgtb"; 583 case Asse_CMPGT16S: return "pcmpgtw"; 584 case Asse_CMPGT32S: return "pcmpgtd"; 585 case Asse_SHL16: return "psllw"; 586 case Asse_SHL32: return "pslld"; 587 case Asse_SHL64: return "psllq"; 588 case Asse_SHR16: return "psrlw"; 589 case Asse_SHR32: return "psrld"; 590 case Asse_SHR64: return "psrlq"; 591 case Asse_SAR16: return "psraw"; 592 case Asse_SAR32: return "psrad"; 593 case Asse_PACKSSD: return "packssdw"; 594 case Asse_PACKSSW: return "packsswb"; 595 case Asse_PACKUSW: return "packuswb"; 596 case Asse_UNPCKHB: return "punpckhb"; 597 case Asse_UNPCKHW: return "punpckhw"; 598 case Asse_UNPCKHD: return "punpckhd"; 599 case Asse_UNPCKHQ: return "punpckhq"; 600 case Asse_UNPCKLB: return "punpcklb"; 601 case Asse_UNPCKLW: return "punpcklw"; 602 case Asse_UNPCKLD: return "punpckld"; 603 case Asse_UNPCKLQ: return "punpcklq"; 604 default: vpanic("showAMD64SseOp"); 605 } 606 } 607 608 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) { 609 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 610 i->tag = Ain_Imm64; 611 i->Ain.Imm64.imm64 = imm64; 612 i->Ain.Imm64.dst = dst; 613 return i; 614 } 615 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { 616 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 617 i->tag = Ain_Alu64R; 618 i->Ain.Alu64R.op = op; 619 i->Ain.Alu64R.src = src; 620 i->Ain.Alu64R.dst = dst; 621 return i; 622 } 623 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) { 624 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 625 i->tag = Ain_Alu64M; 626 i->Ain.Alu64M.op = op; 627 i->Ain.Alu64M.src = src; 628 i->Ain.Alu64M.dst = dst; 629 vassert(op != Aalu_MUL); 630 return i; 631 } 632 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { 633 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 634 i->tag = Ain_Sh64; 635 i->Ain.Sh64.op = op; 636 i->Ain.Sh64.src = src; 637 i->Ain.Sh64.dst = dst; 638 return i; 639 } 640 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { 641 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 642 i->tag = Ain_Test64; 643 i->Ain.Test64.imm32 = imm32; 644 i->Ain.Test64.dst = dst; 645 return i; 646 } 647 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) { 648 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 649 i->tag = Ain_Unary64; 650 i->Ain.Unary64.op = op; 651 i->Ain.Unary64.dst = dst; 652 return i; 653 } 654 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) { 655 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 656 i->tag = Ain_Lea64; 657 i->Ain.Lea64.am = am; 658 i->Ain.Lea64.dst = dst; 659 return i; 660 } 661 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { 662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 663 i->tag = Ain_Alu32R; 664 i->Ain.Alu32R.op = op; 665 i->Ain.Alu32R.src = src; 666 i->Ain.Alu32R.dst = dst; 667 switch (op) { 668 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP: 669 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break; 670 default: vassert(0); 671 } 672 return i; 673 } 674 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) { 675 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 676 i->tag = Ain_MulL; 677 i->Ain.MulL.syned = syned; 678 i->Ain.MulL.src = src; 679 return i; 680 } 681 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { 682 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 683 i->tag = Ain_Div; 684 i->Ain.Div.syned = syned; 685 i->Ain.Div.sz = sz; 686 i->Ain.Div.src = src; 687 vassert(sz == 4 || sz == 8); 688 return i; 689 } 690 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { 691 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 692 i->tag = Ain_Push; 693 i->Ain.Push.src = src; 694 return i; 695 } 696 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms, 697 RetLoc rloc ) { 698 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 699 i->tag = Ain_Call; 700 i->Ain.Call.cond = cond; 701 i->Ain.Call.target = target; 702 i->Ain.Call.regparms = regparms; 703 i->Ain.Call.rloc = rloc; 704 vassert(regparms >= 0 && regparms <= 6); 705 vassert(is_sane_RetLoc(rloc)); 706 return i; 707 } 708 709 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP, 710 AMD64CondCode cond, Bool toFastEP ) { 711 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 712 i->tag = Ain_XDirect; 713 i->Ain.XDirect.dstGA = dstGA; 714 i->Ain.XDirect.amRIP = amRIP; 715 i->Ain.XDirect.cond = cond; 716 i->Ain.XDirect.toFastEP = toFastEP; 717 return i; 718 } 719 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP, 720 AMD64CondCode cond ) { 721 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 722 i->tag = Ain_XIndir; 723 i->Ain.XIndir.dstGA = dstGA; 724 i->Ain.XIndir.amRIP = amRIP; 725 i->Ain.XIndir.cond = cond; 726 return i; 727 } 728 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP, 729 AMD64CondCode cond, IRJumpKind jk ) { 730 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 731 i->tag = Ain_XAssisted; 732 i->Ain.XAssisted.dstGA = dstGA; 733 i->Ain.XAssisted.amRIP = amRIP; 734 i->Ain.XAssisted.cond = cond; 735 i->Ain.XAssisted.jk = jk; 736 return i; 737 } 738 739 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { 740 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 741 i->tag = Ain_CMov64; 742 i->Ain.CMov64.cond = cond; 743 i->Ain.CMov64.src = src; 744 i->Ain.CMov64.dst = dst; 745 vassert(cond != Acc_ALWAYS); 746 return i; 747 } 748 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) { 749 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 750 i->tag = Ain_MovxLQ; 751 i->Ain.MovxLQ.syned = syned; 752 i->Ain.MovxLQ.src = src; 753 i->Ain.MovxLQ.dst = dst; 754 return i; 755 } 756 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, 757 AMD64AMode* src, HReg dst ) { 758 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 759 i->tag = Ain_LoadEX; 760 i->Ain.LoadEX.szSmall = szSmall; 761 i->Ain.LoadEX.syned = syned; 762 i->Ain.LoadEX.src = src; 763 i->Ain.LoadEX.dst = dst; 764 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4); 765 return i; 766 } 767 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) { 768 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 769 i->tag = Ain_Store; 770 i->Ain.Store.sz = sz; 771 i->Ain.Store.src = src; 772 i->Ain.Store.dst = dst; 773 vassert(sz == 1 || sz == 2 || sz == 4); 774 return i; 775 } 776 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) { 777 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 778 i->tag = Ain_Set64; 779 i->Ain.Set64.cond = cond; 780 i->Ain.Set64.dst = dst; 781 return i; 782 } 783 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { 784 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 785 i->tag = Ain_Bsfr64; 786 i->Ain.Bsfr64.isFwds = isFwds; 787 i->Ain.Bsfr64.src = src; 788 i->Ain.Bsfr64.dst = dst; 789 return i; 790 } 791 AMD64Instr* AMD64Instr_MFence ( void ) { 792 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 793 i->tag = Ain_MFence; 794 return i; 795 } 796 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) { 797 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 798 i->tag = Ain_ACAS; 799 i->Ain.ACAS.addr = addr; 800 i->Ain.ACAS.sz = sz; 801 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 802 return i; 803 } 804 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) { 805 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 806 i->tag = Ain_DACAS; 807 i->Ain.DACAS.addr = addr; 808 i->Ain.DACAS.sz = sz; 809 vassert(sz == 8 || sz == 4); 810 return i; 811 } 812 813 AMD64Instr* AMD64Instr_A87Free ( Int nregs ) 814 { 815 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 816 i->tag = Ain_A87Free; 817 i->Ain.A87Free.nregs = nregs; 818 vassert(nregs >= 1 && nregs <= 7); 819 return i; 820 } 821 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ) 822 { 823 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 824 i->tag = Ain_A87PushPop; 825 i->Ain.A87PushPop.addr = addr; 826 i->Ain.A87PushPop.isPush = isPush; 827 i->Ain.A87PushPop.szB = szB; 828 vassert(szB == 8 || szB == 4); 829 return i; 830 } 831 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) 832 { 833 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 834 i->tag = Ain_A87FpOp; 835 i->Ain.A87FpOp.op = op; 836 return i; 837 } 838 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) 839 { 840 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 841 i->tag = Ain_A87LdCW; 842 i->Ain.A87LdCW.addr = addr; 843 return i; 844 } 845 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) 846 { 847 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 848 i->tag = Ain_A87StSW; 849 i->Ain.A87StSW.addr = addr; 850 return i; 851 } 852 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { 853 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 854 i->tag = Ain_LdMXCSR; 855 i->Ain.LdMXCSR.addr = addr; 856 return i; 857 } 858 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { 859 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 860 i->tag = Ain_SseUComIS; 861 i->Ain.SseUComIS.sz = toUChar(sz); 862 i->Ain.SseUComIS.srcL = srcL; 863 i->Ain.SseUComIS.srcR = srcR; 864 i->Ain.SseUComIS.dst = dst; 865 vassert(sz == 4 || sz == 8); 866 return i; 867 } 868 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) { 869 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 870 i->tag = Ain_SseSI2SF; 871 i->Ain.SseSI2SF.szS = toUChar(szS); 872 i->Ain.SseSI2SF.szD = toUChar(szD); 873 i->Ain.SseSI2SF.src = src; 874 i->Ain.SseSI2SF.dst = dst; 875 vassert(szS == 4 || szS == 8); 876 vassert(szD == 4 || szD == 8); 877 return i; 878 } 879 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) { 880 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 881 i->tag = Ain_SseSF2SI; 882 i->Ain.SseSF2SI.szS = toUChar(szS); 883 i->Ain.SseSF2SI.szD = toUChar(szD); 884 i->Ain.SseSF2SI.src = src; 885 i->Ain.SseSF2SI.dst = dst; 886 vassert(szS == 4 || szS == 8); 887 vassert(szD == 4 || szD == 8); 888 return i; 889 } 890 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) 891 { 892 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 893 i->tag = Ain_SseSDSS; 894 i->Ain.SseSDSS.from64 = from64; 895 i->Ain.SseSDSS.src = src; 896 i->Ain.SseSDSS.dst = dst; 897 return i; 898 } 899 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, 900 HReg reg, AMD64AMode* addr ) { 901 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 902 i->tag = Ain_SseLdSt; 903 i->Ain.SseLdSt.isLoad = isLoad; 904 i->Ain.SseLdSt.sz = toUChar(sz); 905 i->Ain.SseLdSt.reg = reg; 906 i->Ain.SseLdSt.addr = addr; 907 vassert(sz == 4 || sz == 8 || sz == 16); 908 return i; 909 } 910 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr ) 911 { 912 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 913 i->tag = Ain_SseLdzLO; 914 i->Ain.SseLdzLO.sz = sz; 915 i->Ain.SseLdzLO.reg = reg; 916 i->Ain.SseLdzLO.addr = addr; 917 vassert(sz == 4 || sz == 8); 918 return i; 919 } 920 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) { 921 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 922 i->tag = Ain_Sse32Fx4; 923 i->Ain.Sse32Fx4.op = op; 924 i->Ain.Sse32Fx4.src = src; 925 i->Ain.Sse32Fx4.dst = dst; 926 vassert(op != Asse_MOV); 927 return i; 928 } 929 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { 930 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 931 i->tag = Ain_Sse32FLo; 932 i->Ain.Sse32FLo.op = op; 933 i->Ain.Sse32FLo.src = src; 934 i->Ain.Sse32FLo.dst = dst; 935 vassert(op != Asse_MOV); 936 return i; 937 } 938 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { 939 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 940 i->tag = Ain_Sse64Fx2; 941 i->Ain.Sse64Fx2.op = op; 942 i->Ain.Sse64Fx2.src = src; 943 i->Ain.Sse64Fx2.dst = dst; 944 vassert(op != Asse_MOV); 945 return i; 946 } 947 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { 948 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 949 i->tag = Ain_Sse64FLo; 950 i->Ain.Sse64FLo.op = op; 951 i->Ain.Sse64FLo.src = src; 952 i->Ain.Sse64FLo.dst = dst; 953 vassert(op != Asse_MOV); 954 return i; 955 } 956 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) { 957 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 958 i->tag = Ain_SseReRg; 959 i->Ain.SseReRg.op = op; 960 i->Ain.SseReRg.src = re; 961 i->Ain.SseReRg.dst = rg; 962 return i; 963 } 964 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) { 965 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 966 i->tag = Ain_SseCMov; 967 i->Ain.SseCMov.cond = cond; 968 i->Ain.SseCMov.src = src; 969 i->Ain.SseCMov.dst = dst; 970 vassert(cond != Acc_ALWAYS); 971 return i; 972 } 973 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { 974 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 975 i->tag = Ain_SseShuf; 976 i->Ain.SseShuf.order = order; 977 i->Ain.SseShuf.src = src; 978 i->Ain.SseShuf.dst = dst; 979 vassert(order >= 0 && order <= 0xFF); 980 return i; 981 } 982 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad, 983 //uu HReg reg, AMD64AMode* addr ) { 984 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 985 //uu i->tag = Ain_AvxLdSt; 986 //uu i->Ain.AvxLdSt.isLoad = isLoad; 987 //uu i->Ain.AvxLdSt.reg = reg; 988 //uu i->Ain.AvxLdSt.addr = addr; 989 //uu return i; 990 //uu } 991 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) { 992 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 993 //uu i->tag = Ain_AvxReRg; 994 //uu i->Ain.AvxReRg.op = op; 995 //uu i->Ain.AvxReRg.src = re; 996 //uu i->Ain.AvxReRg.dst = rg; 997 //uu return i; 998 //uu } 999 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, 1000 AMD64AMode* amFailAddr ) { 1001 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1002 i->tag = Ain_EvCheck; 1003 i->Ain.EvCheck.amCounter = amCounter; 1004 i->Ain.EvCheck.amFailAddr = amFailAddr; 1005 return i; 1006 } 1007 AMD64Instr* AMD64Instr_ProfInc ( void ) { 1008 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); 1009 i->tag = Ain_ProfInc; 1010 return i; 1011 } 1012 1013 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) 1014 { 1015 vassert(mode64 == True); 1016 switch (i->tag) { 1017 case Ain_Imm64: 1018 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64); 1019 ppHRegAMD64(i->Ain.Imm64.dst); 1020 return; 1021 case Ain_Alu64R: 1022 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op)); 1023 ppAMD64RMI(i->Ain.Alu64R.src); 1024 vex_printf(","); 1025 ppHRegAMD64(i->Ain.Alu64R.dst); 1026 return; 1027 case Ain_Alu64M: 1028 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op)); 1029 ppAMD64RI(i->Ain.Alu64M.src); 1030 vex_printf(","); 1031 ppAMD64AMode(i->Ain.Alu64M.dst); 1032 return; 1033 case Ain_Sh64: 1034 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op)); 1035 if (i->Ain.Sh64.src == 0) 1036 vex_printf("%%cl,"); 1037 else 1038 vex_printf("$%d,", (Int)i->Ain.Sh64.src); 1039 ppHRegAMD64(i->Ain.Sh64.dst); 1040 return; 1041 case Ain_Test64: 1042 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); 1043 ppHRegAMD64(i->Ain.Test64.dst); 1044 return; 1045 case Ain_Unary64: 1046 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op)); 1047 ppHRegAMD64(i->Ain.Unary64.dst); 1048 return; 1049 case Ain_Lea64: 1050 vex_printf("leaq "); 1051 ppAMD64AMode(i->Ain.Lea64.am); 1052 vex_printf(","); 1053 ppHRegAMD64(i->Ain.Lea64.dst); 1054 return; 1055 case Ain_Alu32R: 1056 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op)); 1057 ppAMD64RMI_lo32(i->Ain.Alu32R.src); 1058 vex_printf(","); 1059 ppHRegAMD64_lo32(i->Ain.Alu32R.dst); 1060 return; 1061 case Ain_MulL: 1062 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u'); 1063 ppAMD64RM(i->Ain.MulL.src); 1064 return; 1065 case Ain_Div: 1066 vex_printf("%cdiv%s ", 1067 i->Ain.Div.syned ? 's' : 'u', 1068 showAMD64ScalarSz(i->Ain.Div.sz)); 1069 ppAMD64RM(i->Ain.Div.src); 1070 return; 1071 case Ain_Push: 1072 vex_printf("pushq "); 1073 ppAMD64RMI(i->Ain.Push.src); 1074 return; 1075 case Ain_Call: 1076 vex_printf("call%s[%d,", 1077 i->Ain.Call.cond==Acc_ALWAYS 1078 ? "" : showAMD64CondCode(i->Ain.Call.cond), 1079 i->Ain.Call.regparms ); 1080 ppRetLoc(i->Ain.Call.rloc); 1081 vex_printf("] 0x%llx", i->Ain.Call.target); 1082 break; 1083 1084 case Ain_XDirect: 1085 vex_printf("(xDirect) "); 1086 vex_printf("if (%%rflags.%s) { ", 1087 showAMD64CondCode(i->Ain.XDirect.cond)); 1088 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA); 1089 vex_printf("movq %%r11,"); 1090 ppAMD64AMode(i->Ain.XDirect.amRIP); 1091 vex_printf("; "); 1092 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }", 1093 i->Ain.XDirect.toFastEP ? "fast" : "slow"); 1094 return; 1095 case Ain_XIndir: 1096 vex_printf("(xIndir) "); 1097 vex_printf("if (%%rflags.%s) { ", 1098 showAMD64CondCode(i->Ain.XIndir.cond)); 1099 vex_printf("movq "); 1100 ppHRegAMD64(i->Ain.XIndir.dstGA); 1101 vex_printf(","); 1102 ppAMD64AMode(i->Ain.XIndir.amRIP); 1103 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }"); 1104 return; 1105 case Ain_XAssisted: 1106 vex_printf("(xAssisted) "); 1107 vex_printf("if (%%rflags.%s) { ", 1108 showAMD64CondCode(i->Ain.XAssisted.cond)); 1109 vex_printf("movq "); 1110 ppHRegAMD64(i->Ain.XAssisted.dstGA); 1111 vex_printf(","); 1112 ppAMD64AMode(i->Ain.XAssisted.amRIP); 1113 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp", 1114 (Int)i->Ain.XAssisted.jk); 1115 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }"); 1116 return; 1117 1118 case Ain_CMov64: 1119 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); 1120 ppAMD64RM(i->Ain.CMov64.src); 1121 vex_printf(","); 1122 ppHRegAMD64(i->Ain.CMov64.dst); 1123 return; 1124 case Ain_MovxLQ: 1125 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z'); 1126 ppHRegAMD64_lo32(i->Ain.MovxLQ.src); 1127 vex_printf(","); 1128 ppHRegAMD64(i->Ain.MovxLQ.dst); 1129 return; 1130 case Ain_LoadEX: 1131 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) { 1132 vex_printf("movl "); 1133 ppAMD64AMode(i->Ain.LoadEX.src); 1134 vex_printf(","); 1135 ppHRegAMD64_lo32(i->Ain.LoadEX.dst); 1136 } else { 1137 vex_printf("mov%c%cq ", 1138 i->Ain.LoadEX.syned ? 's' : 'z', 1139 i->Ain.LoadEX.szSmall==1 1140 ? 'b' 1141 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l')); 1142 ppAMD64AMode(i->Ain.LoadEX.src); 1143 vex_printf(","); 1144 ppHRegAMD64(i->Ain.LoadEX.dst); 1145 } 1146 return; 1147 case Ain_Store: 1148 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b' 1149 : (i->Ain.Store.sz==2 ? 'w' : 'l')); 1150 ppHRegAMD64(i->Ain.Store.src); 1151 vex_printf(","); 1152 ppAMD64AMode(i->Ain.Store.dst); 1153 return; 1154 case Ain_Set64: 1155 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond)); 1156 ppHRegAMD64(i->Ain.Set64.dst); 1157 return; 1158 case Ain_Bsfr64: 1159 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r'); 1160 ppHRegAMD64(i->Ain.Bsfr64.src); 1161 vex_printf(","); 1162 ppHRegAMD64(i->Ain.Bsfr64.dst); 1163 return; 1164 case Ain_MFence: 1165 vex_printf("mfence" ); 1166 return; 1167 case Ain_ACAS: 1168 vex_printf("lock cmpxchg%c ", 1169 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w' 1170 : i->Ain.ACAS.sz==4 ? 'l' : 'q' ); 1171 vex_printf("{%%rax->%%rbx},"); 1172 ppAMD64AMode(i->Ain.ACAS.addr); 1173 return; 1174 case Ain_DACAS: 1175 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},", 1176 (Int)(2 * i->Ain.DACAS.sz)); 1177 ppAMD64AMode(i->Ain.DACAS.addr); 1178 return; 1179 case Ain_A87Free: 1180 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); 1181 break; 1182 case Ain_A87PushPop: 1183 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ", 1184 i->Ain.A87PushPop.szB == 4 ? 's' : 'l'); 1185 ppAMD64AMode(i->Ain.A87PushPop.addr); 1186 break; 1187 case Ain_A87FpOp: 1188 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op)); 1189 break; 1190 case Ain_A87LdCW: 1191 vex_printf("fldcw "); 1192 ppAMD64AMode(i->Ain.A87LdCW.addr); 1193 break; 1194 case Ain_A87StSW: 1195 vex_printf("fstsw "); 1196 ppAMD64AMode(i->Ain.A87StSW.addr); 1197 break; 1198 case Ain_LdMXCSR: 1199 vex_printf("ldmxcsr "); 1200 ppAMD64AMode(i->Ain.LdMXCSR.addr); 1201 break; 1202 case Ain_SseUComIS: 1203 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d"); 1204 ppHRegAMD64(i->Ain.SseUComIS.srcL); 1205 vex_printf(","); 1206 ppHRegAMD64(i->Ain.SseUComIS.srcR); 1207 vex_printf(" ; pushfq ; popq "); 1208 ppHRegAMD64(i->Ain.SseUComIS.dst); 1209 break; 1210 case Ain_SseSI2SF: 1211 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d"); 1212 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1213 (i->Ain.SseSI2SF.src); 1214 vex_printf(","); 1215 ppHRegAMD64(i->Ain.SseSI2SF.dst); 1216 break; 1217 case Ain_SseSF2SI: 1218 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d"); 1219 ppHRegAMD64(i->Ain.SseSF2SI.src); 1220 vex_printf(","); 1221 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) 1222 (i->Ain.SseSF2SI.dst); 1223 break; 1224 case Ain_SseSDSS: 1225 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd "); 1226 ppHRegAMD64(i->Ain.SseSDSS.src); 1227 vex_printf(","); 1228 ppHRegAMD64(i->Ain.SseSDSS.dst); 1229 break; 1230 case Ain_SseLdSt: 1231 switch (i->Ain.SseLdSt.sz) { 1232 case 4: vex_printf("movss "); break; 1233 case 8: vex_printf("movsd "); break; 1234 case 16: vex_printf("movups "); break; 1235 default: vassert(0); 1236 } 1237 if (i->Ain.SseLdSt.isLoad) { 1238 ppAMD64AMode(i->Ain.SseLdSt.addr); 1239 vex_printf(","); 1240 ppHRegAMD64(i->Ain.SseLdSt.reg); 1241 } else { 1242 ppHRegAMD64(i->Ain.SseLdSt.reg); 1243 vex_printf(","); 1244 ppAMD64AMode(i->Ain.SseLdSt.addr); 1245 } 1246 return; 1247 case Ain_SseLdzLO: 1248 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d"); 1249 ppAMD64AMode(i->Ain.SseLdzLO.addr); 1250 vex_printf(","); 1251 ppHRegAMD64(i->Ain.SseLdzLO.reg); 1252 return; 1253 case Ain_Sse32Fx4: 1254 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op)); 1255 ppHRegAMD64(i->Ain.Sse32Fx4.src); 1256 vex_printf(","); 1257 ppHRegAMD64(i->Ain.Sse32Fx4.dst); 1258 return; 1259 case Ain_Sse32FLo: 1260 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op)); 1261 ppHRegAMD64(i->Ain.Sse32FLo.src); 1262 vex_printf(","); 1263 ppHRegAMD64(i->Ain.Sse32FLo.dst); 1264 return; 1265 case Ain_Sse64Fx2: 1266 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); 1267 ppHRegAMD64(i->Ain.Sse64Fx2.src); 1268 vex_printf(","); 1269 ppHRegAMD64(i->Ain.Sse64Fx2.dst); 1270 return; 1271 case Ain_Sse64FLo: 1272 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); 1273 ppHRegAMD64(i->Ain.Sse64FLo.src); 1274 vex_printf(","); 1275 ppHRegAMD64(i->Ain.Sse64FLo.dst); 1276 return; 1277 case Ain_SseReRg: 1278 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op)); 1279 ppHRegAMD64(i->Ain.SseReRg.src); 1280 vex_printf(","); 1281 ppHRegAMD64(i->Ain.SseReRg.dst); 1282 return; 1283 case Ain_SseCMov: 1284 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond)); 1285 ppHRegAMD64(i->Ain.SseCMov.src); 1286 vex_printf(","); 1287 ppHRegAMD64(i->Ain.SseCMov.dst); 1288 return; 1289 case Ain_SseShuf: 1290 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order); 1291 ppHRegAMD64(i->Ain.SseShuf.src); 1292 vex_printf(","); 1293 ppHRegAMD64(i->Ain.SseShuf.dst); 1294 return; 1295 //uu case Ain_AvxLdSt: 1296 //uu vex_printf("vmovups "); 1297 //uu if (i->Ain.AvxLdSt.isLoad) { 1298 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr); 1299 //uu vex_printf(","); 1300 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg); 1301 //uu } else { 1302 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg); 1303 //uu vex_printf(","); 1304 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr); 1305 //uu } 1306 //uu return; 1307 //uu case Ain_AvxReRg: 1308 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op)); 1309 //uu ppHRegAMD64(i->Ain.AvxReRg.src); 1310 //uu vex_printf(","); 1311 //uu ppHRegAMD64(i->Ain.AvxReRg.dst); 1312 //uu return; 1313 case Ain_EvCheck: 1314 vex_printf("(evCheck) decl "); 1315 ppAMD64AMode(i->Ain.EvCheck.amCounter); 1316 vex_printf("; jns nofail; jmp *"); 1317 ppAMD64AMode(i->Ain.EvCheck.amFailAddr); 1318 vex_printf("; nofail:"); 1319 return; 1320 case Ain_ProfInc: 1321 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)"); 1322 return; 1323 default: 1324 vpanic("ppAMD64Instr"); 1325 } 1326 } 1327 1328 /* --------- Helpers for register allocation. --------- */ 1329 1330 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) 1331 { 1332 Bool unary; 1333 vassert(mode64 == True); 1334 initHRegUsage(u); 1335 switch (i->tag) { 1336 case Ain_Imm64: 1337 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst); 1338 return; 1339 case Ain_Alu64R: 1340 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); 1341 if (i->Ain.Alu64R.op == Aalu_MOV) { 1342 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); 1343 return; 1344 } 1345 if (i->Ain.Alu64R.op == Aalu_CMP) { 1346 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst); 1347 return; 1348 } 1349 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst); 1350 return; 1351 case Ain_Alu64M: 1352 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src); 1353 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst); 1354 return; 1355 case Ain_Sh64: 1356 addHRegUse(u, HRmModify, i->Ain.Sh64.dst); 1357 if (i->Ain.Sh64.src == 0) 1358 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1359 return; 1360 case Ain_Test64: 1361 addHRegUse(u, HRmRead, i->Ain.Test64.dst); 1362 return; 1363 case Ain_Unary64: 1364 addHRegUse(u, HRmModify, i->Ain.Unary64.dst); 1365 return; 1366 case Ain_Lea64: 1367 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am); 1368 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst); 1369 return; 1370 case Ain_Alu32R: 1371 vassert(i->Ain.Alu32R.op != Aalu_MOV); 1372 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src); 1373 if (i->Ain.Alu32R.op == Aalu_CMP) { 1374 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst); 1375 return; 1376 } 1377 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst); 1378 return; 1379 case Ain_MulL: 1380 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead); 1381 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1382 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1383 return; 1384 case Ain_Div: 1385 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead); 1386 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1387 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1388 return; 1389 case Ain_Push: 1390 addRegUsage_AMD64RMI(u, i->Ain.Push.src); 1391 addHRegUse(u, HRmModify, hregAMD64_RSP()); 1392 return; 1393 case Ain_Call: 1394 /* This is a bit subtle. */ 1395 /* First off, claim it trashes all the caller-saved regs 1396 which fall within the register allocator's jurisdiction. 1397 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 1398 and all the xmm registers. 1399 */ 1400 addHRegUse(u, HRmWrite, hregAMD64_RAX()); 1401 addHRegUse(u, HRmWrite, hregAMD64_RCX()); 1402 addHRegUse(u, HRmWrite, hregAMD64_RDX()); 1403 addHRegUse(u, HRmWrite, hregAMD64_RSI()); 1404 addHRegUse(u, HRmWrite, hregAMD64_RDI()); 1405 addHRegUse(u, HRmWrite, hregAMD64_R8()); 1406 addHRegUse(u, HRmWrite, hregAMD64_R9()); 1407 addHRegUse(u, HRmWrite, hregAMD64_R10()); 1408 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1409 addHRegUse(u, HRmWrite, hregAMD64_XMM0()); 1410 addHRegUse(u, HRmWrite, hregAMD64_XMM1()); 1411 addHRegUse(u, HRmWrite, hregAMD64_XMM3()); 1412 addHRegUse(u, HRmWrite, hregAMD64_XMM4()); 1413 addHRegUse(u, HRmWrite, hregAMD64_XMM5()); 1414 addHRegUse(u, HRmWrite, hregAMD64_XMM6()); 1415 addHRegUse(u, HRmWrite, hregAMD64_XMM7()); 1416 addHRegUse(u, HRmWrite, hregAMD64_XMM8()); 1417 addHRegUse(u, HRmWrite, hregAMD64_XMM9()); 1418 addHRegUse(u, HRmWrite, hregAMD64_XMM10()); 1419 addHRegUse(u, HRmWrite, hregAMD64_XMM11()); 1420 addHRegUse(u, HRmWrite, hregAMD64_XMM12()); 1421 1422 /* Now we have to state any parameter-carrying registers 1423 which might be read. This depends on the regparmness. */ 1424 switch (i->Ain.Call.regparms) { 1425 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/ 1426 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/ 1427 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/ 1428 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/ 1429 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/ 1430 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break; 1431 case 0: break; 1432 default: vpanic("getRegUsage_AMD64Instr:Call:regparms"); 1433 } 1434 /* Finally, there is the issue that the insn trashes a 1435 register because the literal target address has to be 1436 loaded into a register. Fortunately, r11 is stated in the 1437 ABI as a scratch register, and so seems a suitable victim. */ 1438 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1439 /* Upshot of this is that the assembler really must use r11, 1440 and no other, as a destination temporary. */ 1441 return; 1442 /* XDirect/XIndir/XAssisted are also a bit subtle. They 1443 conditionally exit the block. Hence we only need to list (1) 1444 the registers that they read, and (2) the registers that they 1445 write in the case where the block is not exited. (2) is 1446 empty, hence only (1) is relevant here. */ 1447 case Ain_XDirect: 1448 /* Don't bother to mention the write to %r11, since it is not 1449 available to the allocator. */ 1450 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP); 1451 return; 1452 case Ain_XIndir: 1453 /* Ditto re %r11 */ 1454 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA); 1455 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP); 1456 return; 1457 case Ain_XAssisted: 1458 /* Ditto re %r11 and %rbp (the baseblock ptr) */ 1459 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA); 1460 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP); 1461 return; 1462 case Ain_CMov64: 1463 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); 1464 addHRegUse(u, HRmModify, i->Ain.CMov64.dst); 1465 return; 1466 case Ain_MovxLQ: 1467 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src); 1468 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst); 1469 return; 1470 case Ain_LoadEX: 1471 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src); 1472 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst); 1473 return; 1474 case Ain_Store: 1475 addHRegUse(u, HRmRead, i->Ain.Store.src); 1476 addRegUsage_AMD64AMode(u, i->Ain.Store.dst); 1477 return; 1478 case Ain_Set64: 1479 addHRegUse(u, HRmWrite, i->Ain.Set64.dst); 1480 return; 1481 case Ain_Bsfr64: 1482 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src); 1483 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst); 1484 return; 1485 case Ain_MFence: 1486 return; 1487 case Ain_ACAS: 1488 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr); 1489 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1490 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1491 return; 1492 case Ain_DACAS: 1493 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr); 1494 addHRegUse(u, HRmRead, hregAMD64_RCX()); 1495 addHRegUse(u, HRmRead, hregAMD64_RBX()); 1496 addHRegUse(u, HRmModify, hregAMD64_RDX()); 1497 addHRegUse(u, HRmModify, hregAMD64_RAX()); 1498 return; 1499 case Ain_A87Free: 1500 return; 1501 case Ain_A87PushPop: 1502 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr); 1503 return; 1504 case Ain_A87FpOp: 1505 return; 1506 case Ain_A87LdCW: 1507 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr); 1508 return; 1509 case Ain_A87StSW: 1510 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr); 1511 return; 1512 case Ain_LdMXCSR: 1513 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr); 1514 return; 1515 case Ain_SseUComIS: 1516 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL); 1517 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR); 1518 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst); 1519 return; 1520 case Ain_SseSI2SF: 1521 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src); 1522 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst); 1523 return; 1524 case Ain_SseSF2SI: 1525 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src); 1526 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst); 1527 return; 1528 case Ain_SseSDSS: 1529 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src); 1530 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst); 1531 return; 1532 case Ain_SseLdSt: 1533 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr); 1534 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead, 1535 i->Ain.SseLdSt.reg); 1536 return; 1537 case Ain_SseLdzLO: 1538 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr); 1539 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg); 1540 return; 1541 case Ain_Sse32Fx4: 1542 vassert(i->Ain.Sse32Fx4.op != Asse_MOV); 1543 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF 1544 || i->Ain.Sse32Fx4.op == Asse_RSQRTF 1545 || i->Ain.Sse32Fx4.op == Asse_SQRTF ); 1546 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src); 1547 addHRegUse(u, unary ? HRmWrite : HRmModify, 1548 i->Ain.Sse32Fx4.dst); 1549 return; 1550 case Ain_Sse32FLo: 1551 vassert(i->Ain.Sse32FLo.op != Asse_MOV); 1552 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF 1553 || i->Ain.Sse32FLo.op == Asse_RSQRTF 1554 || i->Ain.Sse32FLo.op == Asse_SQRTF ); 1555 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src); 1556 addHRegUse(u, unary ? HRmWrite : HRmModify, 1557 i->Ain.Sse32FLo.dst); 1558 return; 1559 case Ain_Sse64Fx2: 1560 vassert(i->Ain.Sse64Fx2.op != Asse_MOV); 1561 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF 1562 || i->Ain.Sse64Fx2.op == Asse_RSQRTF 1563 || i->Ain.Sse64Fx2.op == Asse_SQRTF ); 1564 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src); 1565 addHRegUse(u, unary ? HRmWrite : HRmModify, 1566 i->Ain.Sse64Fx2.dst); 1567 return; 1568 case Ain_Sse64FLo: 1569 vassert(i->Ain.Sse64FLo.op != Asse_MOV); 1570 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF 1571 || i->Ain.Sse64FLo.op == Asse_RSQRTF 1572 || i->Ain.Sse64FLo.op == Asse_SQRTF ); 1573 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src); 1574 addHRegUse(u, unary ? HRmWrite : HRmModify, 1575 i->Ain.Sse64FLo.dst); 1576 return; 1577 case Ain_SseReRg: 1578 if ( (i->Ain.SseReRg.op == Asse_XOR 1579 || i->Ain.SseReRg.op == Asse_CMPEQ32) 1580 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) { 1581 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd 1582 r,r' as a write of a value to r, and independent of any 1583 previous value in r */ 1584 /* (as opposed to a rite of passage :-) */ 1585 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst); 1586 } else { 1587 addHRegUse(u, HRmRead, i->Ain.SseReRg.src); 1588 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 1589 ? HRmWrite : HRmModify, 1590 i->Ain.SseReRg.dst); 1591 } 1592 return; 1593 case Ain_SseCMov: 1594 addHRegUse(u, HRmRead, i->Ain.SseCMov.src); 1595 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst); 1596 return; 1597 case Ain_SseShuf: 1598 addHRegUse(u, HRmRead, i->Ain.SseShuf.src); 1599 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst); 1600 return; 1601 //uu case Ain_AvxLdSt: 1602 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr); 1603 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead, 1604 //uu i->Ain.AvxLdSt.reg); 1605 //uu return; 1606 //uu case Ain_AvxReRg: 1607 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR 1608 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32) 1609 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) { 1610 //uu /* See comments on the case for Ain_SseReRg. */ 1611 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst); 1612 //uu } else { 1613 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src); 1614 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 1615 //uu ? HRmWrite : HRmModify, 1616 //uu i->Ain.AvxReRg.dst); 1617 //uu } 1618 //uu return; 1619 case Ain_EvCheck: 1620 /* We expect both amodes only to mention %rbp, so this is in 1621 fact pointless, since %rbp isn't allocatable, but anyway.. */ 1622 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter); 1623 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr); 1624 return; 1625 case Ain_ProfInc: 1626 addHRegUse(u, HRmWrite, hregAMD64_R11()); 1627 return; 1628 default: 1629 ppAMD64Instr(i, mode64); 1630 vpanic("getRegUsage_AMD64Instr"); 1631 } 1632 } 1633 1634 /* local helper */ 1635 static inline void mapReg(HRegRemap* m, HReg* r) 1636 { 1637 *r = lookupHRegRemap(m, *r); 1638 } 1639 1640 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) 1641 { 1642 vassert(mode64 == True); 1643 switch (i->tag) { 1644 case Ain_Imm64: 1645 mapReg(m, &i->Ain.Imm64.dst); 1646 return; 1647 case Ain_Alu64R: 1648 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src); 1649 mapReg(m, &i->Ain.Alu64R.dst); 1650 return; 1651 case Ain_Alu64M: 1652 mapRegs_AMD64RI(m, i->Ain.Alu64M.src); 1653 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst); 1654 return; 1655 case Ain_Sh64: 1656 mapReg(m, &i->Ain.Sh64.dst); 1657 return; 1658 case Ain_Test64: 1659 mapReg(m, &i->Ain.Test64.dst); 1660 return; 1661 case Ain_Unary64: 1662 mapReg(m, &i->Ain.Unary64.dst); 1663 return; 1664 case Ain_Lea64: 1665 mapRegs_AMD64AMode(m, i->Ain.Lea64.am); 1666 mapReg(m, &i->Ain.Lea64.dst); 1667 return; 1668 case Ain_Alu32R: 1669 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src); 1670 mapReg(m, &i->Ain.Alu32R.dst); 1671 return; 1672 case Ain_MulL: 1673 mapRegs_AMD64RM(m, i->Ain.MulL.src); 1674 return; 1675 case Ain_Div: 1676 mapRegs_AMD64RM(m, i->Ain.Div.src); 1677 return; 1678 case Ain_Push: 1679 mapRegs_AMD64RMI(m, i->Ain.Push.src); 1680 return; 1681 case Ain_Call: 1682 return; 1683 case Ain_XDirect: 1684 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP); 1685 return; 1686 case Ain_XIndir: 1687 mapReg(m, &i->Ain.XIndir.dstGA); 1688 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP); 1689 return; 1690 case Ain_XAssisted: 1691 mapReg(m, &i->Ain.XAssisted.dstGA); 1692 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP); 1693 return; 1694 case Ain_CMov64: 1695 mapRegs_AMD64RM(m, i->Ain.CMov64.src); 1696 mapReg(m, &i->Ain.CMov64.dst); 1697 return; 1698 case Ain_MovxLQ: 1699 mapReg(m, &i->Ain.MovxLQ.src); 1700 mapReg(m, &i->Ain.MovxLQ.dst); 1701 return; 1702 case Ain_LoadEX: 1703 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src); 1704 mapReg(m, &i->Ain.LoadEX.dst); 1705 return; 1706 case Ain_Store: 1707 mapReg(m, &i->Ain.Store.src); 1708 mapRegs_AMD64AMode(m, i->Ain.Store.dst); 1709 return; 1710 case Ain_Set64: 1711 mapReg(m, &i->Ain.Set64.dst); 1712 return; 1713 case Ain_Bsfr64: 1714 mapReg(m, &i->Ain.Bsfr64.src); 1715 mapReg(m, &i->Ain.Bsfr64.dst); 1716 return; 1717 case Ain_MFence: 1718 return; 1719 case Ain_ACAS: 1720 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr); 1721 return; 1722 case Ain_DACAS: 1723 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr); 1724 return; 1725 case Ain_A87Free: 1726 return; 1727 case Ain_A87PushPop: 1728 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr); 1729 return; 1730 case Ain_A87FpOp: 1731 return; 1732 case Ain_A87LdCW: 1733 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr); 1734 return; 1735 case Ain_A87StSW: 1736 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr); 1737 return; 1738 case Ain_LdMXCSR: 1739 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr); 1740 return; 1741 case Ain_SseUComIS: 1742 mapReg(m, &i->Ain.SseUComIS.srcL); 1743 mapReg(m, &i->Ain.SseUComIS.srcR); 1744 mapReg(m, &i->Ain.SseUComIS.dst); 1745 return; 1746 case Ain_SseSI2SF: 1747 mapReg(m, &i->Ain.SseSI2SF.src); 1748 mapReg(m, &i->Ain.SseSI2SF.dst); 1749 return; 1750 case Ain_SseSF2SI: 1751 mapReg(m, &i->Ain.SseSF2SI.src); 1752 mapReg(m, &i->Ain.SseSF2SI.dst); 1753 return; 1754 case Ain_SseSDSS: 1755 mapReg(m, &i->Ain.SseSDSS.src); 1756 mapReg(m, &i->Ain.SseSDSS.dst); 1757 return; 1758 case Ain_SseLdSt: 1759 mapReg(m, &i->Ain.SseLdSt.reg); 1760 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr); 1761 break; 1762 case Ain_SseLdzLO: 1763 mapReg(m, &i->Ain.SseLdzLO.reg); 1764 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr); 1765 break; 1766 case Ain_Sse32Fx4: 1767 mapReg(m, &i->Ain.Sse32Fx4.src); 1768 mapReg(m, &i->Ain.Sse32Fx4.dst); 1769 return; 1770 case Ain_Sse32FLo: 1771 mapReg(m, &i->Ain.Sse32FLo.src); 1772 mapReg(m, &i->Ain.Sse32FLo.dst); 1773 return; 1774 case Ain_Sse64Fx2: 1775 mapReg(m, &i->Ain.Sse64Fx2.src); 1776 mapReg(m, &i->Ain.Sse64Fx2.dst); 1777 return; 1778 case Ain_Sse64FLo: 1779 mapReg(m, &i->Ain.Sse64FLo.src); 1780 mapReg(m, &i->Ain.Sse64FLo.dst); 1781 return; 1782 case Ain_SseReRg: 1783 mapReg(m, &i->Ain.SseReRg.src); 1784 mapReg(m, &i->Ain.SseReRg.dst); 1785 return; 1786 case Ain_SseCMov: 1787 mapReg(m, &i->Ain.SseCMov.src); 1788 mapReg(m, &i->Ain.SseCMov.dst); 1789 return; 1790 case Ain_SseShuf: 1791 mapReg(m, &i->Ain.SseShuf.src); 1792 mapReg(m, &i->Ain.SseShuf.dst); 1793 return; 1794 //uu case Ain_AvxLdSt: 1795 //uu mapReg(m, &i->Ain.AvxLdSt.reg); 1796 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr); 1797 //uu break; 1798 //uu case Ain_AvxReRg: 1799 //uu mapReg(m, &i->Ain.AvxReRg.src); 1800 //uu mapReg(m, &i->Ain.AvxReRg.dst); 1801 //uu return; 1802 case Ain_EvCheck: 1803 /* We expect both amodes only to mention %rbp, so this is in 1804 fact pointless, since %rbp isn't allocatable, but anyway.. */ 1805 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter); 1806 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr); 1807 return; 1808 case Ain_ProfInc: 1809 /* hardwires r11 -- nothing to modify. */ 1810 return; 1811 default: 1812 ppAMD64Instr(i, mode64); 1813 vpanic("mapRegs_AMD64Instr"); 1814 } 1815 } 1816 1817 /* Figure out if i represents a reg-reg move, and if so assign the 1818 source and destination to *src and *dst. If in doubt say No. Used 1819 by the register allocator to do move coalescing. 1820 */ 1821 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst ) 1822 { 1823 switch (i->tag) { 1824 case Ain_Alu64R: 1825 /* Moves between integer regs */ 1826 if (i->Ain.Alu64R.op != Aalu_MOV) 1827 return False; 1828 if (i->Ain.Alu64R.src->tag != Armi_Reg) 1829 return False; 1830 *src = i->Ain.Alu64R.src->Armi.Reg.reg; 1831 *dst = i->Ain.Alu64R.dst; 1832 return True; 1833 case Ain_SseReRg: 1834 /* Moves between SSE regs */ 1835 if (i->Ain.SseReRg.op != Asse_MOV) 1836 return False; 1837 *src = i->Ain.SseReRg.src; 1838 *dst = i->Ain.SseReRg.dst; 1839 return True; 1840 //uu case Ain_AvxReRg: 1841 //uu /* Moves between AVX regs */ 1842 //uu if (i->Ain.AvxReRg.op != Asse_MOV) 1843 //uu return False; 1844 //uu *src = i->Ain.AvxReRg.src; 1845 //uu *dst = i->Ain.AvxReRg.dst; 1846 //uu return True; 1847 default: 1848 return False; 1849 } 1850 /*NOTREACHED*/ 1851 } 1852 1853 1854 /* Generate amd64 spill/reload instructions under the direction of the 1855 register allocator. Note it's critical these don't write the 1856 condition codes. */ 1857 1858 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1859 HReg rreg, Int offsetB, Bool mode64 ) 1860 { 1861 AMD64AMode* am; 1862 vassert(offsetB >= 0); 1863 vassert(!hregIsVirtual(rreg)); 1864 vassert(mode64 == True); 1865 *i1 = *i2 = NULL; 1866 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1867 switch (hregClass(rreg)) { 1868 case HRcInt64: 1869 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am ); 1870 return; 1871 case HRcVec128: 1872 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am ); 1873 return; 1874 default: 1875 ppHRegClass(hregClass(rreg)); 1876 vpanic("genSpill_AMD64: unimplemented regclass"); 1877 } 1878 } 1879 1880 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1881 HReg rreg, Int offsetB, Bool mode64 ) 1882 { 1883 AMD64AMode* am; 1884 vassert(offsetB >= 0); 1885 vassert(!hregIsVirtual(rreg)); 1886 vassert(mode64 == True); 1887 *i1 = *i2 = NULL; 1888 am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); 1889 switch (hregClass(rreg)) { 1890 case HRcInt64: 1891 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg ); 1892 return; 1893 case HRcVec128: 1894 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am ); 1895 return; 1896 default: 1897 ppHRegClass(hregClass(rreg)); 1898 vpanic("genReload_AMD64: unimplemented regclass"); 1899 } 1900 } 1901 1902 1903 /* --------- The amd64 assembler (bleh.) --------- */ 1904 1905 /* Produce the low three bits of an integer register number. */ 1906 static UChar iregBits210 ( HReg r ) 1907 { 1908 UInt n; 1909 vassert(hregClass(r) == HRcInt64); 1910 vassert(!hregIsVirtual(r)); 1911 n = hregNumber(r); 1912 vassert(n <= 15); 1913 return toUChar(n & 7); 1914 } 1915 1916 /* Produce bit 3 of an integer register number. */ 1917 static UChar iregBit3 ( HReg r ) 1918 { 1919 UInt n; 1920 vassert(hregClass(r) == HRcInt64); 1921 vassert(!hregIsVirtual(r)); 1922 n = hregNumber(r); 1923 vassert(n <= 15); 1924 return toUChar((n >> 3) & 1); 1925 } 1926 1927 /* Produce a complete 4-bit integer register number. */ 1928 static UChar iregBits3210 ( HReg r ) 1929 { 1930 UInt n; 1931 vassert(hregClass(r) == HRcInt64); 1932 vassert(!hregIsVirtual(r)); 1933 n = hregNumber(r); 1934 vassert(n <= 15); 1935 return toUChar(n); 1936 } 1937 1938 /* Given an xmm (128bit V-class) register number, produce the 1939 equivalent numbered register in 64-bit I-class. This is a bit of 1940 fakery which facilitates using functions that work on integer 1941 register numbers to be used when assembling SSE instructions 1942 too. */ 1943 static HReg vreg2ireg ( HReg r ) 1944 { 1945 UInt n; 1946 vassert(hregClass(r) == HRcVec128); 1947 vassert(!hregIsVirtual(r)); 1948 n = hregNumber(r); 1949 vassert(n <= 15); 1950 return mkHReg(n, HRcInt64, False); 1951 } 1952 1953 //uu /* Ditto for ymm regs. */ 1954 //uu static HReg dvreg2ireg ( HReg r ) 1955 //uu { 1956 //uu UInt n; 1957 //uu vassert(hregClass(r) == HRcVec256); 1958 //uu vassert(!hregIsVirtual(r)); 1959 //uu n = hregNumber(r); 1960 //uu vassert(n <= 15); 1961 //uu return mkHReg(n, HRcInt64, False); 1962 //uu } 1963 1964 static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem ) 1965 { 1966 vassert(mod < 4); 1967 vassert((reg|regmem) < 8); 1968 return toUChar( ((mod & 3) << 6) 1969 | ((reg & 7) << 3) 1970 | (regmem & 7) ); 1971 } 1972 1973 static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase ) 1974 { 1975 vassert(shift < 4); 1976 vassert((regindex|regbase) < 8); 1977 return toUChar( ((shift & 3) << 6) 1978 | ((regindex & 7) << 3) 1979 | (regbase & 7) ); 1980 } 1981 1982 static UChar* emit32 ( UChar* p, UInt w32 ) 1983 { 1984 *p++ = toUChar((w32) & 0x000000FF); 1985 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1986 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1987 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1988 return p; 1989 } 1990 1991 static UChar* emit64 ( UChar* p, ULong w64 ) 1992 { 1993 p = emit32(p, toUInt(w64 & 0xFFFFFFFF)); 1994 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF)); 1995 return p; 1996 } 1997 1998 /* Does a sign-extend of the lowest 8 bits give 1999 the original number? */ 2000 static Bool fits8bits ( UInt w32 ) 2001 { 2002 Int i32 = (Int)w32; 2003 return toBool(i32 == ((i32 << 24) >> 24)); 2004 } 2005 /* Can the lower 32 bits be signedly widened to produce the whole 2006 64-bit value? In other words, are the top 33 bits either all 0 or 2007 all 1 ? */ 2008 static Bool fitsIn32Bits ( ULong x ) 2009 { 2010 Long y0 = (Long)x; 2011 Long y1 = y0; 2012 y1 <<= 32; 2013 y1 >>=/*s*/ 32; 2014 return toBool(x == y1); 2015 } 2016 2017 2018 /* Forming mod-reg-rm bytes and scale-index-base bytes. 2019 2020 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13 2021 = 00 greg ereg 2022 2023 greg, d8(ereg) | ereg is neither of: RSP R12 2024 = 01 greg ereg, d8 2025 2026 greg, d32(ereg) | ereg is neither of: RSP R12 2027 = 10 greg ereg, d32 2028 2029 greg, d8(ereg) | ereg is either: RSP R12 2030 = 01 greg 100, 0x24, d8 2031 (lowest bit of rex distinguishes R12/RSP) 2032 2033 greg, d32(ereg) | ereg is either: RSP R12 2034 = 10 greg 100, 0x24, d32 2035 (lowest bit of rex distinguishes R12/RSP) 2036 2037 ----------------------------------------------- 2038 2039 greg, d8(base,index,scale) 2040 | index != RSP 2041 = 01 greg 100, scale index base, d8 2042 2043 greg, d32(base,index,scale) 2044 | index != RSP 2045 = 10 greg 100, scale index base, d32 2046 */ 2047 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am ) 2048 { 2049 if (am->tag == Aam_IR) { 2050 if (am->Aam.IR.imm == 0 2051 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP()) 2052 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP()) 2053 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12()) 2054 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13()) 2055 ) { 2056 *p++ = mkModRegRM(0, iregBits210(greg), 2057 iregBits210(am->Aam.IR.reg)); 2058 return p; 2059 } 2060 if (fits8bits(am->Aam.IR.imm) 2061 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP()) 2062 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12()) 2063 ) { 2064 *p++ = mkModRegRM(1, iregBits210(greg), 2065 iregBits210(am->Aam.IR.reg)); 2066 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2067 return p; 2068 } 2069 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP()) 2070 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12()) 2071 ) { 2072 *p++ = mkModRegRM(2, iregBits210(greg), 2073 iregBits210(am->Aam.IR.reg)); 2074 p = emit32(p, am->Aam.IR.imm); 2075 return p; 2076 } 2077 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP()) 2078 || sameHReg(am->Aam.IR.reg, hregAMD64_R12())) 2079 && fits8bits(am->Aam.IR.imm)) { 2080 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2081 *p++ = 0x24; 2082 *p++ = toUChar(am->Aam.IR.imm & 0xFF); 2083 return p; 2084 } 2085 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP()) 2086 || wait for test case for RSP case */ 2087 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) { 2088 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2089 *p++ = 0x24; 2090 p = emit32(p, am->Aam.IR.imm); 2091 return p; 2092 } 2093 ppAMD64AMode(am); 2094 vpanic("doAMode_M: can't emit amode IR"); 2095 /*NOTREACHED*/ 2096 } 2097 if (am->tag == Aam_IRRS) { 2098 if (fits8bits(am->Aam.IRRS.imm) 2099 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) { 2100 *p++ = mkModRegRM(1, iregBits210(greg), 4); 2101 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index), 2102 iregBits210(am->Aam.IRRS.base)); 2103 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF); 2104 return p; 2105 } 2106 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) { 2107 *p++ = mkModRegRM(2, iregBits210(greg), 4); 2108 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index), 2109 iregBits210(am->Aam.IRRS.base)); 2110 p = emit32(p, am->Aam.IRRS.imm); 2111 return p; 2112 } 2113 ppAMD64AMode(am); 2114 vpanic("doAMode_M: can't emit amode IRRS"); 2115 /*NOTREACHED*/ 2116 } 2117 vpanic("doAMode_M: unknown amode"); 2118 /*NOTREACHED*/ 2119 } 2120 2121 2122 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 2123 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 2124 { 2125 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg)); 2126 return p; 2127 } 2128 2129 2130 /* Clear the W bit on a REX byte, thereby changing the operand size 2131 back to whatever that instruction's default operand size is. */ 2132 static inline UChar clearWBit ( UChar rex ) 2133 { 2134 return toUChar(rex & ~(1<<3)); 2135 } 2136 2137 2138 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */ 2139 static UChar rexAMode_M ( HReg greg, AMD64AMode* am ) 2140 { 2141 if (am->tag == Aam_IR) { 2142 UChar W = 1; /* we want 64-bit mode */ 2143 UChar R = iregBit3(greg); 2144 UChar X = 0; /* not relevant */ 2145 UChar B = iregBit3(am->Aam.IR.reg); 2146 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2147 } 2148 if (am->tag == Aam_IRRS) { 2149 UChar W = 1; /* we want 64-bit mode */ 2150 UChar R = iregBit3(greg); 2151 UChar X = iregBit3(am->Aam.IRRS.index); 2152 UChar B = iregBit3(am->Aam.IRRS.base); 2153 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2154 } 2155 vassert(0); 2156 return 0; /*NOTREACHED*/ 2157 } 2158 2159 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */ 2160 static UChar rexAMode_R ( HReg greg, HReg ereg ) 2161 { 2162 UChar W = 1; /* we want 64-bit mode */ 2163 UChar R = iregBit3(greg); 2164 UChar X = 0; /* not relevant */ 2165 UChar B = iregBit3(ereg); 2166 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); 2167 } 2168 2169 2170 //uu /* May 2012: this VEX prefix stuff is currently unused, but has 2171 //uu verified correct (I reckon). Certainly it has been known to 2172 //uu produce correct VEX prefixes during testing. */ 2173 //uu 2174 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and 2175 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go 2176 //uu in verbatim. There's no range checking on the bits. */ 2177 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB, 2178 //uu UInt mmmmm, UInt rexW, UInt notVvvv, 2179 //uu UInt L, UInt pp ) 2180 //uu { 2181 //uu UChar byte0 = 0; 2182 //uu UChar byte1 = 0; 2183 //uu UChar byte2 = 0; 2184 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) { 2185 //uu /* 2 byte encoding is possible. */ 2186 //uu byte0 = 0xC5; 2187 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3) 2188 //uu | (L << 2) | pp; 2189 //uu } else { 2190 //uu /* 3 byte encoding is needed. */ 2191 //uu byte0 = 0xC4; 2192 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6) 2193 //uu | ((rexB ^ 1) << 5) | mmmmm; 2194 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp; 2195 //uu } 2196 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0); 2197 //uu } 2198 //uu 2199 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits 2200 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in 2201 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to 2202 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and 2203 //uu vvvv=1111 (unused 3rd reg). */ 2204 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am ) 2205 //uu { 2206 //uu UChar L = 1; /* size = 256 */ 2207 //uu UChar pp = 0; /* no SIMD prefix */ 2208 //uu UChar mmmmm = 1; /* 0F */ 2209 //uu UChar notVvvv = 0; /* unused */ 2210 //uu UChar rexW = 0; 2211 //uu UChar rexR = 0; 2212 //uu UChar rexX = 0; 2213 //uu UChar rexB = 0; 2214 //uu /* Same logic as in rexAMode_M. */ 2215 //uu if (am->tag == Aam_IR) { 2216 //uu rexR = iregBit3(greg); 2217 //uu rexX = 0; /* not relevant */ 2218 //uu rexB = iregBit3(am->Aam.IR.reg); 2219 //uu } 2220 //uu else if (am->tag == Aam_IRRS) { 2221 //uu rexR = iregBit3(greg); 2222 //uu rexX = iregBit3(am->Aam.IRRS.index); 2223 //uu rexB = iregBit3(am->Aam.IRRS.base); 2224 //uu } else { 2225 //uu vassert(0); 2226 //uu } 2227 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp ); 2228 //uu } 2229 //uu 2230 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex ) 2231 //uu { 2232 //uu switch (vex & 0xFF) { 2233 //uu case 0xC5: 2234 //uu *p++ = 0xC5; 2235 //uu *p++ = (vex >> 8) & 0xFF; 2236 //uu vassert(0 == (vex >> 16)); 2237 //uu break; 2238 //uu case 0xC4: 2239 //uu *p++ = 0xC4; 2240 //uu *p++ = (vex >> 8) & 0xFF; 2241 //uu *p++ = (vex >> 16) & 0xFF; 2242 //uu vassert(0 == (vex >> 24)); 2243 //uu break; 2244 //uu default: 2245 //uu vassert(0); 2246 //uu } 2247 //uu return p; 2248 //uu } 2249 2250 2251 /* Emit ffree %st(N) */ 2252 static UChar* do_ffree_st ( UChar* p, Int n ) 2253 { 2254 vassert(n >= 0 && n <= 7); 2255 *p++ = 0xDD; 2256 *p++ = toUChar(0xC0 + n); 2257 return p; 2258 } 2259 2260 /* Emit an instruction into buf and return the number of bytes used. 2261 Note that buf is not the insn's final place, and therefore it is 2262 imperative to emit position-independent code. If the emitted 2263 instruction was a profiler inc, set *is_profInc to True, else 2264 leave it unchanged. */ 2265 2266 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, 2267 UChar* buf, Int nbuf, AMD64Instr* i, 2268 Bool mode64, 2269 void* disp_cp_chain_me_to_slowEP, 2270 void* disp_cp_chain_me_to_fastEP, 2271 void* disp_cp_xindir, 2272 void* disp_cp_xassisted ) 2273 { 2274 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2275 UInt xtra; 2276 UInt reg; 2277 UChar rex; 2278 UChar* p = &buf[0]; 2279 UChar* ptmp; 2280 Int j; 2281 vassert(nbuf >= 32); 2282 vassert(mode64 == True); 2283 2284 /* Wrap an integer as a int register, for use assembling 2285 GrpN insns, in which the greg field is used as a sub-opcode 2286 and does not really contain a register. */ 2287 # define fake(_n) mkHReg((_n), HRcInt64, False) 2288 2289 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */ 2290 2291 switch (i->tag) { 2292 2293 case Ain_Imm64: 2294 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) { 2295 /* Use the short form (load into 32 bit reg, + default 2296 widening rule) for constants under 1 million. We could 2297 use this form for the range 0 to 0x7FFFFFFF inclusive, but 2298 limit it to a smaller range for verifiability purposes. */ 2299 if (1 & iregBit3(i->Ain.Imm64.dst)) 2300 *p++ = 0x41; 2301 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst); 2302 p = emit32(p, (UInt)i->Ain.Imm64.imm64); 2303 } else { 2304 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst))); 2305 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst)); 2306 p = emit64(p, i->Ain.Imm64.imm64); 2307 } 2308 goto done; 2309 2310 case Ain_Alu64R: 2311 /* Deal specially with MOV */ 2312 if (i->Ain.Alu64R.op == Aalu_MOV) { 2313 switch (i->Ain.Alu64R.src->tag) { 2314 case Armi_Imm: 2315 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) { 2316 /* Actually we could use this form for constants in 2317 the range 0 through 0x7FFFFFFF inclusive, but 2318 limit it to a small range for verifiability 2319 purposes. */ 2320 /* Generate "movl $imm32, 32-bit-register" and let 2321 the default zero-extend rule cause the upper half 2322 of the dst to be zeroed out too. This saves 1 2323 and sometimes 2 bytes compared to the more 2324 obvious encoding in the 'else' branch. */ 2325 if (1 & iregBit3(i->Ain.Alu64R.dst)) 2326 *p++ = 0x41; 2327 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst); 2328 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2329 } else { 2330 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst))); 2331 *p++ = 0xC7; 2332 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst)); 2333 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2334 } 2335 goto done; 2336 case Armi_Reg: 2337 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2338 i->Ain.Alu64R.dst ); 2339 *p++ = 0x89; 2340 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2341 i->Ain.Alu64R.dst); 2342 goto done; 2343 case Armi_Mem: 2344 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2345 i->Ain.Alu64R.src->Armi.Mem.am); 2346 *p++ = 0x8B; 2347 p = doAMode_M(p, i->Ain.Alu64R.dst, 2348 i->Ain.Alu64R.src->Armi.Mem.am); 2349 goto done; 2350 default: 2351 goto bad; 2352 } 2353 } 2354 /* MUL */ 2355 if (i->Ain.Alu64R.op == Aalu_MUL) { 2356 switch (i->Ain.Alu64R.src->tag) { 2357 case Armi_Reg: 2358 *p++ = rexAMode_R( i->Ain.Alu64R.dst, 2359 i->Ain.Alu64R.src->Armi.Reg.reg); 2360 *p++ = 0x0F; 2361 *p++ = 0xAF; 2362 p = doAMode_R(p, i->Ain.Alu64R.dst, 2363 i->Ain.Alu64R.src->Armi.Reg.reg); 2364 goto done; 2365 case Armi_Mem: 2366 *p++ = rexAMode_M(i->Ain.Alu64R.dst, 2367 i->Ain.Alu64R.src->Armi.Mem.am); 2368 *p++ = 0x0F; 2369 *p++ = 0xAF; 2370 p = doAMode_M(p, i->Ain.Alu64R.dst, 2371 i->Ain.Alu64R.src->Armi.Mem.am); 2372 goto done; 2373 case Armi_Imm: 2374 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2375 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2376 *p++ = 0x6B; 2377 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2378 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2379 } else { 2380 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2381 *p++ = 0x69; 2382 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); 2383 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2384 } 2385 goto done; 2386 default: 2387 goto bad; 2388 } 2389 } 2390 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2391 opc = opc_rr = subopc_imm = opc_imma = 0; 2392 switch (i->Ain.Alu64R.op) { 2393 case Aalu_ADC: opc = 0x13; opc_rr = 0x11; 2394 subopc_imm = 2; opc_imma = 0x15; break; 2395 case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 2396 subopc_imm = 0; opc_imma = 0x05; break; 2397 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 2398 subopc_imm = 5; opc_imma = 0x2D; break; 2399 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19; 2400 subopc_imm = 3; opc_imma = 0x1D; break; 2401 case Aalu_AND: opc = 0x23; opc_rr = 0x21; 2402 subopc_imm = 4; opc_imma = 0x25; break; 2403 case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 2404 subopc_imm = 6; opc_imma = 0x35; break; 2405 case Aalu_OR: opc = 0x0B; opc_rr = 0x09; 2406 subopc_imm = 1; opc_imma = 0x0D; break; 2407 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 2408 subopc_imm = 7; opc_imma = 0x3D; break; 2409 default: goto bad; 2410 } 2411 switch (i->Ain.Alu64R.src->tag) { 2412 case Armi_Imm: 2413 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX()) 2414 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2415 goto bad; /* FIXME: awaiting test case */ 2416 *p++ = toUChar(opc_imma); 2417 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2418 } else 2419 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { 2420 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst ); 2421 *p++ = 0x83; 2422 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2423 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); 2424 } else { 2425 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst); 2426 *p++ = 0x81; 2427 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); 2428 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); 2429 } 2430 goto done; 2431 case Armi_Reg: 2432 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, 2433 i->Ain.Alu64R.dst); 2434 *p++ = toUChar(opc_rr); 2435 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, 2436 i->Ain.Alu64R.dst); 2437 goto done; 2438 case Armi_Mem: 2439 *p++ = rexAMode_M( i->Ain.Alu64R.dst, 2440 i->Ain.Alu64R.src->Armi.Mem.am); 2441 *p++ = toUChar(opc); 2442 p = doAMode_M(p, i->Ain.Alu64R.dst, 2443 i->Ain.Alu64R.src->Armi.Mem.am); 2444 goto done; 2445 default: 2446 goto bad; 2447 } 2448 break; 2449 2450 case Ain_Alu64M: 2451 /* Deal specially with MOV */ 2452 if (i->Ain.Alu64M.op == Aalu_MOV) { 2453 switch (i->Ain.Alu64M.src->tag) { 2454 case Ari_Reg: 2455 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg, 2456 i->Ain.Alu64M.dst); 2457 *p++ = 0x89; 2458 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg, 2459 i->Ain.Alu64M.dst); 2460 goto done; 2461 case Ari_Imm: 2462 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst); 2463 *p++ = 0xC7; 2464 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst); 2465 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32); 2466 goto done; 2467 default: 2468 goto bad; 2469 } 2470 } 2471 break; 2472 2473 case Ain_Sh64: 2474 opc_cl = opc_imm = subopc = 0; 2475 switch (i->Ain.Sh64.op) { 2476 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2477 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2478 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2479 default: goto bad; 2480 } 2481 if (i->Ain.Sh64.src == 0) { 2482 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2483 *p++ = toUChar(opc_cl); 2484 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2485 goto done; 2486 } else { 2487 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); 2488 *p++ = toUChar(opc_imm); 2489 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); 2490 *p++ = (UChar)(i->Ain.Sh64.src); 2491 goto done; 2492 } 2493 break; 2494 2495 case Ain_Test64: 2496 /* testq sign-extend($imm32), %reg */ 2497 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst); 2498 *p++ = 0xF7; 2499 p = doAMode_R(p, fake(0), i->Ain.Test64.dst); 2500 p = emit32(p, i->Ain.Test64.imm32); 2501 goto done; 2502 2503 case Ain_Unary64: 2504 if (i->Ain.Unary64.op == Aun_NOT) { 2505 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2506 *p++ = 0xF7; 2507 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst); 2508 goto done; 2509 } 2510 if (i->Ain.Unary64.op == Aun_NEG) { 2511 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); 2512 *p++ = 0xF7; 2513 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst); 2514 goto done; 2515 } 2516 break; 2517 2518 case Ain_Lea64: 2519 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am); 2520 *p++ = 0x8D; 2521 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am); 2522 goto done; 2523 2524 case Ain_Alu32R: 2525 /* ADD/SUB/AND/OR/XOR/CMP */ 2526 opc = opc_rr = subopc_imm = opc_imma = 0; 2527 switch (i->Ain.Alu32R.op) { 2528 case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 2529 subopc_imm = 0; opc_imma = 0x05; break; 2530 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 2531 subopc_imm = 5; opc_imma = 0x2D; break; 2532 case Aalu_AND: opc = 0x23; opc_rr = 0x21; 2533 subopc_imm = 4; opc_imma = 0x25; break; 2534 case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 2535 subopc_imm = 6; opc_imma = 0x35; break; 2536 case Aalu_OR: opc = 0x0B; opc_rr = 0x09; 2537 subopc_imm = 1; opc_imma = 0x0D; break; 2538 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 2539 subopc_imm = 7; opc_imma = 0x3D; break; 2540 default: goto bad; 2541 } 2542 switch (i->Ain.Alu32R.src->tag) { 2543 case Armi_Imm: 2544 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX()) 2545 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { 2546 goto bad; /* FIXME: awaiting test case */ 2547 *p++ = toUChar(opc_imma); 2548 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); 2549 } else 2550 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { 2551 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) ); 2552 if (rex != 0x40) *p++ = rex; 2553 *p++ = 0x83; 2554 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); 2555 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32); 2556 } else { 2557 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) ); 2558 if (rex != 0x40) *p++ = rex; 2559 *p++ = 0x81; 2560 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); 2561 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); 2562 } 2563 goto done; 2564 case Armi_Reg: 2565 rex = clearWBit( 2566 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg, 2567 i->Ain.Alu32R.dst) ); 2568 if (rex != 0x40) *p++ = rex; 2569 *p++ = toUChar(opc_rr); 2570 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg, 2571 i->Ain.Alu32R.dst); 2572 goto done; 2573 case Armi_Mem: 2574 rex = clearWBit( 2575 rexAMode_M( i->Ain.Alu32R.dst, 2576 i->Ain.Alu32R.src->Armi.Mem.am) ); 2577 if (rex != 0x40) *p++ = rex; 2578 *p++ = toUChar(opc); 2579 p = doAMode_M(p, i->Ain.Alu32R.dst, 2580 i->Ain.Alu32R.src->Armi.Mem.am); 2581 goto done; 2582 default: 2583 goto bad; 2584 } 2585 break; 2586 2587 case Ain_MulL: 2588 subopc = i->Ain.MulL.syned ? 5 : 4; 2589 switch (i->Ain.MulL.src->tag) { 2590 case Arm_Mem: 2591 *p++ = rexAMode_M( fake(0), 2592 i->Ain.MulL.src->Arm.Mem.am); 2593 *p++ = 0xF7; 2594 p = doAMode_M(p, fake(subopc), 2595 i->Ain.MulL.src->Arm.Mem.am); 2596 goto done; 2597 case Arm_Reg: 2598 *p++ = rexAMode_R(fake(0), 2599 i->Ain.MulL.src->Arm.Reg.reg); 2600 *p++ = 0xF7; 2601 p = doAMode_R(p, fake(subopc), 2602 i->Ain.MulL.src->Arm.Reg.reg); 2603 goto done; 2604 default: 2605 goto bad; 2606 } 2607 break; 2608 2609 case Ain_Div: 2610 subopc = i->Ain.Div.syned ? 7 : 6; 2611 if (i->Ain.Div.sz == 4) { 2612 switch (i->Ain.Div.src->tag) { 2613 case Arm_Mem: 2614 goto bad; 2615 /*FIXME*/ 2616 *p++ = 0xF7; 2617 p = doAMode_M(p, fake(subopc), 2618 i->Ain.Div.src->Arm.Mem.am); 2619 goto done; 2620 case Arm_Reg: 2621 *p++ = clearWBit( 2622 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg)); 2623 *p++ = 0xF7; 2624 p = doAMode_R(p, fake(subopc), 2625 i->Ain.Div.src->Arm.Reg.reg); 2626 goto done; 2627 default: 2628 goto bad; 2629 } 2630 } 2631 if (i->Ain.Div.sz == 8) { 2632 switch (i->Ain.Div.src->tag) { 2633 case Arm_Mem: 2634 *p++ = rexAMode_M( fake(0), 2635 i->Ain.Div.src->Arm.Mem.am); 2636 *p++ = 0xF7; 2637 p = doAMode_M(p, fake(subopc), 2638 i->Ain.Div.src->Arm.Mem.am); 2639 goto done; 2640 case Arm_Reg: 2641 *p++ = rexAMode_R( fake(0), 2642 i->Ain.Div.src->Arm.Reg.reg); 2643 *p++ = 0xF7; 2644 p = doAMode_R(p, fake(subopc), 2645 i->Ain.Div.src->Arm.Reg.reg); 2646 goto done; 2647 default: 2648 goto bad; 2649 } 2650 } 2651 break; 2652 2653 case Ain_Push: 2654 switch (i->Ain.Push.src->tag) { 2655 case Armi_Mem: 2656 *p++ = clearWBit( 2657 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am)); 2658 *p++ = 0xFF; 2659 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am); 2660 goto done; 2661 case Armi_Imm: 2662 *p++ = 0x68; 2663 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32); 2664 goto done; 2665 case Armi_Reg: 2666 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg))); 2667 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg)); 2668 goto done; 2669 default: 2670 goto bad; 2671 } 2672 2673 case Ain_Call: { 2674 if (i->Ain.Call.cond != Acc_ALWAYS 2675 && i->Ain.Call.rloc.pri != RLPri_None) { 2676 /* The call might not happen (it isn't unconditional) and it 2677 returns a result. In this case we will need to generate a 2678 control flow diamond to put 0x555..555 in the return 2679 register(s) in the case where the call doesn't happen. If 2680 this ever becomes necessary, maybe copy code from the ARM 2681 equivalent. Until that day, just give up. */ 2682 goto bad; 2683 } 2684 /* As per detailed comment for Ain_Call in 2685 getRegUsage_AMD64Instr above, %r11 is used as an address 2686 temporary. */ 2687 /* jump over the following two insns if the condition does not 2688 hold */ 2689 Bool shortImm = fitsIn32Bits(i->Ain.Call.target); 2690 if (i->Ain.Call.cond != Acc_ALWAYS) { 2691 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); 2692 *p++ = shortImm ? 10 : 13; 2693 /* 10 or 13 bytes in the next two insns */ 2694 } 2695 if (shortImm) { 2696 /* 7 bytes: movl sign-extend(imm32), %r11 */ 2697 *p++ = 0x49; 2698 *p++ = 0xC7; 2699 *p++ = 0xC3; 2700 p = emit32(p, (UInt)i->Ain.Call.target); 2701 } else { 2702 /* 10 bytes: movabsq $target, %r11 */ 2703 *p++ = 0x49; 2704 *p++ = 0xBB; 2705 p = emit64(p, i->Ain.Call.target); 2706 } 2707 /* 3 bytes: call *%r11 */ 2708 *p++ = 0x41; 2709 *p++ = 0xFF; 2710 *p++ = 0xD3; 2711 goto done; 2712 } 2713 2714 case Ain_XDirect: { 2715 /* NB: what goes on here has to be very closely coordinated with the 2716 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */ 2717 /* We're generating chain-me requests here, so we need to be 2718 sure this is actually allowed -- no-redir translations can't 2719 use chain-me's. Hence: */ 2720 vassert(disp_cp_chain_me_to_slowEP != NULL); 2721 vassert(disp_cp_chain_me_to_fastEP != NULL); 2722 2723 HReg r11 = hregAMD64_R11(); 2724 2725 /* Use ptmp for backpatching conditional jumps. */ 2726 ptmp = NULL; 2727 2728 /* First off, if this is conditional, create a conditional 2729 jump over the rest of it. */ 2730 if (i->Ain.XDirect.cond != Acc_ALWAYS) { 2731 /* jmp fwds if !condition */ 2732 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1))); 2733 ptmp = p; /* fill in this bit later */ 2734 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2735 } 2736 2737 /* Update the guest RIP. */ 2738 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) { 2739 /* use a shorter encoding */ 2740 /* movl sign-extend(dstGA), %r11 */ 2741 *p++ = 0x49; 2742 *p++ = 0xC7; 2743 *p++ = 0xC3; 2744 p = emit32(p, (UInt)i->Ain.XDirect.dstGA); 2745 } else { 2746 /* movabsq $dstGA, %r11 */ 2747 *p++ = 0x49; 2748 *p++ = 0xBB; 2749 p = emit64(p, i->Ain.XDirect.dstGA); 2750 } 2751 2752 /* movq %r11, amRIP */ 2753 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP); 2754 *p++ = 0x89; 2755 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP); 2756 2757 /* --- FIRST PATCHABLE BYTE follows --- */ 2758 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling 2759 to) backs up the return address, so as to find the address of 2760 the first patchable byte. So: don't change the length of the 2761 two instructions below. */ 2762 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */ 2763 *p++ = 0x49; 2764 *p++ = 0xBB; 2765 void* disp_cp_chain_me 2766 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 2767 : disp_cp_chain_me_to_slowEP; 2768 p = emit64(p, Ptr_to_ULong(disp_cp_chain_me)); 2769 /* call *%r11 */ 2770 *p++ = 0x41; 2771 *p++ = 0xFF; 2772 *p++ = 0xD3; 2773 /* --- END of PATCHABLE BYTES --- */ 2774 2775 /* Fix up the conditional jump, if there was one. */ 2776 if (i->Ain.XDirect.cond != Acc_ALWAYS) { 2777 Int delta = p - ptmp; 2778 vassert(delta > 0 && delta < 40); 2779 *ptmp = toUChar(delta-1); 2780 } 2781 goto done; 2782 } 2783 2784 case Ain_XIndir: { 2785 /* We're generating transfers that could lead indirectly to a 2786 chain-me, so we need to be sure this is actually allowed -- 2787 no-redir translations are not allowed to reach normal 2788 translations without going through the scheduler. That means 2789 no XDirects or XIndirs out from no-redir translations. 2790 Hence: */ 2791 vassert(disp_cp_xindir != NULL); 2792 2793 /* Use ptmp for backpatching conditional jumps. */ 2794 ptmp = NULL; 2795 2796 /* First off, if this is conditional, create a conditional 2797 jump over the rest of it. */ 2798 if (i->Ain.XIndir.cond != Acc_ALWAYS) { 2799 /* jmp fwds if !condition */ 2800 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1))); 2801 ptmp = p; /* fill in this bit later */ 2802 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2803 } 2804 2805 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ 2806 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); 2807 *p++ = 0x89; 2808 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); 2809 2810 /* get $disp_cp_xindir into %r11 */ 2811 if (fitsIn32Bits(Ptr_to_ULong(disp_cp_xindir))) { 2812 /* use a shorter encoding */ 2813 /* movl sign-extend(disp_cp_xindir), %r11 */ 2814 *p++ = 0x49; 2815 *p++ = 0xC7; 2816 *p++ = 0xC3; 2817 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); 2818 } else { 2819 /* movabsq $disp_cp_xindir, %r11 */ 2820 *p++ = 0x49; 2821 *p++ = 0xBB; 2822 p = emit64(p, Ptr_to_ULong(disp_cp_xindir)); 2823 } 2824 2825 /* jmp *%r11 */ 2826 *p++ = 0x41; 2827 *p++ = 0xFF; 2828 *p++ = 0xE3; 2829 2830 /* Fix up the conditional jump, if there was one. */ 2831 if (i->Ain.XIndir.cond != Acc_ALWAYS) { 2832 Int delta = p - ptmp; 2833 vassert(delta > 0 && delta < 40); 2834 *ptmp = toUChar(delta-1); 2835 } 2836 goto done; 2837 } 2838 2839 case Ain_XAssisted: { 2840 /* Use ptmp for backpatching conditional jumps. */ 2841 ptmp = NULL; 2842 2843 /* First off, if this is conditional, create a conditional 2844 jump over the rest of it. */ 2845 if (i->Ain.XAssisted.cond != Acc_ALWAYS) { 2846 /* jmp fwds if !condition */ 2847 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1))); 2848 ptmp = p; /* fill in this bit later */ 2849 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2850 } 2851 2852 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ 2853 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); 2854 *p++ = 0x89; 2855 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); 2856 /* movl $magic_number, %ebp. Since these numbers are all small positive 2857 integers, we can get away with "movl $N, %ebp" rather than 2858 the longer "movq $N, %rbp". */ 2859 UInt trcval = 0; 2860 switch (i->Ain.XAssisted.jk) { 2861 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; 2862 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; 2863 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break; 2864 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; 2865 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; 2866 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; 2867 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; 2868 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break; 2869 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; 2870 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; 2871 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; 2872 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; 2873 /* We don't expect to see the following being assisted. */ 2874 case Ijk_Ret: 2875 case Ijk_Call: 2876 /* fallthrough */ 2877 default: 2878 ppIRJumpKind(i->Ain.XAssisted.jk); 2879 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind"); 2880 } 2881 vassert(trcval != 0); 2882 *p++ = 0xBD; 2883 p = emit32(p, trcval); 2884 /* movabsq $disp_assisted, %r11 */ 2885 *p++ = 0x49; 2886 *p++ = 0xBB; 2887 p = emit64(p, Ptr_to_ULong(disp_cp_xassisted)); 2888 /* jmp *%r11 */ 2889 *p++ = 0x41; 2890 *p++ = 0xFF; 2891 *p++ = 0xE3; 2892 2893 /* Fix up the conditional jump, if there was one. */ 2894 if (i->Ain.XAssisted.cond != Acc_ALWAYS) { 2895 Int delta = p - ptmp; 2896 vassert(delta > 0 && delta < 40); 2897 *ptmp = toUChar(delta-1); 2898 } 2899 goto done; 2900 } 2901 2902 case Ain_CMov64: 2903 vassert(i->Ain.CMov64.cond != Acc_ALWAYS); 2904 if (i->Ain.CMov64.src->tag == Arm_Reg) { 2905 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2906 *p++ = 0x0F; 2907 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2908 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); 2909 goto done; 2910 } 2911 if (i->Ain.CMov64.src->tag == Arm_Mem) { 2912 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2913 *p++ = 0x0F; 2914 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); 2915 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); 2916 goto done; 2917 } 2918 break; 2919 2920 case Ain_MovxLQ: 2921 /* No, _don't_ ask me why the sense of the args has to be 2922 different in the S vs Z case. I don't know. */ 2923 if (i->Ain.MovxLQ.syned) { 2924 /* Need REX.W = 1 here, but rexAMode_R does that for us. */ 2925 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2926 *p++ = 0x63; 2927 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); 2928 } else { 2929 /* Produce a 32-bit reg-reg move, since the implicit 2930 zero-extend does what we want. */ 2931 *p++ = clearWBit ( 2932 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst)); 2933 *p++ = 0x89; 2934 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst); 2935 } 2936 goto done; 2937 2938 case Ain_LoadEX: 2939 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) { 2940 /* movzbq */ 2941 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2942 *p++ = 0x0F; 2943 *p++ = 0xB6; 2944 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2945 goto done; 2946 } 2947 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) { 2948 /* movzwq */ 2949 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2950 *p++ = 0x0F; 2951 *p++ = 0xB7; 2952 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2953 goto done; 2954 } 2955 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) { 2956 /* movzlq */ 2957 /* This isn't really an existing AMD64 instruction per se. 2958 Rather, we have to do a 32-bit load. Because a 32-bit 2959 write implicitly clears the upper 32 bits of the target 2960 register, we get what we want. */ 2961 *p++ = clearWBit( 2962 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src)); 2963 *p++ = 0x8B; 2964 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); 2965 goto done; 2966 } 2967 break; 2968 2969 case Ain_Set64: 2970 /* Make the destination register be 1 or 0, depending on whether 2971 the relevant condition holds. Complication: the top 56 bits 2972 of the destination should be forced to zero, but doing 'xorq 2973 %r,%r' kills the flag(s) we are about to read. Sigh. So 2974 start off my moving $0 into the dest. */ 2975 reg = iregBits3210(i->Ain.Set64.dst); 2976 vassert(reg < 16); 2977 2978 /* movq $0, %dst */ 2979 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48); 2980 *p++ = 0xC7; 2981 *p++ = toUChar(0xC0 + (reg & 7)); 2982 p = emit32(p, 0); 2983 2984 /* setb lo8(%dst) */ 2985 /* note, 8-bit register rex trickyness. Be careful here. */ 2986 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40); 2987 *p++ = 0x0F; 2988 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond)); 2989 *p++ = toUChar(0xC0 + (reg & 7)); 2990 goto done; 2991 2992 case Ain_Bsfr64: 2993 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 2994 *p++ = 0x0F; 2995 if (i->Ain.Bsfr64.isFwds) { 2996 *p++ = 0xBC; 2997 } else { 2998 *p++ = 0xBD; 2999 } 3000 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); 3001 goto done; 3002 3003 case Ain_MFence: 3004 /* mfence */ 3005 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 3006 goto done; 3007 3008 case Ain_ACAS: 3009 /* lock */ 3010 *p++ = 0xF0; 3011 if (i->Ain.ACAS.sz == 2) *p++ = 0x66; 3012 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value 3013 in %rbx. The new-value register is hardwired to be %rbx 3014 since dealing with byte integer registers is too much hassle, 3015 so we force the register operand to %rbx (could equally be 3016 %rcx or %rdx). */ 3017 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr ); 3018 if (i->Ain.ACAS.sz != 8) 3019 rex = clearWBit(rex); 3020 3021 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */ 3022 *p++ = 0x0F; 3023 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 3024 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr); 3025 goto done; 3026 3027 case Ain_DACAS: 3028 /* lock */ 3029 *p++ = 0xF0; 3030 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new 3031 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so 3032 aren't encoded in the insn. */ 3033 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr ); 3034 if (i->Ain.ACAS.sz != 8) 3035 rex = clearWBit(rex); 3036 *p++ = rex; 3037 *p++ = 0x0F; 3038 *p++ = 0xC7; 3039 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr); 3040 goto done; 3041 3042 case Ain_A87Free: 3043 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7); 3044 for (j = 0; j < i->Ain.A87Free.nregs; j++) { 3045 p = do_ffree_st(p, 7-j); 3046 } 3047 goto done; 3048 3049 case Ain_A87PushPop: 3050 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4); 3051 if (i->Ain.A87PushPop.isPush) { 3052 /* Load from memory into %st(0): flds/fldl amode */ 3053 *p++ = clearWBit( 3054 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); 3055 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 3056 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); 3057 } else { 3058 /* Dump %st(0) to memory: fstps/fstpl amode */ 3059 *p++ = clearWBit( 3060 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); 3061 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; 3062 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); 3063 goto done; 3064 } 3065 goto done; 3066 3067 case Ain_A87FpOp: 3068 switch (i->Ain.A87FpOp.op) { 3069 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 3070 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 3071 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 3072 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 3073 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 3074 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; 3075 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break; 3076 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break; 3077 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; 3078 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; 3079 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; 3080 case Afp_TAN: 3081 /* fptan pushes 1.0 on the FP stack, except when the 3082 argument is out of range. Hence we have to do the 3083 instruction, then inspect C2 to see if there is an out 3084 of range condition. If there is, we skip the fincstp 3085 that is used by the in-range case to get rid of this 3086 extra 1.0 value. */ 3087 *p++ = 0xD9; *p++ = 0xF2; // fptan 3088 *p++ = 0x50; // pushq %rax 3089 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax 3090 *p++ = 0x66; *p++ = 0xA9; 3091 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax 3092 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp 3093 *p++ = 0xD9; *p++ = 0xF7; // fincstp 3094 *p++ = 0x58; // after_fincstp: popq %rax 3095 break; 3096 default: 3097 goto bad; 3098 } 3099 goto done; 3100 3101 case Ain_A87LdCW: 3102 *p++ = clearWBit( 3103 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) ); 3104 *p++ = 0xD9; 3105 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr); 3106 goto done; 3107 3108 case Ain_A87StSW: 3109 *p++ = clearWBit( 3110 rexAMode_M(fake(7), i->Ain.A87StSW.addr) ); 3111 *p++ = 0xDD; 3112 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr); 3113 goto done; 3114 3115 case Ain_Store: 3116 if (i->Ain.Store.sz == 2) { 3117 /* This just goes to show the crazyness of the instruction 3118 set encoding. We have to insert two prefix bytes, but be 3119 careful to avoid a conflict in what the size should be, by 3120 ensuring that REX.W = 0. */ 3121 *p++ = 0x66; /* override to 16-bits */ 3122 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3123 *p++ = 0x89; 3124 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3125 goto done; 3126 } 3127 if (i->Ain.Store.sz == 4) { 3128 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3129 *p++ = 0x89; 3130 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3131 goto done; 3132 } 3133 if (i->Ain.Store.sz == 1) { 3134 /* This is one place where it would be wrong to skip emitting 3135 a rex byte of 0x40, since the mere presence of rex changes 3136 the meaning of the byte register access. Be careful. */ 3137 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); 3138 *p++ = 0x88; 3139 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); 3140 goto done; 3141 } 3142 break; 3143 3144 case Ain_LdMXCSR: 3145 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr)); 3146 *p++ = 0x0F; 3147 *p++ = 0xAE; 3148 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr); 3149 goto done; 3150 3151 case Ain_SseUComIS: 3152 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */ 3153 /* ucomi[sd] %srcL, %srcR */ 3154 if (i->Ain.SseUComIS.sz == 8) { 3155 *p++ = 0x66; 3156 } else { 3157 goto bad; 3158 vassert(i->Ain.SseUComIS.sz == 4); 3159 } 3160 *p++ = clearWBit ( 3161 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL), 3162 vreg2ireg(i->Ain.SseUComIS.srcR) )); 3163 *p++ = 0x0F; 3164 *p++ = 0x2E; 3165 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL), 3166 vreg2ireg(i->Ain.SseUComIS.srcR) ); 3167 /* pushfq */ 3168 *p++ = 0x9C; 3169 /* popq %dst */ 3170 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst))); 3171 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst)); 3172 goto done; 3173 3174 case Ain_SseSI2SF: 3175 /* cvssi2s[sd] %src, %dst */ 3176 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst), 3177 i->Ain.SseSI2SF.src ); 3178 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2); 3179 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex); 3180 *p++ = 0x0F; 3181 *p++ = 0x2A; 3182 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst), 3183 i->Ain.SseSI2SF.src ); 3184 goto done; 3185 3186 case Ain_SseSF2SI: 3187 /* cvss[sd]2si %src, %dst */ 3188 rex = rexAMode_R( i->Ain.SseSF2SI.dst, 3189 vreg2ireg(i->Ain.SseSF2SI.src) ); 3190 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2); 3191 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex); 3192 *p++ = 0x0F; 3193 *p++ = 0x2D; 3194 p = doAMode_R( p, i->Ain.SseSF2SI.dst, 3195 vreg2ireg(i->Ain.SseSF2SI.src) ); 3196 goto done; 3197 3198 case Ain_SseSDSS: 3199 /* cvtsd2ss/cvtss2sd %src, %dst */ 3200 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3); 3201 *p++ = clearWBit( 3202 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst), 3203 vreg2ireg(i->Ain.SseSDSS.src) )); 3204 *p++ = 0x0F; 3205 *p++ = 0x5A; 3206 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst), 3207 vreg2ireg(i->Ain.SseSDSS.src) ); 3208 goto done; 3209 3210 case Ain_SseLdSt: 3211 if (i->Ain.SseLdSt.sz == 8) { 3212 *p++ = 0xF2; 3213 } else 3214 if (i->Ain.SseLdSt.sz == 4) { 3215 *p++ = 0xF3; 3216 } else 3217 if (i->Ain.SseLdSt.sz != 16) { 3218 vassert(0); 3219 } 3220 *p++ = clearWBit( 3221 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr)); 3222 *p++ = 0x0F; 3223 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11); 3224 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr); 3225 goto done; 3226 3227 case Ain_SseLdzLO: 3228 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8); 3229 /* movs[sd] amode, %xmm-dst */ 3230 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3231 *p++ = clearWBit( 3232 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg), 3233 i->Ain.SseLdzLO.addr)); 3234 *p++ = 0x0F; 3235 *p++ = 0x10; 3236 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg), 3237 i->Ain.SseLdzLO.addr); 3238 goto done; 3239 3240 case Ain_Sse32Fx4: 3241 xtra = 0; 3242 *p++ = clearWBit( 3243 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst), 3244 vreg2ireg(i->Ain.Sse32Fx4.src) )); 3245 *p++ = 0x0F; 3246 switch (i->Ain.Sse32Fx4.op) { 3247 case Asse_ADDF: *p++ = 0x58; break; 3248 case Asse_DIVF: *p++ = 0x5E; break; 3249 case Asse_MAXF: *p++ = 0x5F; break; 3250 case Asse_MINF: *p++ = 0x5D; break; 3251 case Asse_MULF: *p++ = 0x59; break; 3252 case Asse_RCPF: *p++ = 0x53; break; 3253 case Asse_RSQRTF: *p++ = 0x52; break; 3254 case Asse_SQRTF: *p++ = 0x51; break; 3255 case Asse_SUBF: *p++ = 0x5C; break; 3256 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3257 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3258 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3259 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3260 default: goto bad; 3261 } 3262 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst), 3263 vreg2ireg(i->Ain.Sse32Fx4.src) ); 3264 if (xtra & 0x100) 3265 *p++ = toUChar(xtra & 0xFF); 3266 goto done; 3267 3268 case Ain_Sse64Fx2: 3269 xtra = 0; 3270 *p++ = 0x66; 3271 *p++ = clearWBit( 3272 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst), 3273 vreg2ireg(i->Ain.Sse64Fx2.src) )); 3274 *p++ = 0x0F; 3275 switch (i->Ain.Sse64Fx2.op) { 3276 case Asse_ADDF: *p++ = 0x58; break; 3277 case Asse_DIVF: *p++ = 0x5E; break; 3278 case Asse_MAXF: *p++ = 0x5F; break; 3279 case Asse_MINF: *p++ = 0x5D; break; 3280 case Asse_MULF: *p++ = 0x59; break; 3281 case Asse_SQRTF: *p++ = 0x51; break; 3282 case Asse_SUBF: *p++ = 0x5C; break; 3283 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3284 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3285 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3286 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3287 default: goto bad; 3288 } 3289 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst), 3290 vreg2ireg(i->Ain.Sse64Fx2.src) ); 3291 if (xtra & 0x100) 3292 *p++ = toUChar(xtra & 0xFF); 3293 goto done; 3294 3295 case Ain_Sse32FLo: 3296 xtra = 0; 3297 *p++ = 0xF3; 3298 *p++ = clearWBit( 3299 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst), 3300 vreg2ireg(i->Ain.Sse32FLo.src) )); 3301 *p++ = 0x0F; 3302 switch (i->Ain.Sse32FLo.op) { 3303 case Asse_ADDF: *p++ = 0x58; break; 3304 case Asse_DIVF: *p++ = 0x5E; break; 3305 case Asse_MAXF: *p++ = 0x5F; break; 3306 case Asse_MINF: *p++ = 0x5D; break; 3307 case Asse_MULF: *p++ = 0x59; break; 3308 case Asse_RCPF: *p++ = 0x53; break; 3309 case Asse_RSQRTF: *p++ = 0x52; break; 3310 case Asse_SQRTF: *p++ = 0x51; break; 3311 case Asse_SUBF: *p++ = 0x5C; break; 3312 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3313 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3314 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3315 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3316 default: goto bad; 3317 } 3318 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst), 3319 vreg2ireg(i->Ain.Sse32FLo.src) ); 3320 if (xtra & 0x100) 3321 *p++ = toUChar(xtra & 0xFF); 3322 goto done; 3323 3324 case Ain_Sse64FLo: 3325 xtra = 0; 3326 *p++ = 0xF2; 3327 *p++ = clearWBit( 3328 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst), 3329 vreg2ireg(i->Ain.Sse64FLo.src) )); 3330 *p++ = 0x0F; 3331 switch (i->Ain.Sse64FLo.op) { 3332 case Asse_ADDF: *p++ = 0x58; break; 3333 case Asse_DIVF: *p++ = 0x5E; break; 3334 case Asse_MAXF: *p++ = 0x5F; break; 3335 case Asse_MINF: *p++ = 0x5D; break; 3336 case Asse_MULF: *p++ = 0x59; break; 3337 case Asse_SQRTF: *p++ = 0x51; break; 3338 case Asse_SUBF: *p++ = 0x5C; break; 3339 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3340 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3341 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3342 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3343 default: goto bad; 3344 } 3345 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst), 3346 vreg2ireg(i->Ain.Sse64FLo.src) ); 3347 if (xtra & 0x100) 3348 *p++ = toUChar(xtra & 0xFF); 3349 goto done; 3350 3351 case Ain_SseReRg: 3352 # define XX(_n) *p++ = (_n) 3353 3354 rex = clearWBit( 3355 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst), 3356 vreg2ireg(i->Ain.SseReRg.src) )); 3357 3358 switch (i->Ain.SseReRg.op) { 3359 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break; 3360 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break; 3361 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break; 3362 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break; 3363 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break; 3364 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break; 3365 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break; 3366 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break; 3367 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break; 3368 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break; 3369 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break; 3370 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break; 3371 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break; 3372 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break; 3373 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break; 3374 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break; 3375 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break; 3376 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break; 3377 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break; 3378 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break; 3379 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break; 3380 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break; 3381 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break; 3382 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break; 3383 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break; 3384 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break; 3385 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break; 3386 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break; 3387 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break; 3388 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break; 3389 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break; 3390 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break; 3391 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break; 3392 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break; 3393 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break; 3394 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break; 3395 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break; 3396 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break; 3397 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break; 3398 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break; 3399 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break; 3400 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break; 3401 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break; 3402 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break; 3403 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break; 3404 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break; 3405 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break; 3406 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break; 3407 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break; 3408 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break; 3409 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break; 3410 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break; 3411 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break; 3412 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break; 3413 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break; 3414 default: goto bad; 3415 } 3416 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst), 3417 vreg2ireg(i->Ain.SseReRg.src) ); 3418 # undef XX 3419 goto done; 3420 3421 case Ain_SseCMov: 3422 /* jmp fwds if !condition */ 3423 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1)); 3424 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3425 ptmp = p; 3426 3427 /* movaps %src, %dst */ 3428 *p++ = clearWBit( 3429 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst), 3430 vreg2ireg(i->Ain.SseCMov.src) )); 3431 *p++ = 0x0F; 3432 *p++ = 0x28; 3433 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst), 3434 vreg2ireg(i->Ain.SseCMov.src) ); 3435 3436 /* Fill in the jump offset. */ 3437 *(ptmp-1) = toUChar(p - ptmp); 3438 goto done; 3439 3440 case Ain_SseShuf: 3441 *p++ = 0x66; 3442 *p++ = clearWBit( 3443 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst), 3444 vreg2ireg(i->Ain.SseShuf.src) )); 3445 *p++ = 0x0F; 3446 *p++ = 0x70; 3447 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst), 3448 vreg2ireg(i->Ain.SseShuf.src) ); 3449 *p++ = (UChar)(i->Ain.SseShuf.order); 3450 goto done; 3451 3452 //uu case Ain_AvxLdSt: { 3453 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg), 3454 //uu i->Ain.AvxLdSt.addr ); 3455 //uu p = emitVexPrefix(p, vex); 3456 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11); 3457 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr); 3458 //uu goto done; 3459 //uu } 3460 3461 case Ain_EvCheck: { 3462 /* We generate: 3463 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER) 3464 (2 bytes) jns nofail expected taken 3465 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR) 3466 nofail: 3467 */ 3468 /* This is heavily asserted re instruction lengths. It needs to 3469 be. If we get given unexpected forms of .amCounter or 3470 .amFailAddr -- basically, anything that's not of the form 3471 uimm7(%rbp) -- they are likely to fail. */ 3472 /* Note also that after the decl we must be very careful not to 3473 read the carry flag, else we get a partial flags stall. 3474 js/jns avoids that, though. */ 3475 UChar* p0 = p; 3476 /* --- decl 8(%rbp) --- */ 3477 /* Need to compute the REX byte for the decl in order to prove 3478 that we don't need it, since this is a 32-bit inc and all 3479 registers involved in the amode are < r8. "fake(1)" because 3480 there's no register in this encoding; instead the register 3481 field is used as a sub opcode. The encoding for "decl r/m32" 3482 is FF /1, hence the fake(1). */ 3483 rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter)); 3484 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */ 3485 *p++ = 0xFF; 3486 p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter); 3487 vassert(p - p0 == 3); 3488 /* --- jns nofail --- */ 3489 *p++ = 0x79; 3490 *p++ = 0x03; /* need to check this 0x03 after the next insn */ 3491 vassert(p - p0 == 5); 3492 /* --- jmp* 0(%rbp) --- */ 3493 /* Once again, verify we don't need REX. The encoding is FF /4. 3494 We don't need REX.W since by default FF /4 in 64-bit mode 3495 implies a 64 bit load. */ 3496 rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr)); 3497 if (rex != 0x40) goto bad; 3498 *p++ = 0xFF; 3499 p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr); 3500 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ 3501 /* And crosscheck .. */ 3502 vassert(evCheckSzB_AMD64() == 8); 3503 goto done; 3504 } 3505 3506 case Ain_ProfInc: { 3507 /* We generate movabsq $0, %r11 3508 incq (%r11) 3509 in the expectation that a later call to LibVEX_patchProfCtr 3510 will be used to fill in the immediate field once the right 3511 value is known. 3512 49 BB 00 00 00 00 00 00 00 00 3513 49 FF 03 3514 */ 3515 *p++ = 0x49; *p++ = 0xBB; 3516 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3517 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3518 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03; 3519 /* Tell the caller .. */ 3520 vassert(!(*is_profInc)); 3521 *is_profInc = True; 3522 goto done; 3523 } 3524 3525 default: 3526 goto bad; 3527 } 3528 3529 bad: 3530 ppAMD64Instr(i, mode64); 3531 vpanic("emit_AMD64Instr"); 3532 /*NOTREACHED*/ 3533 3534 done: 3535 vassert(p - &buf[0] <= 32); 3536 return p - &buf[0]; 3537 3538 # undef fake 3539 } 3540 3541 3542 /* How big is an event check? See case for Ain_EvCheck in 3543 emit_AMD64Instr just above. That crosschecks what this returns, so 3544 we can tell if we're inconsistent. */ 3545 Int evCheckSzB_AMD64 ( void ) 3546 { 3547 return 8; 3548 } 3549 3550 3551 /* NB: what goes on here has to be very closely coordinated with the 3552 emitInstr case for XDirect, above. */ 3553 VexInvalRange chainXDirect_AMD64 ( void* place_to_chain, 3554 void* disp_cp_chain_me_EXPECTED, 3555 void* place_to_jump_to ) 3556 { 3557 /* What we're expecting to see is: 3558 movabsq $disp_cp_chain_me_EXPECTED, %r11 3559 call *%r11 3560 viz 3561 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED> 3562 41 FF D3 3563 */ 3564 UChar* p = (UChar*)place_to_chain; 3565 vassert(p[0] == 0x49); 3566 vassert(p[1] == 0xBB); 3567 vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); 3568 vassert(p[10] == 0x41); 3569 vassert(p[11] == 0xFF); 3570 vassert(p[12] == 0xD3); 3571 /* And what we want to change it to is either: 3572 (general case): 3573 movabsq $place_to_jump_to, %r11 3574 jmpq *%r11 3575 viz 3576 49 BB <8 bytes value == place_to_jump_to> 3577 41 FF E3 3578 So it's the same length (convenient, huh) and we don't 3579 need to change all the bits. 3580 ---OR--- 3581 in the case where the displacement falls within 32 bits 3582 jmpq disp32 where disp32 is relative to the next insn 3583 ud2; ud2; ud2; ud2 3584 viz 3585 E9 <4 bytes == disp32> 3586 0F 0B 0F 0B 0F 0B 0F 0B 3587 3588 In both cases the replacement has the same length as the original. 3589 To remain sane & verifiable, 3590 (1) limit the displacement for the short form to 3591 (say) +/- one billion, so as to avoid wraparound 3592 off-by-ones 3593 (2) even if the short form is applicable, once every (say) 3594 1024 times use the long form anyway, so as to maintain 3595 verifiability 3596 */ 3597 /* This is the delta we need to put into a JMP d32 insn. It's 3598 relative to the start of the next insn, hence the -5. */ 3599 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; 3600 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000; 3601 3602 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */ 3603 if (shortOK) { 3604 shortCTR++; // thread safety bleh 3605 if (0 == (shortCTR & 0x3FF)) { 3606 shortOK = False; 3607 if (0) 3608 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, " 3609 "using long jmp\n", shortCTR); 3610 } 3611 } 3612 3613 /* And make the modifications. */ 3614 if (shortOK) { 3615 p[0] = 0xE9; 3616 p[1] = (delta >> 0) & 0xFF; 3617 p[2] = (delta >> 8) & 0xFF; 3618 p[3] = (delta >> 16) & 0xFF; 3619 p[4] = (delta >> 24) & 0xFF; 3620 p[5] = 0x0F; p[6] = 0x0B; 3621 p[7] = 0x0F; p[8] = 0x0B; 3622 p[9] = 0x0F; p[10] = 0x0B; 3623 p[11] = 0x0F; p[12] = 0x0B; 3624 /* sanity check on the delta -- top 32 are all 0 or all 1 */ 3625 delta >>= 32; 3626 vassert(delta == 0LL || delta == -1LL); 3627 } else { 3628 /* Minimal modifications from the starting sequence. */ 3629 *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to); 3630 p[12] = 0xE3; 3631 } 3632 VexInvalRange vir = { (HWord)place_to_chain, 13 }; 3633 return vir; 3634 } 3635 3636 3637 /* NB: what goes on here has to be very closely coordinated with the 3638 emitInstr case for XDirect, above. */ 3639 VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain, 3640 void* place_to_jump_to_EXPECTED, 3641 void* disp_cp_chain_me ) 3642 { 3643 /* What we're expecting to see is either: 3644 (general case) 3645 movabsq $place_to_jump_to_EXPECTED, %r11 3646 jmpq *%r11 3647 viz 3648 49 BB <8 bytes value == place_to_jump_to_EXPECTED> 3649 41 FF E3 3650 ---OR--- 3651 in the case where the displacement falls within 32 bits 3652 jmpq d32 3653 ud2; ud2; ud2; ud2 3654 viz 3655 E9 <4 bytes == disp32> 3656 0F 0B 0F 0B 0F 0B 0F 0B 3657 */ 3658 UChar* p = (UChar*)place_to_unchain; 3659 Bool valid = False; 3660 if (p[0] == 0x49 && p[1] == 0xBB 3661 && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED) 3662 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) { 3663 /* it's the long form */ 3664 valid = True; 3665 } 3666 else 3667 if (p[0] == 0xE9 3668 && p[5] == 0x0F && p[6] == 0x0B 3669 && p[7] == 0x0F && p[8] == 0x0B 3670 && p[9] == 0x0F && p[10] == 0x0B 3671 && p[11] == 0x0F && p[12] == 0x0B) { 3672 /* It's the short form. Check the offset is right. */ 3673 Int s32 = *(Int*)(&p[1]); 3674 Long s64 = (Long)s32; 3675 if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) { 3676 valid = True; 3677 if (0) 3678 vex_printf("QQQ unchainXDirect_AMD64: found short form\n"); 3679 } 3680 } 3681 vassert(valid); 3682 /* And what we want to change it to is: 3683 movabsq $disp_cp_chain_me, %r11 3684 call *%r11 3685 viz 3686 49 BB <8 bytes value == disp_cp_chain_me> 3687 41 FF D3 3688 So it's the same length (convenient, huh). 3689 */ 3690 p[0] = 0x49; 3691 p[1] = 0xBB; 3692 *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me); 3693 p[10] = 0x41; 3694 p[11] = 0xFF; 3695 p[12] = 0xD3; 3696 VexInvalRange vir = { (HWord)place_to_unchain, 13 }; 3697 return vir; 3698 } 3699 3700 3701 /* Patch the counter address into a profile inc point, as previously 3702 created by the Ain_ProfInc case for emit_AMD64Instr. */ 3703 VexInvalRange patchProfInc_AMD64 ( void* place_to_patch, 3704 ULong* location_of_counter ) 3705 { 3706 vassert(sizeof(ULong*) == 8); 3707 UChar* p = (UChar*)place_to_patch; 3708 vassert(p[0] == 0x49); 3709 vassert(p[1] == 0xBB); 3710 vassert(p[2] == 0x00); 3711 vassert(p[3] == 0x00); 3712 vassert(p[4] == 0x00); 3713 vassert(p[5] == 0x00); 3714 vassert(p[6] == 0x00); 3715 vassert(p[7] == 0x00); 3716 vassert(p[8] == 0x00); 3717 vassert(p[9] == 0x00); 3718 vassert(p[10] == 0x49); 3719 vassert(p[11] == 0xFF); 3720 vassert(p[12] == 0x03); 3721 ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter); 3722 p[2] = imm64 & 0xFF; imm64 >>= 8; 3723 p[3] = imm64 & 0xFF; imm64 >>= 8; 3724 p[4] = imm64 & 0xFF; imm64 >>= 8; 3725 p[5] = imm64 & 0xFF; imm64 >>= 8; 3726 p[6] = imm64 & 0xFF; imm64 >>= 8; 3727 p[7] = imm64 & 0xFF; imm64 >>= 8; 3728 p[8] = imm64 & 0xFF; imm64 >>= 8; 3729 p[9] = imm64 & 0xFF; imm64 >>= 8; 3730 VexInvalRange vir = { (HWord)place_to_patch, 13 }; 3731 return vir; 3732 } 3733 3734 3735 /*---------------------------------------------------------------*/ 3736 /*--- end host_amd64_defs.c ---*/ 3737 /*---------------------------------------------------------------*/ 3738