1 //===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// 12 /// This code emitter outputs bytecode that is understood by the r600g driver 13 /// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, 14 /// but it still needs to be run through a finalizer in order to be executed 15 /// by the GPU. 16 /// 17 /// [1] http://www.mesa3d.org/ 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "R600Defines.h" 22 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" 23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24 #include "llvm/MC/MCCodeEmitter.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCInstrInfo.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <stdio.h> 32 33 #define SRC_BYTE_COUNT 11 34 #define DST_BYTE_COUNT 5 35 36 using namespace llvm; 37 38 namespace { 39 40 class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { 41 R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; 42 void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; 43 const MCInstrInfo &MCII; 44 const MCRegisterInfo &MRI; 45 const MCSubtargetInfo &STI; 46 MCContext &Ctx; 47 48 public: 49 50 R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, 51 const MCSubtargetInfo &sti, MCContext &ctx) 52 : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } 53 54 /// \brief Encode the instruction and write it to the OS. 55 virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, 56 SmallVectorImpl<MCFixup> &Fixups) const; 57 58 /// \returns the encoding for an MCOperand. 59 virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, 60 SmallVectorImpl<MCFixup> &Fixups) const; 61 private: 62 63 void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, 64 raw_ostream &OS) const; 65 void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; 66 void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, 67 raw_ostream &OS) const; 68 void EmitDst(const MCInst &MI, raw_ostream &OS) const; 69 void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, 70 raw_ostream &OS) const; 71 void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; 72 73 void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; 74 75 void EmitByte(unsigned int byte, raw_ostream &OS) const; 76 77 void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; 78 79 void Emit(uint32_t value, raw_ostream &OS) const; 80 void Emit(uint64_t value, raw_ostream &OS) const; 81 82 unsigned getHWRegChan(unsigned reg) const; 83 unsigned getHWReg(unsigned regNo) const; 84 85 bool isFCOp(unsigned opcode) const; 86 bool isTexOp(unsigned opcode) const; 87 bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; 88 89 }; 90 91 } // End anonymous namespace 92 93 enum RegElement { 94 ELEMENT_X = 0, 95 ELEMENT_Y, 96 ELEMENT_Z, 97 ELEMENT_W 98 }; 99 100 enum InstrTypes { 101 INSTR_ALU = 0, 102 INSTR_TEX, 103 INSTR_FC, 104 INSTR_NATIVE, 105 INSTR_VTX, 106 INSTR_EXPORT 107 }; 108 109 enum FCInstr { 110 FC_IF_PREDICATE = 0, 111 FC_ELSE, 112 FC_ENDIF, 113 FC_BGNLOOP, 114 FC_ENDLOOP, 115 FC_BREAK_PREDICATE, 116 FC_CONTINUE 117 }; 118 119 enum TextureTypes { 120 TEXTURE_1D = 1, 121 TEXTURE_2D, 122 TEXTURE_3D, 123 TEXTURE_CUBE, 124 TEXTURE_RECT, 125 TEXTURE_SHADOW1D, 126 TEXTURE_SHADOW2D, 127 TEXTURE_SHADOWRECT, 128 TEXTURE_1D_ARRAY, 129 TEXTURE_2D_ARRAY, 130 TEXTURE_SHADOW1D_ARRAY, 131 TEXTURE_SHADOW2D_ARRAY 132 }; 133 134 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, 135 const MCRegisterInfo &MRI, 136 const MCSubtargetInfo &STI, 137 MCContext &Ctx) { 138 return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); 139 } 140 141 void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, 142 SmallVectorImpl<MCFixup> &Fixups) const { 143 if (isTexOp(MI.getOpcode())) { 144 EmitTexInstr(MI, Fixups, OS); 145 } else if (isFCOp(MI.getOpcode())){ 146 EmitFCInstr(MI, OS); 147 } else if (MI.getOpcode() == AMDGPU::RETURN || 148 MI.getOpcode() == AMDGPU::BUNDLE || 149 MI.getOpcode() == AMDGPU::KILL) { 150 return; 151 } else { 152 switch(MI.getOpcode()) { 153 case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 154 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 155 uint64_t inst = getBinaryCodeForInstr(MI, Fixups); 156 EmitByte(INSTR_NATIVE, OS); 157 Emit(inst, OS); 158 break; 159 } 160 case AMDGPU::CONSTANT_LOAD_eg: 161 case AMDGPU::VTX_READ_PARAM_8_eg: 162 case AMDGPU::VTX_READ_PARAM_16_eg: 163 case AMDGPU::VTX_READ_PARAM_32_eg: 164 case AMDGPU::VTX_READ_PARAM_128_eg: 165 case AMDGPU::VTX_READ_GLOBAL_8_eg: 166 case AMDGPU::VTX_READ_GLOBAL_32_eg: 167 case AMDGPU::VTX_READ_GLOBAL_128_eg: 168 case AMDGPU::TEX_VTX_CONSTBUF: 169 case AMDGPU::TEX_VTX_TEXBUF : { 170 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); 171 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset 172 173 EmitByte(INSTR_VTX, OS); 174 Emit(InstWord01, OS); 175 Emit(InstWord2, OS); 176 break; 177 } 178 case AMDGPU::EG_ExportSwz: 179 case AMDGPU::R600_ExportSwz: 180 case AMDGPU::EG_ExportBuf: 181 case AMDGPU::R600_ExportBuf: { 182 uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); 183 EmitByte(INSTR_EXPORT, OS); 184 Emit(Inst, OS); 185 break; 186 } 187 188 default: 189 EmitALUInstr(MI, Fixups, OS); 190 break; 191 } 192 } 193 } 194 195 void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, 196 SmallVectorImpl<MCFixup> &Fixups, 197 raw_ostream &OS) const { 198 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); 199 200 // Emit instruction type 201 EmitByte(INSTR_ALU, OS); 202 203 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); 204 205 //older alu have different encoding for instructions with one or two src 206 //parameters. 207 if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && 208 !(MCDesc.TSFlags & R600_InstFlag::OP3)) { 209 uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); 210 InstWord01 &= ~(0x3FFULL << 39); 211 InstWord01 |= ISAOpCode << 1; 212 } 213 214 unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : 215 MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; 216 217 EmitByte(SrcNum, OS); 218 219 const unsigned SrcOps[3][2] = { 220 {R600Operands::SRC0, R600Operands::SRC0_SEL}, 221 {R600Operands::SRC1, R600Operands::SRC1_SEL}, 222 {R600Operands::SRC2, R600Operands::SRC2_SEL} 223 }; 224 225 for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { 226 unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; 227 unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; 228 EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); 229 } 230 231 Emit(InstWord01, OS); 232 return; 233 } 234 235 void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, 236 raw_ostream &OS) const { 237 const MCOperand &MO = MI.getOperand(OpIdx); 238 union { 239 float f; 240 uint32_t i; 241 } Value; 242 Value.i = 0; 243 // Emit the source select (2 bytes). For GPRs, this is the register index. 244 // For other potential instruction operands, (e.g. constant registers) the 245 // value of the source select is defined in the r600isa docs. 246 if (MO.isReg()) { 247 unsigned reg = MO.getReg(); 248 EmitTwoBytes(getHWReg(reg), OS); 249 if (reg == AMDGPU::ALU_LITERAL_X) { 250 unsigned ImmOpIndex = MI.getNumOperands() - 1; 251 MCOperand ImmOp = MI.getOperand(ImmOpIndex); 252 if (ImmOp.isFPImm()) { 253 Value.f = ImmOp.getFPImm(); 254 } else { 255 assert(ImmOp.isImm()); 256 Value.i = ImmOp.getImm(); 257 } 258 } 259 } else { 260 // XXX: Handle other operand types. 261 EmitTwoBytes(0, OS); 262 } 263 264 // Emit the source channel (1 byte) 265 if (MO.isReg()) { 266 EmitByte(getHWRegChan(MO.getReg()), OS); 267 } else { 268 EmitByte(0, OS); 269 } 270 271 // XXX: Emit isNegated (1 byte) 272 if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) 273 && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || 274 (MO.isReg() && 275 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ 276 EmitByte(1, OS); 277 } else { 278 EmitByte(0, OS); 279 } 280 281 // Emit isAbsolute (1 byte) 282 if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { 283 EmitByte(1, OS); 284 } else { 285 EmitByte(0, OS); 286 } 287 288 // XXX: Emit relative addressing mode (1 byte) 289 EmitByte(0, OS); 290 291 // Emit kc_bank, This will be adjusted later by r600_asm 292 EmitByte(0, OS); 293 294 // Emit the literal value, if applicable (4 bytes). 295 Emit(Value.i, OS); 296 297 } 298 299 void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, 300 unsigned SelOpIdx, raw_ostream &OS) const { 301 const MCOperand &RegMO = MI.getOperand(RegOpIdx); 302 const MCOperand &SelMO = MI.getOperand(SelOpIdx); 303 304 union { 305 float f; 306 uint32_t i; 307 } InlineConstant; 308 InlineConstant.i = 0; 309 // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 310 // and select is 0 (GPR index is encoded in the instr encoding. For constants 311 // type is 1 and select is the original const select passed from the driver. 312 unsigned Reg = RegMO.getReg(); 313 if (Reg == AMDGPU::ALU_CONST) { 314 EmitByte(1, OS); 315 uint32_t Sel = SelMO.getImm(); 316 Emit(Sel, OS); 317 } else { 318 EmitByte(0, OS); 319 Emit((uint32_t)0, OS); 320 } 321 322 if (Reg == AMDGPU::ALU_LITERAL_X) { 323 unsigned ImmOpIndex = MI.getNumOperands() - 1; 324 MCOperand ImmOp = MI.getOperand(ImmOpIndex); 325 if (ImmOp.isFPImm()) { 326 InlineConstant.f = ImmOp.getFPImm(); 327 } else { 328 assert(ImmOp.isImm()); 329 InlineConstant.i = ImmOp.getImm(); 330 } 331 } 332 333 // Emit the literal value, if applicable (4 bytes). 334 Emit(InlineConstant.i, OS); 335 } 336 337 void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, 338 SmallVectorImpl<MCFixup> &Fixups, 339 raw_ostream &OS) const { 340 341 unsigned Opcode = MI.getOpcode(); 342 bool hasOffsets = (Opcode == AMDGPU::TEX_LD); 343 unsigned OpOffset = hasOffsets ? 3 : 0; 344 int64_t Resource = MI.getOperand(OpOffset + 2).getImm(); 345 int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); 346 int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); 347 unsigned srcSelect[4] = {0, 1, 2, 3}; 348 349 // Emit instruction type 350 EmitByte(1, OS); 351 352 // Emit instruction 353 EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); 354 355 // Emit resource id 356 EmitByte(Resource, OS); 357 358 // Emit source register 359 EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); 360 361 // XXX: Emit src isRelativeAddress 362 EmitByte(0, OS); 363 364 // Emit destination register 365 EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); 366 367 // XXX: Emit dst isRealtiveAddress 368 EmitByte(0, OS); 369 370 // XXX: Emit dst select 371 EmitByte(0, OS); // X 372 EmitByte(1, OS); // Y 373 EmitByte(2, OS); // Z 374 EmitByte(3, OS); // W 375 376 // XXX: Emit lod bias 377 EmitByte(0, OS); 378 379 // XXX: Emit coord types 380 unsigned coordType[4] = {1, 1, 1, 1}; 381 382 if (TextureType == TEXTURE_RECT 383 || TextureType == TEXTURE_SHADOWRECT) { 384 coordType[ELEMENT_X] = 0; 385 coordType[ELEMENT_Y] = 0; 386 } 387 388 if (TextureType == TEXTURE_1D_ARRAY 389 || TextureType == TEXTURE_SHADOW1D_ARRAY) { 390 if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) { 391 coordType[ELEMENT_Y] = 0; 392 } else { 393 coordType[ELEMENT_Z] = 0; 394 srcSelect[ELEMENT_Z] = ELEMENT_Y; 395 } 396 } else if (TextureType == TEXTURE_2D_ARRAY 397 || TextureType == TEXTURE_SHADOW2D_ARRAY) { 398 coordType[ELEMENT_Z] = 0; 399 } 400 401 for (unsigned i = 0; i < 4; i++) { 402 EmitByte(coordType[i], OS); 403 } 404 405 // XXX: Emit offsets 406 if (hasOffsets) 407 for (unsigned i = 2; i < 5; i++) 408 EmitByte(MI.getOperand(i).getImm()<<1, OS); 409 else 410 EmitNullBytes(3, OS); 411 412 // Emit sampler id 413 EmitByte(Sampler, OS); 414 415 // XXX:Emit source select 416 if ((TextureType == TEXTURE_SHADOW1D 417 || TextureType == TEXTURE_SHADOW2D 418 || TextureType == TEXTURE_SHADOWRECT 419 || TextureType == TEXTURE_SHADOW1D_ARRAY) 420 && Opcode != AMDGPU::TEX_SAMPLE_C_L 421 && Opcode != AMDGPU::TEX_SAMPLE_C_LB) { 422 srcSelect[ELEMENT_W] = ELEMENT_Z; 423 } 424 425 for (unsigned i = 0; i < 4; i++) { 426 EmitByte(srcSelect[i], OS); 427 } 428 } 429 430 void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { 431 432 // Emit instruction type 433 EmitByte(INSTR_FC, OS); 434 435 // Emit SRC 436 unsigned NumOperands = MI.getNumOperands(); 437 if (NumOperands > 0) { 438 assert(NumOperands == 1); 439 EmitSrc(MI, 0, OS); 440 } else { 441 EmitNullBytes(SRC_BYTE_COUNT, OS); 442 } 443 444 // Emit FC Instruction 445 enum FCInstr instr; 446 switch (MI.getOpcode()) { 447 case AMDGPU::PREDICATED_BREAK: 448 instr = FC_BREAK_PREDICATE; 449 break; 450 case AMDGPU::CONTINUE: 451 instr = FC_CONTINUE; 452 break; 453 case AMDGPU::IF_PREDICATE_SET: 454 instr = FC_IF_PREDICATE; 455 break; 456 case AMDGPU::ELSE: 457 instr = FC_ELSE; 458 break; 459 case AMDGPU::ENDIF: 460 instr = FC_ENDIF; 461 break; 462 case AMDGPU::ENDLOOP: 463 instr = FC_ENDLOOP; 464 break; 465 case AMDGPU::WHILELOOP: 466 instr = FC_BGNLOOP; 467 break; 468 default: 469 abort(); 470 break; 471 } 472 EmitByte(instr, OS); 473 } 474 475 void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, 476 raw_ostream &OS) const { 477 478 for (unsigned int i = 0; i < ByteCount; i++) { 479 EmitByte(0, OS); 480 } 481 } 482 483 void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { 484 OS.write((uint8_t) Byte & 0xff); 485 } 486 487 void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, 488 raw_ostream &OS) const { 489 OS.write((uint8_t) (Bytes & 0xff)); 490 OS.write((uint8_t) ((Bytes >> 8) & 0xff)); 491 } 492 493 void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { 494 for (unsigned i = 0; i < 4; i++) { 495 OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); 496 } 497 } 498 499 void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { 500 for (unsigned i = 0; i < 8; i++) { 501 EmitByte((Value >> (8 * i)) & 0xff, OS); 502 } 503 } 504 505 unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { 506 return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT; 507 } 508 509 unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { 510 return MRI.getEncodingValue(RegNo) & HW_REG_MASK; 511 } 512 513 uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, 514 const MCOperand &MO, 515 SmallVectorImpl<MCFixup> &Fixup) const { 516 if (MO.isReg()) { 517 if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { 518 return MRI.getEncodingValue(MO.getReg()); 519 } else { 520 return getHWReg(MO.getReg()); 521 } 522 } else if (MO.isImm()) { 523 return MO.getImm(); 524 } else { 525 assert(0); 526 return 0; 527 } 528 } 529 530 //===----------------------------------------------------------------------===// 531 // Encoding helper functions 532 //===----------------------------------------------------------------------===// 533 534 bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { 535 switch(opcode) { 536 default: return false; 537 case AMDGPU::PREDICATED_BREAK: 538 case AMDGPU::CONTINUE: 539 case AMDGPU::IF_PREDICATE_SET: 540 case AMDGPU::ELSE: 541 case AMDGPU::ENDIF: 542 case AMDGPU::ENDLOOP: 543 case AMDGPU::WHILELOOP: 544 return true; 545 } 546 } 547 548 bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { 549 switch(opcode) { 550 default: return false; 551 case AMDGPU::TEX_LD: 552 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 553 case AMDGPU::TEX_SAMPLE: 554 case AMDGPU::TEX_SAMPLE_C: 555 case AMDGPU::TEX_SAMPLE_L: 556 case AMDGPU::TEX_SAMPLE_C_L: 557 case AMDGPU::TEX_SAMPLE_LB: 558 case AMDGPU::TEX_SAMPLE_C_LB: 559 case AMDGPU::TEX_SAMPLE_G: 560 case AMDGPU::TEX_SAMPLE_C_G: 561 case AMDGPU::TEX_GET_GRADIENTS_H: 562 case AMDGPU::TEX_GET_GRADIENTS_V: 563 case AMDGPU::TEX_SET_GRADIENTS_H: 564 case AMDGPU::TEX_SET_GRADIENTS_V: 565 return true; 566 } 567 } 568 569 bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, 570 unsigned Flag) const { 571 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); 572 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); 573 if (FlagIndex == 0) { 574 return false; 575 } 576 assert(MI.getOperand(FlagIndex).isImm()); 577 return !!((MI.getOperand(FlagIndex).getImm() >> 578 (NUM_MO_FLAGS * Operand)) & Flag); 579 } 580 581 #include "AMDGPUGenMCCodeEmitter.inc" 582