1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "codegen/nv50_ir.h" 24 #include "codegen/nv50_ir_target.h" 25 26 namespace nv50_ir { 27 28 const uint8_t Target::operationSrcNr[] = 29 { 30 0, 0, // NOP, PHI 31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 32 1, 1, 2, // MOV, LOAD, STORE 33 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD 34 1, 1, 1, // ABS, NEG, NOT 35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 36 2, 2, 1, // MAX, MIN, SAT 37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT 39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW 41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 42 0, 0, 0, // PRERET,CONT,BREAK 43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 44 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP 45 1, 1, // EMIT, RESTART 46 1, 1, 1, // TEX, TXB, TXL, 47 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 48 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA 49 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 50 0, // TEXBAR 51 1, 1, // DFDX, DFDY 52 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 53 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT 54 2, 2, // ATOM, BAR 55 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 56 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 57 3, // SHFL 58 1, // VOTE 59 1, // BUFQ 60 0 61 }; 62 63 const OpClass Target::operationClass[] = 64 { 65 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT 66 OPCLASS_OTHER, 67 OPCLASS_PSEUDO, 68 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, 69 // MOV; LOAD; STORE 70 OPCLASS_MOVE, 71 OPCLASS_LOAD, 72 OPCLASS_STORE, 73 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD 74 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 75 OPCLASS_ARITH, OPCLASS_ARITH, 76 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 77 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR 78 OPCLASS_CONVERT, OPCLASS_CONVERT, 79 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, 80 OPCLASS_SHIFT, OPCLASS_SHIFT, 81 // MAX, MIN 82 OPCLASS_COMPARE, OPCLASS_COMPARE, 83 // SAT, CEIL, FLOOR, TRUNC; CVT 84 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, 85 OPCLASS_CONVERT, 86 // SET(AND,OR,XOR); SELP, SLCT 87 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, 88 OPCLASS_COMPARE, OPCLASS_COMPARE, 89 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW 90 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 91 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 92 OPCLASS_SFU, OPCLASS_SFU, 93 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN 94 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 95 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 96 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 97 // DISCARD, EXIT 98 OPCLASS_FLOW, OPCLASS_FLOW, 99 // MEMBAR 100 OPCLASS_CONTROL, 101 // VFETCH, PFETCH, AFETCH, EXPORT 102 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, 103 // LINTERP, PINTERP 104 OPCLASS_SFU, OPCLASS_SFU, 105 // EMIT, RESTART 106 OPCLASS_CONTROL, OPCLASS_CONTROL, 107 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP 108 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 109 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 110 OPCLASS_TEXTURE, OPCLASS_TEXTURE, 111 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA 112 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, 113 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, 114 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 115 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, 116 // TEXBAR 117 OPCLASS_OTHER, 118 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP 119 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 120 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, 121 // POPCNT, INSBF, EXTBF, BFIND; PERMT 122 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 123 OPCLASS_BITFIELD, 124 // ATOM, BAR 125 OPCLASS_ATOMIC, OPCLASS_CONTROL, 126 // VADD, VAVG, VMIN, VMAX 127 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 128 // VSAD, VSET, VSHR, VSHL 129 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 130 // VSEL, CCTL 131 OPCLASS_VECTOR, OPCLASS_CONTROL, 132 // SHFL 133 OPCLASS_OTHER, 134 // VOTE 135 OPCLASS_OTHER, 136 // BUFQ 137 OPCLASS_OTHER, 138 OPCLASS_PSEUDO // LAST 139 }; 140 141 142 extern Target *getTargetGM107(unsigned int chipset); 143 extern Target *getTargetNVC0(unsigned int chipset); 144 extern Target *getTargetNV50(unsigned int chipset); 145 146 Target *Target::create(unsigned int chipset) 147 { 148 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); 149 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); 150 switch (chipset & ~0xf) { 151 case 0x110: 152 case 0x120: 153 case 0x130: 154 return getTargetGM107(chipset); 155 case 0xc0: 156 case 0xd0: 157 case 0xe0: 158 case 0xf0: 159 case 0x100: 160 return getTargetNVC0(chipset); 161 case 0x50: 162 case 0x80: 163 case 0x90: 164 case 0xa0: 165 return getTargetNV50(chipset); 166 default: 167 ERROR("unsupported target: NV%x\n", chipset); 168 return 0; 169 } 170 } 171 172 void Target::destroy(Target *targ) 173 { 174 delete targ; 175 } 176 177 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL) 178 { 179 } 180 181 void 182 CodeEmitter::setCodeLocation(void *ptr, uint32_t size) 183 { 184 code = reinterpret_cast<uint32_t *>(ptr); 185 codeSize = 0; 186 codeSizeLimit = size; 187 } 188 189 void 190 CodeEmitter::printBinary() const 191 { 192 uint32_t *bin = code - codeSize / 4; 193 INFO("program binary (%u bytes)", codeSize); 194 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { 195 if ((pos % 8) == 0) 196 INFO("\n"); 197 INFO("%08x ", bin[pos]); 198 } 199 INFO("\n"); 200 } 201 202 static inline uint32_t sizeToBundlesNVE4(uint32_t size) 203 { 204 return (size + 55) / 56; 205 } 206 207 void 208 CodeEmitter::prepareEmission(Program *prog) 209 { 210 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 211 !fi.end(); fi.next()) { 212 Function *func = reinterpret_cast<Function *>(fi.get()); 213 func->binPos = prog->binSize; 214 prepareEmission(func); 215 216 // adjust sizes & positions for schedulding info: 217 if (prog->getTarget()->hasSWSched) { 218 uint32_t adjPos = func->binPos; 219 BasicBlock *bb = NULL; 220 for (int i = 0; i < func->bbCount; ++i) { 221 bb = func->bbArray[i]; 222 int32_t adjSize = bb->binSize; 223 if (adjPos % 64) { 224 adjSize -= 64 - adjPos % 64; 225 if (adjSize < 0) 226 adjSize = 0; 227 } 228 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; 229 bb->binPos = adjPos; 230 bb->binSize = adjSize; 231 adjPos += adjSize; 232 } 233 if (bb) 234 func->binSize = adjPos - func->binPos; 235 } 236 237 prog->binSize += func->binSize; 238 } 239 } 240 241 void 242 CodeEmitter::prepareEmission(Function *func) 243 { 244 func->bbCount = 0; 245 func->bbArray = new BasicBlock * [func->cfg.getSize()]; 246 247 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; 248 249 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) 250 prepareEmission(BasicBlock::get(*it)); 251 } 252 253 void 254 CodeEmitter::prepareEmission(BasicBlock *bb) 255 { 256 Instruction *i, *next; 257 Function *func = bb->getFunction(); 258 int j; 259 unsigned int nShort; 260 261 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); 262 263 for (; j >= 0; --j) { 264 BasicBlock *in = func->bbArray[j]; 265 Instruction *exit = in->getExit(); 266 267 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { 268 in->binSize -= 8; 269 func->binSize -= 8; 270 271 for (++j; j < func->bbCount; ++j) 272 func->bbArray[j]->binPos -= 8; 273 274 in->remove(exit); 275 } 276 bb->binPos = in->binPos + in->binSize; 277 if (in->binSize) // no more no-op branches to bb 278 break; 279 } 280 func->bbArray[func->bbCount++] = bb; 281 282 if (!bb->getExit()) 283 return; 284 285 // determine encoding size, try to group short instructions 286 nShort = 0; 287 for (i = bb->getEntry(); i; i = next) { 288 next = i->next; 289 290 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) { 291 bb->remove(i); 292 continue; 293 } 294 295 i->encSize = getMinEncodingSize(i); 296 if (next && i->encSize < 8) 297 ++nShort; 298 else 299 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { 300 if (i->isCommutationLegal(i->next)) { 301 bb->permuteAdjacent(i, next); 302 next->encSize = 4; 303 next = i; 304 i = i->prev; 305 ++nShort; 306 } else 307 if (i->isCommutationLegal(i->prev) && next->next) { 308 bb->permuteAdjacent(i->prev, i); 309 next->encSize = 4; 310 next = next->next; 311 bb->binSize += 4; 312 ++nShort; 313 } else { 314 i->encSize = 8; 315 i->prev->encSize = 8; 316 bb->binSize += 4; 317 nShort = 0; 318 } 319 } else { 320 i->encSize = 8; 321 if (nShort & 1) { 322 i->prev->encSize = 8; 323 bb->binSize += 4; 324 } 325 nShort = 0; 326 } 327 bb->binSize += i->encSize; 328 } 329 330 if (bb->getExit()->encSize == 4) { 331 assert(nShort); 332 bb->getExit()->encSize = 8; 333 bb->binSize += 4; 334 335 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { 336 bb->binSize += 8; 337 bb->getExit()->prev->encSize = 8; 338 } 339 } 340 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); 341 342 func->binSize += bb->binSize; 343 } 344 345 void 346 Program::emitSymbolTable(struct nv50_ir_prog_info *info) 347 { 348 unsigned int n = 0, nMax = allFuncs.getSize(); 349 350 info->bin.syms = 351 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); 352 353 for (ArrayList::Iterator fi = allFuncs.iterator(); 354 !fi.end(); 355 fi.next(), ++n) { 356 Function *f = (Function *)fi.get(); 357 assert(n < nMax); 358 359 info->bin.syms[n].label = f->getLabel(); 360 info->bin.syms[n].offset = f->binPos; 361 } 362 363 info->bin.numSyms = n; 364 } 365 366 bool 367 Program::emitBinary(struct nv50_ir_prog_info *info) 368 { 369 CodeEmitter *emit = target->getCodeEmitter(progType); 370 371 emit->prepareEmission(this); 372 373 if (dbgFlags & NV50_IR_DEBUG_BASIC) 374 this->print(); 375 376 if (!binSize) { 377 code = NULL; 378 return false; 379 } 380 code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); 381 if (!code) 382 return false; 383 emit->setCodeLocation(code, binSize); 384 info->bin.instructions = 0; 385 386 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { 387 Function *fn = reinterpret_cast<Function *>(fi.get()); 388 389 assert(emit->getCodeSize() == fn->binPos); 390 391 for (int b = 0; b < fn->bbCount; ++b) { 392 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) { 393 emit->emitInstruction(i); 394 info->bin.instructions++; 395 if (i->sType == TYPE_F64 || i->dType == TYPE_F64) 396 info->io.fp64 = true; 397 } 398 } 399 } 400 info->bin.relocData = emit->getRelocInfo(); 401 info->bin.fixupData = emit->getFixupInfo(); 402 403 emitSymbolTable(info); 404 405 // the nvc0 driver will print the binary iself together with the header 406 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) 407 emit->printBinary(); 408 409 delete emit; 410 return true; 411 } 412 413 #define RELOC_ALLOC_INCREMENT 8 414 415 bool 416 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, 417 int s) 418 { 419 unsigned int n = relocInfo ? relocInfo->count : 0; 420 421 if (!(n % RELOC_ALLOC_INCREMENT)) { 422 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); 423 relocInfo = reinterpret_cast<RelocInfo *>( 424 REALLOC(relocInfo, n ? size : 0, 425 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); 426 if (!relocInfo) 427 return false; 428 if (n == 0) 429 memset(relocInfo, 0, sizeof(RelocInfo)); 430 } 431 ++relocInfo->count; 432 433 relocInfo->entry[n].data = data; 434 relocInfo->entry[n].mask = m; 435 relocInfo->entry[n].offset = codeSize + w * 4; 436 relocInfo->entry[n].bitPos = s; 437 relocInfo->entry[n].type = ty; 438 439 return true; 440 } 441 442 bool 443 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply) 444 { 445 unsigned int n = fixupInfo ? fixupInfo->count : 0; 446 447 if (!(n % RELOC_ALLOC_INCREMENT)) { 448 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry); 449 fixupInfo = reinterpret_cast<FixupInfo *>( 450 REALLOC(fixupInfo, n ? size : 0, 451 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry))); 452 if (!fixupInfo) 453 return false; 454 if (n == 0) 455 memset(fixupInfo, 0, sizeof(FixupInfo)); 456 } 457 ++fixupInfo->count; 458 459 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2); 460 461 return true; 462 } 463 464 void 465 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const 466 { 467 uint32_t value = 0; 468 469 switch (type) { 470 case TYPE_CODE: value = info->codePos; break; 471 case TYPE_BUILTIN: value = info->libPos; break; 472 case TYPE_DATA: value = info->dataPos; break; 473 default: 474 assert(0); 475 break; 476 } 477 value += data; 478 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); 479 480 binary[offset / 4] &= ~mask; 481 binary[offset / 4] |= value & mask; 482 } 483 484 } // namespace nv50_ir 485 486 487 #include "codegen/nv50_ir_driver.h" 488 489 extern "C" { 490 491 void 492 nv50_ir_relocate_code(void *relocData, uint32_t *code, 493 uint32_t codePos, 494 uint32_t libPos, 495 uint32_t dataPos) 496 { 497 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); 498 499 info->codePos = codePos; 500 info->libPos = libPos; 501 info->dataPos = dataPos; 502 503 for (unsigned int i = 0; i < info->count; ++i) 504 info->entry[i].apply(code, info); 505 } 506 507 void 508 nv50_ir_apply_fixups(void *fixupData, uint32_t *code, 509 bool force_persample_interp, bool flatshade, 510 uint8_t alphatest) 511 { 512 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>( 513 fixupData); 514 515 // force_persample_interp: all non-flat -> per-sample 516 // flatshade: all color -> flat 517 // alphatest: PIPE_FUNC_* to use with alphatest 518 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest); 519 for (unsigned i = 0; i < info->count; ++i) 520 info->entry[i].apply(&info->entry[i], code, data); 521 } 522 523 void 524 nv50_ir_get_target_library(uint32_t chipset, 525 const uint32_t **code, uint32_t *size) 526 { 527 nv50_ir::Target *targ = nv50_ir::Target::create(chipset); 528 targ->getBuiltinCode(code, size); 529 nv50_ir::Target::destroy(targ); 530 } 531 532 } 533