1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 #include "nv50_ir_target_nvc0.h" 24 25 namespace nv50_ir { 26 27 // Argh, all these assertions ... 28 29 class CodeEmitterNVC0 : public CodeEmitter 30 { 31 public: 32 CodeEmitterNVC0(const TargetNVC0 *); 33 34 virtual bool emitInstruction(Instruction *); 35 virtual uint32_t getMinEncodingSize(const Instruction *) const; 36 virtual void prepareEmission(Function *); 37 38 inline void setProgramType(Program::Type pType) { progType = pType; } 39 40 private: 41 const TargetNVC0 *targ; 42 43 Program::Type progType; 44 45 const bool writeIssueDelays; 46 47 private: 48 void emitForm_A(const Instruction *, uint64_t); 49 void emitForm_B(const Instruction *, uint64_t); 50 void emitForm_S(const Instruction *, uint32_t, bool pred); 51 52 void emitPredicate(const Instruction *); 53 54 void setAddress16(const ValueRef&); 55 void setImmediate(const Instruction *, const int s); // needs op already set 56 void setImmediateS8(const ValueRef&); 57 58 void emitCondCode(CondCode cc, int pos); 59 void emitInterpMode(const Instruction *); 60 void emitLoadStoreType(DataType ty); 61 void emitCachingMode(CacheMode c); 62 63 void emitShortSrc2(const ValueRef&); 64 65 inline uint8_t getSRegEncoding(const ValueRef&); 66 67 void roundMode_A(const Instruction *); 68 void roundMode_C(const Instruction *); 69 void roundMode_CS(const Instruction *); 70 71 void emitNegAbs12(const Instruction *); 72 73 void emitNOP(const Instruction *); 74 75 void emitLOAD(const Instruction *); 76 void emitSTORE(const Instruction *); 77 void emitMOV(const Instruction *); 78 79 void emitINTERP(const Instruction *); 80 void emitPFETCH(const Instruction *); 81 void emitVFETCH(const Instruction *); 82 void emitEXPORT(const Instruction *); 83 void emitOUT(const Instruction *); 84 85 void emitUADD(const Instruction *); 86 void emitFADD(const Instruction *); 87 void emitUMUL(const Instruction *); 88 void emitFMUL(const Instruction *); 89 void emitIMAD(const Instruction *); 90 void emitISAD(const Instruction *); 91 void emitFMAD(const Instruction *); 92 93 void emitNOT(Instruction *); 94 void emitLogicOp(const Instruction *, uint8_t subOp); 95 void emitPOPC(const Instruction *); 96 void emitINSBF(const Instruction *); 97 void emitShift(const Instruction *); 98 99 void emitSFnOp(const Instruction *, uint8_t subOp); 100 101 void emitCVT(Instruction *); 102 void emitMINMAX(const Instruction *); 103 void emitPreOp(const Instruction *); 104 105 void emitSET(const CmpInstruction *); 106 void emitSLCT(const CmpInstruction *); 107 void emitSELP(const Instruction *); 108 109 void emitTEXBAR(const Instruction *); 110 void emitTEX(const TexInstruction *); 111 void emitTEXCSAA(const TexInstruction *); 112 void emitTXQ(const TexInstruction *); 113 void emitPIXLD(const TexInstruction *); 114 115 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); 116 117 void emitFlow(const Instruction *); 118 119 inline void defId(const ValueDef&, const int pos); 120 inline void srcId(const ValueRef&, const int pos); 121 inline void srcId(const ValueRef *, const int pos); 122 inline void srcId(const Instruction *, int s, const int pos); 123 124 inline void srcAddr32(const ValueRef&, const int pos); // address / 4 125 126 inline bool isLIMM(const ValueRef&, DataType ty); 127 }; 128 129 // for better visibility 130 #define HEX64(h, l) 0x##h##l##ULL 131 132 #define SDATA(a) ((a).rep()->reg.data) 133 #define DDATA(a) ((a).rep()->reg.data) 134 135 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos) 136 { 137 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32); 138 } 139 140 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos) 141 { 142 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32); 143 } 144 145 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos) 146 { 147 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63; 148 code[pos / 32] |= r << (pos % 32); 149 } 150 151 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos) 152 { 153 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); 154 } 155 156 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos) 157 { 158 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32); 159 } 160 161 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty) 162 { 163 const ImmediateValue *imm = ref.get()->asImm(); 164 165 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); 166 } 167 168 void 169 CodeEmitterNVC0::roundMode_A(const Instruction *insn) 170 { 171 switch (insn->rnd) { 172 case ROUND_M: code[1] |= 1 << 23; break; 173 case ROUND_P: code[1] |= 2 << 23; break; 174 case ROUND_Z: code[1] |= 3 << 23; break; 175 default: 176 assert(insn->rnd == ROUND_N); 177 break; 178 } 179 } 180 181 void 182 CodeEmitterNVC0::emitNegAbs12(const Instruction *i) 183 { 184 if (i->src(1).mod.abs()) code[0] |= 1 << 6; 185 if (i->src(0).mod.abs()) code[0] |= 1 << 7; 186 if (i->src(1).mod.neg()) code[0] |= 1 << 8; 187 if (i->src(0).mod.neg()) code[0] |= 1 << 9; 188 } 189 190 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos) 191 { 192 uint8_t val; 193 194 switch (cc) { 195 case CC_LT: val = 0x1; break; 196 case CC_LTU: val = 0x9; break; 197 case CC_EQ: val = 0x2; break; 198 case CC_EQU: val = 0xa; break; 199 case CC_LE: val = 0x3; break; 200 case CC_LEU: val = 0xb; break; 201 case CC_GT: val = 0x4; break; 202 case CC_GTU: val = 0xc; break; 203 case CC_NE: val = 0x5; break; 204 case CC_NEU: val = 0xd; break; 205 case CC_GE: val = 0x6; break; 206 case CC_GEU: val = 0xe; break; 207 case CC_TR: val = 0xf; break; 208 case CC_FL: val = 0x0; break; 209 210 case CC_A: val = 0x14; break; 211 case CC_NA: val = 0x13; break; 212 case CC_S: val = 0x15; break; 213 case CC_NS: val = 0x12; break; 214 case CC_C: val = 0x16; break; 215 case CC_NC: val = 0x11; break; 216 case CC_O: val = 0x17; break; 217 case CC_NO: val = 0x10; break; 218 219 default: 220 val = 0; 221 assert(!"invalid condition code"); 222 break; 223 } 224 code[pos / 32] |= val << (pos % 32); 225 } 226 227 void 228 CodeEmitterNVC0::emitPredicate(const Instruction *i) 229 { 230 if (i->predSrc >= 0) { 231 assert(i->getPredicate()->reg.file == FILE_PREDICATE); 232 srcId(i->src(i->predSrc), 10); 233 if (i->cc == CC_NOT_P) 234 code[0] |= 0x2000; // negate 235 } else { 236 code[0] |= 0x1c00; 237 } 238 } 239 240 void 241 CodeEmitterNVC0::setAddress16(const ValueRef& src) 242 { 243 Symbol *sym = src.get()->asSym(); 244 245 assert(sym); 246 247 code[0] |= (sym->reg.data.offset & 0x003f) << 26; 248 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6; 249 } 250 251 void 252 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s) 253 { 254 const ImmediateValue *imm = i->src(s).get()->asImm(); 255 uint32_t u32; 256 257 assert(imm); 258 u32 = imm->reg.data.u32; 259 260 if ((code[0] & 0xf) == 0x2) { 261 // LIMM 262 code[0] |= (u32 & 0x3f) << 26; 263 code[1] |= u32 >> 6; 264 } else 265 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) { 266 // integer immediate 267 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); 268 assert(!(code[1] & 0xc000)); 269 u32 &= 0xfffff; 270 code[0] |= (u32 & 0x3f) << 26; 271 code[1] |= 0xc000 | (u32 >> 6); 272 } else { 273 // float immediate 274 assert(!(u32 & 0x00000fff)); 275 assert(!(code[1] & 0xc000)); 276 code[0] |= ((u32 >> 12) & 0x3f) << 26; 277 code[1] |= 0xc000 | (u32 >> 18); 278 } 279 } 280 281 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref) 282 { 283 const ImmediateValue *imm = ref.get()->asImm(); 284 285 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32); 286 287 assert(s8 == imm->reg.data.s32); 288 289 code[0] |= (s8 & 0x3f) << 26; 290 code[0] |= (s8 >> 6) << 8; 291 } 292 293 void 294 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) 295 { 296 code[0] = opc; 297 code[1] = opc >> 32; 298 299 emitPredicate(i); 300 301 defId(i->def(0), 14); 302 303 int s1 = 26; 304 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST) 305 s1 = 49; 306 307 for (int s = 0; s < 3 && i->srcExists(s); ++s) { 308 switch (i->getSrc(s)->reg.file) { 309 case FILE_MEMORY_CONST: 310 assert(!(code[1] & 0xc000)); 311 code[1] |= (s == 2) ? 0x8000 : 0x4000; 312 code[1] |= i->getSrc(s)->reg.fileIndex << 10; 313 setAddress16(i->src(s)); 314 break; 315 case FILE_IMMEDIATE: 316 assert(s == 1 || 317 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2); 318 assert(!(code[1] & 0xc000)); 319 setImmediate(i, s); 320 break; 321 case FILE_GPR: 322 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst 323 break; 324 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); 325 break; 326 default: 327 // ignore here, can be predicate or flags, but must not be address 328 break; 329 } 330 } 331 } 332 333 void 334 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc) 335 { 336 code[0] = opc; 337 code[1] = opc >> 32; 338 339 emitPredicate(i); 340 341 defId(i->def(0), 14); 342 343 switch (i->src(0).getFile()) { 344 case FILE_MEMORY_CONST: 345 assert(!(code[1] & 0xc000)); 346 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10); 347 setAddress16(i->src(0)); 348 break; 349 case FILE_IMMEDIATE: 350 assert(!(code[1] & 0xc000)); 351 setImmediate(i, 0); 352 break; 353 case FILE_GPR: 354 srcId(i->src(0), 26); 355 break; 356 default: 357 // ignore here, can be predicate or flags, but must not be address 358 break; 359 } 360 } 361 362 void 363 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred) 364 { 365 code[0] = opc; 366 367 int ss2a = 0; 368 if (opc == 0x0d || opc == 0x0e) 369 ss2a = 2; 370 371 defId(i->def(0), 14); 372 srcId(i->src(0), 20); 373 374 assert(pred || (i->predSrc < 0)); 375 if (pred) 376 emitPredicate(i); 377 378 for (int s = 1; s < 3 && i->srcExists(s); ++s) { 379 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) { 380 assert(!(code[0] & (0x300 >> ss2a))); 381 switch (i->src(s).get()->reg.fileIndex) { 382 case 0: code[0] |= 0x100 >> ss2a; break; 383 case 1: code[0] |= 0x200 >> ss2a; break; 384 case 16: code[0] |= 0x300 >> ss2a; break; 385 default: 386 ERROR("invalid c[] space for short form\n"); 387 break; 388 } 389 if (s == 1) 390 code[0] |= i->getSrc(s)->reg.data.offset << 24; 391 else 392 code[0] |= i->getSrc(s)->reg.data.offset << 6; 393 } else 394 if (i->src(s).getFile() == FILE_IMMEDIATE) { 395 assert(s == 1); 396 setImmediateS8(i->src(s)); 397 } else 398 if (i->src(s).getFile() == FILE_GPR) { 399 srcId(i->src(s), (s == 1) ? 26 : 8); 400 } 401 } 402 } 403 404 void 405 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src) 406 { 407 if (src.getFile() == FILE_MEMORY_CONST) { 408 switch (src.get()->reg.fileIndex) { 409 case 0: code[0] |= 0x100; break; 410 case 1: code[0] |= 0x200; break; 411 case 16: code[0] |= 0x300; break; 412 default: 413 assert(!"unsupported file index for short op"); 414 break; 415 } 416 srcAddr32(src, 20); 417 } else { 418 srcId(src, 20); 419 assert(src.getFile() == FILE_GPR); 420 } 421 } 422 423 void 424 CodeEmitterNVC0::emitNOP(const Instruction *i) 425 { 426 code[0] = 0x000001e4; 427 code[1] = 0x40000000; 428 emitPredicate(i); 429 } 430 431 void 432 CodeEmitterNVC0::emitFMAD(const Instruction *i) 433 { 434 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); 435 436 if (i->encSize == 8) { 437 if (isLIMM(i->src(1), TYPE_F32)) { 438 emitForm_A(i, HEX64(20000000, 00000002)); 439 } else { 440 emitForm_A(i, HEX64(30000000, 00000000)); 441 442 if (i->src(2).mod.neg()) 443 code[0] |= 1 << 8; 444 } 445 roundMode_A(i); 446 447 if (neg1) 448 code[0] |= 1 << 9; 449 450 if (i->saturate) 451 code[0] |= 1 << 5; 452 if (i->ftz) 453 code[0] |= 1 << 6; 454 } else { 455 assert(!i->saturate && !i->src(2).mod.neg()); 456 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e, 457 false); 458 if (neg1) 459 code[0] |= 1 << 4; 460 } 461 } 462 463 void 464 CodeEmitterNVC0::emitFMUL(const Instruction *i) 465 { 466 bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); 467 468 assert(i->postFactor >= -3 && i->postFactor <= 3); 469 470 if (i->encSize == 8) { 471 if (isLIMM(i->src(1), TYPE_F32)) { 472 assert(i->postFactor == 0); // constant folded, hopefully 473 emitForm_A(i, HEX64(30000000, 00000002)); 474 } else { 475 emitForm_A(i, HEX64(58000000, 00000000)); 476 roundMode_A(i); 477 code[1] |= ((i->postFactor > 0) ? 478 (7 - i->postFactor) : (0 - i->postFactor)) << 17; 479 } 480 if (neg) 481 code[1] ^= 1 << 25; // aliases with LIMM sign bit 482 483 if (i->saturate) 484 code[0] |= 1 << 5; 485 486 if (i->dnz) 487 code[0] |= 1 << 7; 488 else 489 if (i->ftz) 490 code[0] |= 1 << 6; 491 } else { 492 assert(!neg && !i->saturate && !i->ftz && !i->postFactor); 493 emitForm_S(i, 0xa8, true); 494 } 495 } 496 497 void 498 CodeEmitterNVC0::emitUMUL(const Instruction *i) 499 { 500 if (i->encSize == 8) { 501 if (i->src(1).getFile() == FILE_IMMEDIATE) { 502 emitForm_A(i, HEX64(10000000, 00000002)); 503 } else { 504 emitForm_A(i, HEX64(50000000, 00000003)); 505 } 506 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 507 code[0] |= 1 << 6; 508 if (i->sType == TYPE_S32) 509 code[0] |= 1 << 5; 510 if (i->dType == TYPE_S32) 511 code[0] |= 1 << 7; 512 } else { 513 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true); 514 515 if (i->sType == TYPE_S32) 516 code[0] |= 1 << 6; 517 } 518 } 519 520 void 521 CodeEmitterNVC0::emitFADD(const Instruction *i) 522 { 523 if (i->encSize == 8) { 524 if (isLIMM(i->src(1), TYPE_F32)) { 525 assert(!i->saturate); 526 emitForm_A(i, HEX64(28000000, 00000002)); 527 528 code[0] |= i->src(0).mod.abs() << 7; 529 code[0] |= i->src(0).mod.neg() << 9; 530 531 if (i->src(1).mod.abs()) 532 code[1] &= 0xfdffffff; 533 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg())) 534 code[1] ^= 0x02000000; 535 } else { 536 emitForm_A(i, HEX64(50000000, 00000000)); 537 538 roundMode_A(i); 539 if (i->saturate) 540 code[1] |= 1 << 17; 541 542 emitNegAbs12(i); 543 if (i->op == OP_SUB) code[0] ^= 1 << 8; 544 } 545 if (i->ftz) 546 code[0] |= 1 << 5; 547 } else { 548 assert(!i->saturate && i->op != OP_SUB && 549 !i->src(0).mod.abs() && 550 !i->src(1).mod.neg() && !i->src(1).mod.abs()); 551 552 emitForm_S(i, 0x49, true); 553 554 if (i->src(0).mod.neg()) 555 code[0] |= 1 << 7; 556 } 557 } 558 559 void 560 CodeEmitterNVC0::emitUADD(const Instruction *i) 561 { 562 uint32_t addOp = 0; 563 564 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); 565 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg()); 566 567 if (i->src(0).mod.neg()) 568 addOp |= 0x200; 569 if (i->src(1).mod.neg()) 570 addOp |= 0x100; 571 if (i->op == OP_SUB) { 572 addOp ^= 0x100; 573 assert(addOp != 0x300); // would be add-plus-one 574 } 575 576 if (i->encSize == 8) { 577 if (isLIMM(i->src(1), TYPE_U32)) { 578 emitForm_A(i, HEX64(08000000, 00000002)); 579 if (i->defExists(1)) 580 code[1] |= 1 << 26; // write carry 581 } else { 582 emitForm_A(i, HEX64(48000000, 00000003)); 583 if (i->defExists(1)) 584 code[1] |= 1 << 16; // write carry 585 } 586 code[0] |= addOp; 587 588 if (i->saturate) 589 code[0] |= 1 << 5; 590 if (i->flagsSrc >= 0) // add carry 591 code[0] |= 1 << 6; 592 } else { 593 assert(!(addOp & 0x100)); 594 emitForm_S(i, (addOp >> 3) | 595 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true); 596 } 597 } 598 599 // TODO: shl-add 600 void 601 CodeEmitterNVC0::emitIMAD(const Instruction *i) 602 { 603 assert(i->encSize == 8); 604 emitForm_A(i, HEX64(20000000, 00000003)); 605 606 if (isSignedType(i->dType)) 607 code[0] |= 1 << 7; 608 if (isSignedType(i->sType)) 609 code[0] |= 1 << 5; 610 611 code[1] |= i->saturate << 24; 612 613 if (i->flagsDef >= 0) code[1] |= 1 << 16; 614 if (i->flagsSrc >= 0) code[1] |= 1 << 23; 615 616 if (i->src(2).mod.neg()) code[0] |= 0x10; 617 if (i->src(1).mod.neg() ^ 618 i->src(0).mod.neg()) code[0] |= 0x20; 619 620 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 621 code[0] |= 1 << 6; 622 } 623 624 void 625 CodeEmitterNVC0::emitISAD(const Instruction *i) 626 { 627 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); 628 assert(i->encSize == 8); 629 630 emitForm_A(i, HEX64(38000000, 00000003)); 631 632 if (i->dType == TYPE_S32) 633 code[0] |= 1 << 5; 634 } 635 636 void 637 CodeEmitterNVC0::emitNOT(Instruction *i) 638 { 639 assert(i->encSize == 8); 640 i->setSrc(1, i->src(0)); 641 emitForm_A(i, HEX64(68000000, 000001c3)); 642 } 643 644 void 645 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp) 646 { 647 if (i->encSize == 8) { 648 if (isLIMM(i->src(1), TYPE_U32)) { 649 emitForm_A(i, HEX64(38000000, 00000002)); 650 651 if (i->srcExists(2)) 652 code[1] |= 1 << 26; 653 } else { 654 emitForm_A(i, HEX64(68000000, 00000003)); 655 656 if (i->srcExists(2)) 657 code[1] |= 1 << 16; 658 } 659 code[0] |= subOp << 6; 660 661 if (i->srcExists(2)) // carry 662 code[0] |= 1 << 5; 663 664 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; 665 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; 666 } else { 667 emitForm_S(i, (subOp << 5) | 668 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true); 669 } 670 } 671 672 void 673 CodeEmitterNVC0::emitPOPC(const Instruction *i) 674 { 675 emitForm_A(i, HEX64(54000000, 00000004)); 676 677 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; 678 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; 679 } 680 681 void 682 CodeEmitterNVC0::emitINSBF(const Instruction *i) 683 { 684 emitForm_A(i, HEX64(28000000, 30000000)); 685 } 686 687 void 688 CodeEmitterNVC0::emitShift(const Instruction *i) 689 { 690 if (i->op == OP_SHR) { 691 emitForm_A(i, HEX64(58000000, 00000003) 692 | (isSignedType(i->dType) ? 0x20 : 0x00)); 693 } else { 694 emitForm_A(i, HEX64(60000000, 00000003)); 695 } 696 697 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) 698 code[0] |= 1 << 9; 699 } 700 701 void 702 CodeEmitterNVC0::emitPreOp(const Instruction *i) 703 { 704 if (i->encSize == 8) { 705 emitForm_B(i, HEX64(60000000, 00000000)); 706 707 if (i->op == OP_PREEX2) 708 code[0] |= 0x20; 709 710 if (i->src(0).mod.abs()) code[0] |= 1 << 6; 711 if (i->src(0).mod.neg()) code[0] |= 1 << 8; 712 } else { 713 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true); 714 } 715 } 716 717 void 718 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp) 719 { 720 if (i->encSize == 8) { 721 code[0] = 0x00000000 | (subOp << 26); 722 code[1] = 0xc8000000; 723 724 emitPredicate(i); 725 726 defId(i->def(0), 14); 727 srcId(i->src(0), 20); 728 729 assert(i->src(0).getFile() == FILE_GPR); 730 731 if (i->saturate) code[0] |= 1 << 5; 732 733 if (i->src(0).mod.abs()) code[0] |= 1 << 7; 734 if (i->src(0).mod.neg()) code[0] |= 1 << 9; 735 } else { 736 emitForm_S(i, 0x80000008 | (subOp << 26), true); 737 738 assert(!i->src(0).mod.neg()); 739 if (i->src(0).mod.abs()) code[0] |= 1 << 30; 740 } 741 } 742 743 void 744 CodeEmitterNVC0::emitMINMAX(const Instruction *i) 745 { 746 uint64_t op; 747 748 assert(i->encSize == 8); 749 750 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL; 751 752 if (i->ftz) 753 op |= 1 << 5; 754 else 755 if (!isFloatType(i->dType)) 756 op |= isSignedType(i->dType) ? 0x23 : 0x03; 757 758 emitForm_A(i, op); 759 emitNegAbs12(i); 760 } 761 762 void 763 CodeEmitterNVC0::roundMode_C(const Instruction *i) 764 { 765 switch (i->rnd) { 766 case ROUND_M: code[1] |= 1 << 17; break; 767 case ROUND_P: code[1] |= 2 << 17; break; 768 case ROUND_Z: code[1] |= 3 << 17; break; 769 case ROUND_NI: code[0] |= 1 << 7; break; 770 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break; 771 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break; 772 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break; 773 case ROUND_N: break; 774 default: 775 assert(!"invalid round mode"); 776 break; 777 } 778 } 779 780 void 781 CodeEmitterNVC0::roundMode_CS(const Instruction *i) 782 { 783 switch (i->rnd) { 784 case ROUND_M: 785 case ROUND_MI: code[0] |= 1 << 16; break; 786 case ROUND_P: 787 case ROUND_PI: code[0] |= 2 << 16; break; 788 case ROUND_Z: 789 case ROUND_ZI: code[0] |= 3 << 16; break; 790 default: 791 break; 792 } 793 } 794 795 void 796 CodeEmitterNVC0::emitCVT(Instruction *i) 797 { 798 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); 799 800 switch (i->op) { 801 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break; 802 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break; 803 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break; 804 default: 805 break; 806 } 807 808 const bool sat = (i->op == OP_SAT) || i->saturate; 809 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs(); 810 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg(); 811 812 if (i->encSize == 8) { 813 emitForm_B(i, HEX64(10000000, 00000004)); 814 815 roundMode_C(i); 816 817 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size() 818 code[0] |= util_logbase2(typeSizeof(i->dType)) << 20; 819 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; 820 821 if (sat) 822 code[0] |= 0x20; 823 if (abs) 824 code[0] |= 1 << 6; 825 if (neg && i->op != OP_ABS) 826 code[0] |= 1 << 8; 827 828 if (i->ftz) 829 code[1] |= 1 << 23; 830 831 if (isSignedIntType(i->dType)) 832 code[0] |= 0x080; 833 if (isSignedIntType(i->sType)) 834 code[0] |= 0x200; 835 836 if (isFloatType(i->dType)) { 837 if (!isFloatType(i->sType)) 838 code[1] |= 0x08000000; 839 } else { 840 if (isFloatType(i->sType)) 841 code[1] |= 0x04000000; 842 else 843 code[1] |= 0x0c000000; 844 } 845 } else { 846 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) { 847 code[0] = 0x298; 848 } else 849 if (isFloatType(i->dType)) { 850 if (isFloatType(i->sType)) 851 code[0] = 0x098; 852 else 853 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0); 854 } else { 855 assert(isFloatType(i->sType)); 856 857 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0); 858 } 859 860 if (neg) code[0] |= 1 << 16; 861 if (sat) code[0] |= 1 << 18; 862 if (abs) code[0] |= 1 << 19; 863 864 roundMode_CS(i); 865 } 866 } 867 868 void 869 CodeEmitterNVC0::emitSET(const CmpInstruction *i) 870 { 871 uint32_t hi; 872 uint32_t lo = 0; 873 874 if (i->sType == TYPE_F64) 875 lo = 0x1; 876 else 877 if (!isFloatType(i->sType)) 878 lo = 0x3; 879 880 if (isFloatType(i->dType) || isSignedIntType(i->sType)) 881 lo |= 0x20; 882 883 switch (i->op) { 884 case OP_SET_AND: hi = 0x10000000; break; 885 case OP_SET_OR: hi = 0x10200000; break; 886 case OP_SET_XOR: hi = 0x10400000; break; 887 default: 888 hi = 0x100e0000; 889 break; 890 } 891 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo); 892 893 if (i->op != OP_SET) 894 srcId(i->src(2), 32 + 17); 895 896 if (i->def(0).getFile() == FILE_PREDICATE) { 897 if (i->sType == TYPE_F32) 898 code[1] += 0x10000000; 899 else 900 code[1] += 0x08000000; 901 902 code[0] &= ~0xfc000; 903 defId(i->def(0), 17); 904 if (i->defExists(1)) 905 defId(i->def(1), 14); 906 else 907 code[0] |= 0x1c000; 908 } 909 910 if (i->ftz) 911 code[1] |= 1 << 27; 912 913 emitCondCode(i->setCond, 32 + 23); 914 emitNegAbs12(i); 915 } 916 917 void 918 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) 919 { 920 uint64_t op; 921 922 switch (i->dType) { 923 case TYPE_S32: 924 op = HEX64(30000000, 00000023); 925 break; 926 case TYPE_U32: 927 op = HEX64(30000000, 00000003); 928 break; 929 case TYPE_F32: 930 op = HEX64(38000000, 00000000); 931 break; 932 default: 933 assert(!"invalid type for SLCT"); 934 op = 0; 935 break; 936 } 937 emitForm_A(i, op); 938 939 CondCode cc = i->setCond; 940 941 if (i->src(2).mod.neg()) 942 cc = reverseCondCode(cc); 943 944 emitCondCode(cc, 32 + 23); 945 946 if (i->ftz) 947 code[0] |= 1 << 5; 948 } 949 950 void CodeEmitterNVC0::emitSELP(const Instruction *i) 951 { 952 emitForm_A(i, HEX64(20000000, 00000004)); 953 954 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) 955 code[1] |= 1 << 20; 956 } 957 958 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) 959 { 960 code[0] = 0x00000006 | (i->subOp << 26); 961 code[1] = 0xf0000000; 962 emitPredicate(i); 963 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5); 964 } 965 966 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) 967 { 968 code[0] = 0x00000086; 969 code[1] = 0xd0000000; 970 971 code[1] |= i->tex.r; 972 code[1] |= i->tex.s << 8; 973 974 if (i->tex.liveOnly) 975 code[0] |= 1 << 9; 976 977 defId(i->def(0), 14); 978 srcId(i->src(0), 20); 979 } 980 981 static inline bool 982 isNextIndependentTex(const TexInstruction *i) 983 { 984 if (!i->next || !isTextureOp(i->next->op)) 985 return false; 986 if (i->getDef(0)->interfers(i->next->getSrc(0))) 987 return false; 988 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); 989 } 990 991 void 992 CodeEmitterNVC0::emitTEX(const TexInstruction *i) 993 { 994 code[0] = 0x00000006; 995 996 if (isNextIndependentTex(i)) 997 code[0] |= 0x080; // t mode 998 else 999 code[0] |= 0x100; // p mode 1000 1001 if (i->tex.liveOnly) 1002 code[0] |= 1 << 9; 1003 1004 switch (i->op) { 1005 case OP_TEX: code[1] = 0x80000000; break; 1006 case OP_TXB: code[1] = 0x84000000; break; 1007 case OP_TXL: code[1] = 0x86000000; break; 1008 case OP_TXF: code[1] = 0x90000000; break; 1009 case OP_TXG: code[1] = 0xa0000000; break; 1010 case OP_TXD: code[1] = 0xe0000000; break; 1011 default: 1012 assert(!"invalid texture op"); 1013 break; 1014 } 1015 if (i->op == OP_TXF) { 1016 if (!i->tex.levelZero) 1017 code[1] |= 0x02000000; 1018 } else 1019 if (i->tex.levelZero) { 1020 code[1] |= 0x02000000; 1021 } 1022 1023 if (i->op != OP_TXD && i->tex.derivAll) 1024 code[1] |= 1 << 13; 1025 1026 defId(i->def(0), 14); 1027 srcId(i->src(0), 20); 1028 1029 emitPredicate(i); 1030 1031 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; 1032 1033 code[1] |= i->tex.mask << 14; 1034 1035 code[1] |= i->tex.r; 1036 code[1] |= i->tex.s << 8; 1037 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) 1038 code[1] |= 1 << 18; // in 1st source (with array index) 1039 1040 // texture target: 1041 code[1] |= (i->tex.target.getDim() - 1) << 20; 1042 if (i->tex.target.isCube()) 1043 code[1] += 2 << 20; 1044 if (i->tex.target.isArray()) 1045 code[1] |= 1 << 19; 1046 if (i->tex.target.isShadow()) 1047 code[1] |= 1 << 24; 1048 1049 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) 1050 1051 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { 1052 // lzero 1053 if (i->op == OP_TXL) 1054 code[1] &= ~(1 << 26); 1055 else 1056 if (i->op == OP_TXF) 1057 code[1] &= ~(1 << 25); 1058 } 1059 if (i->tex.target == TEX_TARGET_2D_MS || 1060 i->tex.target == TEX_TARGET_2D_MS_ARRAY) 1061 code[1] |= 1 << 23; 1062 1063 if (i->tex.useOffsets) // in vecSrc0.w 1064 code[1] |= 1 << 22; 1065 1066 srcId(i, src1, 26); 1067 } 1068 1069 void 1070 CodeEmitterNVC0::emitTXQ(const TexInstruction *i) 1071 { 1072 code[0] = 0x00000086; 1073 code[1] = 0xc0000000; 1074 1075 switch (i->tex.query) { 1076 case TXQ_DIMS: code[1] |= 0 << 22; break; 1077 case TXQ_TYPE: code[1] |= 1 << 22; break; 1078 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break; 1079 case TXQ_FILTER: code[1] |= 3 << 22; break; 1080 case TXQ_LOD: code[1] |= 4 << 22; break; 1081 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break; 1082 default: 1083 assert(!"invalid texture query"); 1084 break; 1085 } 1086 1087 code[1] |= i->tex.mask << 14; 1088 1089 code[1] |= i->tex.r; 1090 code[1] |= i->tex.s << 8; 1091 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0) 1092 code[1] |= 1 << 18; 1093 1094 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) 1095 1096 defId(i->def(0), 14); 1097 srcId(i->src(0), 20); 1098 srcId(i, src1, 26); 1099 1100 emitPredicate(i); 1101 } 1102 1103 void 1104 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) 1105 { 1106 code[0] = 0x00000000 | (laneMask << 6); 1107 code[1] = 0x48000000 | qOp; 1108 1109 defId(i->def(0), 14); 1110 srcId(i->src(0), 20); 1111 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); 1112 1113 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) 1114 code[0] |= 1 << 9; // dall 1115 1116 emitPredicate(i); 1117 } 1118 1119 void 1120 CodeEmitterNVC0::emitFlow(const Instruction *i) 1121 { 1122 const FlowInstruction *f = i->asFlow(); 1123 1124 unsigned mask; // bit 0: predicate, bit 1: target 1125 1126 code[0] = 0x00000007; 1127 1128 switch (i->op) { 1129 case OP_BRA: 1130 code[1] = f->absolute ? 0x00000000 : 0x40000000; 1131 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) 1132 code[0] |= 0x4000; 1133 mask = 3; 1134 break; 1135 case OP_CALL: 1136 code[1] = f->absolute ? 0x10000000 : 0x50000000; 1137 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) 1138 code[0] |= 0x4000; 1139 mask = 2; 1140 break; 1141 1142 case OP_EXIT: code[1] = 0x80000000; mask = 1; break; 1143 case OP_RET: code[1] = 0x90000000; mask = 1; break; 1144 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break; 1145 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break; 1146 case OP_CONT: code[1] = 0xb0000000; mask = 1; break; 1147 1148 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break; 1149 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break; 1150 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break; 1151 case OP_PRERET: code[1] = 0x78000000; mask = 2; break; 1152 1153 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break; 1154 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break; 1155 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break; 1156 default: 1157 assert(!"invalid flow operation"); 1158 return; 1159 } 1160 1161 if (mask & 1) { 1162 emitPredicate(i); 1163 if (i->flagsSrc < 0) 1164 code[0] |= 0x1e0; 1165 } 1166 1167 if (!f) 1168 return; 1169 1170 if (f->allWarp) 1171 code[0] |= 1 << 15; 1172 if (f->limit) 1173 code[0] |= 1 << 16; 1174 1175 if (f->op == OP_CALL) { 1176 if (f->builtin) { 1177 assert(f->absolute); 1178 uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin); 1179 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26); 1180 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6); 1181 } else { 1182 assert(!f->absolute); 1183 int32_t pcRel = f->target.fn->binPos - (codeSize + 8); 1184 code[0] |= (pcRel & 0x3f) << 26; 1185 code[1] |= (pcRel >> 6) & 0x3ffff; 1186 } 1187 } else 1188 if (mask & 2) { 1189 int32_t pcRel = f->target.bb->binPos - (codeSize + 8); 1190 // currently we don't want absolute branches 1191 assert(!f->absolute); 1192 code[0] |= (pcRel & 0x3f) << 26; 1193 code[1] |= (pcRel >> 6) & 0x3ffff; 1194 } 1195 } 1196 1197 void 1198 CodeEmitterNVC0::emitPFETCH(const Instruction *i) 1199 { 1200 uint32_t prim = i->src(0).get()->reg.data.u32; 1201 1202 code[0] = 0x00000006 | ((prim & 0x3f) << 26); 1203 code[1] = 0x00000000 | (prim >> 6); 1204 1205 emitPredicate(i); 1206 1207 defId(i->def(0), 14); 1208 srcId(i->src(1), 20); 1209 } 1210 1211 void 1212 CodeEmitterNVC0::emitVFETCH(const Instruction *i) 1213 { 1214 code[0] = 0x00000006; 1215 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset; 1216 1217 if (i->perPatch) 1218 code[0] |= 0x100; 1219 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) 1220 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads 1221 1222 emitPredicate(i); 1223 1224 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5; 1225 1226 defId(i->def(0), 14); 1227 srcId(i->src(0).getIndirect(0), 20); 1228 srcId(i->src(0).getIndirect(1), 26); // vertex address 1229 } 1230 1231 void 1232 CodeEmitterNVC0::emitEXPORT(const Instruction *i) 1233 { 1234 unsigned int size = typeSizeof(i->dType); 1235 1236 code[0] = 0x00000006 | ((size / 4 - 1) << 5); 1237 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset; 1238 1239 assert(!(code[1] & ((size == 12) ? 15 : (size - 1)))); 1240 1241 if (i->perPatch) 1242 code[0] |= 0x100; 1243 1244 emitPredicate(i); 1245 1246 assert(i->src(1).getFile() == FILE_GPR); 1247 1248 srcId(i->src(0).getIndirect(0), 20); 1249 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address 1250 srcId(i->src(1), 26); 1251 } 1252 1253 void 1254 CodeEmitterNVC0::emitOUT(const Instruction *i) 1255 { 1256 code[0] = 0x00000006; 1257 code[1] = 0x1c000000; 1258 1259 emitPredicate(i); 1260 1261 defId(i->def(0), 14); // new secret address 1262 srcId(i->src(0), 20); // old secret address, should be 0 initially 1263 1264 assert(i->src(0).getFile() == FILE_GPR); 1265 1266 if (i->op == OP_EMIT) 1267 code[0] |= 1 << 5; 1268 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) 1269 code[0] |= 1 << 6; 1270 1271 // vertex stream 1272 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1273 code[1] |= 0xc000; 1274 code[0] |= SDATA(i->src(1)).u32 << 26; 1275 } else { 1276 srcId(i->src(1), 26); 1277 } 1278 } 1279 1280 void 1281 CodeEmitterNVC0::emitInterpMode(const Instruction *i) 1282 { 1283 if (i->encSize == 8) { 1284 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID 1285 } else { 1286 if (i->getInterpMode() == NV50_IR_INTERP_SC) 1287 code[0] |= 0x80; 1288 assert(i->op == OP_PINTERP && i->getSampleMode() == 0); 1289 } 1290 } 1291 1292 void 1293 CodeEmitterNVC0::emitINTERP(const Instruction *i) 1294 { 1295 const uint32_t base = i->getSrc(0)->reg.data.offset; 1296 1297 if (i->encSize == 8) { 1298 code[0] = 0x00000000; 1299 code[1] = 0xc0000000 | (base & 0xffff); 1300 1301 if (i->saturate) 1302 code[0] |= 1 << 5; 1303 1304 if (i->op == OP_PINTERP) 1305 srcId(i->src(1), 26); 1306 else 1307 code[0] |= 0x3f << 26; 1308 1309 srcId(i->src(0).getIndirect(0), 20); 1310 } else { 1311 assert(i->op == OP_PINTERP); 1312 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26); 1313 srcId(i->src(1), 20); 1314 } 1315 emitInterpMode(i); 1316 1317 emitPredicate(i); 1318 defId(i->def(0), 14); 1319 1320 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) 1321 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17); 1322 else 1323 code[1] |= 0x3f << 17; 1324 } 1325 1326 void 1327 CodeEmitterNVC0::emitLoadStoreType(DataType ty) 1328 { 1329 uint8_t val; 1330 1331 switch (ty) { 1332 case TYPE_U8: 1333 val = 0x00; 1334 break; 1335 case TYPE_S8: 1336 val = 0x20; 1337 break; 1338 case TYPE_F16: 1339 case TYPE_U16: 1340 val = 0x40; 1341 break; 1342 case TYPE_S16: 1343 val = 0x60; 1344 break; 1345 case TYPE_F32: 1346 case TYPE_U32: 1347 case TYPE_S32: 1348 val = 0x80; 1349 break; 1350 case TYPE_F64: 1351 case TYPE_U64: 1352 case TYPE_S64: 1353 val = 0xa0; 1354 break; 1355 case TYPE_B128: 1356 val = 0xc0; 1357 break; 1358 default: 1359 val = 0x80; 1360 assert(!"invalid type"); 1361 break; 1362 } 1363 code[0] |= val; 1364 } 1365 1366 void 1367 CodeEmitterNVC0::emitCachingMode(CacheMode c) 1368 { 1369 uint32_t val; 1370 1371 switch (c) { 1372 case CACHE_CA: 1373 // case CACHE_WB: 1374 val = 0x000; 1375 break; 1376 case CACHE_CG: 1377 val = 0x100; 1378 break; 1379 case CACHE_CS: 1380 val = 0x200; 1381 break; 1382 case CACHE_CV: 1383 // case CACHE_WT: 1384 val = 0x300; 1385 break; 1386 default: 1387 val = 0; 1388 assert(!"invalid caching mode"); 1389 break; 1390 } 1391 code[0] |= val; 1392 } 1393 1394 void 1395 CodeEmitterNVC0::emitSTORE(const Instruction *i) 1396 { 1397 uint32_t opc; 1398 1399 switch (i->src(0).getFile()) { 1400 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; 1401 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; 1402 case FILE_MEMORY_SHARED: opc = 0xc9000000; break; 1403 default: 1404 assert(!"invalid memory file"); 1405 opc = 0; 1406 break; 1407 } 1408 code[0] = 0x00000005; 1409 code[1] = opc; 1410 1411 setAddress16(i->src(0)); 1412 srcId(i->src(1), 14); 1413 srcId(i->src(0).getIndirect(0), 20); 1414 1415 emitPredicate(i); 1416 1417 emitLoadStoreType(i->dType); 1418 emitCachingMode(i->cache); 1419 } 1420 1421 void 1422 CodeEmitterNVC0::emitLOAD(const Instruction *i) 1423 { 1424 uint32_t opc; 1425 1426 code[0] = 0x00000005; 1427 1428 switch (i->src(0).getFile()) { 1429 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; 1430 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; 1431 case FILE_MEMORY_SHARED: opc = 0xc1000000; break; 1432 case FILE_MEMORY_CONST: 1433 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { 1434 emitMOV(i); // not sure if this is any better 1435 return; 1436 } 1437 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10); 1438 code[0] = 0x00000006 | (i->subOp << 8); 1439 break; 1440 default: 1441 assert(!"invalid memory file"); 1442 opc = 0; 1443 break; 1444 } 1445 code[1] = opc; 1446 1447 defId(i->def(0), 14); 1448 1449 setAddress16(i->src(0)); 1450 srcId(i->src(0).getIndirect(0), 20); 1451 1452 emitPredicate(i); 1453 1454 emitLoadStoreType(i->dType); 1455 emitCachingMode(i->cache); 1456 } 1457 1458 uint8_t 1459 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) 1460 { 1461 switch (SDATA(ref).sv.sv) { 1462 case SV_LANEID: return 0x00; 1463 case SV_PHYSID: return 0x03; 1464 case SV_VERTEX_COUNT: return 0x10; 1465 case SV_INVOCATION_ID: return 0x11; 1466 case SV_YDIR: return 0x12; 1467 case SV_TID: return 0x21 + SDATA(ref).sv.index; 1468 case SV_CTAID: return 0x25 + SDATA(ref).sv.index; 1469 case SV_NTID: return 0x29 + SDATA(ref).sv.index; 1470 case SV_GRIDID: return 0x2c; 1471 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; 1472 case SV_LBASE: return 0x34; 1473 case SV_SBASE: return 0x30; 1474 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; 1475 default: 1476 assert(!"no sreg for system value"); 1477 return 0; 1478 } 1479 } 1480 1481 void 1482 CodeEmitterNVC0::emitMOV(const Instruction *i) 1483 { 1484 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { 1485 uint8_t sr = getSRegEncoding(i->src(0)); 1486 1487 if (i->encSize == 8) { 1488 code[0] = 0x00000004 | (sr << 26); 1489 code[1] = 0x2c000000; 1490 } else { 1491 code[0] = 0x40000008 | (sr << 20); 1492 } 1493 defId(i->def(0), 14); 1494 1495 emitPredicate(i); 1496 } else 1497 if (i->encSize == 8) { 1498 uint64_t opc; 1499 1500 if (i->src(0).getFile() == FILE_IMMEDIATE) 1501 opc = HEX64(18000000, 000001e2); 1502 else 1503 if (i->src(0).getFile() == FILE_PREDICATE) 1504 opc = HEX64(080e0000, 1c000004); 1505 else 1506 opc = HEX64(28000000, 00000004); 1507 1508 opc |= i->lanes << 5; 1509 1510 emitForm_B(i, opc); 1511 } else { 1512 uint32_t imm; 1513 1514 if (i->src(0).getFile() == FILE_IMMEDIATE) { 1515 imm = SDATA(i->src(0)).u32; 1516 if (imm & 0xfff00000) { 1517 assert(!(imm & 0x000fffff)); 1518 code[0] = 0x00000318 | imm; 1519 } else { 1520 assert(imm < 0x800 || ((int32_t)imm >= -0x800)); 1521 code[0] = 0x00000118 | (imm << 20); 1522 } 1523 } else { 1524 code[0] = 0x0028; 1525 emitShortSrc2(i->src(0)); 1526 } 1527 defId(i->def(0), 14); 1528 1529 emitPredicate(i); 1530 } 1531 } 1532 1533 bool 1534 CodeEmitterNVC0::emitInstruction(Instruction *insn) 1535 { 1536 unsigned int size = insn->encSize; 1537 1538 if (writeIssueDelays && !(codeSize & 0x3f)) 1539 size += 8; 1540 1541 if (!insn->encSize) { 1542 ERROR("skipping unencodable instruction: "); insn->print(); 1543 return false; 1544 } else 1545 if (codeSize + size > codeSizeLimit) { 1546 ERROR("code emitter output buffer too small\n"); 1547 return false; 1548 } 1549 1550 if (writeIssueDelays) { 1551 if (!(codeSize & 0x3f)) { 1552 code[0] = 0x00000007; // cf issue delay "instruction" 1553 code[1] = 0x20000000; 1554 code += 2; 1555 codeSize += 8; 1556 } 1557 const unsigned int id = (codeSize & 0x3f) / 8 - 1; 1558 uint32_t *data = code - (id * 2 + 2); 1559 if (id <= 2) { 1560 data[0] |= insn->sched << (id * 8 + 4); 1561 } else 1562 if (id == 3) { 1563 data[0] |= insn->sched << 28; 1564 data[1] |= insn->sched >> 4; 1565 } else { 1566 data[1] |= insn->sched << ((id - 4) * 8 + 4); 1567 } 1568 } 1569 1570 // assert that instructions with multiple defs don't corrupt registers 1571 for (int d = 0; insn->defExists(d); ++d) 1572 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); 1573 1574 switch (insn->op) { 1575 case OP_MOV: 1576 case OP_RDSV: 1577 emitMOV(insn); 1578 break; 1579 case OP_NOP: 1580 break; 1581 case OP_LOAD: 1582 emitLOAD(insn); 1583 break; 1584 case OP_STORE: 1585 emitSTORE(insn); 1586 break; 1587 case OP_LINTERP: 1588 case OP_PINTERP: 1589 emitINTERP(insn); 1590 break; 1591 case OP_VFETCH: 1592 emitVFETCH(insn); 1593 break; 1594 case OP_EXPORT: 1595 emitEXPORT(insn); 1596 break; 1597 case OP_PFETCH: 1598 emitPFETCH(insn); 1599 break; 1600 case OP_EMIT: 1601 case OP_RESTART: 1602 emitOUT(insn); 1603 break; 1604 case OP_ADD: 1605 case OP_SUB: 1606 if (isFloatType(insn->dType)) 1607 emitFADD(insn); 1608 else 1609 emitUADD(insn); 1610 break; 1611 case OP_MUL: 1612 if (isFloatType(insn->dType)) 1613 emitFMUL(insn); 1614 else 1615 emitUMUL(insn); 1616 break; 1617 case OP_MAD: 1618 case OP_FMA: 1619 if (isFloatType(insn->dType)) 1620 emitFMAD(insn); 1621 else 1622 emitIMAD(insn); 1623 break; 1624 case OP_SAD: 1625 emitISAD(insn); 1626 break; 1627 case OP_NOT: 1628 emitNOT(insn); 1629 break; 1630 case OP_AND: 1631 emitLogicOp(insn, 0); 1632 break; 1633 case OP_OR: 1634 emitLogicOp(insn, 1); 1635 break; 1636 case OP_XOR: 1637 emitLogicOp(insn, 2); 1638 break; 1639 case OP_SHL: 1640 case OP_SHR: 1641 emitShift(insn); 1642 break; 1643 case OP_SET: 1644 case OP_SET_AND: 1645 case OP_SET_OR: 1646 case OP_SET_XOR: 1647 emitSET(insn->asCmp()); 1648 break; 1649 case OP_SELP: 1650 emitSELP(insn); 1651 break; 1652 case OP_SLCT: 1653 emitSLCT(insn->asCmp()); 1654 break; 1655 case OP_MIN: 1656 case OP_MAX: 1657 emitMINMAX(insn); 1658 break; 1659 case OP_ABS: 1660 case OP_NEG: 1661 case OP_CEIL: 1662 case OP_FLOOR: 1663 case OP_TRUNC: 1664 case OP_CVT: 1665 case OP_SAT: 1666 emitCVT(insn); 1667 break; 1668 case OP_RSQ: 1669 emitSFnOp(insn, 5); 1670 break; 1671 case OP_RCP: 1672 emitSFnOp(insn, 4); 1673 break; 1674 case OP_LG2: 1675 emitSFnOp(insn, 3); 1676 break; 1677 case OP_EX2: 1678 emitSFnOp(insn, 2); 1679 break; 1680 case OP_SIN: 1681 emitSFnOp(insn, 1); 1682 break; 1683 case OP_COS: 1684 emitSFnOp(insn, 0); 1685 break; 1686 case OP_PRESIN: 1687 case OP_PREEX2: 1688 emitPreOp(insn); 1689 break; 1690 case OP_TEX: 1691 case OP_TXB: 1692 case OP_TXL: 1693 case OP_TXD: 1694 case OP_TXF: 1695 emitTEX(insn->asTex()); 1696 break; 1697 case OP_TXQ: 1698 emitTXQ(insn->asTex()); 1699 break; 1700 case OP_TEXBAR: 1701 emitTEXBAR(insn); 1702 break; 1703 case OP_BRA: 1704 case OP_CALL: 1705 case OP_PRERET: 1706 case OP_RET: 1707 case OP_DISCARD: 1708 case OP_EXIT: 1709 case OP_PRECONT: 1710 case OP_CONT: 1711 case OP_PREBREAK: 1712 case OP_BREAK: 1713 case OP_JOINAT: 1714 case OP_BRKPT: 1715 case OP_QUADON: 1716 case OP_QUADPOP: 1717 emitFlow(insn); 1718 break; 1719 case OP_QUADOP: 1720 emitQUADOP(insn, insn->subOp, insn->lanes); 1721 break; 1722 case OP_DFDX: 1723 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); 1724 break; 1725 case OP_DFDY: 1726 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); 1727 break; 1728 case OP_POPCNT: 1729 emitPOPC(insn); 1730 break; 1731 case OP_JOIN: 1732 emitNOP(insn); 1733 insn->join = 1; 1734 break; 1735 case OP_PHI: 1736 case OP_UNION: 1737 case OP_CONSTRAINT: 1738 ERROR("operation should have been eliminated"); 1739 return false; 1740 case OP_EXP: 1741 case OP_LOG: 1742 case OP_SQRT: 1743 case OP_POW: 1744 ERROR("operation should have been lowered\n"); 1745 return false; 1746 default: 1747 ERROR("unknow op\n"); 1748 return false; 1749 } 1750 1751 if (insn->join) { 1752 code[0] |= 0x10; 1753 assert(insn->encSize == 8); 1754 } 1755 1756 code += insn->encSize / 4; 1757 codeSize += insn->encSize; 1758 return true; 1759 } 1760 1761 uint32_t 1762 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const 1763 { 1764 const Target::OpInfo &info = targ->getOpInfo(i); 1765 1766 if (writeIssueDelays || info.minEncSize == 8 || 1) 1767 return 8; 1768 1769 if (i->ftz || i->saturate || i->join) 1770 return 8; 1771 if (i->rnd != ROUND_N) 1772 return 8; 1773 if (i->predSrc >= 0 && i->op == OP_MAD) 1774 return 8; 1775 1776 if (i->op == OP_PINTERP) { 1777 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work 1778 return 8; 1779 } else 1780 if (i->op == OP_MOV && i->lanes != 0xf) { 1781 return 8; 1782 } 1783 1784 for (int s = 0; i->srcExists(s); ++s) { 1785 if (i->src(s).isIndirect(0)) 1786 return 8; 1787 1788 if (i->src(s).getFile() == FILE_MEMORY_CONST) { 1789 if (SDATA(i->src(s)).offset >= 0x100) 1790 return 8; 1791 if (i->getSrc(s)->reg.fileIndex > 1 && 1792 i->getSrc(s)->reg.fileIndex != 16) 1793 return 8; 1794 } else 1795 if (i->src(s).getFile() == FILE_IMMEDIATE) { 1796 if (i->dType == TYPE_F32) { 1797 if (SDATA(i->src(s)).u32 >= 0x100) 1798 return 8; 1799 } else { 1800 if (SDATA(i->src(s)).u32 > 0xff) 1801 return 8; 1802 } 1803 } 1804 1805 if (i->op == OP_CVT) 1806 continue; 1807 if (i->src(s).mod != Modifier(0)) { 1808 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS)) 1809 if (i->op != OP_RSQ) 1810 return 8; 1811 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG)) 1812 if (i->op != OP_ADD || s != 0) 1813 return 8; 1814 } 1815 } 1816 1817 return 4; 1818 } 1819 1820 // Simplified, erring on safe side. 1821 class SchedDataCalculator : public Pass 1822 { 1823 public: 1824 SchedDataCalculator(const Target *targ) : targ(targ) { } 1825 1826 private: 1827 struct RegScores 1828 { 1829 struct Resource { 1830 int st[DATA_FILE_COUNT]; // LD to LD delay 3 1831 int ld[DATA_FILE_COUNT]; // ST to ST delay 3 1832 int tex; // TEX to non-TEX delay 17 (0x11) 1833 int sfu; // SFU to SFU delay 3 (except PRE-ops) 1834 int imul; // integer MUL to MUL delay 3 1835 } res; 1836 struct ScoreData { 1837 int r[64]; 1838 int p[8]; 1839 int c; 1840 } rd, wr; 1841 int base; 1842 1843 void rebase(const int base) 1844 { 1845 const int delta = this->base - base; 1846 if (!delta) 1847 return; 1848 this->base = 0; 1849 1850 for (int i = 0; i < 64; ++i) { 1851 rd.r[i] += delta; 1852 wr.r[i] += delta; 1853 } 1854 for (int i = 0; i < 8; ++i) { 1855 rd.p[i] += delta; 1856 wr.p[i] += delta; 1857 } 1858 rd.c += delta; 1859 wr.c += delta; 1860 1861 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { 1862 res.ld[f] += delta; 1863 res.st[f] += delta; 1864 } 1865 res.sfu += delta; 1866 res.imul += delta; 1867 res.tex += delta; 1868 } 1869 void wipe() 1870 { 1871 memset(&rd, 0, sizeof(rd)); 1872 memset(&wr, 0, sizeof(wr)); 1873 memset(&res, 0, sizeof(res)); 1874 } 1875 int getLatest(const ScoreData& d) const 1876 { 1877 int max = 0; 1878 for (int i = 0; i < 64; ++i) 1879 if (d.r[i] > max) 1880 max = d.r[i]; 1881 for (int i = 0; i < 8; ++i) 1882 if (d.p[i] > max) 1883 max = d.p[i]; 1884 if (d.c > max) 1885 max = d.c; 1886 return max; 1887 } 1888 inline int getLatestRd() const 1889 { 1890 return getLatest(rd); 1891 } 1892 inline int getLatestWr() const 1893 { 1894 return getLatest(wr); 1895 } 1896 inline int getLatest() const 1897 { 1898 const int a = getLatestRd(); 1899 const int b = getLatestWr(); 1900 1901 int max = MAX2(a, b); 1902 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { 1903 max = MAX2(res.ld[f], max); 1904 max = MAX2(res.st[f], max); 1905 } 1906 max = MAX2(res.sfu, max); 1907 max = MAX2(res.imul, max); 1908 max = MAX2(res.tex, max); 1909 return max; 1910 } 1911 void setMax(const RegScores *that) 1912 { 1913 for (int i = 0; i < 64; ++i) { 1914 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); 1915 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); 1916 } 1917 for (int i = 0; i < 8; ++i) { 1918 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); 1919 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); 1920 } 1921 rd.c = MAX2(rd.c, that->rd.c); 1922 wr.c = MAX2(wr.c, that->wr.c); 1923 1924 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { 1925 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]); 1926 res.st[f] = MAX2(res.st[f], that->res.st[f]); 1927 } 1928 res.sfu = MAX2(res.sfu, that->res.sfu); 1929 res.imul = MAX2(res.imul, that->res.imul); 1930 res.tex = MAX2(res.tex, that->res.tex); 1931 } 1932 void print(int cycle) 1933 { 1934 for (int i = 0; i < 64; ++i) { 1935 if (rd.r[i] > cycle) 1936 INFO("rd $r%i @ %i\n", i, rd.r[i]); 1937 if (wr.r[i] > cycle) 1938 INFO("wr $r%i @ %i\n", i, wr.r[i]); 1939 } 1940 for (int i = 0; i < 8; ++i) { 1941 if (rd.p[i] > cycle) 1942 INFO("rd $p%i @ %i\n", i, rd.p[i]); 1943 if (wr.p[i] > cycle) 1944 INFO("wr $p%i @ %i\n", i, wr.p[i]); 1945 } 1946 if (rd.c > cycle) 1947 INFO("rd $c @ %i\n", rd.c); 1948 if (wr.c > cycle) 1949 INFO("wr $c @ %i\n", wr.c); 1950 if (res.sfu > cycle) 1951 INFO("sfu @ %i\n", res.sfu); 1952 if (res.imul > cycle) 1953 INFO("imul @ %i\n", res.imul); 1954 if (res.tex > cycle) 1955 INFO("tex @ %i\n", res.tex); 1956 } 1957 }; 1958 1959 RegScores *score; // for current BB 1960 std::vector<RegScores> scoreBoards; 1961 int cycle; 1962 int prevData; 1963 operation prevOp; 1964 1965 const Target *targ; 1966 1967 bool visit(Function *); 1968 bool visit(BasicBlock *); 1969 1970 void commitInsn(const Instruction *, int cycle); 1971 int calcDelay(const Instruction *, int cycle) const; 1972 void setDelay(Instruction *, int delay, Instruction *next); 1973 1974 void recordRd(const Value *, const int ready); 1975 void recordWr(const Value *, const int ready); 1976 void checkRd(const Value *, int cycle, int& delay) const; 1977 void checkWr(const Value *, int cycle, int& delay) const; 1978 1979 int getCycles(const Instruction *, int origDelay) const; 1980 }; 1981 1982 void 1983 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next) 1984 { 1985 if (insn->op == OP_EXIT) 1986 delay = MAX2(delay, 14); 1987 1988 if (insn->op == OP_TEXBAR) { 1989 // TODO: except if results not used before EXIT 1990 insn->sched = 0xc2; 1991 } else 1992 if (insn->op == OP_JOIN || insn->join) { 1993 insn->sched = 0x00; 1994 } else 1995 if (delay >= 0 || prevData == 0x04 || 1996 !next || !targ->canDualIssue(insn, next)) { 1997 insn->sched = static_cast<uint8_t>(MAX2(delay, 0)); 1998 if (prevOp == OP_EXPORT) 1999 insn->sched |= 0x40; 2000 else 2001 insn->sched |= 0x20; 2002 } else { 2003 insn->sched = 0x04; // dual-issue 2004 } 2005 2006 if (prevData != 0x04 || prevOp != OP_EXPORT) 2007 if (insn->sched != 0x04 || insn->op == OP_EXPORT) 2008 prevOp = insn->op; 2009 2010 prevData = insn->sched; 2011 } 2012 2013 int 2014 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const 2015 { 2016 if (insn->sched & 0x80) { 2017 int c = (insn->sched & 0x0f) * 2 + 1; 2018 if (insn->op == OP_TEXBAR && origDelay > 0) 2019 c += origDelay; 2020 return c; 2021 } 2022 if (insn->sched & 0x60) 2023 return (insn->sched & 0x1f) + 1; 2024 return (insn->sched == 0x04) ? 0 : 32; 2025 } 2026 2027 bool 2028 SchedDataCalculator::visit(Function *func) 2029 { 2030 scoreBoards.resize(func->cfg.getSize()); 2031 for (size_t i = 0; i < scoreBoards.size(); ++i) 2032 scoreBoards[i].wipe(); 2033 return true; 2034 } 2035 2036 bool 2037 SchedDataCalculator::visit(BasicBlock *bb) 2038 { 2039 Instruction *insn; 2040 Instruction *next = NULL; 2041 2042 int cycle = 0; 2043 2044 prevData = 0x00; 2045 prevOp = OP_NOP; 2046 score = &scoreBoards.at(bb->getId()); 2047 2048 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 2049 BasicBlock *in = BasicBlock::get(ei.getNode()); 2050 if (in->getExit()) { 2051 if (prevData != 0x04) 2052 prevData = in->getExit()->sched; 2053 prevOp = in->getExit()->op; 2054 } 2055 if (ei.getType() != Graph::Edge::BACK) 2056 score->setMax(&scoreBoards.at(in->getId())); 2057 // back branches will wait until all target dependencies are satisfied 2058 } 2059 if (bb->cfg.incidentCount() > 1) 2060 prevOp = OP_NOP; 2061 2062 #ifdef NVC0_DEBUG_SCHED_DATA 2063 INFO("=== BB:%i initial scores\n", bb->getId()); 2064 score->print(cycle); 2065 #endif 2066 2067 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) { 2068 next = insn->next; 2069 2070 commitInsn(insn, cycle); 2071 int delay = calcDelay(next, cycle); 2072 setDelay(insn, delay, next); 2073 cycle += getCycles(insn, delay); 2074 2075 #ifdef NVC0_DEBUG_SCHED_DATA 2076 INFO("cycle %i, sched %02x\n", cycle, insn->sched); 2077 insn->print(); 2078 next->print(); 2079 #endif 2080 } 2081 if (!insn) 2082 return true; 2083 commitInsn(insn, cycle); 2084 2085 int bbDelay = -1; 2086 2087 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 2088 BasicBlock *out = BasicBlock::get(ei.getNode()); 2089 2090 if (ei.getType() != Graph::Edge::BACK) { 2091 // only test the first instruction of the outgoing block 2092 next = out->getEntry(); 2093 if (next) 2094 bbDelay = MAX2(bbDelay, calcDelay(next, cycle)); 2095 } else { 2096 // wait until all dependencies are satisfied 2097 const int regsFree = score->getLatest(); 2098 next = out->getFirst(); 2099 for (int c = cycle; next && c < regsFree; next = next->next) { 2100 bbDelay = MAX2(bbDelay, calcDelay(next, c)); 2101 c += getCycles(next, bbDelay); 2102 } 2103 next = NULL; 2104 } 2105 } 2106 if (bb->cfg.outgoingCount() != 1) 2107 next = NULL; 2108 setDelay(insn, bbDelay, next); 2109 cycle += getCycles(insn, bbDelay); 2110 2111 score->rebase(cycle); // common base for initializing out blocks' scores 2112 return true; 2113 } 2114 2115 #define NVE4_MAX_ISSUE_DELAY 0x1f 2116 int 2117 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const 2118 { 2119 int delay = 0, ready = cycle; 2120 2121 for (int s = 0; insn->srcExists(s); ++s) 2122 checkRd(insn->getSrc(s), cycle, delay); 2123 // WAR & WAW don't seem to matter 2124 // for (int s = 0; insn->srcExists(s); ++s) 2125 // recordRd(insn->getSrc(s), cycle); 2126 2127 switch (Target::getOpClass(insn->op)) { 2128 case OPCLASS_SFU: 2129 ready = score->res.sfu; 2130 break; 2131 case OPCLASS_ARITH: 2132 if (insn->op == OP_MUL && !isFloatType(insn->dType)) 2133 ready = score->res.imul; 2134 break; 2135 case OPCLASS_TEXTURE: 2136 ready = score->res.tex; 2137 break; 2138 case OPCLASS_LOAD: 2139 ready = score->res.ld[insn->src(0).getFile()]; 2140 break; 2141 case OPCLASS_STORE: 2142 ready = score->res.st[insn->src(0).getFile()]; 2143 break; 2144 default: 2145 break; 2146 } 2147 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE) 2148 ready = MAX2(ready, score->res.tex); 2149 2150 delay = MAX2(delay, ready - cycle); 2151 2152 // if can issue next cycle, delay is 0, not 1 2153 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY); 2154 } 2155 2156 void 2157 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle) 2158 { 2159 const int ready = cycle + targ->getLatency(insn); 2160 2161 for (int d = 0; insn->defExists(d); ++d) 2162 recordWr(insn->getDef(d), ready); 2163 // WAR & WAW don't seem to matter 2164 // for (int s = 0; insn->srcExists(s); ++s) 2165 // recordRd(insn->getSrc(s), cycle); 2166 2167 switch (Target::getOpClass(insn->op)) { 2168 case OPCLASS_SFU: 2169 score->res.sfu = cycle + 4; 2170 break; 2171 case OPCLASS_ARITH: 2172 if (insn->op == OP_MUL && !isFloatType(insn->dType)) 2173 score->res.imul = cycle + 4; 2174 break; 2175 case OPCLASS_TEXTURE: 2176 score->res.tex = cycle + 18; 2177 break; 2178 case OPCLASS_LOAD: 2179 if (insn->src(0).getFile() == FILE_MEMORY_CONST) 2180 break; 2181 score->res.ld[insn->src(0).getFile()] = cycle + 4; 2182 score->res.st[insn->src(0).getFile()] = ready; 2183 break; 2184 case OPCLASS_STORE: 2185 score->res.st[insn->src(0).getFile()] = cycle + 4; 2186 score->res.ld[insn->src(0).getFile()] = ready; 2187 break; 2188 case OPCLASS_OTHER: 2189 if (insn->op == OP_TEXBAR) 2190 score->res.tex = cycle; 2191 break; 2192 default: 2193 break; 2194 } 2195 2196 #ifdef NVC0_DEBUG_SCHED_DATA 2197 score->print(cycle); 2198 #endif 2199 } 2200 2201 void 2202 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const 2203 { 2204 int ready = cycle; 2205 int a, b; 2206 2207 switch (v->reg.file) { 2208 case FILE_GPR: 2209 a = v->reg.data.id; 2210 b = a + v->reg.size / 4; 2211 for (int r = a; r < b; ++r) 2212 ready = MAX2(ready, score->rd.r[r]); 2213 break; 2214 case FILE_PREDICATE: 2215 ready = MAX2(ready, score->rd.p[v->reg.data.id]); 2216 break; 2217 case FILE_FLAGS: 2218 ready = MAX2(ready, score->rd.c); 2219 break; 2220 case FILE_SHADER_INPUT: 2221 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs 2222 case FILE_MEMORY_LOCAL: 2223 case FILE_MEMORY_CONST: 2224 case FILE_MEMORY_SHARED: 2225 case FILE_MEMORY_GLOBAL: 2226 case FILE_SYSTEM_VALUE: 2227 // TODO: any restrictions here ? 2228 break; 2229 case FILE_IMMEDIATE: 2230 break; 2231 default: 2232 assert(0); 2233 break; 2234 } 2235 if (cycle < ready) 2236 delay = MAX2(delay, ready - cycle); 2237 } 2238 2239 void 2240 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const 2241 { 2242 int ready = cycle; 2243 int a, b; 2244 2245 switch (v->reg.file) { 2246 case FILE_GPR: 2247 a = v->reg.data.id; 2248 b = a + v->reg.size / 4; 2249 for (int r = a; r < b; ++r) 2250 ready = MAX2(ready, score->wr.r[r]); 2251 break; 2252 case FILE_PREDICATE: 2253 ready = MAX2(ready, score->wr.p[v->reg.data.id]); 2254 break; 2255 default: 2256 assert(v->reg.file == FILE_FLAGS); 2257 ready = MAX2(ready, score->wr.c); 2258 break; 2259 } 2260 if (cycle < ready) 2261 delay = MAX2(delay, ready - cycle); 2262 } 2263 2264 void 2265 SchedDataCalculator::recordWr(const Value *v, const int ready) 2266 { 2267 int a = v->reg.data.id; 2268 2269 if (v->reg.file == FILE_GPR) { 2270 int b = a + v->reg.size / 4; 2271 for (int r = a; r < b; ++r) 2272 score->rd.r[r] = ready; 2273 } else 2274 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry) 2275 if (v->reg.file == FILE_PREDICATE) { 2276 score->rd.p[a] = ready + 4; 2277 } else { 2278 assert(v->reg.file == FILE_FLAGS); 2279 score->rd.c = ready + 4; 2280 } 2281 } 2282 2283 void 2284 SchedDataCalculator::recordRd(const Value *v, const int ready) 2285 { 2286 int a = v->reg.data.id; 2287 2288 if (v->reg.file == FILE_GPR) { 2289 int b = a + v->reg.size / 4; 2290 for (int r = a; r < b; ++r) 2291 score->wr.r[r] = ready; 2292 } else 2293 if (v->reg.file == FILE_PREDICATE) { 2294 score->wr.p[a] = ready; 2295 } else 2296 if (v->reg.file == FILE_FLAGS) { 2297 score->wr.c = ready; 2298 } 2299 } 2300 2301 void 2302 CodeEmitterNVC0::prepareEmission(Function *func) 2303 { 2304 const Target *targ = func->getProgram()->getTarget(); 2305 2306 CodeEmitter::prepareEmission(func); 2307 2308 if (targ->hasSWSched) { 2309 SchedDataCalculator sched(targ); 2310 sched.run(func, true, true); 2311 } 2312 } 2313 2314 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) 2315 : CodeEmitter(target), 2316 writeIssueDelays(target->hasSWSched) 2317 { 2318 code = NULL; 2319 codeSize = codeSizeLimit = 0; 2320 relocInfo = NULL; 2321 } 2322 2323 CodeEmitter * 2324 TargetNVC0::getCodeEmitter(Program::Type type) 2325 { 2326 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this); 2327 emit->setProgramType(type); 2328 return emit; 2329 } 2330 2331 } // namespace nv50_ir 2332