1 /* 2 * Copyright 2014 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Ben Skeggs <bskeggs (at) redhat.com> 23 */ 24 25 #include "codegen/nv50_ir_target_gm107.h" 26 27 //#define GM107_DEBUG_SCHED_DATA 28 29 namespace nv50_ir { 30 31 class CodeEmitterGM107 : public CodeEmitter 32 { 33 public: 34 CodeEmitterGM107(const TargetGM107 *); 35 36 virtual bool emitInstruction(Instruction *); 37 virtual uint32_t getMinEncodingSize(const Instruction *) const; 38 39 virtual void prepareEmission(Program *); 40 virtual void prepareEmission(Function *); 41 42 inline void setProgramType(Program::Type pType) { progType = pType; } 43 44 private: 45 const TargetGM107 *targGM107; 46 47 Program::Type progType; 48 49 const Instruction *insn; 50 const bool writeIssueDelays; 51 uint32_t *data; 52 53 private: 54 inline void emitField(uint32_t *, int, int, uint32_t); 55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); } 56 57 inline void emitInsn(uint32_t, bool); 58 inline void emitInsn(uint32_t o) { emitInsn(o, true); } 59 inline void emitPred(); 60 inline void emitGPR(int, const Value *); 61 inline void emitGPR(int pos) { 62 emitGPR(pos, (const Value *)NULL); 63 } 64 inline void emitGPR(int pos, const ValueRef &ref) { 65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); 66 } 67 inline void emitGPR(int pos, const ValueRef *ref) { 68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); 69 } 70 inline void emitGPR(int pos, const ValueDef &def) { 71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); 72 } 73 inline void emitSYS(int, const Value *); 74 inline void emitSYS(int pos, const ValueRef &ref) { 75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); 76 } 77 inline void emitPRED(int, const Value *); 78 inline void emitPRED(int pos) { 79 emitPRED(pos, (const Value *)NULL); 80 } 81 inline void emitPRED(int pos, const ValueRef &ref) { 82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); 83 } 84 inline void emitPRED(int pos, const ValueDef &def) { 85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); 86 } 87 inline void emitADDR(int, int, int, int, const ValueRef &); 88 inline void emitCBUF(int, int, int, int, int, const ValueRef &); 89 inline bool longIMMD(const ValueRef &); 90 inline void emitIMMD(int, int, const ValueRef &); 91 92 void emitCond3(int, CondCode); 93 void emitCond4(int, CondCode); 94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); } 95 inline void emitO(int); 96 inline void emitP(int); 97 inline void emitSAT(int); 98 inline void emitCC(int); 99 inline void emitX(int); 100 inline void emitABS(int, const ValueRef &); 101 inline void emitNEG(int, const ValueRef &); 102 inline void emitNEG2(int, const ValueRef &, const ValueRef &); 103 inline void emitFMZ(int, int); 104 inline void emitRND(int, RoundMode, int); 105 inline void emitRND(int pos) { 106 emitRND(pos, insn->rnd, -1); 107 } 108 inline void emitPDIV(int); 109 inline void emitINV(int, const ValueRef &); 110 111 void emitEXIT(); 112 void emitBRA(); 113 void emitCAL(); 114 void emitPCNT(); 115 void emitCONT(); 116 void emitPBK(); 117 void emitBRK(); 118 void emitPRET(); 119 void emitRET(); 120 void emitSSY(); 121 void emitSYNC(); 122 void emitSAM(); 123 void emitRAM(); 124 125 void emitMOV(); 126 void emitS2R(); 127 void emitF2F(); 128 void emitF2I(); 129 void emitI2F(); 130 void emitI2I(); 131 void emitSEL(); 132 void emitSHFL(); 133 134 void emitDADD(); 135 void emitDMUL(); 136 void emitDFMA(); 137 void emitDMNMX(); 138 void emitDSET(); 139 void emitDSETP(); 140 141 void emitFADD(); 142 void emitFMUL(); 143 void emitFFMA(); 144 void emitMUFU(); 145 void emitFMNMX(); 146 void emitRRO(); 147 void emitFCMP(); 148 void emitFSET(); 149 void emitFSETP(); 150 void emitFSWZADD(); 151 152 void emitLOP(); 153 void emitNOT(); 154 void emitIADD(); 155 void emitIMUL(); 156 void emitIMAD(); 157 void emitISCADD(); 158 void emitIMNMX(); 159 void emitICMP(); 160 void emitISET(); 161 void emitISETP(); 162 void emitSHL(); 163 void emitSHR(); 164 void emitPOPC(); 165 void emitBFI(); 166 void emitBFE(); 167 void emitFLO(); 168 169 void emitLDSTs(int, DataType); 170 void emitLDSTc(int); 171 void emitLDC(); 172 void emitLDL(); 173 void emitLDS(); 174 void emitLD(); 175 void emitSTL(); 176 void emitSTS(); 177 void emitST(); 178 void emitALD(); 179 void emitAST(); 180 void emitISBERD(); 181 void emitAL2P(); 182 void emitIPA(); 183 void emitATOM(); 184 void emitATOMS(); 185 void emitRED(); 186 void emitCCTL(); 187 188 void emitPIXLD(); 189 190 void emitTEXs(int); 191 void emitTEX(); 192 void emitTLD(); 193 void emitTLD4(); 194 void emitTXD(); 195 void emitTXQ(); 196 void emitTMML(); 197 void emitDEPBAR(); 198 199 void emitNOP(); 200 void emitKIL(); 201 void emitOUT(); 202 203 void emitBAR(); 204 void emitMEMBAR(); 205 206 void emitVOTE(); 207 208 void emitSUTarget(); 209 void emitSUHandle(const int s); 210 void emitSUSTx(); 211 void emitSULDx(); 212 void emitSUREDx(); 213 }; 214 215 /******************************************************************************* 216 * general instruction layout/fields 217 ******************************************************************************/ 218 219 void 220 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v) 221 { 222 if (b >= 0) { 223 uint32_t m = ((1ULL << s) - 1); 224 uint64_t d = (uint64_t)(v & m) << b; 225 assert(!(v & ~m) || (v & ~m) == ~m); 226 data[1] |= d >> 32; 227 data[0] |= d; 228 } 229 } 230 231 void 232 CodeEmitterGM107::emitPred() 233 { 234 if (insn->predSrc >= 0) { 235 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); 236 emitField(19, 1, insn->cc == CC_NOT_P); 237 } else { 238 emitField(16, 3, 7); 239 } 240 } 241 242 void 243 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred) 244 { 245 code[0] = 0x00000000; 246 code[1] = hi; 247 if (pred) 248 emitPred(); 249 } 250 251 void 252 CodeEmitterGM107::emitGPR(int pos, const Value *val) 253 { 254 emitField(pos, 8, val ? val->reg.data.id : 255); 255 } 256 257 void 258 CodeEmitterGM107::emitSYS(int pos, const Value *val) 259 { 260 int id = val ? val->reg.data.id : -1; 261 262 switch (id) { 263 case SV_LANEID : id = 0x00; break; 264 case SV_VERTEX_COUNT : id = 0x10; break; 265 case SV_INVOCATION_ID : id = 0x11; break; 266 case SV_THREAD_KILL : id = 0x13; break; 267 case SV_INVOCATION_INFO: id = 0x1d; break; 268 case SV_TID : id = 0x21 + val->reg.data.sv.index; break; 269 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; 270 default: 271 assert(!"invalid system value"); 272 id = 0; 273 break; 274 } 275 276 emitField(pos, 8, id); 277 } 278 279 void 280 CodeEmitterGM107::emitPRED(int pos, const Value *val) 281 { 282 emitField(pos, 3, val ? val->reg.data.id : 7); 283 } 284 285 void 286 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr, 287 const ValueRef &ref) 288 { 289 const Value *v = ref.get(); 290 assert(!(v->reg.data.offset & ((1 << shr) - 1))); 291 if (gpr >= 0) 292 emitGPR(gpr, ref.getIndirect(0)); 293 emitField(off, len, v->reg.data.offset >> shr); 294 } 295 296 void 297 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr, 298 const ValueRef &ref) 299 { 300 const Value *v = ref.get(); 301 const Symbol *s = v->asSym(); 302 303 assert(!(s->reg.data.offset & ((1 << shr) - 1))); 304 305 emitField(buf, 5, v->reg.fileIndex); 306 if (gpr >= 0) 307 emitGPR(gpr, ref.getIndirect(0)); 308 emitField(off, 16, s->reg.data.offset >> shr); 309 } 310 311 bool 312 CodeEmitterGM107::longIMMD(const ValueRef &ref) 313 { 314 if (ref.getFile() == FILE_IMMEDIATE) { 315 const ImmediateValue *imm = ref.get()->asImm(); 316 if (isFloatType(insn->sType)) { 317 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000) 318 return true; 319 } else { 320 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 && 321 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000) 322 return true; 323 } 324 } 325 return false; 326 } 327 328 void 329 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref) 330 { 331 const ImmediateValue *imm = ref.get()->asImm(); 332 uint32_t val = imm->reg.data.u32; 333 334 if (len == 19) { 335 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) { 336 assert(!(val & 0x00000fff)); 337 val >>= 12; 338 } else if (insn->sType == TYPE_F64) { 339 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL)); 340 val = imm->reg.data.u64 >> 44; 341 } 342 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000); 343 emitField( 56, 1, (val & 0x80000) >> 19); 344 emitField(pos, len, (val & 0x7ffff)); 345 } else { 346 emitField(pos, len, val); 347 } 348 } 349 350 /******************************************************************************* 351 * modifiers 352 ******************************************************************************/ 353 354 void 355 CodeEmitterGM107::emitCond3(int pos, CondCode code) 356 { 357 int data = 0; 358 359 switch (code) { 360 case CC_FL : data = 0x00; break; 361 case CC_LTU: 362 case CC_LT : data = 0x01; break; 363 case CC_EQU: 364 case CC_EQ : data = 0x02; break; 365 case CC_LEU: 366 case CC_LE : data = 0x03; break; 367 case CC_GTU: 368 case CC_GT : data = 0x04; break; 369 case CC_NEU: 370 case CC_NE : data = 0x05; break; 371 case CC_GEU: 372 case CC_GE : data = 0x06; break; 373 case CC_TR : data = 0x07; break; 374 default: 375 assert(!"invalid cond3"); 376 break; 377 } 378 379 emitField(pos, 3, data); 380 } 381 382 void 383 CodeEmitterGM107::emitCond4(int pos, CondCode code) 384 { 385 int data = 0; 386 387 switch (code) { 388 case CC_FL: data = 0x00; break; 389 case CC_LT: data = 0x01; break; 390 case CC_EQ: data = 0x02; break; 391 case CC_LE: data = 0x03; break; 392 case CC_GT: data = 0x04; break; 393 case CC_NE: data = 0x05; break; 394 case CC_GE: data = 0x06; break; 395 // case CC_NUM: data = 0x07; break; 396 // case CC_NAN: data = 0x08; break; 397 case CC_LTU: data = 0x09; break; 398 case CC_EQU: data = 0x0a; break; 399 case CC_LEU: data = 0x0b; break; 400 case CC_GTU: data = 0x0c; break; 401 case CC_NEU: data = 0x0d; break; 402 case CC_GEU: data = 0x0e; break; 403 case CC_TR: data = 0x0f; break; 404 default: 405 assert(!"invalid cond4"); 406 break; 407 } 408 409 emitField(pos, 4, data); 410 } 411 412 void 413 CodeEmitterGM107::emitO(int pos) 414 { 415 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); 416 } 417 418 void 419 CodeEmitterGM107::emitP(int pos) 420 { 421 emitField(pos, 1, insn->perPatch); 422 } 423 424 void 425 CodeEmitterGM107::emitSAT(int pos) 426 { 427 emitField(pos, 1, insn->saturate); 428 } 429 430 void 431 CodeEmitterGM107::emitCC(int pos) 432 { 433 emitField(pos, 1, insn->flagsDef >= 0); 434 } 435 436 void 437 CodeEmitterGM107::emitX(int pos) 438 { 439 emitField(pos, 1, insn->flagsSrc >= 0); 440 } 441 442 void 443 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref) 444 { 445 emitField(pos, 1, ref.mod.abs()); 446 } 447 448 void 449 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref) 450 { 451 emitField(pos, 1, ref.mod.neg()); 452 } 453 454 void 455 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b) 456 { 457 emitField(pos, 1, a.mod.neg() ^ b.mod.neg()); 458 } 459 460 void 461 CodeEmitterGM107::emitFMZ(int pos, int len) 462 { 463 emitField(pos, len, insn->dnz << 1 | insn->ftz); 464 } 465 466 void 467 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip) 468 { 469 int rm = 0, ri = 0; 470 switch (rnd) { 471 case ROUND_NI: ri = 1; 472 case ROUND_N : rm = 0; break; 473 case ROUND_MI: ri = 1; 474 case ROUND_M : rm = 1; break; 475 case ROUND_PI: ri = 1; 476 case ROUND_P : rm = 2; break; 477 case ROUND_ZI: ri = 1; 478 case ROUND_Z : rm = 3; break; 479 default: 480 assert(!"invalid round mode"); 481 break; 482 } 483 emitField(rip, 1, ri); 484 emitField(rmp, 2, rm); 485 } 486 487 void 488 CodeEmitterGM107::emitPDIV(int pos) 489 { 490 assert(insn->postFactor >= -3 && insn->postFactor <= 3); 491 if (insn->postFactor > 0) 492 emitField(pos, 3, 7 - insn->postFactor); 493 else 494 emitField(pos, 3, 0 - insn->postFactor); 495 } 496 497 void 498 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref) 499 { 500 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); 501 } 502 503 /******************************************************************************* 504 * control flow 505 ******************************************************************************/ 506 507 void 508 CodeEmitterGM107::emitEXIT() 509 { 510 emitInsn (0xe3000000); 511 emitCond5(0x00, CC_TR); 512 } 513 514 void 515 CodeEmitterGM107::emitBRA() 516 { 517 const FlowInstruction *insn = this->insn->asFlow(); 518 int gpr = -1; 519 520 if (insn->indirect) { 521 if (insn->absolute) 522 emitInsn(0xe2000000); // JMX 523 else 524 emitInsn(0xe2500000); // BRX 525 gpr = 0x08; 526 } else { 527 if (insn->absolute) 528 emitInsn(0xe2100000); // JMP 529 else 530 emitInsn(0xe2400000); // BRA 531 emitField(0x07, 1, insn->allWarp); 532 } 533 534 emitField(0x06, 1, insn->limit); 535 emitCond5(0x00, CC_TR); 536 537 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 538 int32_t pos = insn->target.bb->binPos; 539 if (writeIssueDelays && !(pos & 0x1f)) 540 pos += 8; 541 if (!insn->absolute) 542 emitField(0x14, 24, pos - (codeSize + 8)); 543 else 544 emitField(0x14, 32, pos); 545 } else { 546 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); 547 emitField(0x05, 1, 1); 548 } 549 } 550 551 void 552 CodeEmitterGM107::emitCAL() 553 { 554 const FlowInstruction *insn = this->insn->asFlow(); 555 556 if (insn->absolute) { 557 emitInsn(0xe2200000, 0); // JCAL 558 } else { 559 emitInsn(0xe2600000, 0); // CAL 560 } 561 562 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 563 if (!insn->absolute) 564 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 565 else { 566 if (insn->builtin) { 567 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin); 568 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20); 569 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12); 570 } else { 571 emitField(0x14, 32, insn->target.bb->binPos); 572 } 573 } 574 } else { 575 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 576 emitField(0x05, 1, 1); 577 } 578 } 579 580 void 581 CodeEmitterGM107::emitPCNT() 582 { 583 const FlowInstruction *insn = this->insn->asFlow(); 584 585 emitInsn(0xe2b00000, 0); 586 587 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 588 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 589 } else { 590 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 591 emitField(0x05, 1, 1); 592 } 593 } 594 595 void 596 CodeEmitterGM107::emitCONT() 597 { 598 emitInsn (0xe3500000); 599 emitCond5(0x00, CC_TR); 600 } 601 602 void 603 CodeEmitterGM107::emitPBK() 604 { 605 const FlowInstruction *insn = this->insn->asFlow(); 606 607 emitInsn(0xe2a00000, 0); 608 609 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 610 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 611 } else { 612 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 613 emitField(0x05, 1, 1); 614 } 615 } 616 617 void 618 CodeEmitterGM107::emitBRK() 619 { 620 emitInsn (0xe3400000); 621 emitCond5(0x00, CC_TR); 622 } 623 624 void 625 CodeEmitterGM107::emitPRET() 626 { 627 const FlowInstruction *insn = this->insn->asFlow(); 628 629 emitInsn(0xe2700000, 0); 630 631 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 632 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 633 } else { 634 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 635 emitField(0x05, 1, 1); 636 } 637 } 638 639 void 640 CodeEmitterGM107::emitRET() 641 { 642 emitInsn (0xe3200000); 643 emitCond5(0x00, CC_TR); 644 } 645 646 void 647 CodeEmitterGM107::emitSSY() 648 { 649 const FlowInstruction *insn = this->insn->asFlow(); 650 651 emitInsn(0xe2900000, 0); 652 653 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { 654 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); 655 } else { 656 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); 657 emitField(0x05, 1, 1); 658 } 659 } 660 661 void 662 CodeEmitterGM107::emitSYNC() 663 { 664 emitInsn (0xf0f80000); 665 emitCond5(0x00, CC_TR); 666 } 667 668 void 669 CodeEmitterGM107::emitSAM() 670 { 671 emitInsn(0xe3700000, 0); 672 } 673 674 void 675 CodeEmitterGM107::emitRAM() 676 { 677 emitInsn(0xe3800000, 0); 678 } 679 680 /******************************************************************************* 681 * predicate/cc 682 ******************************************************************************/ 683 684 /******************************************************************************* 685 * movement / conversion 686 ******************************************************************************/ 687 688 void 689 CodeEmitterGM107::emitMOV() 690 { 691 if (insn->src(0).getFile() != FILE_IMMEDIATE) { 692 switch (insn->src(0).getFile()) { 693 case FILE_GPR: 694 if (insn->def(0).getFile() == FILE_PREDICATE) { 695 emitInsn(0x5b6a0000); 696 emitGPR (0x08); 697 } else { 698 emitInsn(0x5c980000); 699 } 700 emitGPR (0x14, insn->src(0)); 701 break; 702 case FILE_MEMORY_CONST: 703 emitInsn(0x4c980000); 704 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 705 break; 706 case FILE_IMMEDIATE: 707 emitInsn(0x38980000); 708 emitIMMD(0x14, 19, insn->src(0)); 709 break; 710 case FILE_PREDICATE: 711 emitInsn(0x50880000); 712 emitPRED(0x0c, insn->src(0)); 713 emitPRED(0x1d); 714 emitPRED(0x27); 715 break; 716 default: 717 assert(!"bad src file"); 718 break; 719 } 720 if (insn->def(0).getFile() != FILE_PREDICATE && 721 insn->src(0).getFile() != FILE_PREDICATE) 722 emitField(0x27, 4, insn->lanes); 723 } else { 724 emitInsn (0x01000000); 725 emitIMMD (0x14, 32, insn->src(0)); 726 emitField(0x0c, 4, insn->lanes); 727 } 728 729 if (insn->def(0).getFile() == FILE_PREDICATE) { 730 emitPRED(0x27); 731 emitPRED(0x03, insn->def(0)); 732 emitPRED(0x00); 733 } else { 734 emitGPR(0x00, insn->def(0)); 735 } 736 } 737 738 void 739 CodeEmitterGM107::emitS2R() 740 { 741 emitInsn(0xf0c80000); 742 emitSYS (0x14, insn->src(0)); 743 emitGPR (0x00, insn->def(0)); 744 } 745 746 void 747 CodeEmitterGM107::emitF2F() 748 { 749 RoundMode rnd = insn->rnd; 750 751 switch (insn->op) { 752 case OP_FLOOR: rnd = ROUND_MI; break; 753 case OP_CEIL : rnd = ROUND_PI; break; 754 case OP_TRUNC: rnd = ROUND_ZI; break; 755 default: 756 break; 757 } 758 759 switch (insn->src(0).getFile()) { 760 case FILE_GPR: 761 emitInsn(0x5ca80000); 762 emitGPR (0x14, insn->src(0)); 763 break; 764 case FILE_MEMORY_CONST: 765 emitInsn(0x4ca80000); 766 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 767 break; 768 case FILE_IMMEDIATE: 769 emitInsn(0x38a80000); 770 emitIMMD(0x14, 19, insn->src(0)); 771 break; 772 default: 773 assert(!"bad src0 file"); 774 break; 775 } 776 777 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate); 778 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 779 emitCC (0x2f); 780 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 781 emitFMZ (0x2c, 1); 782 emitField(0x29, 1, insn->subOp); 783 emitRND (0x27, rnd, 0x2a); 784 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 785 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 786 emitGPR (0x00, insn->def(0)); 787 } 788 789 void 790 CodeEmitterGM107::emitF2I() 791 { 792 RoundMode rnd = insn->rnd; 793 794 switch (insn->op) { 795 case OP_FLOOR: rnd = ROUND_M; break; 796 case OP_CEIL : rnd = ROUND_P; break; 797 case OP_TRUNC: rnd = ROUND_Z; break; 798 default: 799 break; 800 } 801 802 switch (insn->src(0).getFile()) { 803 case FILE_GPR: 804 emitInsn(0x5cb00000); 805 emitGPR (0x14, insn->src(0)); 806 break; 807 case FILE_MEMORY_CONST: 808 emitInsn(0x4cb00000); 809 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 810 break; 811 case FILE_IMMEDIATE: 812 emitInsn(0x38b00000); 813 emitIMMD(0x14, 19, insn->src(0)); 814 break; 815 default: 816 assert(!"bad src0 file"); 817 break; 818 } 819 820 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 821 emitCC (0x2f); 822 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 823 emitFMZ (0x2c, 1); 824 emitRND (0x27, rnd, 0x2a); 825 emitField(0x0c, 1, isSignedType(insn->dType)); 826 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 827 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 828 emitGPR (0x00, insn->def(0)); 829 } 830 831 void 832 CodeEmitterGM107::emitI2F() 833 { 834 RoundMode rnd = insn->rnd; 835 836 switch (insn->op) { 837 case OP_FLOOR: rnd = ROUND_M; break; 838 case OP_CEIL : rnd = ROUND_P; break; 839 case OP_TRUNC: rnd = ROUND_Z; break; 840 default: 841 break; 842 } 843 844 switch (insn->src(0).getFile()) { 845 case FILE_GPR: 846 emitInsn(0x5cb80000); 847 emitGPR (0x14, insn->src(0)); 848 break; 849 case FILE_MEMORY_CONST: 850 emitInsn(0x4cb80000); 851 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 852 break; 853 case FILE_IMMEDIATE: 854 emitInsn(0x38b80000); 855 emitIMMD(0x14, 19, insn->src(0)); 856 break; 857 default: 858 assert(!"bad src0 file"); 859 break; 860 } 861 862 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 863 emitCC (0x2f); 864 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 865 emitField(0x29, 2, insn->subOp); 866 emitRND (0x27, rnd, -1); 867 emitField(0x0d, 1, isSignedType(insn->sType)); 868 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 869 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 870 emitGPR (0x00, insn->def(0)); 871 } 872 873 void 874 CodeEmitterGM107::emitI2I() 875 { 876 switch (insn->src(0).getFile()) { 877 case FILE_GPR: 878 emitInsn(0x5ce00000); 879 emitGPR (0x14, insn->src(0)); 880 break; 881 case FILE_MEMORY_CONST: 882 emitInsn(0x4ce00000); 883 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 884 break; 885 case FILE_IMMEDIATE: 886 emitInsn(0x38e00000); 887 emitIMMD(0x14, 19, insn->src(0)); 888 break; 889 default: 890 assert(!"bad src0 file"); 891 break; 892 } 893 894 emitSAT (0x32); 895 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); 896 emitCC (0x2f); 897 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); 898 emitField(0x29, 2, insn->subOp); 899 emitField(0x0d, 1, isSignedType(insn->sType)); 900 emitField(0x0c, 1, isSignedType(insn->dType)); 901 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); 902 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); 903 emitGPR (0x00, insn->def(0)); 904 } 905 906 static void 907 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) 908 { 909 int loc = entry->loc; 910 if (data.force_persample_interp) 911 code[loc + 1] |= 1 << 10; 912 else 913 code[loc + 1] &= ~(1 << 10); 914 } 915 916 void 917 CodeEmitterGM107::emitSEL() 918 { 919 switch (insn->src(1).getFile()) { 920 case FILE_GPR: 921 emitInsn(0x5ca00000); 922 emitGPR (0x14, insn->src(1)); 923 break; 924 case FILE_MEMORY_CONST: 925 emitInsn(0x4ca00000); 926 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 927 break; 928 case FILE_IMMEDIATE: 929 emitInsn(0x38a00000); 930 emitIMMD(0x14, 19, insn->src(1)); 931 break; 932 default: 933 assert(!"bad src1 file"); 934 break; 935 } 936 937 emitINV (0x2a, insn->src(2)); 938 emitPRED(0x27, insn->src(2)); 939 emitGPR (0x08, insn->src(0)); 940 emitGPR (0x00, insn->def(0)); 941 942 if (insn->subOp == 1) { 943 addInterp(0, 0, selpFlip); 944 } 945 } 946 947 void 948 CodeEmitterGM107::emitSHFL() 949 { 950 int type = 0; 951 952 emitInsn (0xef100000); 953 954 switch (insn->src(1).getFile()) { 955 case FILE_GPR: 956 emitGPR(0x14, insn->src(1)); 957 break; 958 case FILE_IMMEDIATE: 959 emitIMMD(0x14, 5, insn->src(1)); 960 type |= 1; 961 break; 962 default: 963 assert(!"invalid src1 file"); 964 break; 965 } 966 967 /*XXX: what is this arg? hardcode immediate for now */ 968 emitField(0x22, 13, 0x1c03); 969 type |= 2; 970 971 emitPRED (0x30); 972 emitField(0x1e, 2, insn->subOp); 973 emitField(0x1c, 2, type); 974 emitGPR (0x08, insn->src(0)); 975 emitGPR (0x00, insn->def(0)); 976 } 977 978 /******************************************************************************* 979 * double 980 ******************************************************************************/ 981 982 void 983 CodeEmitterGM107::emitDADD() 984 { 985 switch (insn->src(1).getFile()) { 986 case FILE_GPR: 987 emitInsn(0x5c700000); 988 emitGPR (0x14, insn->src(1)); 989 break; 990 case FILE_MEMORY_CONST: 991 emitInsn(0x4c700000); 992 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 993 break; 994 case FILE_IMMEDIATE: 995 emitInsn(0x38700000); 996 emitIMMD(0x14, 19, insn->src(1)); 997 break; 998 default: 999 assert(!"bad src1 file"); 1000 break; 1001 } 1002 emitABS(0x31, insn->src(1)); 1003 emitNEG(0x30, insn->src(0)); 1004 emitCC (0x2f); 1005 emitABS(0x2e, insn->src(0)); 1006 emitNEG(0x2d, insn->src(1)); 1007 1008 if (insn->op == OP_SUB) 1009 code[1] ^= 0x00002000; 1010 1011 emitGPR(0x08, insn->src(0)); 1012 emitGPR(0x00, insn->def(0)); 1013 } 1014 1015 void 1016 CodeEmitterGM107::emitDMUL() 1017 { 1018 switch (insn->src(1).getFile()) { 1019 case FILE_GPR: 1020 emitInsn(0x5c800000); 1021 emitGPR (0x14, insn->src(1)); 1022 break; 1023 case FILE_MEMORY_CONST: 1024 emitInsn(0x4c800000); 1025 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1026 break; 1027 case FILE_IMMEDIATE: 1028 emitInsn(0x38800000); 1029 emitIMMD(0x14, 19, insn->src(1)); 1030 break; 1031 default: 1032 assert(!"bad src1 file"); 1033 break; 1034 } 1035 1036 emitNEG2(0x30, insn->src(0), insn->src(1)); 1037 emitCC (0x2f); 1038 emitRND (0x27); 1039 emitGPR (0x08, insn->src(0)); 1040 emitGPR (0x00, insn->def(0)); 1041 } 1042 1043 void 1044 CodeEmitterGM107::emitDFMA() 1045 { 1046 switch(insn->src(2).getFile()) { 1047 case FILE_GPR: 1048 switch (insn->src(1).getFile()) { 1049 case FILE_GPR: 1050 emitInsn(0x5b700000); 1051 emitGPR (0x14, insn->src(1)); 1052 break; 1053 case FILE_MEMORY_CONST: 1054 emitInsn(0x4b700000); 1055 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1056 break; 1057 case FILE_IMMEDIATE: 1058 emitInsn(0x36700000); 1059 emitIMMD(0x14, 19, insn->src(1)); 1060 break; 1061 default: 1062 assert(!"bad src1 file"); 1063 break; 1064 } 1065 emitGPR (0x27, insn->src(2)); 1066 break; 1067 case FILE_MEMORY_CONST: 1068 emitInsn(0x53700000); 1069 emitGPR (0x27, insn->src(1)); 1070 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1071 break; 1072 default: 1073 assert(!"bad src2 file"); 1074 break; 1075 } 1076 1077 emitRND (0x32); 1078 emitNEG (0x31, insn->src(2)); 1079 emitNEG2(0x30, insn->src(0), insn->src(1)); 1080 emitCC (0x2f); 1081 emitGPR (0x08, insn->src(0)); 1082 emitGPR (0x00, insn->def(0)); 1083 } 1084 1085 void 1086 CodeEmitterGM107::emitDMNMX() 1087 { 1088 switch (insn->src(1).getFile()) { 1089 case FILE_GPR: 1090 emitInsn(0x5c500000); 1091 emitGPR (0x14, insn->src(1)); 1092 break; 1093 case FILE_MEMORY_CONST: 1094 emitInsn(0x4c500000); 1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1096 break; 1097 case FILE_IMMEDIATE: 1098 emitInsn(0x38500000); 1099 emitIMMD(0x14, 19, insn->src(1)); 1100 break; 1101 default: 1102 assert(!"bad src1 file"); 1103 break; 1104 } 1105 1106 emitABS (0x31, insn->src(1)); 1107 emitNEG (0x30, insn->src(0)); 1108 emitCC (0x2f); 1109 emitABS (0x2e, insn->src(0)); 1110 emitNEG (0x2d, insn->src(1)); 1111 emitField(0x2a, 1, insn->op == OP_MAX); 1112 emitPRED (0x27); 1113 emitGPR (0x08, insn->src(0)); 1114 emitGPR (0x00, insn->def(0)); 1115 } 1116 1117 void 1118 CodeEmitterGM107::emitDSET() 1119 { 1120 const CmpInstruction *insn = this->insn->asCmp(); 1121 1122 switch (insn->src(1).getFile()) { 1123 case FILE_GPR: 1124 emitInsn(0x59000000); 1125 emitGPR (0x14, insn->src(1)); 1126 break; 1127 case FILE_MEMORY_CONST: 1128 emitInsn(0x49000000); 1129 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1130 break; 1131 case FILE_IMMEDIATE: 1132 emitInsn(0x32000000); 1133 emitIMMD(0x14, 19, insn->src(1)); 1134 break; 1135 default: 1136 assert(!"bad src1 file"); 1137 break; 1138 } 1139 1140 if (insn->op != OP_SET) { 1141 switch (insn->op) { 1142 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1143 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1144 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1145 default: 1146 assert(!"invalid set op"); 1147 break; 1148 } 1149 emitPRED(0x27, insn->src(2)); 1150 } else { 1151 emitPRED(0x27); 1152 } 1153 1154 emitABS (0x36, insn->src(0)); 1155 emitNEG (0x35, insn->src(1)); 1156 emitField(0x34, 1, insn->dType == TYPE_F32); 1157 emitCond4(0x30, insn->setCond); 1158 emitCC (0x2f); 1159 emitABS (0x2c, insn->src(1)); 1160 emitNEG (0x2b, insn->src(0)); 1161 emitGPR (0x08, insn->src(0)); 1162 emitGPR (0x00, insn->def(0)); 1163 } 1164 1165 void 1166 CodeEmitterGM107::emitDSETP() 1167 { 1168 const CmpInstruction *insn = this->insn->asCmp(); 1169 1170 switch (insn->src(1).getFile()) { 1171 case FILE_GPR: 1172 emitInsn(0x5b800000); 1173 emitGPR (0x14, insn->src(1)); 1174 break; 1175 case FILE_MEMORY_CONST: 1176 emitInsn(0x4b800000); 1177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1178 break; 1179 case FILE_IMMEDIATE: 1180 emitInsn(0x36800000); 1181 emitIMMD(0x14, 19, insn->src(1)); 1182 break; 1183 default: 1184 assert(!"bad src1 file"); 1185 break; 1186 } 1187 1188 if (insn->op != OP_SET) { 1189 switch (insn->op) { 1190 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1191 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1192 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1193 default: 1194 assert(!"invalid set op"); 1195 break; 1196 } 1197 emitPRED(0x27, insn->src(2)); 1198 } else { 1199 emitPRED(0x27); 1200 } 1201 1202 emitCond4(0x30, insn->setCond); 1203 emitABS (0x2c, insn->src(1)); 1204 emitNEG (0x2b, insn->src(0)); 1205 emitGPR (0x08, insn->src(0)); 1206 emitABS (0x07, insn->src(0)); 1207 emitNEG (0x06, insn->src(1)); 1208 emitPRED (0x03, insn->def(0)); 1209 if (insn->defExists(1)) 1210 emitPRED(0x00, insn->def(1)); 1211 else 1212 emitPRED(0x00); 1213 } 1214 1215 /******************************************************************************* 1216 * float 1217 ******************************************************************************/ 1218 1219 void 1220 CodeEmitterGM107::emitFADD() 1221 { 1222 if (!longIMMD(insn->src(1))) { 1223 switch (insn->src(1).getFile()) { 1224 case FILE_GPR: 1225 emitInsn(0x5c580000); 1226 emitGPR (0x14, insn->src(1)); 1227 break; 1228 case FILE_MEMORY_CONST: 1229 emitInsn(0x4c580000); 1230 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1231 break; 1232 case FILE_IMMEDIATE: 1233 emitInsn(0x38580000); 1234 emitIMMD(0x14, 19, insn->src(1)); 1235 break; 1236 default: 1237 assert(!"bad src1 file"); 1238 break; 1239 } 1240 emitSAT(0x32); 1241 emitABS(0x31, insn->src(1)); 1242 emitNEG(0x30, insn->src(0)); 1243 emitCC (0x2f); 1244 emitABS(0x2e, insn->src(0)); 1245 emitNEG(0x2d, insn->src(1)); 1246 emitFMZ(0x2c, 1); 1247 1248 if (insn->op == OP_SUB) 1249 code[1] ^= 0x00002000; 1250 } else { 1251 emitInsn(0x08000000); 1252 emitABS(0x39, insn->src(1)); 1253 emitNEG(0x38, insn->src(0)); 1254 emitFMZ(0x37, 1); 1255 emitABS(0x36, insn->src(0)); 1256 emitNEG(0x35, insn->src(1)); 1257 emitCC (0x34); 1258 emitIMMD(0x14, 32, insn->src(1)); 1259 1260 if (insn->op == OP_SUB) 1261 code[1] ^= 0x00080000; 1262 } 1263 1264 emitGPR(0x08, insn->src(0)); 1265 emitGPR(0x00, insn->def(0)); 1266 } 1267 1268 void 1269 CodeEmitterGM107::emitFMUL() 1270 { 1271 if (!longIMMD(insn->src(1))) { 1272 switch (insn->src(1).getFile()) { 1273 case FILE_GPR: 1274 emitInsn(0x5c680000); 1275 emitGPR (0x14, insn->src(1)); 1276 break; 1277 case FILE_MEMORY_CONST: 1278 emitInsn(0x4c680000); 1279 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1280 break; 1281 case FILE_IMMEDIATE: 1282 emitInsn(0x38680000); 1283 emitIMMD(0x14, 19, insn->src(1)); 1284 break; 1285 default: 1286 assert(!"bad src1 file"); 1287 break; 1288 } 1289 emitSAT (0x32); 1290 emitNEG2(0x30, insn->src(0), insn->src(1)); 1291 emitCC (0x2f); 1292 emitFMZ (0x2c, 2); 1293 emitPDIV(0x29); 1294 emitRND (0x27); 1295 } else { 1296 emitInsn(0x1e000000); 1297 emitSAT (0x37); 1298 emitFMZ (0x35, 2); 1299 emitCC (0x34); 1300 emitIMMD(0x14, 32, insn->src(1)); 1301 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg()) 1302 code[1] ^= 0x00080000; /* flip immd sign bit */ 1303 } 1304 1305 emitGPR(0x08, insn->src(0)); 1306 emitGPR(0x00, insn->def(0)); 1307 } 1308 1309 void 1310 CodeEmitterGM107::emitFFMA() 1311 { 1312 /*XXX: ffma32i exists, but not using it as third src overlaps dst */ 1313 switch(insn->src(2).getFile()) { 1314 case FILE_GPR: 1315 switch (insn->src(1).getFile()) { 1316 case FILE_GPR: 1317 emitInsn(0x59800000); 1318 emitGPR (0x14, insn->src(1)); 1319 break; 1320 case FILE_MEMORY_CONST: 1321 emitInsn(0x49800000); 1322 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1323 break; 1324 case FILE_IMMEDIATE: 1325 emitInsn(0x32800000); 1326 emitIMMD(0x14, 19, insn->src(1)); 1327 break; 1328 default: 1329 assert(!"bad src1 file"); 1330 break; 1331 } 1332 emitGPR (0x27, insn->src(2)); 1333 break; 1334 case FILE_MEMORY_CONST: 1335 emitInsn(0x51800000); 1336 emitGPR (0x27, insn->src(1)); 1337 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1338 break; 1339 default: 1340 assert(!"bad src2 file"); 1341 break; 1342 } 1343 emitRND (0x33); 1344 emitSAT (0x32); 1345 emitNEG (0x31, insn->src(2)); 1346 emitNEG2(0x30, insn->src(0), insn->src(1)); 1347 emitCC (0x2f); 1348 1349 emitFMZ(0x35, 2); 1350 emitGPR(0x08, insn->src(0)); 1351 emitGPR(0x00, insn->def(0)); 1352 } 1353 1354 void 1355 CodeEmitterGM107::emitMUFU() 1356 { 1357 int mufu = 0; 1358 1359 switch (insn->op) { 1360 case OP_COS: mufu = 0; break; 1361 case OP_SIN: mufu = 1; break; 1362 case OP_EX2: mufu = 2; break; 1363 case OP_LG2: mufu = 3; break; 1364 case OP_RCP: mufu = 4 + 2 * insn->subOp; break; 1365 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break; 1366 default: 1367 assert(!"invalid mufu"); 1368 break; 1369 } 1370 1371 emitInsn (0x50800000); 1372 emitSAT (0x32); 1373 emitNEG (0x30, insn->src(0)); 1374 emitABS (0x2e, insn->src(0)); 1375 emitField(0x14, 3, mufu); 1376 emitGPR (0x08, insn->src(0)); 1377 emitGPR (0x00, insn->def(0)); 1378 } 1379 1380 void 1381 CodeEmitterGM107::emitFMNMX() 1382 { 1383 switch (insn->src(1).getFile()) { 1384 case FILE_GPR: 1385 emitInsn(0x5c600000); 1386 emitGPR (0x14, insn->src(1)); 1387 break; 1388 case FILE_MEMORY_CONST: 1389 emitInsn(0x4c600000); 1390 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1391 break; 1392 case FILE_IMMEDIATE: 1393 emitInsn(0x38600000); 1394 emitIMMD(0x14, 19, insn->src(1)); 1395 break; 1396 default: 1397 assert(!"bad src1 file"); 1398 break; 1399 } 1400 1401 emitField(0x2a, 1, insn->op == OP_MAX); 1402 emitPRED (0x27); 1403 1404 emitABS(0x31, insn->src(1)); 1405 emitNEG(0x30, insn->src(0)); 1406 emitCC (0x2f); 1407 emitABS(0x2e, insn->src(0)); 1408 emitNEG(0x2d, insn->src(1)); 1409 emitFMZ(0x2c, 1); 1410 emitGPR(0x08, insn->src(0)); 1411 emitGPR(0x00, insn->def(0)); 1412 } 1413 1414 void 1415 CodeEmitterGM107::emitRRO() 1416 { 1417 switch (insn->src(0).getFile()) { 1418 case FILE_GPR: 1419 emitInsn(0x5c900000); 1420 emitGPR (0x14, insn->src(0)); 1421 break; 1422 case FILE_MEMORY_CONST: 1423 emitInsn(0x4c900000); 1424 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 1425 break; 1426 case FILE_IMMEDIATE: 1427 emitInsn(0x38900000); 1428 emitIMMD(0x14, 19, insn->src(0)); 1429 break; 1430 default: 1431 assert(!"bad src file"); 1432 break; 1433 } 1434 1435 emitABS (0x31, insn->src(0)); 1436 emitNEG (0x2d, insn->src(0)); 1437 emitField(0x27, 1, insn->op == OP_PREEX2); 1438 emitGPR (0x00, insn->def(0)); 1439 } 1440 1441 void 1442 CodeEmitterGM107::emitFCMP() 1443 { 1444 const CmpInstruction *insn = this->insn->asCmp(); 1445 CondCode cc = insn->setCond; 1446 1447 if (insn->src(2).mod.neg()) 1448 cc = reverseCondCode(cc); 1449 1450 switch(insn->src(2).getFile()) { 1451 case FILE_GPR: 1452 switch (insn->src(1).getFile()) { 1453 case FILE_GPR: 1454 emitInsn(0x5ba00000); 1455 emitGPR (0x14, insn->src(1)); 1456 break; 1457 case FILE_MEMORY_CONST: 1458 emitInsn(0x4ba00000); 1459 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1460 break; 1461 case FILE_IMMEDIATE: 1462 emitInsn(0x36a00000); 1463 emitIMMD(0x14, 19, insn->src(1)); 1464 break; 1465 default: 1466 assert(!"bad src1 file"); 1467 break; 1468 } 1469 emitGPR (0x27, insn->src(2)); 1470 break; 1471 case FILE_MEMORY_CONST: 1472 emitInsn(0x53a00000); 1473 emitGPR (0x27, insn->src(1)); 1474 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1475 break; 1476 default: 1477 assert(!"bad src2 file"); 1478 break; 1479 } 1480 1481 emitCond4(0x30, cc); 1482 emitFMZ (0x2f, 1); 1483 emitGPR (0x08, insn->src(0)); 1484 emitGPR (0x00, insn->def(0)); 1485 } 1486 1487 void 1488 CodeEmitterGM107::emitFSET() 1489 { 1490 const CmpInstruction *insn = this->insn->asCmp(); 1491 1492 switch (insn->src(1).getFile()) { 1493 case FILE_GPR: 1494 emitInsn(0x58000000); 1495 emitGPR (0x14, insn->src(1)); 1496 break; 1497 case FILE_MEMORY_CONST: 1498 emitInsn(0x48000000); 1499 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1500 break; 1501 case FILE_IMMEDIATE: 1502 emitInsn(0x30000000); 1503 emitIMMD(0x14, 19, insn->src(1)); 1504 break; 1505 default: 1506 assert(!"bad src1 file"); 1507 break; 1508 } 1509 1510 if (insn->op != OP_SET) { 1511 switch (insn->op) { 1512 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1513 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1514 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1515 default: 1516 assert(!"invalid set op"); 1517 break; 1518 } 1519 emitPRED(0x27, insn->src(2)); 1520 } else { 1521 emitPRED(0x27); 1522 } 1523 1524 emitFMZ (0x37, 1); 1525 emitABS (0x36, insn->src(0)); 1526 emitNEG (0x35, insn->src(1)); 1527 emitField(0x34, 1, insn->dType == TYPE_F32); 1528 emitCond4(0x30, insn->setCond); 1529 emitCC (0x2f); 1530 emitABS (0x2c, insn->src(1)); 1531 emitNEG (0x2b, insn->src(0)); 1532 emitGPR (0x08, insn->src(0)); 1533 emitGPR (0x00, insn->def(0)); 1534 } 1535 1536 void 1537 CodeEmitterGM107::emitFSETP() 1538 { 1539 const CmpInstruction *insn = this->insn->asCmp(); 1540 1541 switch (insn->src(1).getFile()) { 1542 case FILE_GPR: 1543 emitInsn(0x5bb00000); 1544 emitGPR (0x14, insn->src(1)); 1545 break; 1546 case FILE_MEMORY_CONST: 1547 emitInsn(0x4bb00000); 1548 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1549 break; 1550 case FILE_IMMEDIATE: 1551 emitInsn(0x36b00000); 1552 emitIMMD(0x14, 19, insn->src(1)); 1553 break; 1554 default: 1555 assert(!"bad src1 file"); 1556 break; 1557 } 1558 1559 if (insn->op != OP_SET) { 1560 switch (insn->op) { 1561 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1562 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1563 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1564 default: 1565 assert(!"invalid set op"); 1566 break; 1567 } 1568 emitPRED(0x27, insn->src(2)); 1569 } else { 1570 emitPRED(0x27); 1571 } 1572 1573 emitCond4(0x30, insn->setCond); 1574 emitFMZ (0x2f, 1); 1575 emitABS (0x2c, insn->src(1)); 1576 emitNEG (0x2b, insn->src(0)); 1577 emitGPR (0x08, insn->src(0)); 1578 emitABS (0x07, insn->src(0)); 1579 emitNEG (0x06, insn->src(1)); 1580 emitPRED (0x03, insn->def(0)); 1581 if (insn->defExists(1)) 1582 emitPRED(0x00, insn->def(1)); 1583 else 1584 emitPRED(0x00); 1585 } 1586 1587 void 1588 CodeEmitterGM107::emitFSWZADD() 1589 { 1590 emitInsn (0x50f80000); 1591 emitCC (0x2f); 1592 emitFMZ (0x2c, 1); 1593 emitRND (0x27); 1594 emitField(0x26, 1, insn->lanes); /* abused for .ndv */ 1595 emitField(0x1c, 8, insn->subOp); 1596 if (insn->predSrc != 1) 1597 emitGPR (0x14, insn->src(1)); 1598 else 1599 emitGPR (0x14); 1600 emitGPR (0x08, insn->src(0)); 1601 emitGPR (0x00, insn->def(0)); 1602 } 1603 1604 /******************************************************************************* 1605 * integer 1606 ******************************************************************************/ 1607 1608 void 1609 CodeEmitterGM107::emitLOP() 1610 { 1611 int lop = 0; 1612 1613 switch (insn->op) { 1614 case OP_AND: lop = 0; break; 1615 case OP_OR : lop = 1; break; 1616 case OP_XOR: lop = 2; break; 1617 default: 1618 assert(!"invalid lop"); 1619 break; 1620 } 1621 1622 if (insn->src(1).getFile() != FILE_IMMEDIATE) { 1623 switch (insn->src(1).getFile()) { 1624 case FILE_GPR: 1625 emitInsn(0x5c400000); 1626 emitGPR (0x14, insn->src(1)); 1627 break; 1628 case FILE_MEMORY_CONST: 1629 emitInsn(0x4c400000); 1630 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1631 break; 1632 case FILE_IMMEDIATE: 1633 emitInsn(0x38400000); 1634 emitIMMD(0x14, 19, insn->src(1)); 1635 break; 1636 default: 1637 assert(!"bad src1 file"); 1638 break; 1639 } 1640 emitPRED (0x30); 1641 emitCC (0x2f); 1642 emitX (0x2b); 1643 emitField(0x29, 2, lop); 1644 emitINV (0x28, insn->src(1)); 1645 emitINV (0x27, insn->src(0)); 1646 } else { 1647 emitInsn (0x04000000); 1648 emitX (0x39); 1649 emitINV (0x38, insn->src(1)); 1650 emitINV (0x37, insn->src(0)); 1651 emitField(0x35, 2, lop); 1652 emitCC (0x34); 1653 emitIMMD (0x14, 32, insn->src(1)); 1654 } 1655 1656 emitGPR (0x08, insn->src(0)); 1657 emitGPR (0x00, insn->def(0)); 1658 } 1659 1660 /* special-case of emitLOP(): lop pass_b dst 0 ~src */ 1661 void 1662 CodeEmitterGM107::emitNOT() 1663 { 1664 if (!longIMMD(insn->src(0))) { 1665 switch (insn->src(0).getFile()) { 1666 case FILE_GPR: 1667 emitInsn(0x5c400700); 1668 emitGPR (0x14, insn->src(0)); 1669 break; 1670 case FILE_MEMORY_CONST: 1671 emitInsn(0x4c400700); 1672 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 1673 break; 1674 case FILE_IMMEDIATE: 1675 emitInsn(0x38400700); 1676 emitIMMD(0x14, 19, insn->src(0)); 1677 break; 1678 default: 1679 assert(!"bad src1 file"); 1680 break; 1681 } 1682 emitPRED (0x30); 1683 } else { 1684 emitInsn (0x05600000); 1685 emitIMMD (0x14, 32, insn->src(1)); 1686 } 1687 1688 emitGPR(0x08); 1689 emitGPR(0x00, insn->def(0)); 1690 } 1691 1692 void 1693 CodeEmitterGM107::emitIADD() 1694 { 1695 if (insn->src(1).getFile() != FILE_IMMEDIATE) { 1696 switch (insn->src(1).getFile()) { 1697 case FILE_GPR: 1698 emitInsn(0x5c100000); 1699 emitGPR (0x14, insn->src(1)); 1700 break; 1701 case FILE_MEMORY_CONST: 1702 emitInsn(0x4c100000); 1703 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1704 break; 1705 case FILE_IMMEDIATE: 1706 emitInsn(0x38100000); 1707 emitIMMD(0x14, 19, insn->src(1)); 1708 break; 1709 default: 1710 assert(!"bad src1 file"); 1711 break; 1712 } 1713 emitSAT(0x32); 1714 emitNEG(0x31, insn->src(0)); 1715 emitNEG(0x30, insn->src(1)); 1716 emitCC (0x2f); 1717 emitX (0x2b); 1718 } else { 1719 emitInsn(0x1c000000); 1720 emitNEG (0x38, insn->src(0)); 1721 emitSAT (0x36); 1722 emitX (0x35); 1723 emitCC (0x34); 1724 emitIMMD(0x14, 32, insn->src(1)); 1725 } 1726 1727 if (insn->op == OP_SUB) 1728 code[1] ^= 0x00010000; 1729 1730 emitGPR(0x08, insn->src(0)); 1731 emitGPR(0x00, insn->def(0)); 1732 } 1733 1734 void 1735 CodeEmitterGM107::emitIMUL() 1736 { 1737 if (insn->src(1).getFile() != FILE_IMMEDIATE) { 1738 switch (insn->src(1).getFile()) { 1739 case FILE_GPR: 1740 emitInsn(0x5c380000); 1741 emitGPR (0x14, insn->src(1)); 1742 break; 1743 case FILE_MEMORY_CONST: 1744 emitInsn(0x4c380000); 1745 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1746 break; 1747 case FILE_IMMEDIATE: 1748 emitInsn(0x38380000); 1749 emitIMMD(0x14, 19, insn->src(1)); 1750 break; 1751 default: 1752 assert(!"bad src1 file"); 1753 break; 1754 } 1755 emitCC (0x2f); 1756 emitField(0x29, 1, isSignedType(insn->sType)); 1757 emitField(0x28, 1, isSignedType(insn->dType)); 1758 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1759 } else { 1760 emitInsn (0x1f000000); 1761 emitField(0x37, 1, isSignedType(insn->sType)); 1762 emitField(0x36, 1, isSignedType(insn->dType)); 1763 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1764 emitCC (0x34); 1765 emitIMMD (0x14, 32, insn->src(1)); 1766 } 1767 1768 emitGPR(0x08, insn->src(0)); 1769 emitGPR(0x00, insn->def(0)); 1770 } 1771 1772 void 1773 CodeEmitterGM107::emitIMAD() 1774 { 1775 /*XXX: imad32i exists, but not using it as third src overlaps dst */ 1776 switch(insn->src(2).getFile()) { 1777 case FILE_GPR: 1778 switch (insn->src(1).getFile()) { 1779 case FILE_GPR: 1780 emitInsn(0x5a000000); 1781 emitGPR (0x14, insn->src(1)); 1782 break; 1783 case FILE_MEMORY_CONST: 1784 emitInsn(0x4a000000); 1785 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1786 break; 1787 case FILE_IMMEDIATE: 1788 emitInsn(0x34000000); 1789 emitIMMD(0x14, 19, insn->src(1)); 1790 break; 1791 default: 1792 assert(!"bad src1 file"); 1793 break; 1794 } 1795 emitGPR (0x27, insn->src(2)); 1796 break; 1797 case FILE_MEMORY_CONST: 1798 emitInsn(0x52000000); 1799 emitGPR (0x27, insn->src(1)); 1800 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1801 break; 1802 default: 1803 assert(!"bad src2 file"); 1804 break; 1805 } 1806 1807 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); 1808 emitField(0x35, 1, isSignedType(insn->sType)); 1809 emitNEG (0x34, insn->src(2)); 1810 emitNEG2 (0x33, insn->src(0), insn->src(1)); 1811 emitSAT (0x32); 1812 emitX (0x31); 1813 emitField(0x30, 1, isSignedType(insn->dType)); 1814 emitCC (0x2f); 1815 emitGPR (0x08, insn->src(0)); 1816 emitGPR (0x00, insn->def(0)); 1817 } 1818 1819 void 1820 CodeEmitterGM107::emitISCADD() 1821 { 1822 switch (insn->src(2).getFile()) { 1823 case FILE_GPR: 1824 emitInsn(0x5c180000); 1825 emitGPR (0x14, insn->src(2)); 1826 break; 1827 case FILE_MEMORY_CONST: 1828 emitInsn(0x4c180000); 1829 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1830 break; 1831 case FILE_IMMEDIATE: 1832 emitInsn(0x38180000); 1833 emitIMMD(0x14, 19, insn->src(2)); 1834 break; 1835 default: 1836 assert(!"bad src1 file"); 1837 break; 1838 } 1839 emitNEG (0x31, insn->src(0)); 1840 emitNEG (0x30, insn->src(2)); 1841 emitCC (0x2f); 1842 emitIMMD(0x27, 5, insn->src(1)); 1843 emitGPR (0x08, insn->src(0)); 1844 emitGPR (0x00, insn->def(0)); 1845 } 1846 1847 void 1848 CodeEmitterGM107::emitIMNMX() 1849 { 1850 switch (insn->src(1).getFile()) { 1851 case FILE_GPR: 1852 emitInsn(0x5c200000); 1853 emitGPR (0x14, insn->src(1)); 1854 break; 1855 case FILE_MEMORY_CONST: 1856 emitInsn(0x4c200000); 1857 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1858 break; 1859 case FILE_IMMEDIATE: 1860 emitInsn(0x38200000); 1861 emitIMMD(0x14, 19, insn->src(1)); 1862 break; 1863 default: 1864 assert(!"bad src1 file"); 1865 break; 1866 } 1867 1868 emitField(0x30, 1, isSignedType(insn->dType)); 1869 emitCC (0x2f); 1870 emitField(0x2a, 1, insn->op == OP_MAX); 1871 emitPRED (0x27); 1872 emitGPR (0x08, insn->src(0)); 1873 emitGPR (0x00, insn->def(0)); 1874 } 1875 1876 void 1877 CodeEmitterGM107::emitICMP() 1878 { 1879 const CmpInstruction *insn = this->insn->asCmp(); 1880 CondCode cc = insn->setCond; 1881 1882 if (insn->src(2).mod.neg()) 1883 cc = reverseCondCode(cc); 1884 1885 switch(insn->src(2).getFile()) { 1886 case FILE_GPR: 1887 switch (insn->src(1).getFile()) { 1888 case FILE_GPR: 1889 emitInsn(0x5b400000); 1890 emitGPR (0x14, insn->src(1)); 1891 break; 1892 case FILE_MEMORY_CONST: 1893 emitInsn(0x4b400000); 1894 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1895 break; 1896 case FILE_IMMEDIATE: 1897 emitInsn(0x36400000); 1898 emitIMMD(0x14, 19, insn->src(1)); 1899 break; 1900 default: 1901 assert(!"bad src1 file"); 1902 break; 1903 } 1904 emitGPR (0x27, insn->src(2)); 1905 break; 1906 case FILE_MEMORY_CONST: 1907 emitInsn(0x53400000); 1908 emitGPR (0x27, insn->src(1)); 1909 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 1910 break; 1911 default: 1912 assert(!"bad src2 file"); 1913 break; 1914 } 1915 1916 emitCond3(0x31, cc); 1917 emitField(0x30, 1, isSignedType(insn->sType)); 1918 emitGPR (0x08, insn->src(0)); 1919 emitGPR (0x00, insn->def(0)); 1920 } 1921 1922 void 1923 CodeEmitterGM107::emitISET() 1924 { 1925 const CmpInstruction *insn = this->insn->asCmp(); 1926 1927 switch (insn->src(1).getFile()) { 1928 case FILE_GPR: 1929 emitInsn(0x5b500000); 1930 emitGPR (0x14, insn->src(1)); 1931 break; 1932 case FILE_MEMORY_CONST: 1933 emitInsn(0x4b500000); 1934 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1935 break; 1936 case FILE_IMMEDIATE: 1937 emitInsn(0x36500000); 1938 emitIMMD(0x14, 19, insn->src(1)); 1939 break; 1940 default: 1941 assert(!"bad src1 file"); 1942 break; 1943 } 1944 1945 if (insn->op != OP_SET) { 1946 switch (insn->op) { 1947 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1948 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1949 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1950 default: 1951 assert(!"invalid set op"); 1952 break; 1953 } 1954 emitPRED(0x27, insn->src(2)); 1955 } else { 1956 emitPRED(0x27); 1957 } 1958 1959 emitCond3(0x31, insn->setCond); 1960 emitField(0x30, 1, isSignedType(insn->sType)); 1961 emitCC (0x2f); 1962 emitField(0x2c, 1, insn->dType == TYPE_F32); 1963 emitX (0x2b); 1964 emitGPR (0x08, insn->src(0)); 1965 emitGPR (0x00, insn->def(0)); 1966 } 1967 1968 void 1969 CodeEmitterGM107::emitISETP() 1970 { 1971 const CmpInstruction *insn = this->insn->asCmp(); 1972 1973 switch (insn->src(1).getFile()) { 1974 case FILE_GPR: 1975 emitInsn(0x5b600000); 1976 emitGPR (0x14, insn->src(1)); 1977 break; 1978 case FILE_MEMORY_CONST: 1979 emitInsn(0x4b600000); 1980 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 1981 break; 1982 case FILE_IMMEDIATE: 1983 emitInsn(0x36600000); 1984 emitIMMD(0x14, 19, insn->src(1)); 1985 break; 1986 default: 1987 assert(!"bad src1 file"); 1988 break; 1989 } 1990 1991 if (insn->op != OP_SET) { 1992 switch (insn->op) { 1993 case OP_SET_AND: emitField(0x2d, 2, 0); break; 1994 case OP_SET_OR : emitField(0x2d, 2, 1); break; 1995 case OP_SET_XOR: emitField(0x2d, 2, 2); break; 1996 default: 1997 assert(!"invalid set op"); 1998 break; 1999 } 2000 emitPRED(0x27, insn->src(2)); 2001 } else { 2002 emitPRED(0x27); 2003 } 2004 2005 emitCond3(0x31, insn->setCond); 2006 emitField(0x30, 1, isSignedType(insn->sType)); 2007 emitX (0x2b); 2008 emitGPR (0x08, insn->src(0)); 2009 emitPRED (0x03, insn->def(0)); 2010 if (insn->defExists(1)) 2011 emitPRED(0x00, insn->def(1)); 2012 else 2013 emitPRED(0x00); 2014 } 2015 2016 void 2017 CodeEmitterGM107::emitSHL() 2018 { 2019 switch (insn->src(1).getFile()) { 2020 case FILE_GPR: 2021 emitInsn(0x5c480000); 2022 emitGPR (0x14, insn->src(1)); 2023 break; 2024 case FILE_MEMORY_CONST: 2025 emitInsn(0x4c480000); 2026 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2027 break; 2028 case FILE_IMMEDIATE: 2029 emitInsn(0x38480000); 2030 emitIMMD(0x14, 19, insn->src(1)); 2031 break; 2032 default: 2033 assert(!"bad src1 file"); 2034 break; 2035 } 2036 2037 emitCC (0x2f); 2038 emitX (0x2b); 2039 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); 2040 emitGPR (0x08, insn->src(0)); 2041 emitGPR (0x00, insn->def(0)); 2042 } 2043 2044 void 2045 CodeEmitterGM107::emitSHR() 2046 { 2047 switch (insn->src(1).getFile()) { 2048 case FILE_GPR: 2049 emitInsn(0x5c280000); 2050 emitGPR (0x14, insn->src(1)); 2051 break; 2052 case FILE_MEMORY_CONST: 2053 emitInsn(0x4c280000); 2054 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2055 break; 2056 case FILE_IMMEDIATE: 2057 emitInsn(0x38280000); 2058 emitIMMD(0x14, 19, insn->src(1)); 2059 break; 2060 default: 2061 assert(!"bad src1 file"); 2062 break; 2063 } 2064 2065 emitField(0x30, 1, isSignedType(insn->dType)); 2066 emitCC (0x2f); 2067 emitX (0x2c); 2068 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); 2069 emitGPR (0x08, insn->src(0)); 2070 emitGPR (0x00, insn->def(0)); 2071 } 2072 2073 void 2074 CodeEmitterGM107::emitPOPC() 2075 { 2076 switch (insn->src(0).getFile()) { 2077 case FILE_GPR: 2078 emitInsn(0x5c080000); 2079 emitGPR (0x14, insn->src(0)); 2080 break; 2081 case FILE_MEMORY_CONST: 2082 emitInsn(0x4c080000); 2083 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 2084 break; 2085 case FILE_IMMEDIATE: 2086 emitInsn(0x38080000); 2087 emitIMMD(0x14, 19, insn->src(0)); 2088 break; 2089 default: 2090 assert(!"bad src1 file"); 2091 break; 2092 } 2093 2094 emitINV(0x28, insn->src(0)); 2095 emitGPR(0x00, insn->def(0)); 2096 } 2097 2098 void 2099 CodeEmitterGM107::emitBFI() 2100 { 2101 switch(insn->src(2).getFile()) { 2102 case FILE_GPR: 2103 switch (insn->src(1).getFile()) { 2104 case FILE_GPR: 2105 emitInsn(0x5bf00000); 2106 emitGPR (0x14, insn->src(1)); 2107 break; 2108 case FILE_MEMORY_CONST: 2109 emitInsn(0x4bf00000); 2110 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2111 break; 2112 case FILE_IMMEDIATE: 2113 emitInsn(0x36f00000); 2114 emitIMMD(0x14, 19, insn->src(1)); 2115 break; 2116 default: 2117 assert(!"bad src1 file"); 2118 break; 2119 } 2120 emitGPR (0x27, insn->src(2)); 2121 break; 2122 case FILE_MEMORY_CONST: 2123 emitInsn(0x53f00000); 2124 emitGPR (0x27, insn->src(1)); 2125 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); 2126 break; 2127 default: 2128 assert(!"bad src2 file"); 2129 break; 2130 } 2131 2132 emitCC (0x2f); 2133 emitGPR (0x08, insn->src(0)); 2134 emitGPR (0x00, insn->def(0)); 2135 } 2136 2137 void 2138 CodeEmitterGM107::emitBFE() 2139 { 2140 switch (insn->src(1).getFile()) { 2141 case FILE_GPR: 2142 emitInsn(0x5c000000); 2143 emitGPR (0x14, insn->src(1)); 2144 break; 2145 case FILE_MEMORY_CONST: 2146 emitInsn(0x4c000000); 2147 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2148 break; 2149 case FILE_IMMEDIATE: 2150 emitInsn(0x38000000); 2151 emitIMMD(0x14, 19, insn->src(1)); 2152 break; 2153 default: 2154 assert(!"bad src1 file"); 2155 break; 2156 } 2157 2158 emitField(0x30, 1, isSignedType(insn->dType)); 2159 emitCC (0x2f); 2160 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV); 2161 emitGPR (0x08, insn->src(0)); 2162 emitGPR (0x00, insn->def(0)); 2163 } 2164 2165 void 2166 CodeEmitterGM107::emitFLO() 2167 { 2168 switch (insn->src(0).getFile()) { 2169 case FILE_GPR: 2170 emitInsn(0x5c300000); 2171 emitGPR (0x14, insn->src(0)); 2172 break; 2173 case FILE_MEMORY_CONST: 2174 emitInsn(0x4c300000); 2175 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); 2176 break; 2177 case FILE_IMMEDIATE: 2178 emitInsn(0x38300000); 2179 emitIMMD(0x14, 19, insn->src(0)); 2180 break; 2181 default: 2182 assert(!"bad src1 file"); 2183 break; 2184 } 2185 2186 emitField(0x30, 1, isSignedType(insn->dType)); 2187 emitCC (0x2f); 2188 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); 2189 emitINV (0x28, insn->src(0)); 2190 emitGPR (0x00, insn->def(0)); 2191 } 2192 2193 /******************************************************************************* 2194 * memory 2195 ******************************************************************************/ 2196 2197 void 2198 CodeEmitterGM107::emitLDSTs(int pos, DataType type) 2199 { 2200 int data = 0; 2201 2202 switch (typeSizeof(type)) { 2203 case 1: data = isSignedType(type) ? 1 : 0; break; 2204 case 2: data = isSignedType(type) ? 3 : 2; break; 2205 case 4: data = 4; break; 2206 case 8: data = 5; break; 2207 case 16: data = 6; break; 2208 default: 2209 assert(!"bad type"); 2210 break; 2211 } 2212 2213 emitField(pos, 3, data); 2214 } 2215 2216 void 2217 CodeEmitterGM107::emitLDSTc(int pos) 2218 { 2219 int mode = 0; 2220 2221 switch (insn->cache) { 2222 case CACHE_CA: mode = 0; break; 2223 case CACHE_CG: mode = 1; break; 2224 case CACHE_CS: mode = 2; break; 2225 case CACHE_CV: mode = 3; break; 2226 default: 2227 assert(!"invalid caching mode"); 2228 break; 2229 } 2230 2231 emitField(pos, 2, mode); 2232 } 2233 2234 void 2235 CodeEmitterGM107::emitLDC() 2236 { 2237 emitInsn (0xef900000); 2238 emitLDSTs(0x30, insn->dType); 2239 emitField(0x2c, 2, insn->subOp); 2240 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0)); 2241 emitGPR (0x00, insn->def(0)); 2242 } 2243 2244 void 2245 CodeEmitterGM107::emitLDL() 2246 { 2247 emitInsn (0xef400000); 2248 emitLDSTs(0x30, insn->dType); 2249 emitLDSTc(0x2c); 2250 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2251 emitGPR (0x00, insn->def(0)); 2252 } 2253 2254 void 2255 CodeEmitterGM107::emitLDS() 2256 { 2257 emitInsn (0xef480000); 2258 emitLDSTs(0x30, insn->dType); 2259 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2260 emitGPR (0x00, insn->def(0)); 2261 } 2262 2263 void 2264 CodeEmitterGM107::emitLD() 2265 { 2266 emitInsn (0x80000000); 2267 emitPRED (0x3a); 2268 emitLDSTc(0x38); 2269 emitLDSTs(0x35, insn->dType); 2270 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2271 emitADDR (0x08, 0x14, 32, 0, insn->src(0)); 2272 emitGPR (0x00, insn->def(0)); 2273 } 2274 2275 void 2276 CodeEmitterGM107::emitSTL() 2277 { 2278 emitInsn (0xef500000); 2279 emitLDSTs(0x30, insn->dType); 2280 emitLDSTc(0x2c); 2281 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2282 emitGPR (0x00, insn->src(1)); 2283 } 2284 2285 void 2286 CodeEmitterGM107::emitSTS() 2287 { 2288 emitInsn (0xef580000); 2289 emitLDSTs(0x30, insn->dType); 2290 emitADDR (0x08, 0x14, 24, 0, insn->src(0)); 2291 emitGPR (0x00, insn->src(1)); 2292 } 2293 2294 void 2295 CodeEmitterGM107::emitST() 2296 { 2297 emitInsn (0xa0000000); 2298 emitPRED (0x3a); 2299 emitLDSTc(0x38); 2300 emitLDSTs(0x35, insn->dType); 2301 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2302 emitADDR (0x08, 0x14, 32, 0, insn->src(0)); 2303 emitGPR (0x00, insn->src(1)); 2304 } 2305 2306 void 2307 CodeEmitterGM107::emitALD() 2308 { 2309 emitInsn (0xefd80000); 2310 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); 2311 emitGPR (0x27, insn->src(0).getIndirect(1)); 2312 emitO (0x20); 2313 emitP (0x1f); 2314 emitADDR (0x08, 20, 10, 0, insn->src(0)); 2315 emitGPR (0x00, insn->def(0)); 2316 } 2317 2318 void 2319 CodeEmitterGM107::emitAST() 2320 { 2321 emitInsn (0xeff00000); 2322 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1); 2323 emitGPR (0x27, insn->src(0).getIndirect(1)); 2324 emitP (0x1f); 2325 emitADDR (0x08, 20, 10, 0, insn->src(0)); 2326 emitGPR (0x00, insn->src(1)); 2327 } 2328 2329 void 2330 CodeEmitterGM107::emitISBERD() 2331 { 2332 emitInsn(0xefd00000); 2333 emitGPR (0x08, insn->src(0)); 2334 emitGPR (0x00, insn->def(0)); 2335 } 2336 2337 void 2338 CodeEmitterGM107::emitAL2P() 2339 { 2340 emitInsn (0xefa00000); 2341 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); 2342 emitPRED (0x2c); 2343 emitO (0x20); 2344 emitField(0x14, 11, insn->src(0).get()->reg.data.offset); 2345 emitGPR (0x08, insn->src(0).getIndirect(0)); 2346 emitGPR (0x00, insn->def(0)); 2347 } 2348 2349 static void 2350 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) 2351 { 2352 int ipa = entry->ipa; 2353 int reg = entry->reg; 2354 int loc = entry->loc; 2355 2356 if (data.flatshade && 2357 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { 2358 ipa = NV50_IR_INTERP_FLAT; 2359 reg = 0xff; 2360 } else if (data.force_persample_interp && 2361 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && 2362 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { 2363 ipa |= NV50_IR_INTERP_CENTROID; 2364 } 2365 code[loc + 1] &= ~(0xf << 0x14); 2366 code[loc + 1] |= (ipa & 0x3) << 0x16; 2367 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2); 2368 code[loc + 0] &= ~(0xff << 0x14); 2369 code[loc + 0] |= reg << 0x14; 2370 } 2371 2372 void 2373 CodeEmitterGM107::emitIPA() 2374 { 2375 int ipam = 0, ipas = 0; 2376 2377 switch (insn->getInterpMode()) { 2378 case NV50_IR_INTERP_LINEAR : ipam = 0; break; 2379 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break; 2380 case NV50_IR_INTERP_FLAT : ipam = 2; break; 2381 case NV50_IR_INTERP_SC : ipam = 3; break; 2382 default: 2383 assert(!"invalid ipa mode"); 2384 break; 2385 } 2386 2387 switch (insn->getSampleMode()) { 2388 case NV50_IR_INTERP_DEFAULT : ipas = 0; break; 2389 case NV50_IR_INTERP_CENTROID: ipas = 1; break; 2390 case NV50_IR_INTERP_OFFSET : ipas = 2; break; 2391 default: 2392 assert(!"invalid ipa sample mode"); 2393 break; 2394 } 2395 2396 emitInsn (0xe0000000); 2397 emitField(0x36, 2, ipam); 2398 emitField(0x34, 2, ipas); 2399 emitSAT (0x33); 2400 emitField(0x2f, 3, 7); 2401 emitADDR (0x08, 0x1c, 10, 0, insn->src(0)); 2402 if ((code[0] & 0x0000ff00) != 0x0000ff00) 2403 code[1] |= 0x00000040; /* .idx */ 2404 emitGPR(0x00, insn->def(0)); 2405 2406 if (insn->op == OP_PINTERP) { 2407 emitGPR(0x14, insn->src(1)); 2408 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) 2409 emitGPR(0x27, insn->src(2)); 2410 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); 2411 } else { 2412 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) 2413 emitGPR(0x27, insn->src(1)); 2414 emitGPR(0x14); 2415 addInterp(insn->ipa, 0xff, interpApply); 2416 } 2417 2418 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) 2419 emitGPR(0x27); 2420 } 2421 2422 void 2423 CodeEmitterGM107::emitATOM() 2424 { 2425 unsigned dType, subOp; 2426 2427 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 2428 switch (insn->dType) { 2429 case TYPE_U32: dType = 0; break; 2430 case TYPE_U64: dType = 1; break; 2431 default: assert(!"unexpected dType"); dType = 0; break; 2432 } 2433 subOp = 15; 2434 2435 emitInsn (0xee000000); 2436 } else { 2437 switch (insn->dType) { 2438 case TYPE_U32: dType = 0; break; 2439 case TYPE_S32: dType = 1; break; 2440 case TYPE_U64: dType = 2; break; 2441 case TYPE_F32: dType = 3; break; 2442 case TYPE_B128: dType = 4; break; 2443 case TYPE_S64: dType = 5; break; 2444 default: assert(!"unexpected dType"); dType = 0; break; 2445 } 2446 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) 2447 subOp = 8; 2448 else 2449 subOp = insn->subOp; 2450 2451 emitInsn (0xed000000); 2452 } 2453 2454 emitField(0x34, 4, subOp); 2455 emitField(0x31, 3, dType); 2456 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2457 emitGPR (0x14, insn->src(1)); 2458 emitADDR (0x08, 0x1c, 20, 0, insn->src(0)); 2459 emitGPR (0x00, insn->def(0)); 2460 } 2461 2462 void 2463 CodeEmitterGM107::emitATOMS() 2464 { 2465 unsigned dType, subOp; 2466 2467 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 2468 switch (insn->dType) { 2469 case TYPE_U32: dType = 0; break; 2470 case TYPE_U64: dType = 1; break; 2471 default: assert(!"unexpected dType"); dType = 0; break; 2472 } 2473 subOp = 4; 2474 2475 emitInsn (0xee000000); 2476 emitField(0x34, 1, dType); 2477 } else { 2478 switch (insn->dType) { 2479 case TYPE_U32: dType = 0; break; 2480 case TYPE_S32: dType = 1; break; 2481 case TYPE_U64: dType = 2; break; 2482 case TYPE_S64: dType = 3; break; 2483 default: assert(!"unexpected dType"); dType = 0; break; 2484 } 2485 2486 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) 2487 subOp = 8; 2488 else 2489 subOp = insn->subOp; 2490 2491 emitInsn (0xec000000); 2492 emitField(0x1c, 3, dType); 2493 } 2494 2495 emitField(0x34, 4, subOp); 2496 emitGPR (0x14, insn->src(1)); 2497 emitADDR (0x08, 0x1e, 22, 2, insn->src(0)); 2498 emitGPR (0x00, insn->def(0)); 2499 } 2500 2501 void 2502 CodeEmitterGM107::emitRED() 2503 { 2504 unsigned dType; 2505 2506 switch (insn->dType) { 2507 case TYPE_U32: dType = 0; break; 2508 case TYPE_S32: dType = 1; break; 2509 case TYPE_U64: dType = 2; break; 2510 case TYPE_F32: dType = 3; break; 2511 case TYPE_B128: dType = 4; break; 2512 case TYPE_S64: dType = 5; break; 2513 default: assert(!"unexpected dType"); dType = 0; break; 2514 } 2515 2516 emitInsn (0xebf80000); 2517 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2518 emitField(0x17, 3, insn->subOp); 2519 emitField(0x14, 3, dType); 2520 emitADDR (0x08, 0x1c, 20, 0, insn->src(0)); 2521 emitGPR (0x00, insn->src(1)); 2522 } 2523 2524 void 2525 CodeEmitterGM107::emitCCTL() 2526 { 2527 unsigned width; 2528 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { 2529 emitInsn(0xef600000); 2530 width = 30; 2531 } else { 2532 emitInsn(0xef800000); 2533 width = 22; 2534 } 2535 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8); 2536 emitADDR (0x08, 0x16, width, 2, insn->src(0)); 2537 emitField(0x00, 4, insn->subOp); 2538 } 2539 2540 /******************************************************************************* 2541 * surface 2542 ******************************************************************************/ 2543 2544 void 2545 CodeEmitterGM107::emitPIXLD() 2546 { 2547 emitInsn (0xefe80000); 2548 emitPRED (0x2d); 2549 emitField(0x1f, 3, insn->subOp); 2550 emitGPR (0x08, insn->src(0)); 2551 emitGPR (0x00, insn->def(0)); 2552 } 2553 2554 /******************************************************************************* 2555 * texture 2556 ******************************************************************************/ 2557 2558 void 2559 CodeEmitterGM107::emitTEXs(int pos) 2560 { 2561 int src1 = insn->predSrc == 1 ? 2 : 1; 2562 if (insn->srcExists(src1)) 2563 emitGPR(pos, insn->src(src1)); 2564 else 2565 emitGPR(pos); 2566 } 2567 2568 void 2569 CodeEmitterGM107::emitTEX() 2570 { 2571 const TexInstruction *insn = this->insn->asTex(); 2572 int lodm = 0; 2573 2574 if (!insn->tex.levelZero) { 2575 switch (insn->op) { 2576 case OP_TEX: lodm = 0; break; 2577 case OP_TXB: lodm = 2; break; 2578 case OP_TXL: lodm = 3; break; 2579 default: 2580 assert(!"invalid tex op"); 2581 break; 2582 } 2583 } else { 2584 lodm = 1; 2585 } 2586 2587 if (insn->tex.rIndirectSrc >= 0) { 2588 emitInsn (0xdeb80000); 2589 emitField(0x25, 2, lodm); 2590 emitField(0x24, 1, insn->tex.useOffsets == 1); 2591 } else { 2592 emitInsn (0xc0380000); 2593 emitField(0x37, 2, lodm); 2594 emitField(0x36, 1, insn->tex.useOffsets == 1); 2595 emitField(0x24, 13, insn->tex.r); 2596 } 2597 2598 emitField(0x32, 1, insn->tex.target.isShadow()); 2599 emitField(0x31, 1, insn->tex.liveOnly); 2600 emitField(0x23, 1, insn->tex.derivAll); 2601 emitField(0x1f, 4, insn->tex.mask); 2602 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2603 insn->tex.target.getDim() - 1); 2604 emitField(0x1c, 1, insn->tex.target.isArray()); 2605 emitTEXs (0x14); 2606 emitGPR (0x08, insn->src(0)); 2607 emitGPR (0x00, insn->def(0)); 2608 } 2609 2610 void 2611 CodeEmitterGM107::emitTLD() 2612 { 2613 const TexInstruction *insn = this->insn->asTex(); 2614 2615 if (insn->tex.rIndirectSrc >= 0) { 2616 emitInsn (0xdd380000); 2617 } else { 2618 emitInsn (0xdc380000); 2619 emitField(0x24, 13, insn->tex.r); 2620 } 2621 2622 emitField(0x37, 1, insn->tex.levelZero == 0); 2623 emitField(0x32, 1, insn->tex.target.isMS()); 2624 emitField(0x31, 1, insn->tex.liveOnly); 2625 emitField(0x23, 1, insn->tex.useOffsets == 1); 2626 emitField(0x1f, 4, insn->tex.mask); 2627 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2628 insn->tex.target.getDim() - 1); 2629 emitField(0x1c, 1, insn->tex.target.isArray()); 2630 emitTEXs (0x14); 2631 emitGPR (0x08, insn->src(0)); 2632 emitGPR (0x00, insn->def(0)); 2633 } 2634 2635 void 2636 CodeEmitterGM107::emitTLD4() 2637 { 2638 const TexInstruction *insn = this->insn->asTex(); 2639 2640 if (insn->tex.rIndirectSrc >= 0) { 2641 emitInsn (0xdef80000); 2642 emitField(0x26, 2, insn->tex.gatherComp); 2643 emitField(0x25, 2, insn->tex.useOffsets == 4); 2644 emitField(0x24, 2, insn->tex.useOffsets == 1); 2645 } else { 2646 emitInsn (0xc8380000); 2647 emitField(0x38, 2, insn->tex.gatherComp); 2648 emitField(0x37, 2, insn->tex.useOffsets == 4); 2649 emitField(0x36, 2, insn->tex.useOffsets == 1); 2650 emitField(0x24, 13, insn->tex.r); 2651 } 2652 2653 emitField(0x32, 1, insn->tex.target.isShadow()); 2654 emitField(0x31, 1, insn->tex.liveOnly); 2655 emitField(0x23, 1, insn->tex.derivAll); 2656 emitField(0x1f, 4, insn->tex.mask); 2657 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2658 insn->tex.target.getDim() - 1); 2659 emitField(0x1c, 1, insn->tex.target.isArray()); 2660 emitTEXs (0x14); 2661 emitGPR (0x08, insn->src(0)); 2662 emitGPR (0x00, insn->def(0)); 2663 } 2664 2665 void 2666 CodeEmitterGM107::emitTXD() 2667 { 2668 const TexInstruction *insn = this->insn->asTex(); 2669 2670 if (insn->tex.rIndirectSrc >= 0) { 2671 emitInsn (0xde780000); 2672 } else { 2673 emitInsn (0xde380000); 2674 emitField(0x24, 13, insn->tex.r); 2675 } 2676 2677 emitField(0x31, 1, insn->tex.liveOnly); 2678 emitField(0x23, 1, insn->tex.useOffsets == 1); 2679 emitField(0x1f, 4, insn->tex.mask); 2680 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2681 insn->tex.target.getDim() - 1); 2682 emitField(0x1c, 1, insn->tex.target.isArray()); 2683 emitTEXs (0x14); 2684 emitGPR (0x08, insn->src(0)); 2685 emitGPR (0x00, insn->def(0)); 2686 } 2687 2688 void 2689 CodeEmitterGM107::emitTMML() 2690 { 2691 const TexInstruction *insn = this->insn->asTex(); 2692 2693 if (insn->tex.rIndirectSrc >= 0) { 2694 emitInsn (0xdf600000); 2695 } else { 2696 emitInsn (0xdf580000); 2697 emitField(0x24, 13, insn->tex.r); 2698 } 2699 2700 emitField(0x31, 1, insn->tex.liveOnly); 2701 emitField(0x23, 1, insn->tex.derivAll); 2702 emitField(0x1f, 4, insn->tex.mask); 2703 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : 2704 insn->tex.target.getDim() - 1); 2705 emitField(0x1c, 1, insn->tex.target.isArray()); 2706 emitTEXs (0x14); 2707 emitGPR (0x08, insn->src(0)); 2708 emitGPR (0x00, insn->def(0)); 2709 } 2710 2711 void 2712 CodeEmitterGM107::emitTXQ() 2713 { 2714 const TexInstruction *insn = this->insn->asTex(); 2715 int type = 0; 2716 2717 switch (insn->tex.query) { 2718 case TXQ_DIMS : type = 0x01; break; 2719 case TXQ_TYPE : type = 0x02; break; 2720 case TXQ_SAMPLE_POSITION: type = 0x05; break; 2721 case TXQ_FILTER : type = 0x10; break; 2722 case TXQ_LOD : type = 0x12; break; 2723 case TXQ_WRAP : type = 0x14; break; 2724 case TXQ_BORDER_COLOUR : type = 0x16; break; 2725 default: 2726 assert(!"invalid txq query"); 2727 break; 2728 } 2729 2730 if (insn->tex.rIndirectSrc >= 0) { 2731 emitInsn (0xdf500000); 2732 } else { 2733 emitInsn (0xdf480000); 2734 emitField(0x24, 13, insn->tex.r); 2735 } 2736 2737 emitField(0x31, 1, insn->tex.liveOnly); 2738 emitField(0x1f, 4, insn->tex.mask); 2739 emitField(0x16, 6, type); 2740 emitGPR (0x08, insn->src(0)); 2741 emitGPR (0x00, insn->def(0)); 2742 } 2743 2744 void 2745 CodeEmitterGM107::emitDEPBAR() 2746 { 2747 emitInsn (0xf0f00000); 2748 emitField(0x1d, 1, 1); /* le */ 2749 emitField(0x1a, 3, 5); 2750 emitField(0x14, 6, insn->subOp); 2751 emitField(0x00, 6, insn->subOp); 2752 } 2753 2754 /******************************************************************************* 2755 * misc 2756 ******************************************************************************/ 2757 2758 void 2759 CodeEmitterGM107::emitNOP() 2760 { 2761 emitInsn(0x50b00000); 2762 } 2763 2764 void 2765 CodeEmitterGM107::emitKIL() 2766 { 2767 emitInsn (0xe3300000); 2768 emitCond5(0x00, CC_TR); 2769 } 2770 2771 void 2772 CodeEmitterGM107::emitOUT() 2773 { 2774 const int cut = insn->op == OP_RESTART || insn->subOp; 2775 const int emit = insn->op == OP_EMIT; 2776 2777 switch (insn->src(1).getFile()) { 2778 case FILE_GPR: 2779 emitInsn(0xfbe00000); 2780 emitGPR (0x14, insn->src(1)); 2781 break; 2782 case FILE_IMMEDIATE: 2783 emitInsn(0xf6e00000); 2784 emitIMMD(0x14, 19, insn->src(1)); 2785 break; 2786 case FILE_MEMORY_CONST: 2787 emitInsn(0xebe00000); 2788 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); 2789 break; 2790 default: 2791 assert(!"bad src1 file"); 2792 break; 2793 } 2794 2795 emitField(0x27, 2, (cut << 1) | emit); 2796 emitGPR (0x08, insn->src(0)); 2797 emitGPR (0x00, insn->def(0)); 2798 } 2799 2800 void 2801 CodeEmitterGM107::emitBAR() 2802 { 2803 uint8_t subop; 2804 2805 emitInsn (0xf0a80000); 2806 2807 switch (insn->subOp) { 2808 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break; 2809 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break; 2810 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break; 2811 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break; 2812 default: 2813 subop = 0x80; 2814 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC); 2815 break; 2816 } 2817 2818 emitField(0x20, 8, subop); 2819 2820 // barrier id 2821 if (insn->src(0).getFile() == FILE_GPR) { 2822 emitGPR(0x08, insn->src(0)); 2823 } else { 2824 ImmediateValue *imm = insn->getSrc(0)->asImm(); 2825 assert(imm); 2826 emitField(0x08, 8, imm->reg.data.u32); 2827 emitField(0x2b, 1, 1); 2828 } 2829 2830 // thread count 2831 if (insn->src(1).getFile() == FILE_GPR) { 2832 emitGPR(0x14, insn->src(1)); 2833 } else { 2834 ImmediateValue *imm = insn->getSrc(0)->asImm(); 2835 assert(imm); 2836 emitField(0x14, 12, imm->reg.data.u32); 2837 emitField(0x2c, 1, 1); 2838 } 2839 2840 if (insn->srcExists(2) && (insn->predSrc != 2)) { 2841 emitPRED (0x27, insn->src(2)); 2842 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT)); 2843 } else { 2844 emitField(0x27, 3, 7); 2845 } 2846 } 2847 2848 void 2849 CodeEmitterGM107::emitMEMBAR() 2850 { 2851 emitInsn (0xef980000); 2852 emitField(0x08, 2, insn->subOp >> 2); 2853 } 2854 2855 void 2856 CodeEmitterGM107::emitVOTE() 2857 { 2858 assert(insn->src(0).getFile() == FILE_PREDICATE); 2859 2860 int r = -1, p = -1; 2861 for (int i = 0; insn->defExists(i); i++) { 2862 if (insn->def(i).getFile() == FILE_GPR) 2863 r = i; 2864 else if (insn->def(i).getFile() == FILE_PREDICATE) 2865 p = i; 2866 } 2867 2868 emitInsn (0x50d80000); 2869 emitField(0x30, 2, insn->subOp); 2870 if (r >= 0) 2871 emitGPR (0x00, insn->def(r)); 2872 else 2873 emitGPR (0x00); 2874 if (p >= 0) 2875 emitPRED (0x2d, insn->def(p)); 2876 else 2877 emitPRED (0x2d); 2878 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); 2879 emitPRED (0x27, insn->src(0)); 2880 } 2881 2882 void 2883 CodeEmitterGM107::emitSUTarget() 2884 { 2885 const TexInstruction *insn = this->insn->asTex(); 2886 int target = 0; 2887 2888 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); 2889 2890 if (insn->tex.target == TEX_TARGET_BUFFER) { 2891 target = 2; 2892 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { 2893 target = 4; 2894 } else if (insn->tex.target == TEX_TARGET_2D || 2895 insn->tex.target == TEX_TARGET_RECT) { 2896 target = 6; 2897 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || 2898 insn->tex.target == TEX_TARGET_CUBE || 2899 insn->tex.target == TEX_TARGET_CUBE_ARRAY) { 2900 target = 8; 2901 } else if (insn->tex.target == TEX_TARGET_3D) { 2902 target = 10; 2903 } else { 2904 assert(insn->tex.target == TEX_TARGET_1D); 2905 } 2906 emitField(0x20, 4, target); 2907 } 2908 2909 void 2910 CodeEmitterGM107::emitSUHandle(const int s) 2911 { 2912 const TexInstruction *insn = this->insn->asTex(); 2913 2914 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); 2915 2916 if (insn->src(s).getFile() == FILE_GPR) { 2917 emitGPR(0x27, insn->src(s)); 2918 } else { 2919 ImmediateValue *imm = insn->getSrc(s)->asImm(); 2920 assert(imm); 2921 emitField(0x33, 1, 1); 2922 emitField(0x24, 13, imm->reg.data.u32); 2923 } 2924 } 2925 2926 void 2927 CodeEmitterGM107::emitSUSTx() 2928 { 2929 const TexInstruction *insn = this->insn->asTex(); 2930 2931 emitInsn(0xeb200000); 2932 if (insn->op == OP_SUSTB) 2933 emitField(0x34, 1, 1); 2934 emitSUTarget(); 2935 2936 emitLDSTc(0x18); 2937 emitField(0x14, 4, 0xf); // rgba 2938 emitGPR (0x08, insn->src(0)); 2939 emitGPR (0x00, insn->src(1)); 2940 2941 emitSUHandle(2); 2942 } 2943 2944 void 2945 CodeEmitterGM107::emitSULDx() 2946 { 2947 const TexInstruction *insn = this->insn->asTex(); 2948 int type = 0; 2949 2950 emitInsn(0xeb000000); 2951 if (insn->op == OP_SULDB) 2952 emitField(0x34, 1, 1); 2953 emitSUTarget(); 2954 2955 switch (insn->dType) { 2956 case TYPE_S8: type = 1; break; 2957 case TYPE_U16: type = 2; break; 2958 case TYPE_S16: type = 3; break; 2959 case TYPE_U32: type = 4; break; 2960 case TYPE_U64: type = 5; break; 2961 case TYPE_B128: type = 6; break; 2962 default: 2963 assert(insn->dType == TYPE_U8); 2964 break; 2965 } 2966 emitLDSTc(0x18); 2967 emitField(0x14, 3, type); 2968 emitGPR (0x00, insn->def(0)); 2969 emitGPR (0x08, insn->src(0)); 2970 2971 emitSUHandle(1); 2972 } 2973 2974 void 2975 CodeEmitterGM107::emitSUREDx() 2976 { 2977 const TexInstruction *insn = this->insn->asTex(); 2978 uint8_t type = 0, subOp; 2979 2980 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) 2981 emitInsn(0xeac00000); 2982 else 2983 emitInsn(0xea600000); 2984 2985 if (insn->op == OP_SUREDB) 2986 emitField(0x34, 1, 1); 2987 emitSUTarget(); 2988 2989 // destination type 2990 switch (insn->dType) { 2991 case TYPE_S32: type = 1; break; 2992 case TYPE_U64: type = 2; break; 2993 case TYPE_F32: type = 3; break; 2994 case TYPE_S64: type = 5; break; 2995 default: 2996 assert(insn->dType == TYPE_U32); 2997 break; 2998 } 2999 3000 // atomic operation 3001 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { 3002 subOp = 0; 3003 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 3004 subOp = 8; 3005 } else { 3006 subOp = insn->subOp; 3007 } 3008 3009 emitField(0x24, 3, type); 3010 emitField(0x1d, 4, subOp); 3011 emitGPR (0x14, insn->src(1)); 3012 emitGPR (0x08, insn->src(0)); 3013 emitGPR (0x00, insn->def(0)); 3014 3015 emitSUHandle(2); 3016 } 3017 3018 /******************************************************************************* 3019 * assembler front-end 3020 ******************************************************************************/ 3021 3022 bool 3023 CodeEmitterGM107::emitInstruction(Instruction *i) 3024 { 3025 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8; 3026 bool ret = true; 3027 3028 insn = i; 3029 3030 if (insn->encSize != 8) { 3031 ERROR("skipping undecodable instruction: "); insn->print(); 3032 return false; 3033 } else 3034 if (codeSize + size > codeSizeLimit) { 3035 ERROR("code emitter output buffer too small\n"); 3036 return false; 3037 } 3038 3039 if (writeIssueDelays) { 3040 int n = ((codeSize & 0x1f) / 8) - 1; 3041 if (n < 0) { 3042 data = code; 3043 data[0] = 0x00000000; 3044 data[1] = 0x00000000; 3045 code += 2; 3046 codeSize += 8; 3047 n++; 3048 } 3049 3050 emitField(data, n * 21, 21, insn->sched); 3051 } 3052 3053 switch (insn->op) { 3054 case OP_EXIT: 3055 emitEXIT(); 3056 break; 3057 case OP_BRA: 3058 emitBRA(); 3059 break; 3060 case OP_CALL: 3061 emitCAL(); 3062 break; 3063 case OP_PRECONT: 3064 emitPCNT(); 3065 break; 3066 case OP_CONT: 3067 emitCONT(); 3068 break; 3069 case OP_PREBREAK: 3070 emitPBK(); 3071 break; 3072 case OP_BREAK: 3073 emitBRK(); 3074 break; 3075 case OP_PRERET: 3076 emitPRET(); 3077 break; 3078 case OP_RET: 3079 emitRET(); 3080 break; 3081 case OP_JOINAT: 3082 emitSSY(); 3083 break; 3084 case OP_JOIN: 3085 emitSYNC(); 3086 break; 3087 case OP_QUADON: 3088 emitSAM(); 3089 break; 3090 case OP_QUADPOP: 3091 emitRAM(); 3092 break; 3093 case OP_MOV: 3094 emitMOV(); 3095 break; 3096 case OP_RDSV: 3097 emitS2R(); 3098 break; 3099 case OP_ABS: 3100 case OP_NEG: 3101 case OP_SAT: 3102 case OP_FLOOR: 3103 case OP_CEIL: 3104 case OP_TRUNC: 3105 case OP_CVT: 3106 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || 3107 insn->src(0).getFile() == FILE_PREDICATE)) { 3108 emitMOV(); 3109 } else if (isFloatType(insn->dType)) { 3110 if (isFloatType(insn->sType)) 3111 emitF2F(); 3112 else 3113 emitI2F(); 3114 } else { 3115 if (isFloatType(insn->sType)) 3116 emitF2I(); 3117 else 3118 emitI2I(); 3119 } 3120 break; 3121 case OP_SHFL: 3122 emitSHFL(); 3123 break; 3124 case OP_ADD: 3125 case OP_SUB: 3126 if (isFloatType(insn->dType)) { 3127 if (insn->dType == TYPE_F64) 3128 emitDADD(); 3129 else 3130 emitFADD(); 3131 } else { 3132 emitIADD(); 3133 } 3134 break; 3135 case OP_MUL: 3136 if (isFloatType(insn->dType)) { 3137 if (insn->dType == TYPE_F64) 3138 emitDMUL(); 3139 else 3140 emitFMUL(); 3141 } else { 3142 emitIMUL(); 3143 } 3144 break; 3145 case OP_MAD: 3146 case OP_FMA: 3147 if (isFloatType(insn->dType)) { 3148 if (insn->dType == TYPE_F64) 3149 emitDFMA(); 3150 else 3151 emitFFMA(); 3152 } else { 3153 emitIMAD(); 3154 } 3155 break; 3156 case OP_SHLADD: 3157 emitISCADD(); 3158 break; 3159 case OP_MIN: 3160 case OP_MAX: 3161 if (isFloatType(insn->dType)) { 3162 if (insn->dType == TYPE_F64) 3163 emitDMNMX(); 3164 else 3165 emitFMNMX(); 3166 } else { 3167 emitIMNMX(); 3168 } 3169 break; 3170 case OP_SHL: 3171 emitSHL(); 3172 break; 3173 case OP_SHR: 3174 emitSHR(); 3175 break; 3176 case OP_POPCNT: 3177 emitPOPC(); 3178 break; 3179 case OP_INSBF: 3180 emitBFI(); 3181 break; 3182 case OP_EXTBF: 3183 emitBFE(); 3184 break; 3185 case OP_BFIND: 3186 emitFLO(); 3187 break; 3188 case OP_SLCT: 3189 if (isFloatType(insn->dType)) 3190 emitFCMP(); 3191 else 3192 emitICMP(); 3193 break; 3194 case OP_SET: 3195 case OP_SET_AND: 3196 case OP_SET_OR: 3197 case OP_SET_XOR: 3198 if (insn->def(0).getFile() != FILE_PREDICATE) { 3199 if (isFloatType(insn->sType)) 3200 if (insn->sType == TYPE_F64) 3201 emitDSET(); 3202 else 3203 emitFSET(); 3204 else 3205 emitISET(); 3206 } else { 3207 if (isFloatType(insn->sType)) 3208 if (insn->sType == TYPE_F64) 3209 emitDSETP(); 3210 else 3211 emitFSETP(); 3212 else 3213 emitISETP(); 3214 } 3215 break; 3216 case OP_SELP: 3217 emitSEL(); 3218 break; 3219 case OP_PRESIN: 3220 case OP_PREEX2: 3221 emitRRO(); 3222 break; 3223 case OP_COS: 3224 case OP_SIN: 3225 case OP_EX2: 3226 case OP_LG2: 3227 case OP_RCP: 3228 case OP_RSQ: 3229 emitMUFU(); 3230 break; 3231 case OP_AND: 3232 case OP_OR: 3233 case OP_XOR: 3234 emitLOP(); 3235 break; 3236 case OP_NOT: 3237 emitNOT(); 3238 break; 3239 case OP_LOAD: 3240 switch (insn->src(0).getFile()) { 3241 case FILE_MEMORY_CONST : emitLDC(); break; 3242 case FILE_MEMORY_LOCAL : emitLDL(); break; 3243 case FILE_MEMORY_SHARED: emitLDS(); break; 3244 case FILE_MEMORY_GLOBAL: emitLD(); break; 3245 default: 3246 assert(!"invalid load"); 3247 emitNOP(); 3248 break; 3249 } 3250 break; 3251 case OP_STORE: 3252 switch (insn->src(0).getFile()) { 3253 case FILE_MEMORY_LOCAL : emitSTL(); break; 3254 case FILE_MEMORY_SHARED: emitSTS(); break; 3255 case FILE_MEMORY_GLOBAL: emitST(); break; 3256 default: 3257 assert(!"invalid store"); 3258 emitNOP(); 3259 break; 3260 } 3261 break; 3262 case OP_ATOM: 3263 if (insn->src(0).getFile() == FILE_MEMORY_SHARED) 3264 emitATOMS(); 3265 else 3266 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS) 3267 emitRED(); 3268 else 3269 emitATOM(); 3270 break; 3271 case OP_CCTL: 3272 emitCCTL(); 3273 break; 3274 case OP_VFETCH: 3275 emitALD(); 3276 break; 3277 case OP_EXPORT: 3278 emitAST(); 3279 break; 3280 case OP_PFETCH: 3281 emitISBERD(); 3282 break; 3283 case OP_AFETCH: 3284 emitAL2P(); 3285 break; 3286 case OP_LINTERP: 3287 case OP_PINTERP: 3288 emitIPA(); 3289 break; 3290 case OP_PIXLD: 3291 emitPIXLD(); 3292 break; 3293 case OP_TEX: 3294 case OP_TXB: 3295 case OP_TXL: 3296 emitTEX(); 3297 break; 3298 case OP_TXF: 3299 emitTLD(); 3300 break; 3301 case OP_TXG: 3302 emitTLD4(); 3303 break; 3304 case OP_TXD: 3305 emitTXD(); 3306 break; 3307 case OP_TXQ: 3308 emitTXQ(); 3309 break; 3310 case OP_TXLQ: 3311 emitTMML(); 3312 break; 3313 case OP_TEXBAR: 3314 emitDEPBAR(); 3315 break; 3316 case OP_QUADOP: 3317 emitFSWZADD(); 3318 break; 3319 case OP_NOP: 3320 emitNOP(); 3321 break; 3322 case OP_DISCARD: 3323 emitKIL(); 3324 break; 3325 case OP_EMIT: 3326 case OP_RESTART: 3327 emitOUT(); 3328 break; 3329 case OP_BAR: 3330 emitBAR(); 3331 break; 3332 case OP_MEMBAR: 3333 emitMEMBAR(); 3334 break; 3335 case OP_VOTE: 3336 emitVOTE(); 3337 break; 3338 case OP_SUSTB: 3339 case OP_SUSTP: 3340 emitSUSTx(); 3341 break; 3342 case OP_SULDB: 3343 case OP_SULDP: 3344 emitSULDx(); 3345 break; 3346 case OP_SUREDB: 3347 case OP_SUREDP: 3348 emitSUREDx(); 3349 break; 3350 default: 3351 assert(!"invalid opcode"); 3352 emitNOP(); 3353 ret = false; 3354 break; 3355 } 3356 3357 if (insn->join) { 3358 /*XXX*/ 3359 } 3360 3361 code += 2; 3362 codeSize += 8; 3363 return ret; 3364 } 3365 3366 uint32_t 3367 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const 3368 { 3369 return 8; 3370 } 3371 3372 /******************************************************************************* 3373 * sched data calculator 3374 ******************************************************************************/ 3375 3376 class SchedDataCalculatorGM107 : public Pass 3377 { 3378 public: 3379 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {} 3380 3381 private: 3382 struct RegScores 3383 { 3384 struct ScoreData { 3385 int r[256]; 3386 int p[8]; 3387 int c; 3388 } rd, wr; 3389 int base; 3390 3391 void rebase(const int base) 3392 { 3393 const int delta = this->base - base; 3394 if (!delta) 3395 return; 3396 this->base = 0; 3397 3398 for (int i = 0; i < 256; ++i) { 3399 rd.r[i] += delta; 3400 wr.r[i] += delta; 3401 } 3402 for (int i = 0; i < 8; ++i) { 3403 rd.p[i] += delta; 3404 wr.p[i] += delta; 3405 } 3406 rd.c += delta; 3407 wr.c += delta; 3408 } 3409 void wipe() 3410 { 3411 memset(&rd, 0, sizeof(rd)); 3412 memset(&wr, 0, sizeof(wr)); 3413 } 3414 int getLatest(const ScoreData& d) const 3415 { 3416 int max = 0; 3417 for (int i = 0; i < 256; ++i) 3418 if (d.r[i] > max) 3419 max = d.r[i]; 3420 for (int i = 0; i < 8; ++i) 3421 if (d.p[i] > max) 3422 max = d.p[i]; 3423 if (d.c > max) 3424 max = d.c; 3425 return max; 3426 } 3427 inline int getLatestRd() const 3428 { 3429 return getLatest(rd); 3430 } 3431 inline int getLatestWr() const 3432 { 3433 return getLatest(wr); 3434 } 3435 inline int getLatest() const 3436 { 3437 return MAX2(getLatestRd(), getLatestWr()); 3438 } 3439 void setMax(const RegScores *that) 3440 { 3441 for (int i = 0; i < 256; ++i) { 3442 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); 3443 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); 3444 } 3445 for (int i = 0; i < 8; ++i) { 3446 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); 3447 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); 3448 } 3449 rd.c = MAX2(rd.c, that->rd.c); 3450 wr.c = MAX2(wr.c, that->wr.c); 3451 } 3452 void print(int cycle) 3453 { 3454 for (int i = 0; i < 256; ++i) { 3455 if (rd.r[i] > cycle) 3456 INFO("rd $r%i @ %i\n", i, rd.r[i]); 3457 if (wr.r[i] > cycle) 3458 INFO("wr $r%i @ %i\n", i, wr.r[i]); 3459 } 3460 for (int i = 0; i < 8; ++i) { 3461 if (rd.p[i] > cycle) 3462 INFO("rd $p%i @ %i\n", i, rd.p[i]); 3463 if (wr.p[i] > cycle) 3464 INFO("wr $p%i @ %i\n", i, wr.p[i]); 3465 } 3466 if (rd.c > cycle) 3467 INFO("rd $c @ %i\n", rd.c); 3468 if (wr.c > cycle) 3469 INFO("wr $c @ %i\n", wr.c); 3470 } 3471 }; 3472 3473 RegScores *score; // for current BB 3474 std::vector<RegScores> scoreBoards; 3475 3476 const TargetGM107 *targ; 3477 bool visit(Function *); 3478 bool visit(BasicBlock *); 3479 3480 void commitInsn(const Instruction *, int); 3481 int calcDelay(const Instruction *, int) const; 3482 void setDelay(Instruction *, int, const Instruction *); 3483 void recordWr(const Value *, int, int); 3484 void checkRd(const Value *, int, int&) const; 3485 3486 inline void emitYield(Instruction *); 3487 inline void emitStall(Instruction *, uint8_t); 3488 inline void emitReuse(Instruction *, uint8_t); 3489 inline void emitWrDepBar(Instruction *, uint8_t); 3490 inline void emitRdDepBar(Instruction *, uint8_t); 3491 inline void emitWtDepBar(Instruction *, uint8_t); 3492 3493 inline int getStall(const Instruction *) const; 3494 inline int getWrDepBar(const Instruction *) const; 3495 inline int getRdDepBar(const Instruction *) const; 3496 inline int getWtDepBar(const Instruction *) const; 3497 3498 void setReuseFlag(Instruction *); 3499 3500 inline void printSchedInfo(int, const Instruction *) const; 3501 3502 struct LiveBarUse { 3503 LiveBarUse(Instruction *insn, Instruction *usei) 3504 : insn(insn), usei(usei) { } 3505 Instruction *insn; 3506 Instruction *usei; 3507 }; 3508 3509 struct LiveBarDef { 3510 LiveBarDef(Instruction *insn, Instruction *defi) 3511 : insn(insn), defi(defi) { } 3512 Instruction *insn; 3513 Instruction *defi; 3514 }; 3515 3516 bool insertBarriers(BasicBlock *); 3517 3518 Instruction *findFirstUse(const Instruction *) const; 3519 Instruction *findFirstDef(const Instruction *) const; 3520 3521 bool needRdDepBar(const Instruction *) const; 3522 bool needWrDepBar(const Instruction *) const; 3523 }; 3524 3525 inline void 3526 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt) 3527 { 3528 assert(cnt < 16); 3529 insn->sched |= cnt; 3530 } 3531 3532 inline void 3533 SchedDataCalculatorGM107::emitYield(Instruction *insn) 3534 { 3535 insn->sched |= 1 << 4; 3536 } 3537 3538 inline void 3539 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id) 3540 { 3541 assert(id < 6); 3542 if ((insn->sched & 0xe0) == 0xe0) 3543 insn->sched ^= 0xe0; 3544 insn->sched |= id << 5; 3545 } 3546 3547 inline void 3548 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id) 3549 { 3550 assert(id < 6); 3551 if ((insn->sched & 0x700) == 0x700) 3552 insn->sched ^= 0x700; 3553 insn->sched |= id << 8; 3554 } 3555 3556 inline void 3557 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id) 3558 { 3559 assert(id < 6); 3560 insn->sched |= 1 << (11 + id); 3561 } 3562 3563 inline void 3564 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id) 3565 { 3566 assert(id < 4); 3567 insn->sched |= 1 << (17 + id); 3568 } 3569 3570 inline void 3571 SchedDataCalculatorGM107::printSchedInfo(int cycle, 3572 const Instruction *insn) const 3573 { 3574 uint8_t st, yl, wr, rd, wt, ru; 3575 3576 st = (insn->sched & 0x00000f) >> 0; 3577 yl = (insn->sched & 0x000010) >> 4; 3578 wr = (insn->sched & 0x0000e0) >> 5; 3579 rd = (insn->sched & 0x000700) >> 8; 3580 wt = (insn->sched & 0x01f800) >> 11; 3581 ru = (insn->sched & 0x1e0000) >> 17; 3582 3583 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n", 3584 cycle, st, yl, wr, rd, wt, ru); 3585 } 3586 3587 inline int 3588 SchedDataCalculatorGM107::getStall(const Instruction *insn) const 3589 { 3590 return insn->sched & 0xf; 3591 } 3592 3593 inline int 3594 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const 3595 { 3596 return (insn->sched & 0x0000e0) >> 5; 3597 } 3598 3599 inline int 3600 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const 3601 { 3602 return (insn->sched & 0x000700) >> 8; 3603 } 3604 3605 inline int 3606 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const 3607 { 3608 return (insn->sched & 0x01f800) >> 11; 3609 } 3610 3611 // Emit the reuse flag which allows to make use of the new memory hierarchy 3612 // introduced since Maxwell, the operand reuse cache. 3613 // 3614 // It allows to reduce bank conflicts by caching operands. Each time you issue 3615 // an instruction, that flag can tell the hw which operands are going to be 3616 // re-used by the next instruction. Note that the next instruction has to use 3617 // the same GPR id in the same operand slot. 3618 void 3619 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn) 3620 { 3621 Instruction *next = insn->next; 3622 BitSet defs(255, 1); 3623 3624 if (!targ->isReuseSupported(insn)) 3625 return; 3626 3627 for (int d = 0; insn->defExists(d); ++d) { 3628 const Value *def = insn->def(d).rep(); 3629 if (insn->def(d).getFile() != FILE_GPR) 3630 continue; 3631 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255) 3632 continue; 3633 defs.set(def->reg.data.id); 3634 } 3635 3636 for (int s = 0; insn->srcExists(s); s++) { 3637 const Value *src = insn->src(s).rep(); 3638 if (insn->src(s).getFile() != FILE_GPR) 3639 continue; 3640 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255) 3641 continue; 3642 if (defs.test(src->reg.data.id)) 3643 continue; 3644 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR) 3645 continue; 3646 if (src->reg.data.id != next->getSrc(s)->reg.data.id) 3647 continue; 3648 assert(s < 4); 3649 emitReuse(insn, s); 3650 } 3651 } 3652 3653 void 3654 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready) 3655 { 3656 int a = v->reg.data.id, b; 3657 3658 switch (v->reg.file) { 3659 case FILE_GPR: 3660 b = a + v->reg.size / 4; 3661 for (int r = a; r < b; ++r) 3662 score->rd.r[r] = ready; 3663 break; 3664 case FILE_PREDICATE: 3665 // To immediately use a predicate set by any instructions, the minimum 3666 // number of stall counts is 13. 3667 score->rd.p[a] = cycle + 13; 3668 break; 3669 case FILE_FLAGS: 3670 score->rd.c = ready; 3671 break; 3672 default: 3673 break; 3674 } 3675 } 3676 3677 void 3678 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const 3679 { 3680 int a = v->reg.data.id, b; 3681 int ready = cycle; 3682 3683 switch (v->reg.file) { 3684 case FILE_GPR: 3685 b = a + v->reg.size / 4; 3686 for (int r = a; r < b; ++r) 3687 ready = MAX2(ready, score->rd.r[r]); 3688 break; 3689 case FILE_PREDICATE: 3690 ready = MAX2(ready, score->rd.p[a]); 3691 break; 3692 case FILE_FLAGS: 3693 ready = MAX2(ready, score->rd.c); 3694 break; 3695 default: 3696 break; 3697 } 3698 if (cycle < ready) 3699 delay = MAX2(delay, ready - cycle); 3700 } 3701 3702 void 3703 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle) 3704 { 3705 const int ready = cycle + targ->getLatency(insn); 3706 3707 for (int d = 0; insn->defExists(d); ++d) 3708 recordWr(insn->getDef(d), cycle, ready); 3709 3710 #ifdef GM107_DEBUG_SCHED_DATA 3711 score->print(cycle); 3712 #endif 3713 } 3714 3715 #define GM107_MIN_ISSUE_DELAY 0x1 3716 #define GM107_MAX_ISSUE_DELAY 0xf 3717 3718 int 3719 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const 3720 { 3721 int delay = 0, ready = cycle; 3722 3723 for (int s = 0; insn->srcExists(s); ++s) 3724 checkRd(insn->getSrc(s), cycle, delay); 3725 3726 // TODO: make use of getReadLatency()! 3727 3728 return MAX2(delay, ready - cycle); 3729 } 3730 3731 void 3732 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay, 3733 const Instruction *next) 3734 { 3735 const OpClass cl = targ->getOpClass(insn->op); 3736 int wr, rd; 3737 3738 if (insn->op == OP_EXIT || 3739 insn->op == OP_BAR || 3740 insn->op == OP_MEMBAR) { 3741 delay = GM107_MAX_ISSUE_DELAY; 3742 } else 3743 if (insn->op == OP_QUADON || 3744 insn->op == OP_QUADPOP) { 3745 delay = 0xd; 3746 } else 3747 if (cl == OPCLASS_FLOW || insn->join) { 3748 delay = 0xd; 3749 } 3750 3751 if (!next || !targ->canDualIssue(insn, next)) { 3752 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY); 3753 } else { 3754 delay = 0x0; // dual-issue 3755 } 3756 3757 wr = getWrDepBar(insn); 3758 rd = getRdDepBar(insn); 3759 3760 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) { 3761 // Barriers take one additional clock cycle to become active on top of 3762 // the clock consumed by the instruction producing it. 3763 if (!next || insn->bb != next->bb) { 3764 delay = 0x2; 3765 } else { 3766 int wt = getWtDepBar(next); 3767 if ((wt & (1 << wr)) | (wt & (1 << rd))) 3768 delay = 0x2; 3769 } 3770 } 3771 3772 emitStall(insn, delay); 3773 } 3774 3775 3776 // Return true when the given instruction needs to emit a read dependency 3777 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and 3778 // setting the maximum number of stall counts is not enough. 3779 bool 3780 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const 3781 { 3782 BitSet srcs(255, 1), defs(255, 1); 3783 int a, b; 3784 3785 if (!targ->isBarrierRequired(insn)) 3786 return false; 3787 3788 // Do not emit a read dependency barrier when the instruction doesn't use 3789 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary. 3790 for (int s = 0; insn->srcExists(s); ++s) { 3791 const Value *src = insn->src(s).rep(); 3792 if (insn->src(s).getFile() != FILE_GPR) 3793 continue; 3794 if (src->reg.data.id == 255) 3795 continue; 3796 3797 a = src->reg.data.id; 3798 b = a + src->reg.size / 4; 3799 for (int r = a; r < b; ++r) 3800 srcs.set(r); 3801 } 3802 3803 if (!srcs.popCount()) 3804 return false; 3805 3806 // Do not emit a read dependency barrier when the output GPRs are equal to 3807 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will 3808 // be produced and WaR hazards are prevented. 3809 for (int d = 0; insn->defExists(d); ++d) { 3810 const Value *def = insn->def(d).rep(); 3811 if (insn->def(d).getFile() != FILE_GPR) 3812 continue; 3813 if (def->reg.data.id == 255) 3814 continue; 3815 3816 a = def->reg.data.id; 3817 b = a + def->reg.size / 4; 3818 for (int r = a; r < b; ++r) 3819 defs.set(r); 3820 } 3821 3822 srcs.andNot(defs); 3823 if (!srcs.popCount()) 3824 return false; 3825 3826 return true; 3827 } 3828 3829 // Return true when the given instruction needs to emit a write dependency 3830 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and 3831 // setting the maximum number of stall counts is not enough. This is only legal 3832 // if the instruction output something. 3833 bool 3834 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const 3835 { 3836 if (!targ->isBarrierRequired(insn)) 3837 return false; 3838 3839 for (int d = 0; insn->defExists(d); ++d) { 3840 if (insn->def(d).getFile() == FILE_GPR || 3841 insn->def(d).getFile() == FILE_PREDICATE) 3842 return true; 3843 } 3844 return false; 3845 } 3846 3847 // Find the next instruction inside the same basic block which uses the output 3848 // of the given instruction in order to avoid RaW hazards. 3849 Instruction * 3850 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const 3851 { 3852 Instruction *insn, *next; 3853 int minGPR, maxGPR; 3854 3855 if (!bari->defExists(0)) 3856 return NULL; 3857 3858 minGPR = bari->def(0).rep()->reg.data.id; 3859 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1; 3860 3861 for (insn = bari->next; insn != NULL; insn = next) { 3862 next = insn->next; 3863 3864 for (int s = 0; insn->srcExists(s); ++s) { 3865 const Value *src = insn->src(s).rep(); 3866 if (bari->def(0).getFile() == FILE_GPR) { 3867 if (insn->src(s).getFile() != FILE_GPR || 3868 src->reg.data.id + src->reg.size / 4 - 1 < minGPR || 3869 src->reg.data.id > maxGPR) 3870 continue; 3871 return insn; 3872 } else 3873 if (bari->def(0).getFile() == FILE_PREDICATE) { 3874 if (insn->src(s).getFile() != FILE_PREDICATE || 3875 src->reg.data.id != minGPR) 3876 continue; 3877 return insn; 3878 } 3879 } 3880 } 3881 return NULL; 3882 } 3883 3884 // Find the next instruction inside the same basic block which overwrites, at 3885 // least, one source of the given instruction in order to avoid WaR hazards. 3886 Instruction * 3887 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const 3888 { 3889 Instruction *insn, *next; 3890 int minGPR, maxGPR; 3891 3892 for (insn = bari->next; insn != NULL; insn = next) { 3893 next = insn->next; 3894 3895 for (int d = 0; insn->defExists(d); ++d) { 3896 const Value *def = insn->def(d).rep(); 3897 if (insn->def(d).getFile() != FILE_GPR) 3898 continue; 3899 3900 minGPR = def->reg.data.id; 3901 maxGPR = minGPR + def->reg.size / 4 - 1; 3902 3903 for (int s = 0; bari->srcExists(s); ++s) { 3904 const Value *src = bari->src(s).rep(); 3905 if (bari->src(s).getFile() != FILE_GPR || 3906 src->reg.data.id + src->reg.size / 4 - 1 < minGPR || 3907 src->reg.data.id > maxGPR) 3908 continue; 3909 return insn; 3910 } 3911 } 3912 } 3913 return NULL; 3914 } 3915 3916 // Dependency barriers: 3917 // This pass is a bit ugly and could probably be improved by performing a 3918 // better allocation. 3919 // 3920 // The main idea is to avoid WaR and RaW hazards by emitting read/write 3921 // dependency barriers using the control codes. 3922 bool 3923 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb) 3924 { 3925 std::list<LiveBarUse> live_uses; 3926 std::list<LiveBarDef> live_defs; 3927 Instruction *insn, *next; 3928 BitSet bars(6, 1); 3929 int bar_id; 3930 3931 for (insn = bb->getEntry(); insn != NULL; insn = next) { 3932 Instruction *usei = NULL, *defi = NULL; 3933 bool need_wr_bar, need_rd_bar; 3934 3935 next = insn->next; 3936 3937 // Expire old barrier uses. 3938 for (std::list<LiveBarUse>::iterator it = live_uses.begin(); 3939 it != live_uses.end();) { 3940 if (insn->serial >= it->usei->serial) { 3941 int wr = getWrDepBar(it->insn); 3942 emitWtDepBar(insn, wr); 3943 bars.clr(wr); // free barrier 3944 it = live_uses.erase(it); 3945 continue; 3946 } 3947 ++it; 3948 } 3949 3950 // Expire old barrier defs. 3951 for (std::list<LiveBarDef>::iterator it = live_defs.begin(); 3952 it != live_defs.end();) { 3953 if (insn->serial >= it->defi->serial) { 3954 int rd = getRdDepBar(it->insn); 3955 emitWtDepBar(insn, rd); 3956 bars.clr(rd); // free barrier 3957 it = live_defs.erase(it); 3958 continue; 3959 } 3960 ++it; 3961 } 3962 3963 need_wr_bar = needWrDepBar(insn); 3964 need_rd_bar = needRdDepBar(insn); 3965 3966 if (need_wr_bar) { 3967 // When the instruction requires to emit a write dependency barrier 3968 // (all which write something at a variable latency), find the next 3969 // instruction which reads the outputs. 3970 usei = findFirstUse(insn); 3971 3972 // Allocate and emit a new barrier. 3973 bar_id = bars.findFreeRange(1); 3974 if (bar_id == -1) 3975 bar_id = 5; 3976 bars.set(bar_id); 3977 emitWrDepBar(insn, bar_id); 3978 if (usei) 3979 live_uses.push_back(LiveBarUse(insn, usei)); 3980 } 3981 3982 if (need_rd_bar) { 3983 // When the instruction requires to emit a read dependency barrier 3984 // (all which read something at a variable latency), find the next 3985 // instruction which will write the inputs. 3986 defi = findFirstDef(insn); 3987 3988 if (usei && defi && usei->serial <= defi->serial) 3989 continue; 3990 3991 // Allocate and emit a new barrier. 3992 bar_id = bars.findFreeRange(1); 3993 if (bar_id == -1) 3994 bar_id = 5; 3995 bars.set(bar_id); 3996 emitRdDepBar(insn, bar_id); 3997 if (defi) 3998 live_defs.push_back(LiveBarDef(insn, defi)); 3999 } 4000 } 4001 4002 // Remove unnecessary barrier waits. 4003 BitSet alive_bars(6, 1); 4004 for (insn = bb->getEntry(); insn != NULL; insn = next) { 4005 int wr, rd, wt; 4006 4007 next = insn->next; 4008 4009 wr = getWrDepBar(insn); 4010 rd = getRdDepBar(insn); 4011 wt = getWtDepBar(insn); 4012 4013 for (int idx = 0; idx < 6; ++idx) { 4014 if (!(wt & (1 << idx))) 4015 continue; 4016 if (!alive_bars.test(idx)) { 4017 insn->sched &= ~(1 << (11 + idx)); 4018 } else { 4019 alive_bars.clr(idx); 4020 } 4021 } 4022 4023 if (wr < 6) 4024 alive_bars.set(wr); 4025 if (rd < 6) 4026 alive_bars.set(rd); 4027 } 4028 4029 return true; 4030 } 4031 4032 bool 4033 SchedDataCalculatorGM107::visit(Function *func) 4034 { 4035 ArrayList insns; 4036 4037 func->orderInstructions(insns); 4038 4039 scoreBoards.resize(func->cfg.getSize()); 4040 for (size_t i = 0; i < scoreBoards.size(); ++i) 4041 scoreBoards[i].wipe(); 4042 return true; 4043 } 4044 4045 bool 4046 SchedDataCalculatorGM107::visit(BasicBlock *bb) 4047 { 4048 Instruction *insn, *next = NULL; 4049 int cycle = 0; 4050 4051 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) { 4052 /*XXX*/ 4053 insn->sched = 0x7e0; 4054 } 4055 4056 if (!debug_get_bool_option("NV50_PROG_SCHED", true)) 4057 return true; 4058 4059 // Insert read/write dependency barriers for instructions which don't 4060 // operate at a fixed latency. 4061 insertBarriers(bb); 4062 4063 score = &scoreBoards.at(bb->getId()); 4064 4065 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 4066 // back branches will wait until all target dependencies are satisfied 4067 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized 4068 continue; 4069 BasicBlock *in = BasicBlock::get(ei.getNode()); 4070 score->setMax(&scoreBoards.at(in->getId())); 4071 } 4072 4073 #ifdef GM107_DEBUG_SCHED_DATA 4074 INFO("=== BB:%i initial scores\n", bb->getId()); 4075 score->print(cycle); 4076 #endif 4077 4078 // Because barriers are allocated locally (intra-BB), we have to make sure 4079 // that all produced barriers have been consumed before entering inside a 4080 // new basic block. The best way is to do a global allocation pre RA but 4081 // it's really more difficult, especially because of the phi nodes. Anyways, 4082 // it seems like that waiting on a barrier which has already been consumed 4083 // doesn't add any additional cost, it's just not elegant! 4084 Instruction *start = bb->getEntry(); 4085 if (start && bb->cfg.incidentCount() > 0) { 4086 for (int b = 0; b < 6; b++) 4087 emitWtDepBar(start, b); 4088 } 4089 4090 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) { 4091 next = insn->next; 4092 4093 commitInsn(insn, cycle); 4094 int delay = calcDelay(next, cycle); 4095 setDelay(insn, delay, next); 4096 cycle += getStall(insn); 4097 4098 setReuseFlag(insn); 4099 4100 // XXX: The yield flag seems to destroy a bunch of things when it is 4101 // set on every instruction, need investigation. 4102 //emitYield(insn); 4103 4104 #ifdef GM107_DEBUG_SCHED_DATA 4105 printSchedInfo(cycle, insn); 4106 insn->print(); 4107 next->print(); 4108 #endif 4109 } 4110 4111 if (!insn) 4112 return true; 4113 commitInsn(insn, cycle); 4114 4115 int bbDelay = -1; 4116 4117 #ifdef GM107_DEBUG_SCHED_DATA 4118 fprintf(stderr, "last instruction is : "); 4119 insn->print(); 4120 fprintf(stderr, "cycle=%d\n", cycle); 4121 #endif 4122 4123 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 4124 BasicBlock *out = BasicBlock::get(ei.getNode()); 4125 4126 if (ei.getType() != Graph::Edge::BACK) { 4127 // Only test the first instruction of the outgoing block. 4128 next = out->getEntry(); 4129 if (next) { 4130 bbDelay = MAX2(bbDelay, calcDelay(next, cycle)); 4131 } else { 4132 // When the outgoing BB is empty, make sure to set the number of 4133 // stall counts needed by the instruction because we don't know the 4134 // next instruction. 4135 bbDelay = MAX2(bbDelay, targ->getLatency(insn)); 4136 } 4137 } else { 4138 // Wait until all dependencies are satisfied. 4139 const int regsFree = score->getLatest(); 4140 next = out->getFirst(); 4141 for (int c = cycle; next && c < regsFree; next = next->next) { 4142 bbDelay = MAX2(bbDelay, calcDelay(next, c)); 4143 c += getStall(next); 4144 } 4145 next = NULL; 4146 } 4147 } 4148 if (bb->cfg.outgoingCount() != 1) 4149 next = NULL; 4150 setDelay(insn, bbDelay, next); 4151 cycle += getStall(insn); 4152 4153 score->rebase(cycle); // common base for initializing out blocks' scores 4154 return true; 4155 } 4156 4157 /******************************************************************************* 4158 * main 4159 ******************************************************************************/ 4160 4161 void 4162 CodeEmitterGM107::prepareEmission(Function *func) 4163 { 4164 SchedDataCalculatorGM107 sched(targGM107); 4165 CodeEmitter::prepareEmission(func); 4166 sched.run(func, true, true); 4167 } 4168 4169 static inline uint32_t sizeToBundlesGM107(uint32_t size) 4170 { 4171 return (size + 23) / 24; 4172 } 4173 4174 void 4175 CodeEmitterGM107::prepareEmission(Program *prog) 4176 { 4177 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 4178 !fi.end(); fi.next()) { 4179 Function *func = reinterpret_cast<Function *>(fi.get()); 4180 func->binPos = prog->binSize; 4181 prepareEmission(func); 4182 4183 // adjust sizes & positions for schedulding info: 4184 if (prog->getTarget()->hasSWSched) { 4185 uint32_t adjPos = func->binPos; 4186 BasicBlock *bb = NULL; 4187 for (int i = 0; i < func->bbCount; ++i) { 4188 bb = func->bbArray[i]; 4189 int32_t adjSize = bb->binSize; 4190 if (adjPos % 32) { 4191 adjSize -= 32 - adjPos % 32; 4192 if (adjSize < 0) 4193 adjSize = 0; 4194 } 4195 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8; 4196 bb->binPos = adjPos; 4197 bb->binSize = adjSize; 4198 adjPos += adjSize; 4199 } 4200 if (bb) 4201 func->binSize = adjPos - func->binPos; 4202 } 4203 4204 prog->binSize += func->binSize; 4205 } 4206 } 4207 4208 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target) 4209 : CodeEmitter(target), 4210 targGM107(target), 4211 writeIssueDelays(target->hasSWSched) 4212 { 4213 code = NULL; 4214 codeSize = codeSizeLimit = 0; 4215 relocInfo = NULL; 4216 } 4217 4218 CodeEmitter * 4219 TargetGM107::createCodeEmitterGM107(Program::Type type) 4220 { 4221 CodeEmitterGM107 *emit = new CodeEmitterGM107(this); 4222 emit->setProgramType(type); 4223 return emit; 4224 } 4225 4226 } // namespace nv50_ir 4227