1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "codegen/nv50_ir.h" 24 #include "codegen/nv50_ir_target_nv50.h" 25 26 namespace nv50_ir { 27 28 #define NV50_OP_ENC_LONG 0 29 #define NV50_OP_ENC_SHORT 1 30 #define NV50_OP_ENC_IMM 2 31 #define NV50_OP_ENC_LONG_ALT 3 32 33 class CodeEmitterNV50 : public CodeEmitter 34 { 35 public: 36 CodeEmitterNV50(const TargetNV50 *); 37 38 virtual bool emitInstruction(Instruction *); 39 40 virtual uint32_t getMinEncodingSize(const Instruction *) const; 41 42 inline void setProgramType(Program::Type pType) { progType = pType; } 43 44 virtual void prepareEmission(Function *); 45 46 private: 47 Program::Type progType; 48 49 const TargetNV50 *targNV50; 50 51 private: 52 inline void defId(const ValueDef&, const int pos); 53 inline void srcId(const ValueRef&, const int pos); 54 inline void srcId(const ValueRef *, const int pos); 55 56 inline void srcAddr16(const ValueRef&, bool adj, const int pos); 57 inline void srcAddr8(const ValueRef&, const int pos); 58 59 void emitFlagsRd(const Instruction *); 60 void emitFlagsWr(const Instruction *); 61 62 void emitCondCode(CondCode cc, DataType ty, int pos); 63 64 inline void setARegBits(unsigned int); 65 66 void setAReg16(const Instruction *, int s); 67 void setImmediate(const Instruction *, int s); 68 69 void setDst(const Value *); 70 void setDst(const Instruction *, int d); 71 void setSrcFileBits(const Instruction *, int enc); 72 void setSrc(const Instruction *, unsigned int s, int slot); 73 74 void emitForm_MAD(const Instruction *); 75 void emitForm_ADD(const Instruction *); 76 void emitForm_MUL(const Instruction *); 77 void emitForm_IMM(const Instruction *); 78 79 void emitLoadStoreSizeLG(DataType ty, int pos); 80 void emitLoadStoreSizeCS(DataType ty); 81 82 void roundMode_MAD(const Instruction *); 83 void roundMode_CVT(RoundMode); 84 85 void emitMNeg12(const Instruction *); 86 87 void emitLOAD(const Instruction *); 88 void emitSTORE(const Instruction *); 89 void emitMOV(const Instruction *); 90 void emitRDSV(const Instruction *); 91 void emitNOP(); 92 void emitINTERP(const Instruction *); 93 void emitPFETCH(const Instruction *); 94 void emitOUT(const Instruction *); 95 96 void emitUADD(const Instruction *); 97 void emitAADD(const Instruction *); 98 void emitFADD(const Instruction *); 99 void emitDADD(const Instruction *); 100 void emitIMUL(const Instruction *); 101 void emitFMUL(const Instruction *); 102 void emitDMUL(const Instruction *); 103 void emitFMAD(const Instruction *); 104 void emitDMAD(const Instruction *); 105 void emitIMAD(const Instruction *); 106 void emitISAD(const Instruction *); 107 108 void emitMINMAX(const Instruction *); 109 110 void emitPreOp(const Instruction *); 111 void emitSFnOp(const Instruction *, uint8_t subOp); 112 113 void emitShift(const Instruction *); 114 void emitARL(const Instruction *, unsigned int shl); 115 void emitLogicOp(const Instruction *); 116 void emitNOT(const Instruction *); 117 118 void emitCVT(const Instruction *); 119 void emitSET(const Instruction *); 120 121 void emitTEX(const TexInstruction *); 122 void emitTXQ(const TexInstruction *); 123 void emitTEXPREP(const TexInstruction *); 124 125 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp); 126 127 void emitFlow(const Instruction *, uint8_t flowOp); 128 void emitPRERETEmu(const FlowInstruction *); 129 void emitBAR(const Instruction *); 130 131 void emitATOM(const Instruction *); 132 }; 133 134 #define SDATA(a) ((a).rep()->reg.data) 135 #define DDATA(a) ((a).rep()->reg.data) 136 137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos) 138 { 139 assert(src.get()); 140 code[pos / 32] |= SDATA(src).id << (pos % 32); 141 } 142 143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos) 144 { 145 assert(src->get()); 146 code[pos / 32] |= SDATA(*src).id << (pos % 32); 147 } 148 149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos) 150 { 151 assert(src.get()); 152 153 int32_t offset = SDATA(src).offset; 154 155 assert(!adj || src.get()->reg.size <= 4); 156 if (adj) 157 offset /= src.get()->reg.size; 158 159 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16); 160 161 if (offset < 0) 162 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff; 163 164 code[pos / 32] |= offset << (pos % 32); 165 } 166 167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos) 168 { 169 assert(src.get()); 170 171 uint32_t offset = SDATA(src).offset; 172 173 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3)); 174 175 code[pos / 32] |= (offset >> 2) << (pos % 32); 176 } 177 178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos) 179 { 180 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT); 181 182 code[pos / 32] |= DDATA(def).id << (pos % 32); 183 } 184 185 void 186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn) 187 { 188 switch (insn->rnd) { 189 case ROUND_M: code[1] |= 1 << 22; break; 190 case ROUND_P: code[1] |= 2 << 22; break; 191 case ROUND_Z: code[1] |= 3 << 22; break; 192 default: 193 assert(insn->rnd == ROUND_N); 194 break; 195 } 196 } 197 198 void 199 CodeEmitterNV50::emitMNeg12(const Instruction *i) 200 { 201 code[1] |= i->src(0).mod.neg() << 26; 202 code[1] |= i->src(1).mod.neg() << 27; 203 } 204 205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos) 206 { 207 uint8_t enc; 208 209 assert(pos >= 32 || pos <= 27); 210 211 switch (cc) { 212 case CC_LT: enc = 0x1; break; 213 case CC_LTU: enc = 0x9; break; 214 case CC_EQ: enc = 0x2; break; 215 case CC_EQU: enc = 0xa; break; 216 case CC_LE: enc = 0x3; break; 217 case CC_LEU: enc = 0xb; break; 218 case CC_GT: enc = 0x4; break; 219 case CC_GTU: enc = 0xc; break; 220 case CC_NE: enc = 0x5; break; 221 case CC_NEU: enc = 0xd; break; 222 case CC_GE: enc = 0x6; break; 223 case CC_GEU: enc = 0xe; break; 224 case CC_TR: enc = 0xf; break; 225 case CC_FL: enc = 0x0; break; 226 227 case CC_O: enc = 0x10; break; 228 case CC_C: enc = 0x11; break; 229 case CC_A: enc = 0x12; break; 230 case CC_S: enc = 0x13; break; 231 case CC_NS: enc = 0x1c; break; 232 case CC_NA: enc = 0x1d; break; 233 case CC_NC: enc = 0x1e; break; 234 case CC_NO: enc = 0x1f; break; 235 236 default: 237 enc = 0; 238 assert(!"invalid condition code"); 239 break; 240 } 241 if (ty != TYPE_NONE && !isFloatType(ty)) 242 enc &= ~0x8; // unordered only exists for float types 243 244 code[pos / 32] |= enc << (pos % 32); 245 } 246 247 void 248 CodeEmitterNV50::emitFlagsRd(const Instruction *i) 249 { 250 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc; 251 252 assert(!(code[1] & 0x00003f80)); 253 254 if (s >= 0) { 255 assert(i->getSrc(s)->reg.file == FILE_FLAGS); 256 emitCondCode(i->cc, TYPE_NONE, 32 + 7); 257 srcId(i->src(s), 32 + 12); 258 } else { 259 code[1] |= 0x0780; 260 } 261 } 262 263 void 264 CodeEmitterNV50::emitFlagsWr(const Instruction *i) 265 { 266 assert(!(code[1] & 0x70)); 267 268 int flagsDef = i->flagsDef; 269 270 // find flags definition and check that it is the last def 271 if (flagsDef < 0) { 272 for (int d = 0; i->defExists(d); ++d) 273 if (i->def(d).getFile() == FILE_FLAGS) 274 flagsDef = d; 275 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point 276 WARN("Instruction::flagsDef was not set properly\n"); 277 } 278 if (flagsDef == 0 && i->defExists(1)) 279 WARN("flags def should not be the primary definition\n"); 280 281 if (flagsDef >= 0) 282 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40; 283 284 } 285 286 void 287 CodeEmitterNV50::setARegBits(unsigned int u) 288 { 289 code[0] |= (u & 3) << 26; 290 code[1] |= (u & 4); 291 } 292 293 void 294 CodeEmitterNV50::setAReg16(const Instruction *i, int s) 295 { 296 if (i->srcExists(s)) { 297 s = i->src(s).indirect[0]; 298 if (s >= 0) 299 setARegBits(SDATA(i->src(s)).id + 1); 300 } 301 } 302 303 void 304 CodeEmitterNV50::setImmediate(const Instruction *i, int s) 305 { 306 const ImmediateValue *imm = i->src(s).get()->asImm(); 307 assert(imm); 308 309 uint32_t u = imm->reg.data.u32; 310 311 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) 312 u = ~u; 313 314 code[1] |= 3; 315 code[0] |= (u & 0x3f) << 16; 316 code[1] |= (u >> 6) << 2; 317 } 318 319 void 320 CodeEmitterNV50::setDst(const Value *dst) 321 { 322 const Storage *reg = &dst->join->reg; 323 324 assert(reg->file != FILE_ADDRESS); 325 326 if (reg->data.id < 0 || reg->file == FILE_FLAGS) { 327 code[0] |= (127 << 2) | 1; 328 code[1] |= 8; 329 } else { 330 int id; 331 if (reg->file == FILE_SHADER_OUTPUT) { 332 code[1] |= 8; 333 id = reg->data.offset / 4; 334 } else { 335 id = reg->data.id; 336 } 337 code[0] |= id << 2; 338 } 339 } 340 341 void 342 CodeEmitterNV50::setDst(const Instruction *i, int d) 343 { 344 if (i->defExists(d)) { 345 setDst(i->getDef(d)); 346 } else 347 if (!d) { 348 code[0] |= 0x01fc; // bit bucket 349 code[1] |= 0x0008; 350 } 351 } 352 353 // 3 * 2 bits: 354 // 0: r 355 // 1: a/s 356 // 2: c 357 // 3: i 358 void 359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) 360 { 361 uint8_t mode = 0; 362 363 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) { 364 switch (i->src(s).getFile()) { 365 case FILE_GPR: 366 break; 367 case FILE_MEMORY_SHARED: 368 case FILE_SHADER_INPUT: 369 mode |= 1 << (s * 2); 370 break; 371 case FILE_MEMORY_CONST: 372 mode |= 2 << (s * 2); 373 break; 374 case FILE_IMMEDIATE: 375 mode |= 3 << (s * 2); 376 break; 377 default: 378 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); 379 assert(0); 380 break; 381 } 382 } 383 switch (mode) { 384 case 0x00: // rrr 385 break; 386 case 0x01: // arr/grr 387 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { 388 code[0] |= 0x01800000; 389 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT) 390 code[1] |= 0x00200000; 391 } else { 392 if (enc == NV50_OP_ENC_SHORT) 393 code[0] |= 0x01000000; 394 else 395 code[1] |= 0x00200000; 396 } 397 break; 398 case 0x03: // irr 399 assert(i->op == OP_MOV); 400 return; 401 case 0x0c: // rir 402 break; 403 case 0x0d: // gir 404 assert(progType == Program::TYPE_GEOMETRY || 405 progType == Program::TYPE_COMPUTE); 406 code[0] |= 0x01000000; 407 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { 408 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id; 409 assert(reg < 3); 410 code[0] |= (reg + 1) << 26; 411 } 412 break; 413 case 0x08: // rcr 414 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; 415 code[1] |= (i->getSrc(1)->reg.fileIndex << 22); 416 break; 417 case 0x09: // acr/gcr 418 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { 419 code[0] |= 0x01800000; 420 } else { 421 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; 422 code[1] |= 0x00200000; 423 } 424 code[1] |= (i->getSrc(1)->reg.fileIndex << 22); 425 break; 426 case 0x20: // rrc 427 code[0] |= 0x01000000; 428 code[1] |= (i->getSrc(2)->reg.fileIndex << 22); 429 break; 430 case 0x21: // arc 431 code[0] |= 0x01000000; 432 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22); 433 assert(progType != Program::TYPE_GEOMETRY); 434 break; 435 default: 436 ERROR("not encodable: %x\n", mode); 437 assert(0); 438 break; 439 } 440 if (progType != Program::TYPE_COMPUTE) 441 return; 442 443 if ((mode & 3) == 1) { 444 const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14; 445 446 switch (i->sType) { 447 case TYPE_U8: 448 break; 449 case TYPE_U16: 450 code[0] |= 1 << pos; 451 break; 452 case TYPE_S16: 453 code[0] |= 2 << pos; 454 break; 455 default: 456 code[0] |= 3 << pos; 457 assert(i->getSrc(0)->reg.size == 4); 458 break; 459 } 460 } 461 } 462 463 void 464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot) 465 { 466 if (Target::operationSrcNr[i->op] <= s) 467 return; 468 const Storage *reg = &i->src(s).rep()->reg; 469 470 unsigned int id = (reg->file == FILE_GPR) ? 471 reg->data.id : 472 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here 473 474 switch (slot) { 475 case 0: code[0] |= id << 9; break; 476 case 1: code[0] |= id << 16; break; 477 case 2: code[1] |= id << 14; break; 478 default: 479 assert(0); 480 break; 481 } 482 } 483 484 // the default form: 485 // - long instruction 486 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr) 487 // - address & flags 488 void 489 CodeEmitterNV50::emitForm_MAD(const Instruction *i) 490 { 491 assert(i->encSize == 8); 492 code[0] |= 1; 493 494 emitFlagsRd(i); 495 emitFlagsWr(i); 496 497 setDst(i, 0); 498 499 setSrcFileBits(i, NV50_OP_ENC_LONG); 500 setSrc(i, 0, 0); 501 setSrc(i, 1, 1); 502 setSrc(i, 2, 2); 503 504 if (i->getIndirect(0, 0)) { 505 assert(!i->srcExists(1) || !i->getIndirect(1, 0)); 506 assert(!i->srcExists(2) || !i->getIndirect(2, 0)); 507 setAReg16(i, 0); 508 } else if (i->srcExists(1) && i->getIndirect(1, 0)) { 509 assert(!i->srcExists(2) || !i->getIndirect(2, 0)); 510 setAReg16(i, 1); 511 } else { 512 setAReg16(i, 2); 513 } 514 } 515 516 // like default form, but 2nd source in slot 2, and no 3rd source 517 void 518 CodeEmitterNV50::emitForm_ADD(const Instruction *i) 519 { 520 assert(i->encSize == 8); 521 code[0] |= 1; 522 523 emitFlagsRd(i); 524 emitFlagsWr(i); 525 526 setDst(i, 0); 527 528 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); 529 setSrc(i, 0, 0); 530 if (i->predSrc != 1) 531 setSrc(i, 1, 2); 532 533 if (i->getIndirect(0, 0)) { 534 assert(!i->getIndirect(1, 0)); 535 setAReg16(i, 0); 536 } else { 537 setAReg16(i, 1); 538 } 539 } 540 541 // default short form (rr, ar, rc, gr) 542 void 543 CodeEmitterNV50::emitForm_MUL(const Instruction *i) 544 { 545 assert(i->encSize == 4 && !(code[0] & 1)); 546 assert(i->defExists(0)); 547 assert(!i->getPredicate()); 548 549 setDst(i, 0); 550 551 setSrcFileBits(i, NV50_OP_ENC_SHORT); 552 setSrc(i, 0, 0); 553 setSrc(i, 1, 1); 554 } 555 556 // usual immediate form 557 // - 1 to 3 sources where second is immediate (rir, gir) 558 // - no address or predicate possible 559 void 560 CodeEmitterNV50::emitForm_IMM(const Instruction *i) 561 { 562 assert(i->encSize == 8); 563 code[0] |= 1; 564 565 assert(i->defExists(0) && i->srcExists(0)); 566 567 setDst(i, 0); 568 569 setSrcFileBits(i, NV50_OP_ENC_IMM); 570 if (Target::operationSrcNr[i->op] > 1) { 571 setSrc(i, 0, 0); 572 setImmediate(i, 1); 573 // If there is another source, it has to be the same as the dest reg. 574 } else { 575 setImmediate(i, 0); 576 } 577 } 578 579 void 580 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos) 581 { 582 uint8_t enc; 583 584 switch (ty) { 585 case TYPE_F32: // fall through 586 case TYPE_S32: // fall through 587 case TYPE_U32: enc = 0x6; break; 588 case TYPE_B128: enc = 0x5; break; 589 case TYPE_F64: // fall through 590 case TYPE_S64: // fall through 591 case TYPE_U64: enc = 0x4; break; 592 case TYPE_S16: enc = 0x3; break; 593 case TYPE_U16: enc = 0x2; break; 594 case TYPE_S8: enc = 0x1; break; 595 case TYPE_U8: enc = 0x0; break; 596 default: 597 enc = 0; 598 assert(!"invalid load/store type"); 599 break; 600 } 601 code[pos / 32] |= enc << (pos % 32); 602 } 603 604 void 605 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) 606 { 607 switch (ty) { 608 case TYPE_U8: break; 609 case TYPE_U16: code[1] |= 0x4000; break; 610 case TYPE_S16: code[1] |= 0x8000; break; 611 case TYPE_F32: 612 case TYPE_S32: 613 case TYPE_U32: code[1] |= 0xc000; break; 614 default: 615 assert(0); 616 break; 617 } 618 } 619 620 void 621 CodeEmitterNV50::emitLOAD(const Instruction *i) 622 { 623 DataFile sf = i->src(0).getFile(); 624 MAYBE_UNUSED int32_t offset = i->getSrc(0)->reg.data.offset; 625 626 switch (sf) { 627 case FILE_SHADER_INPUT: 628 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) 629 code[0] = 0x11800001; 630 else 631 // use 'mov' where we can 632 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001; 633 code[1] = 0x00200000 | (i->lanes << 14); 634 if (typeSizeof(i->dType) == 4) 635 code[1] |= 0x04000000; 636 break; 637 case FILE_MEMORY_SHARED: 638 if (targ->getChipset() >= 0x84) { 639 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); 640 code[0] = 0x10000001; 641 code[1] = 0x40000000; 642 643 if (typeSizeof(i->dType) == 4) 644 code[1] |= 0x04000000; 645 646 emitLoadStoreSizeCS(i->sType); 647 } else { 648 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType))); 649 code[0] = 0x10000001; 650 code[1] = 0x00200000 | (i->lanes << 14); 651 emitLoadStoreSizeCS(i->sType); 652 } 653 break; 654 case FILE_MEMORY_CONST: 655 code[0] = 0x10000001; 656 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22); 657 if (typeSizeof(i->dType) == 4) 658 code[1] |= 0x04000000; 659 emitLoadStoreSizeCS(i->sType); 660 break; 661 case FILE_MEMORY_LOCAL: 662 code[0] = 0xd0000001; 663 code[1] = 0x40000000; 664 break; 665 case FILE_MEMORY_GLOBAL: 666 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); 667 code[1] = 0x80000000; 668 break; 669 default: 670 assert(!"invalid load source file"); 671 break; 672 } 673 if (sf == FILE_MEMORY_LOCAL || 674 sf == FILE_MEMORY_GLOBAL) 675 emitLoadStoreSizeLG(i->sType, 21 + 32); 676 677 setDst(i, 0); 678 679 emitFlagsRd(i); 680 emitFlagsWr(i); 681 682 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { 683 srcId(*i->src(0).getIndirect(0), 9); 684 } else { 685 setAReg16(i, 0); 686 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9); 687 } 688 } 689 690 void 691 CodeEmitterNV50::emitSTORE(const Instruction *i) 692 { 693 DataFile f = i->getSrc(0)->reg.file; 694 int32_t offset = i->getSrc(0)->reg.data.offset; 695 696 switch (f) { 697 case FILE_SHADER_OUTPUT: 698 code[0] = 0x00000001 | ((offset >> 2) << 9); 699 code[1] = 0x80c00000; 700 srcId(i->src(1), 32 + 14); 701 break; 702 case FILE_MEMORY_GLOBAL: 703 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); 704 code[1] = 0xa0000000; 705 emitLoadStoreSizeLG(i->dType, 21 + 32); 706 srcId(i->src(1), 2); 707 break; 708 case FILE_MEMORY_LOCAL: 709 code[0] = 0xd0000001; 710 code[1] = 0x60000000; 711 emitLoadStoreSizeLG(i->dType, 21 + 32); 712 srcId(i->src(1), 2); 713 break; 714 case FILE_MEMORY_SHARED: 715 code[0] = 0x00000001; 716 code[1] = 0xe0000000; 717 switch (typeSizeof(i->dType)) { 718 case 1: 719 code[0] |= offset << 9; 720 code[1] |= 0x00400000; 721 break; 722 case 2: 723 code[0] |= (offset >> 1) << 9; 724 break; 725 case 4: 726 code[0] |= (offset >> 2) << 9; 727 code[1] |= 0x04200000; 728 break; 729 default: 730 assert(0); 731 break; 732 } 733 srcId(i->src(1), 32 + 14); 734 break; 735 default: 736 assert(!"invalid store destination file"); 737 break; 738 } 739 740 if (f == FILE_MEMORY_GLOBAL) 741 srcId(*i->src(0).getIndirect(0), 9); 742 else 743 setAReg16(i, 0); 744 745 if (f == FILE_MEMORY_LOCAL) 746 srcAddr16(i->src(0), false, 9); 747 748 emitFlagsRd(i); 749 } 750 751 void 752 CodeEmitterNV50::emitMOV(const Instruction *i) 753 { 754 DataFile sf = i->getSrc(0)->reg.file; 755 DataFile df = i->getDef(0)->reg.file; 756 757 assert(sf == FILE_GPR || df == FILE_GPR); 758 759 if (sf == FILE_FLAGS) { 760 assert(i->flagsSrc >= 0); 761 code[0] = 0x00000001; 762 code[1] = 0x20000000; 763 defId(i->def(0), 2); 764 emitFlagsRd(i); 765 } else 766 if (sf == FILE_ADDRESS) { 767 code[0] = 0x00000001; 768 code[1] = 0x40000000; 769 defId(i->def(0), 2); 770 setARegBits(SDATA(i->src(0)).id + 1); 771 emitFlagsRd(i); 772 } else 773 if (df == FILE_FLAGS) { 774 assert(i->flagsDef >= 0); 775 code[0] = 0x00000001; 776 code[1] = 0xa0000000; 777 srcId(i->src(0), 9); 778 emitFlagsRd(i); 779 emitFlagsWr(i); 780 } else 781 if (sf == FILE_IMMEDIATE) { 782 code[0] = 0x10008001; 783 code[1] = 0x00000003; 784 emitForm_IMM(i); 785 } else { 786 if (i->encSize == 4) { 787 code[0] = 0x10008000; 788 } else { 789 code[0] = 0x10000001; 790 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; 791 code[1] |= (i->lanes << 14); 792 emitFlagsRd(i); 793 } 794 defId(i->def(0), 2); 795 srcId(i->src(0), 9); 796 } 797 if (df == FILE_SHADER_OUTPUT) { 798 assert(i->encSize == 8); 799 code[1] |= 0x8; 800 } 801 } 802 803 static inline uint8_t getSRegEncoding(const ValueRef &ref) 804 { 805 switch (SDATA(ref).sv.sv) { 806 case SV_PHYSID: return 0; 807 case SV_CLOCK: return 1; 808 case SV_VERTEX_STRIDE: return 3; 809 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index; 810 case SV_SAMPLE_INDEX: return 8; 811 default: 812 assert(!"no sreg for system value"); 813 return 0; 814 } 815 } 816 817 void 818 CodeEmitterNV50::emitRDSV(const Instruction *i) 819 { 820 code[0] = 0x00000001; 821 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14); 822 defId(i->def(0), 2); 823 emitFlagsRd(i); 824 } 825 826 void 827 CodeEmitterNV50::emitNOP() 828 { 829 code[0] = 0xf0000001; 830 code[1] = 0xe0000000; 831 } 832 833 void 834 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) 835 { 836 code[0] = 0xc0000000 | (lane << 16); 837 code[1] = 0x80000000; 838 839 code[0] |= (quOp & 0x03) << 20; 840 code[1] |= (quOp & 0xfc) << 20; 841 842 emitForm_ADD(i); 843 844 if (!i->srcExists(1) || i->predSrc == 1) 845 srcId(i->src(0), 32 + 14); 846 } 847 848 /* NOTE: This returns the base address of a vertex inside the primitive. 849 * src0 is an immediate, the index (not offset) of the vertex 850 * inside the primitive. XXX: signed or unsigned ? 851 * src1 (may be NULL) should use whatever units the hardware requires 852 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value). 853 */ 854 void 855 CodeEmitterNV50::emitPFETCH(const Instruction *i) 856 { 857 const uint32_t prim = i->src(0).get()->reg.data.u32; 858 assert(prim <= 127); 859 860 if (i->def(0).getFile() == FILE_ADDRESS) { 861 // shl $aX a[] 0 862 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2); 863 code[1] = 0xc0200000; 864 code[0] |= prim << 9; 865 assert(!i->srcExists(1)); 866 } else 867 if (i->srcExists(1)) { 868 // ld b32 $rX a[$aX+base] 869 code[0] = 0x00000001; 870 code[1] = 0x04200000 | (0xf << 14); 871 defId(i->def(0), 2); 872 code[0] |= prim << 9; 873 setARegBits(SDATA(i->src(1)).id + 1); 874 } else { 875 // mov b32 $rX a[] 876 code[0] = 0x10000001; 877 code[1] = 0x04200000 | (0xf << 14); 878 defId(i->def(0), 2); 879 code[0] |= prim << 9; 880 } 881 emitFlagsRd(i); 882 } 883 884 static void 885 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) 886 { 887 int ipa = entry->ipa; 888 int encSize = entry->reg; 889 int loc = entry->loc; 890 891 if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && 892 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { 893 if (data.force_persample_interp) { 894 if (encSize == 8) 895 code[loc + 1] |= 1 << 16; 896 else 897 code[loc + 0] |= 1 << 24; 898 } else { 899 if (encSize == 8) 900 code[loc + 1] &= ~(1 << 16); 901 else 902 code[loc + 0] &= ~(1 << 24); 903 } 904 } 905 } 906 907 void 908 CodeEmitterNV50::emitINTERP(const Instruction *i) 909 { 910 code[0] = 0x80000000; 911 912 defId(i->def(0), 2); 913 srcAddr8(i->src(0), 16); 914 setAReg16(i, 0); 915 916 if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) { 917 code[0] |= 1 << 8; 918 } else { 919 if (i->op == OP_PINTERP) { 920 code[0] |= 1 << 25; 921 srcId(i->src(1), 9); 922 } 923 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID) 924 code[0] |= 1 << 24; 925 } 926 927 if (i->encSize == 8) { 928 if (i->getInterpMode() == NV50_IR_INTERP_FLAT) 929 code[1] = 4 << 16; 930 else 931 code[1] = (code[0] & (3 << 24)) >> (24 - 16); 932 code[0] &= ~0x03000000; 933 code[0] |= 1; 934 emitFlagsRd(i); 935 } 936 937 addInterp(i->ipa, i->encSize, interpApply); 938 } 939 940 void 941 CodeEmitterNV50::emitMINMAX(const Instruction *i) 942 { 943 if (i->dType == TYPE_F64) { 944 code[0] = 0xe0000000; 945 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000; 946 } else { 947 code[0] = 0x30000000; 948 code[1] = 0x80000000; 949 if (i->op == OP_MIN) 950 code[1] |= 0x20000000; 951 952 switch (i->dType) { 953 case TYPE_F32: code[0] |= 0x80000000; break; 954 case TYPE_S32: code[1] |= 0x8c000000; break; 955 case TYPE_U32: code[1] |= 0x84000000; break; 956 case TYPE_S16: code[1] |= 0x80000000; break; 957 case TYPE_U16: break; 958 default: 959 assert(0); 960 break; 961 } 962 } 963 964 code[1] |= i->src(0).mod.abs() << 20; 965 code[1] |= i->src(0).mod.neg() << 26; 966 code[1] |= i->src(1).mod.abs() << 19; 967 code[1] |= i->src(1).mod.neg() << 27; 968 969 emitForm_MAD(i); 970 } 971 972 void 973 CodeEmitterNV50::emitFMAD(const Instruction *i) 974 { 975 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); 976 const int neg_add = i->src(2).mod.neg(); 977 978 code[0] = 0xe0000000; 979 980 if (i->src(1).getFile() == FILE_IMMEDIATE) { 981 code[1] = 0; 982 emitForm_IMM(i); 983 code[0] |= neg_mul << 15; 984 code[0] |= neg_add << 22; 985 if (i->saturate) 986 code[0] |= 1 << 8; 987 } else 988 if (i->encSize == 4) { 989 emitForm_MUL(i); 990 code[0] |= neg_mul << 15; 991 code[0] |= neg_add << 22; 992 if (i->saturate) 993 code[0] |= 1 << 8; 994 } else { 995 code[1] = neg_mul << 26; 996 code[1] |= neg_add << 27; 997 if (i->saturate) 998 code[1] |= 1 << 29; 999 emitForm_MAD(i); 1000 } 1001 } 1002 1003 void 1004 CodeEmitterNV50::emitDMAD(const Instruction *i) 1005 { 1006 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); 1007 const int neg_add = i->src(2).mod.neg(); 1008 1009 assert(i->encSize == 8); 1010 assert(!i->saturate); 1011 1012 code[1] = 0x40000000; 1013 code[0] = 0xe0000000; 1014 1015 code[1] |= neg_mul << 26; 1016 code[1] |= neg_add << 27; 1017 1018 roundMode_MAD(i); 1019 1020 emitForm_MAD(i); 1021 } 1022 1023 void 1024 CodeEmitterNV50::emitFADD(const Instruction *i) 1025 { 1026 const int neg0 = i->src(0).mod.neg(); 1027 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); 1028 1029 code[0] = 0xb0000000; 1030 1031 assert(!(i->src(0).mod | i->src(1).mod).abs()); 1032 1033 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1034 code[1] = 0; 1035 emitForm_IMM(i); 1036 code[0] |= neg0 << 15; 1037 code[0] |= neg1 << 22; 1038 if (i->saturate) 1039 code[0] |= 1 << 8; 1040 } else 1041 if (i->encSize == 8) { 1042 code[1] = 0; 1043 emitForm_ADD(i); 1044 code[1] |= neg0 << 26; 1045 code[1] |= neg1 << 27; 1046 if (i->saturate) 1047 code[1] |= 1 << 29; 1048 } else { 1049 emitForm_MUL(i); 1050 code[0] |= neg0 << 15; 1051 code[0] |= neg1 << 22; 1052 if (i->saturate) 1053 code[0] |= 1 << 8; 1054 } 1055 } 1056 1057 void 1058 CodeEmitterNV50::emitDADD(const Instruction *i) 1059 { 1060 const int neg0 = i->src(0).mod.neg(); 1061 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); 1062 1063 assert(!(i->src(0).mod | i->src(1).mod).abs()); 1064 assert(!i->saturate); 1065 assert(i->encSize == 8); 1066 1067 code[1] = 0x60000000; 1068 code[0] = 0xe0000000; 1069 1070 emitForm_ADD(i); 1071 1072 code[1] |= neg0 << 26; 1073 code[1] |= neg1 << 27; 1074 } 1075 1076 void 1077 CodeEmitterNV50::emitUADD(const Instruction *i) 1078 { 1079 const int neg0 = i->src(0).mod.neg(); 1080 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); 1081 1082 code[0] = 0x20008000; 1083 1084 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1085 code[1] = 0; 1086 emitForm_IMM(i); 1087 } else 1088 if (i->encSize == 8) { 1089 code[0] = 0x20000000; 1090 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; 1091 emitForm_ADD(i); 1092 } else { 1093 emitForm_MUL(i); 1094 } 1095 assert(!(neg0 && neg1)); 1096 code[0] |= neg0 << 28; 1097 code[0] |= neg1 << 22; 1098 1099 if (i->flagsSrc >= 0) { 1100 // addc == sub | subr 1101 assert(!(code[0] & 0x10400000) && !i->getPredicate()); 1102 code[0] |= 0x10400000; 1103 srcId(i->src(i->flagsSrc), 32 + 12); 1104 } 1105 } 1106 1107 void 1108 CodeEmitterNV50::emitAADD(const Instruction *i) 1109 { 1110 const int s = (i->op == OP_MOV) ? 0 : 1; 1111 1112 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9); 1113 code[1] = 0x20000000; 1114 1115 code[0] |= (DDATA(i->def(0)).id + 1) << 2; 1116 1117 emitFlagsRd(i); 1118 1119 if (s && i->srcExists(0)) 1120 setARegBits(SDATA(i->src(0)).id + 1); 1121 } 1122 1123 void 1124 CodeEmitterNV50::emitIMUL(const Instruction *i) 1125 { 1126 code[0] = 0x40000000; 1127 1128 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1129 if (i->sType == TYPE_S16) 1130 code[0] |= 0x8100; 1131 code[1] = 0; 1132 emitForm_IMM(i); 1133 } else 1134 if (i->encSize == 8) { 1135 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; 1136 emitForm_MAD(i); 1137 } else { 1138 if (i->sType == TYPE_S16) 1139 code[0] |= 0x8100; 1140 emitForm_MUL(i); 1141 } 1142 } 1143 1144 void 1145 CodeEmitterNV50::emitFMUL(const Instruction *i) 1146 { 1147 const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); 1148 1149 code[0] = 0xc0000000; 1150 1151 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1152 code[1] = 0; 1153 emitForm_IMM(i); 1154 if (neg) 1155 code[0] |= 0x8000; 1156 if (i->saturate) 1157 code[0] |= 1 << 8; 1158 } else 1159 if (i->encSize == 8) { 1160 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0; 1161 if (neg) 1162 code[1] |= 0x08000000; 1163 if (i->saturate) 1164 code[1] |= 1 << 20; 1165 emitForm_MAD(i); 1166 } else { 1167 emitForm_MUL(i); 1168 if (neg) 1169 code[0] |= 0x8000; 1170 if (i->saturate) 1171 code[0] |= 1 << 8; 1172 } 1173 } 1174 1175 void 1176 CodeEmitterNV50::emitDMUL(const Instruction *i) 1177 { 1178 const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); 1179 1180 assert(!i->saturate); 1181 assert(i->encSize == 8); 1182 1183 code[1] = 0x80000000; 1184 code[0] = 0xe0000000; 1185 1186 if (neg) 1187 code[1] |= 0x08000000; 1188 1189 roundMode_CVT(i->rnd); 1190 1191 emitForm_MAD(i); 1192 } 1193 1194 void 1195 CodeEmitterNV50::emitIMAD(const Instruction *i) 1196 { 1197 int mode; 1198 code[0] = 0x60000000; 1199 1200 assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod); 1201 if (!isSignedType(i->sType)) 1202 mode = 0; 1203 else if (i->saturate) 1204 mode = 2; 1205 else 1206 mode = 1; 1207 1208 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1209 code[1] = 0; 1210 emitForm_IMM(i); 1211 code[0] |= (mode & 1) << 8 | (mode & 2) << 14; 1212 if (i->flagsSrc >= 0) { 1213 assert(!(code[0] & 0x10400000)); 1214 assert(SDATA(i->src(i->flagsSrc)).id == 0); 1215 code[0] |= 0x10400000; 1216 } 1217 } else 1218 if (i->encSize == 4) { 1219 emitForm_MUL(i); 1220 code[0] |= (mode & 1) << 8 | (mode & 2) << 14; 1221 if (i->flagsSrc >= 0) { 1222 assert(!(code[0] & 0x10400000)); 1223 assert(SDATA(i->src(i->flagsSrc)).id == 0); 1224 code[0] |= 0x10400000; 1225 } 1226 } else { 1227 code[1] = mode << 29; 1228 emitForm_MAD(i); 1229 1230 if (i->flagsSrc >= 0) { 1231 // add with carry from $cX 1232 assert(!(code[1] & 0x0c000000) && !i->getPredicate()); 1233 code[1] |= 0xc << 24; 1234 srcId(i->src(i->flagsSrc), 32 + 12); 1235 } 1236 } 1237 } 1238 1239 void 1240 CodeEmitterNV50::emitISAD(const Instruction *i) 1241 { 1242 if (i->encSize == 8) { 1243 code[0] = 0x50000000; 1244 switch (i->sType) { 1245 case TYPE_U32: code[1] = 0x04000000; break; 1246 case TYPE_S32: code[1] = 0x0c000000; break; 1247 case TYPE_U16: code[1] = 0x00000000; break; 1248 case TYPE_S16: code[1] = 0x08000000; break; 1249 default: 1250 assert(0); 1251 break; 1252 } 1253 emitForm_MAD(i); 1254 } else { 1255 switch (i->sType) { 1256 case TYPE_U32: code[0] = 0x50008000; break; 1257 case TYPE_S32: code[0] = 0x50008100; break; 1258 case TYPE_U16: code[0] = 0x50000000; break; 1259 case TYPE_S16: code[0] = 0x50000100; break; 1260 default: 1261 assert(0); 1262 break; 1263 } 1264 emitForm_MUL(i); 1265 } 1266 } 1267 1268 static void 1269 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data) 1270 { 1271 int loc = entry->loc; 1272 int enc; 1273 1274 switch (data.alphatest) { 1275 case PIPE_FUNC_NEVER: enc = 0x0; break; 1276 case PIPE_FUNC_LESS: enc = 0x1; break; 1277 case PIPE_FUNC_EQUAL: enc = 0x2; break; 1278 case PIPE_FUNC_LEQUAL: enc = 0x3; break; 1279 case PIPE_FUNC_GREATER: enc = 0x4; break; 1280 case PIPE_FUNC_NOTEQUAL: enc = 0x5; break; 1281 case PIPE_FUNC_GEQUAL: enc = 0x6; break; 1282 default: 1283 case PIPE_FUNC_ALWAYS: enc = 0xf; break; 1284 } 1285 1286 code[loc + 1] &= ~(0x1f << 14); 1287 code[loc + 1] |= enc << 14; 1288 } 1289 1290 void 1291 CodeEmitterNV50::emitSET(const Instruction *i) 1292 { 1293 code[0] = 0x30000000; 1294 code[1] = 0x60000000; 1295 1296 switch (i->sType) { 1297 case TYPE_F64: 1298 code[0] = 0xe0000000; 1299 code[1] = 0xe0000000; 1300 break; 1301 case TYPE_F32: code[0] |= 0x80000000; break; 1302 case TYPE_S32: code[1] |= 0x0c000000; break; 1303 case TYPE_U32: code[1] |= 0x04000000; break; 1304 case TYPE_S16: code[1] |= 0x08000000; break; 1305 case TYPE_U16: break; 1306 default: 1307 assert(0); 1308 break; 1309 } 1310 1311 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); 1312 1313 if (i->src(0).mod.neg()) code[1] |= 0x04000000; 1314 if (i->src(1).mod.neg()) code[1] |= 0x08000000; 1315 if (i->src(0).mod.abs()) code[1] |= 0x00100000; 1316 if (i->src(1).mod.abs()) code[1] |= 0x00080000; 1317 1318 emitForm_MAD(i); 1319 1320 if (i->subOp == 1) { 1321 addInterp(0, 0, alphatestSet); 1322 } 1323 } 1324 1325 void 1326 CodeEmitterNV50::roundMode_CVT(RoundMode rnd) 1327 { 1328 switch (rnd) { 1329 case ROUND_NI: code[1] |= 0x08000000; break; 1330 case ROUND_M: code[1] |= 0x00020000; break; 1331 case ROUND_MI: code[1] |= 0x08020000; break; 1332 case ROUND_P: code[1] |= 0x00040000; break; 1333 case ROUND_PI: code[1] |= 0x08040000; break; 1334 case ROUND_Z: code[1] |= 0x00060000; break; 1335 case ROUND_ZI: code[1] |= 0x08060000; break; 1336 default: 1337 assert(rnd == ROUND_N); 1338 break; 1339 } 1340 } 1341 1342 void 1343 CodeEmitterNV50::emitCVT(const Instruction *i) 1344 { 1345 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); 1346 RoundMode rnd; 1347 DataType dType; 1348 1349 switch (i->op) { 1350 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; 1351 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; 1352 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; 1353 default: 1354 rnd = i->rnd; 1355 break; 1356 } 1357 1358 if (i->op == OP_NEG && i->dType == TYPE_U32) 1359 dType = TYPE_S32; 1360 else 1361 dType = i->dType; 1362 1363 code[0] = 0xa0000000; 1364 1365 switch (dType) { 1366 case TYPE_F64: 1367 switch (i->sType) { 1368 case TYPE_F64: code[1] = 0xc4404000; break; 1369 case TYPE_S64: code[1] = 0x44414000; break; 1370 case TYPE_U64: code[1] = 0x44404000; break; 1371 case TYPE_F32: code[1] = 0xc4400000; break; 1372 case TYPE_S32: code[1] = 0x44410000; break; 1373 case TYPE_U32: code[1] = 0x44400000; break; 1374 default: 1375 assert(0); 1376 break; 1377 } 1378 break; 1379 case TYPE_S64: 1380 switch (i->sType) { 1381 case TYPE_F64: code[1] = 0x8c404000; break; 1382 case TYPE_F32: code[1] = 0x8c400000; break; 1383 default: 1384 assert(0); 1385 break; 1386 } 1387 break; 1388 case TYPE_U64: 1389 switch (i->sType) { 1390 case TYPE_F64: code[1] = 0x84404000; break; 1391 case TYPE_F32: code[1] = 0x84400000; break; 1392 default: 1393 assert(0); 1394 break; 1395 } 1396 break; 1397 case TYPE_F32: 1398 switch (i->sType) { 1399 case TYPE_F64: code[1] = 0xc0404000; break; 1400 case TYPE_S64: code[1] = 0x40414000; break; 1401 case TYPE_U64: code[1] = 0x40404000; break; 1402 case TYPE_F32: code[1] = 0xc4004000; break; 1403 case TYPE_S32: code[1] = 0x44014000; break; 1404 case TYPE_U32: code[1] = 0x44004000; break; 1405 case TYPE_F16: code[1] = 0xc4000000; break; 1406 case TYPE_U16: code[1] = 0x44000000; break; 1407 default: 1408 assert(0); 1409 break; 1410 } 1411 break; 1412 case TYPE_S32: 1413 switch (i->sType) { 1414 case TYPE_F64: code[1] = 0x88404000; break; 1415 case TYPE_F32: code[1] = 0x8c004000; break; 1416 case TYPE_S32: code[1] = 0x0c014000; break; 1417 case TYPE_U32: code[1] = 0x0c004000; break; 1418 case TYPE_F16: code[1] = 0x8c000000; break; 1419 case TYPE_S16: code[1] = 0x0c010000; break; 1420 case TYPE_U16: code[1] = 0x0c000000; break; 1421 case TYPE_S8: code[1] = 0x0c018000; break; 1422 case TYPE_U8: code[1] = 0x0c008000; break; 1423 default: 1424 assert(0); 1425 break; 1426 } 1427 break; 1428 case TYPE_U32: 1429 switch (i->sType) { 1430 case TYPE_F64: code[1] = 0x80404000; break; 1431 case TYPE_F32: code[1] = 0x84004000; break; 1432 case TYPE_S32: code[1] = 0x04014000; break; 1433 case TYPE_U32: code[1] = 0x04004000; break; 1434 case TYPE_F16: code[1] = 0x84000000; break; 1435 case TYPE_S16: code[1] = 0x04010000; break; 1436 case TYPE_U16: code[1] = 0x04000000; break; 1437 case TYPE_S8: code[1] = 0x04018000; break; 1438 case TYPE_U8: code[1] = 0x04008000; break; 1439 default: 1440 assert(0); 1441 break; 1442 } 1443 break; 1444 case TYPE_S16: 1445 case TYPE_U16: 1446 case TYPE_S8: 1447 case TYPE_U8: 1448 default: 1449 assert(0); 1450 break; 1451 } 1452 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4) 1453 code[1] |= 0x00004000; 1454 1455 roundMode_CVT(rnd); 1456 1457 switch (i->op) { 1458 case OP_ABS: code[1] |= 1 << 20; break; 1459 case OP_SAT: code[1] |= 1 << 19; break; 1460 case OP_NEG: code[1] |= 1 << 29; break; 1461 default: 1462 break; 1463 } 1464 code[1] ^= i->src(0).mod.neg() << 29; 1465 code[1] |= i->src(0).mod.abs() << 20; 1466 if (i->saturate) 1467 code[1] |= 1 << 19; 1468 1469 assert(i->op != OP_ABS || !i->src(0).mod.neg()); 1470 1471 emitForm_MAD(i); 1472 } 1473 1474 void 1475 CodeEmitterNV50::emitPreOp(const Instruction *i) 1476 { 1477 code[0] = 0xb0000000; 1478 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000; 1479 1480 code[1] |= i->src(0).mod.abs() << 20; 1481 code[1] |= i->src(0).mod.neg() << 26; 1482 1483 emitForm_MAD(i); 1484 } 1485 1486 void 1487 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp) 1488 { 1489 code[0] = 0x90000000; 1490 1491 if (i->encSize == 4) { 1492 assert(i->op == OP_RCP); 1493 assert(!i->saturate); 1494 code[0] |= i->src(0).mod.abs() << 15; 1495 code[0] |= i->src(0).mod.neg() << 22; 1496 emitForm_MUL(i); 1497 } else { 1498 code[1] = subOp << 29; 1499 code[1] |= i->src(0).mod.abs() << 20; 1500 code[1] |= i->src(0).mod.neg() << 26; 1501 if (i->saturate) { 1502 assert(subOp == 6 && i->op == OP_EX2); 1503 code[1] |= 1 << 27; 1504 } 1505 emitForm_MAD(i); 1506 } 1507 } 1508 1509 void 1510 CodeEmitterNV50::emitNOT(const Instruction *i) 1511 { 1512 code[0] = 0xd0000000; 1513 code[1] = 0x0002c000; 1514 1515 switch (i->sType) { 1516 case TYPE_U32: 1517 case TYPE_S32: 1518 code[1] |= 0x04000000; 1519 break; 1520 default: 1521 break; 1522 } 1523 emitForm_MAD(i); 1524 setSrc(i, 0, 1); 1525 } 1526 1527 void 1528 CodeEmitterNV50::emitLogicOp(const Instruction *i) 1529 { 1530 code[0] = 0xd0000000; 1531 code[1] = 0; 1532 1533 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1534 switch (i->op) { 1535 case OP_OR: code[0] |= 0x0100; break; 1536 case OP_XOR: code[0] |= 0x8000; break; 1537 default: 1538 assert(i->op == OP_AND); 1539 break; 1540 } 1541 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) 1542 code[0] |= 1 << 22; 1543 1544 emitForm_IMM(i); 1545 } else { 1546 switch (i->op) { 1547 case OP_AND: code[1] = 0x04000000; break; 1548 case OP_OR: code[1] = 0x04004000; break; 1549 case OP_XOR: code[1] = 0x04008000; break; 1550 default: 1551 assert(0); 1552 break; 1553 } 1554 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) 1555 code[1] |= 1 << 16; 1556 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) 1557 code[1] |= 1 << 17; 1558 1559 emitForm_MAD(i); 1560 } 1561 } 1562 1563 void 1564 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl) 1565 { 1566 code[0] = 0x00000001 | (shl << 16); 1567 code[1] = 0xc0000000; 1568 1569 code[0] |= (DDATA(i->def(0)).id + 1) << 2; 1570 1571 setSrcFileBits(i, NV50_OP_ENC_IMM); 1572 setSrc(i, 0, 0); 1573 emitFlagsRd(i); 1574 } 1575 1576 void 1577 CodeEmitterNV50::emitShift(const Instruction *i) 1578 { 1579 if (i->def(0).getFile() == FILE_ADDRESS) { 1580 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE); 1581 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f); 1582 } else { 1583 code[0] = 0x30000001; 1584 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000; 1585 if (i->op == OP_SHR && isSignedType(i->sType)) 1586 code[1] |= 1 << 27; 1587 1588 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1589 code[1] |= 1 << 20; 1590 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16; 1591 defId(i->def(0), 2); 1592 srcId(i->src(0), 9); 1593 emitFlagsRd(i); 1594 } else { 1595 emitForm_MAD(i); 1596 } 1597 } 1598 } 1599 1600 void 1601 CodeEmitterNV50::emitOUT(const Instruction *i) 1602 { 1603 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401; 1604 code[1] = 0xc0000000; 1605 1606 emitFlagsRd(i); 1607 } 1608 1609 void 1610 CodeEmitterNV50::emitTEX(const TexInstruction *i) 1611 { 1612 code[0] = 0xf0000001; 1613 code[1] = 0x00000000; 1614 1615 switch (i->op) { 1616 case OP_TXB: 1617 code[1] = 0x20000000; 1618 break; 1619 case OP_TXL: 1620 code[1] = 0x40000000; 1621 break; 1622 case OP_TXF: 1623 code[0] |= 0x01000000; 1624 break; 1625 case OP_TXG: 1626 code[0] |= 0x01000000; 1627 code[1] = 0x80000000; 1628 break; 1629 case OP_TXLQ: 1630 code[1] = 0x60020000; 1631 break; 1632 default: 1633 assert(i->op == OP_TEX); 1634 break; 1635 } 1636 1637 code[0] |= i->tex.r << 9; 1638 code[0] |= i->tex.s << 17; 1639 1640 int argc = i->tex.target.getArgCount(); 1641 1642 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF) 1643 argc += 1; 1644 if (i->tex.target.isShadow()) 1645 argc += 1; 1646 assert(argc <= 4); 1647 1648 code[0] |= (argc - 1) << 22; 1649 1650 if (i->tex.target.isCube()) { 1651 code[0] |= 0x08000000; 1652 } else 1653 if (i->tex.useOffsets) { 1654 code[1] |= (i->tex.offset[0] & 0xf) << 24; 1655 code[1] |= (i->tex.offset[1] & 0xf) << 20; 1656 code[1] |= (i->tex.offset[2] & 0xf) << 16; 1657 } 1658 1659 code[0] |= (i->tex.mask & 0x3) << 25; 1660 code[1] |= (i->tex.mask & 0xc) << 12; 1661 1662 if (i->tex.liveOnly) 1663 code[1] |= 1 << 2; 1664 if (i->tex.derivAll) 1665 code[1] |= 1 << 3; 1666 1667 defId(i->def(0), 2); 1668 1669 emitFlagsRd(i); 1670 } 1671 1672 void 1673 CodeEmitterNV50::emitTXQ(const TexInstruction *i) 1674 { 1675 assert(i->tex.query == TXQ_DIMS); 1676 1677 code[0] = 0xf0000001; 1678 code[1] = 0x60000000; 1679 1680 code[0] |= i->tex.r << 9; 1681 code[0] |= i->tex.s << 17; 1682 1683 code[0] |= (i->tex.mask & 0x3) << 25; 1684 code[1] |= (i->tex.mask & 0xc) << 12; 1685 1686 defId(i->def(0), 2); 1687 1688 emitFlagsRd(i); 1689 } 1690 1691 void 1692 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i) 1693 { 1694 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9); 1695 code[1] = 0x60010000; 1696 1697 code[0] |= (i->tex.mask & 0x3) << 25; 1698 code[1] |= (i->tex.mask & 0xc) << 12; 1699 defId(i->def(0), 2); 1700 1701 emitFlagsRd(i); 1702 } 1703 1704 void 1705 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i) 1706 { 1707 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */ 1708 1709 code[0] = 0x10000003; // bra 1710 code[1] = 0x00000780; // always 1711 1712 switch (i->subOp) { 1713 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call 1714 break; 1715 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call 1716 pos += 8; 1717 break; 1718 default: 1719 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2)); 1720 code[0] = 0x20000003; // call 1721 code[1] = 0x00000000; // no predicate 1722 break; 1723 } 1724 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9); 1725 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4); 1726 } 1727 1728 void 1729 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) 1730 { 1731 const FlowInstruction *f = i->asFlow(); 1732 bool hasPred = false; 1733 bool hasTarg = false; 1734 1735 code[0] = 0x00000003 | (flowOp << 28); 1736 code[1] = 0x00000000; 1737 1738 switch (i->op) { 1739 case OP_BRA: 1740 hasPred = true; 1741 hasTarg = true; 1742 break; 1743 case OP_BREAK: 1744 case OP_BRKPT: 1745 case OP_DISCARD: 1746 case OP_RET: 1747 hasPred = true; 1748 break; 1749 case OP_CALL: 1750 case OP_PREBREAK: 1751 case OP_JOINAT: 1752 hasTarg = true; 1753 break; 1754 case OP_PRERET: 1755 hasTarg = true; 1756 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) { 1757 emitPRERETEmu(f); 1758 return; 1759 } 1760 break; 1761 default: 1762 break; 1763 } 1764 1765 if (hasPred) 1766 emitFlagsRd(i); 1767 1768 if (hasTarg && f) { 1769 uint32_t pos; 1770 1771 if (f->op == OP_CALL) { 1772 if (f->builtin) { 1773 pos = targNV50->getBuiltinOffset(f->target.builtin); 1774 } else { 1775 pos = f->target.fn->binPos; 1776 } 1777 } else { 1778 pos = f->target.bb->binPos; 1779 } 1780 1781 code[0] |= ((pos >> 2) & 0xffff) << 11; 1782 code[1] |= ((pos >> 18) & 0x003f) << 14; 1783 1784 RelocEntry::Type relocTy; 1785 1786 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE; 1787 1788 addReloc(relocTy, 0, pos, 0x07fff800, 9); 1789 addReloc(relocTy, 1, pos, 0x000fc000, -4); 1790 } 1791 } 1792 1793 void 1794 CodeEmitterNV50::emitBAR(const Instruction *i) 1795 { 1796 ImmediateValue *barId = i->getSrc(0)->asImm(); 1797 assert(barId); 1798 1799 code[0] = 0x82000003 | (barId->reg.data.u32 << 21); 1800 code[1] = 0x00004000; 1801 1802 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC) 1803 code[0] |= 1 << 26; 1804 } 1805 1806 void 1807 CodeEmitterNV50::emitATOM(const Instruction *i) 1808 { 1809 uint8_t subOp; 1810 switch (i->subOp) { 1811 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break; 1812 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break; 1813 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break; 1814 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break; 1815 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break; 1816 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break; 1817 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break; 1818 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break; 1819 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break; 1820 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break; 1821 default: 1822 assert(!"invalid subop"); 1823 return; 1824 } 1825 code[0] = 0xd0000001; 1826 code[1] = 0xe0c00000 | (subOp << 2); 1827 if (isSignedType(i->dType)) 1828 code[1] |= 1 << 21; 1829 1830 // args 1831 emitFlagsRd(i); 1832 setDst(i, 0); 1833 setSrc(i, 1, 1); 1834 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) 1835 setSrc(i, 2, 2); 1836 1837 // g[] pointer 1838 code[0] |= i->getSrc(0)->reg.fileIndex << 23; 1839 srcId(i->getIndirect(0, 0), 9); 1840 } 1841 1842 bool 1843 CodeEmitterNV50::emitInstruction(Instruction *insn) 1844 { 1845 if (!insn->encSize) { 1846 ERROR("skipping unencodable instruction: "); insn->print(); 1847 return false; 1848 } else 1849 if (codeSize + insn->encSize > codeSizeLimit) { 1850 ERROR("code emitter output buffer too small\n"); 1851 return false; 1852 } 1853 1854 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) { 1855 INFO("EMIT: "); insn->print(); 1856 } 1857 1858 switch (insn->op) { 1859 case OP_MOV: 1860 emitMOV(insn); 1861 break; 1862 case OP_EXIT: 1863 case OP_NOP: 1864 case OP_JOIN: 1865 emitNOP(); 1866 break; 1867 case OP_VFETCH: 1868 case OP_LOAD: 1869 emitLOAD(insn); 1870 break; 1871 case OP_EXPORT: 1872 case OP_STORE: 1873 emitSTORE(insn); 1874 break; 1875 case OP_PFETCH: 1876 emitPFETCH(insn); 1877 break; 1878 case OP_RDSV: 1879 emitRDSV(insn); 1880 break; 1881 case OP_LINTERP: 1882 case OP_PINTERP: 1883 emitINTERP(insn); 1884 break; 1885 case OP_ADD: 1886 case OP_SUB: 1887 if (insn->dType == TYPE_F64) 1888 emitDADD(insn); 1889 else if (isFloatType(insn->dType)) 1890 emitFADD(insn); 1891 else if (insn->getDef(0)->reg.file == FILE_ADDRESS) 1892 emitAADD(insn); 1893 else 1894 emitUADD(insn); 1895 break; 1896 case OP_MUL: 1897 if (insn->dType == TYPE_F64) 1898 emitDMUL(insn); 1899 else if (isFloatType(insn->dType)) 1900 emitFMUL(insn); 1901 else 1902 emitIMUL(insn); 1903 break; 1904 case OP_MAD: 1905 case OP_FMA: 1906 if (insn->dType == TYPE_F64) 1907 emitDMAD(insn); 1908 else if (isFloatType(insn->dType)) 1909 emitFMAD(insn); 1910 else 1911 emitIMAD(insn); 1912 break; 1913 case OP_SAD: 1914 emitISAD(insn); 1915 break; 1916 case OP_NOT: 1917 emitNOT(insn); 1918 break; 1919 case OP_AND: 1920 case OP_OR: 1921 case OP_XOR: 1922 emitLogicOp(insn); 1923 break; 1924 case OP_SHL: 1925 case OP_SHR: 1926 emitShift(insn); 1927 break; 1928 case OP_SET: 1929 emitSET(insn); 1930 break; 1931 case OP_MIN: 1932 case OP_MAX: 1933 emitMINMAX(insn); 1934 break; 1935 case OP_CEIL: 1936 case OP_FLOOR: 1937 case OP_TRUNC: 1938 case OP_ABS: 1939 case OP_NEG: 1940 case OP_SAT: 1941 emitCVT(insn); 1942 break; 1943 case OP_CVT: 1944 if (insn->def(0).getFile() == FILE_ADDRESS) 1945 emitARL(insn, 0); 1946 else 1947 if (insn->def(0).getFile() == FILE_FLAGS || 1948 insn->src(0).getFile() == FILE_FLAGS || 1949 insn->src(0).getFile() == FILE_ADDRESS) 1950 emitMOV(insn); 1951 else 1952 emitCVT(insn); 1953 break; 1954 case OP_RCP: 1955 emitSFnOp(insn, 0); 1956 break; 1957 case OP_RSQ: 1958 emitSFnOp(insn, 2); 1959 break; 1960 case OP_LG2: 1961 emitSFnOp(insn, 3); 1962 break; 1963 case OP_SIN: 1964 emitSFnOp(insn, 4); 1965 break; 1966 case OP_COS: 1967 emitSFnOp(insn, 5); 1968 break; 1969 case OP_EX2: 1970 emitSFnOp(insn, 6); 1971 break; 1972 case OP_PRESIN: 1973 case OP_PREEX2: 1974 emitPreOp(insn); 1975 break; 1976 case OP_TEX: 1977 case OP_TXB: 1978 case OP_TXL: 1979 case OP_TXF: 1980 case OP_TXG: 1981 case OP_TXLQ: 1982 emitTEX(insn->asTex()); 1983 break; 1984 case OP_TXQ: 1985 emitTXQ(insn->asTex()); 1986 break; 1987 case OP_TEXPREP: 1988 emitTEXPREP(insn->asTex()); 1989 break; 1990 case OP_EMIT: 1991 case OP_RESTART: 1992 emitOUT(insn); 1993 break; 1994 case OP_DISCARD: 1995 emitFlow(insn, 0x0); 1996 break; 1997 case OP_BRA: 1998 emitFlow(insn, 0x1); 1999 break; 2000 case OP_CALL: 2001 emitFlow(insn, 0x2); 2002 break; 2003 case OP_RET: 2004 emitFlow(insn, 0x3); 2005 break; 2006 case OP_PREBREAK: 2007 emitFlow(insn, 0x4); 2008 break; 2009 case OP_BREAK: 2010 emitFlow(insn, 0x5); 2011 break; 2012 case OP_QUADON: 2013 emitFlow(insn, 0x6); 2014 break; 2015 case OP_QUADPOP: 2016 emitFlow(insn, 0x7); 2017 break; 2018 case OP_JOINAT: 2019 emitFlow(insn, 0xa); 2020 break; 2021 case OP_PRERET: 2022 emitFlow(insn, 0xd); 2023 break; 2024 case OP_QUADOP: 2025 emitQUADOP(insn, insn->lanes, insn->subOp); 2026 break; 2027 case OP_DFDX: 2028 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99); 2029 break; 2030 case OP_DFDY: 2031 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5); 2032 break; 2033 case OP_ATOM: 2034 emitATOM(insn); 2035 break; 2036 case OP_BAR: 2037 emitBAR(insn); 2038 break; 2039 case OP_PHI: 2040 case OP_UNION: 2041 case OP_CONSTRAINT: 2042 ERROR("operation should have been eliminated\n"); 2043 return false; 2044 case OP_EXP: 2045 case OP_LOG: 2046 case OP_SQRT: 2047 case OP_POW: 2048 case OP_SELP: 2049 case OP_SLCT: 2050 case OP_TXD: 2051 case OP_PRECONT: 2052 case OP_CONT: 2053 case OP_POPCNT: 2054 case OP_INSBF: 2055 case OP_EXTBF: 2056 ERROR("operation should have been lowered\n"); 2057 return false; 2058 default: 2059 ERROR("unknown op: %u\n", insn->op); 2060 return false; 2061 } 2062 if (insn->join || insn->op == OP_JOIN) 2063 code[1] |= 0x2; 2064 else 2065 if (insn->exit || insn->op == OP_EXIT) 2066 code[1] |= 0x1; 2067 2068 assert((insn->encSize == 8) == (code[0] & 1)); 2069 2070 code += insn->encSize / 4; 2071 codeSize += insn->encSize; 2072 return true; 2073 } 2074 2075 uint32_t 2076 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const 2077 { 2078 const Target::OpInfo &info = targ->getOpInfo(i); 2079 2080 if (info.minEncSize > 4 || i->dType == TYPE_F64) 2081 return 8; 2082 2083 // check constraints on dst and src operands 2084 for (int d = 0; i->defExists(d); ++d) { 2085 if (i->def(d).rep()->reg.data.id > 63 || 2086 i->def(d).rep()->reg.file != FILE_GPR) 2087 return 8; 2088 } 2089 2090 for (int s = 0; i->srcExists(s); ++s) { 2091 DataFile sf = i->src(s).getFile(); 2092 if (sf != FILE_GPR) 2093 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT) 2094 return 8; 2095 if (i->src(s).rep()->reg.data.id > 63) 2096 return 8; 2097 } 2098 2099 // check modifiers & rounding 2100 if (i->join || i->lanes != 0xf || i->exit) 2101 return 8; 2102 if (i->op == OP_MUL && i->rnd != ROUND_N) 2103 return 8; 2104 2105 if (i->asTex()) 2106 return 8; // TODO: short tex encoding 2107 2108 // check constraints on short MAD 2109 if (info.srcNr >= 2 && i->srcExists(2)) { 2110 if (!i->defExists(0) || 2111 (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) || 2112 DDATA(i->def(0)).id != SDATA(i->src(2)).id) 2113 return 8; 2114 } 2115 2116 return info.minEncSize; 2117 } 2118 2119 // Change the encoding size of an instruction after BBs have been scheduled. 2120 static void 2121 makeInstructionLong(Instruction *insn) 2122 { 2123 if (insn->encSize == 8) 2124 return; 2125 Function *fn = insn->bb->getFunction(); 2126 int n = 0; 2127 int adj = 4; 2128 2129 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next); 2130 2131 if (n & 1) { 2132 adj = 8; 2133 insn->next->encSize = 8; 2134 } else 2135 if (insn->prev && insn->prev->encSize == 4) { 2136 adj = 8; 2137 insn->prev->encSize = 8; 2138 } 2139 insn->encSize = 8; 2140 2141 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { 2142 fn->bbArray[i]->binPos += adj; 2143 } 2144 fn->binSize += adj; 2145 insn->bb->binSize += adj; 2146 } 2147 2148 static bool 2149 trySetExitModifier(Instruction *insn) 2150 { 2151 if (insn->op == OP_DISCARD || 2152 insn->op == OP_QUADON || 2153 insn->op == OP_QUADPOP) 2154 return false; 2155 for (int s = 0; insn->srcExists(s); ++s) 2156 if (insn->src(s).getFile() == FILE_IMMEDIATE) 2157 return false; 2158 if (insn->asFlow()) { 2159 if (insn->op == OP_CALL) // side effects ! 2160 return false; 2161 if (insn->getPredicate()) // cannot do conditional exit (or can we ?) 2162 return false; 2163 insn->op = OP_EXIT; 2164 } 2165 insn->exit = 1; 2166 makeInstructionLong(insn); 2167 return true; 2168 } 2169 2170 static void 2171 replaceExitWithModifier(Function *func) 2172 { 2173 BasicBlock *epilogue = BasicBlock::get(func->cfgExit); 2174 2175 if (!epilogue->getExit() || 2176 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT 2177 return; 2178 2179 if (epilogue->getEntry()->op != OP_EXIT) { 2180 Instruction *insn = epilogue->getExit()->prev; 2181 if (!insn || !trySetExitModifier(insn)) 2182 return; 2183 insn->exit = 1; 2184 } else { 2185 for (Graph::EdgeIterator ei = func->cfgExit->incident(); 2186 !ei.end(); ei.next()) { 2187 BasicBlock *bb = BasicBlock::get(ei.getNode()); 2188 Instruction *i = bb->getExit(); 2189 2190 if (!i || !trySetExitModifier(i)) 2191 return; 2192 } 2193 } 2194 2195 int adj = epilogue->getExit()->encSize; 2196 epilogue->binSize -= adj; 2197 func->binSize -= adj; 2198 delete_Instruction(func->getProgram(), epilogue->getExit()); 2199 2200 // There may be BB's that are laid out after the exit block 2201 for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) { 2202 func->bbArray[i]->binPos -= adj; 2203 } 2204 } 2205 2206 void 2207 CodeEmitterNV50::prepareEmission(Function *func) 2208 { 2209 CodeEmitter::prepareEmission(func); 2210 2211 replaceExitWithModifier(func); 2212 } 2213 2214 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : 2215 CodeEmitter(target), targNV50(target) 2216 { 2217 targ = target; // specialized 2218 code = NULL; 2219 codeSize = codeSizeLimit = 0; 2220 relocInfo = NULL; 2221 } 2222 2223 CodeEmitter * 2224 TargetNV50::getCodeEmitter(Program::Type type) 2225 { 2226 CodeEmitterNV50 *emit = new CodeEmitterNV50(this); 2227 emit->setProgramType(type); 2228 return emit; 2229 } 2230 2231 } // namespace nv50_ir 2232