1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 #include "nv50_ir.h" 24 #include "nv50_ir_target_nv50.h" 25 26 namespace nv50_ir { 27 28 #define NV50_OP_ENC_LONG 0 29 #define NV50_OP_ENC_SHORT 1 30 #define NV50_OP_ENC_IMM 2 31 #define NV50_OP_ENC_LONG_ALT 3 32 33 class CodeEmitterNV50 : public CodeEmitter 34 { 35 public: 36 CodeEmitterNV50(const TargetNV50 *); 37 38 virtual bool emitInstruction(Instruction *); 39 40 virtual uint32_t getMinEncodingSize(const Instruction *) const; 41 42 inline void setProgramType(Program::Type pType) { progType = pType; } 43 44 virtual void prepareEmission(Function *); 45 46 private: 47 Program::Type progType; 48 49 const TargetNV50 *targ; 50 51 private: 52 inline void defId(const ValueDef&, const int pos); 53 inline void srcId(const ValueRef&, const int pos); 54 inline void srcId(const ValueRef *, const int pos); 55 56 inline void srcAddr16(const ValueRef&, bool adj, const int pos); 57 inline void srcAddr8(const ValueRef&, const int pos); 58 59 void emitFlagsRd(const Instruction *); 60 void emitFlagsWr(const Instruction *); 61 62 void emitCondCode(CondCode cc, DataType ty, int pos); 63 64 inline void setARegBits(unsigned int); 65 66 void setAReg16(const Instruction *, int s); 67 void setImmediate(const Instruction *, int s); 68 69 void setDst(const Value *); 70 void setDst(const Instruction *, int d); 71 void setSrcFileBits(const Instruction *, int enc); 72 void setSrc(const Instruction *, unsigned int s, int slot); 73 74 void emitForm_MAD(const Instruction *); 75 void emitForm_ADD(const Instruction *); 76 void emitForm_MUL(const Instruction *); 77 void emitForm_IMM(const Instruction *); 78 79 void emitLoadStoreSizeLG(DataType ty, int pos); 80 void emitLoadStoreSizeCS(DataType ty); 81 82 void roundMode_MAD(const Instruction *); 83 void roundMode_CVT(RoundMode); 84 85 void emitMNeg12(const Instruction *); 86 87 void emitLOAD(const Instruction *); 88 void emitSTORE(const Instruction *); 89 void emitMOV(const Instruction *); 90 void emitNOP(); 91 void emitINTERP(const Instruction *); 92 void emitPFETCH(const Instruction *); 93 void emitOUT(const Instruction *); 94 95 void emitUADD(const Instruction *); 96 void emitAADD(const Instruction *); 97 void emitFADD(const Instruction *); 98 void emitIMUL(const Instruction *); 99 void emitFMUL(const Instruction *); 100 void emitFMAD(const Instruction *); 101 void emitIMAD(const Instruction *); 102 void emitISAD(const Instruction *); 103 104 void emitMINMAX(const Instruction *); 105 106 void emitPreOp(const Instruction *); 107 void emitSFnOp(const Instruction *, uint8_t subOp); 108 109 void emitShift(const Instruction *); 110 void emitARL(const Instruction *, unsigned int shl); 111 void emitLogicOp(const Instruction *); 112 void emitNOT(const Instruction *); 113 114 void emitCVT(const Instruction *); 115 void emitSET(const Instruction *); 116 117 void emitTEX(const TexInstruction *); 118 void emitTXQ(const TexInstruction *); 119 120 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp); 121 122 void emitFlow(const Instruction *, uint8_t flowOp); 123 void emitPRERETEmu(const FlowInstruction *); 124 }; 125 126 #define SDATA(a) ((a).rep()->reg.data) 127 #define DDATA(a) ((a).rep()->reg.data) 128 129 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos) 130 { 131 assert(src.get()); 132 code[pos / 32] |= SDATA(src).id << (pos % 32); 133 } 134 135 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos) 136 { 137 assert(src->get()); 138 code[pos / 32] |= SDATA(*src).id << (pos % 32); 139 } 140 141 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos) 142 { 143 assert(src.get()); 144 145 int32_t offset = SDATA(src).offset; 146 147 assert(!adj || src.get()->reg.size <= 4); 148 if (adj) 149 offset /= src.get()->reg.size; 150 151 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16); 152 153 if (offset < 0) 154 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff; 155 156 code[pos / 32] |= offset << (pos % 32); 157 } 158 159 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos) 160 { 161 assert(src.get()); 162 163 uint32_t offset = SDATA(src).offset; 164 165 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3)); 166 167 code[pos / 32] |= (offset >> 2) << (pos % 32); 168 } 169 170 void CodeEmitterNV50::defId(const ValueDef& def, const int pos) 171 { 172 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT); 173 174 code[pos / 32] |= DDATA(def).id << (pos % 32); 175 } 176 177 void 178 CodeEmitterNV50::roundMode_MAD(const Instruction *insn) 179 { 180 switch (insn->rnd) { 181 case ROUND_M: code[1] |= 1 << 22; break; 182 case ROUND_P: code[1] |= 2 << 22; break; 183 case ROUND_Z: code[1] |= 3 << 22; break; 184 default: 185 assert(insn->rnd == ROUND_N); 186 break; 187 } 188 } 189 190 void 191 CodeEmitterNV50::emitMNeg12(const Instruction *i) 192 { 193 code[1] |= i->src(0).mod.neg() << 26; 194 code[1] |= i->src(1).mod.neg() << 27; 195 } 196 197 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos) 198 { 199 uint8_t enc; 200 201 assert(pos >= 32 || pos <= 27); 202 203 switch (cc) { 204 case CC_LT: enc = 0x1; break; 205 case CC_LTU: enc = 0x9; break; 206 case CC_EQ: enc = 0x2; break; 207 case CC_EQU: enc = 0xa; break; 208 case CC_LE: enc = 0x3; break; 209 case CC_LEU: enc = 0xb; break; 210 case CC_GT: enc = 0x4; break; 211 case CC_GTU: enc = 0xc; break; 212 case CC_NE: enc = 0x5; break; 213 case CC_NEU: enc = 0xd; break; 214 case CC_GE: enc = 0x6; break; 215 case CC_GEU: enc = 0xe; break; 216 case CC_TR: enc = 0xf; break; 217 case CC_FL: enc = 0x0; break; 218 219 case CC_O: enc = 0x10; break; 220 case CC_C: enc = 0x11; break; 221 case CC_A: enc = 0x12; break; 222 case CC_S: enc = 0x13; break; 223 case CC_NS: enc = 0x1c; break; 224 case CC_NA: enc = 0x1d; break; 225 case CC_NC: enc = 0x1e; break; 226 case CC_NO: enc = 0x1f; break; 227 228 default: 229 enc = 0; 230 assert(!"invalid condition code"); 231 break; 232 } 233 if (ty != TYPE_NONE && !isFloatType(ty)) 234 enc &= ~0x8; // unordered only exists for float types 235 236 code[pos / 32] |= enc << (pos % 32); 237 } 238 239 void 240 CodeEmitterNV50::emitFlagsRd(const Instruction *i) 241 { 242 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc; 243 244 assert(!(code[1] & 0x00003f80)); 245 246 if (s >= 0) { 247 assert(i->getSrc(s)->reg.file == FILE_FLAGS); 248 emitCondCode(i->cc, TYPE_NONE, 32 + 7); 249 srcId(i->src(s), 32 + 12); 250 } else { 251 code[1] |= 0x0780; 252 } 253 } 254 255 void 256 CodeEmitterNV50::emitFlagsWr(const Instruction *i) 257 { 258 assert(!(code[1] & 0x70)); 259 260 int flagsDef = i->flagsDef; 261 262 // find flags definition and check that it is the last def 263 if (flagsDef < 0) { 264 for (int d = 0; i->defExists(d); ++d) 265 if (i->def(d).getFile() == FILE_FLAGS) 266 flagsDef = d; 267 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point 268 WARN("Instruction::flagsDef was not set properly\n"); 269 } 270 if (flagsDef == 0 && i->defExists(1)) 271 WARN("flags def should not be the primary definition\n"); 272 273 if (flagsDef >= 0) 274 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40; 275 276 } 277 278 void 279 CodeEmitterNV50::setARegBits(unsigned int u) 280 { 281 code[0] |= (u & 3) << 26; 282 code[1] |= (u & 4); 283 } 284 285 void 286 CodeEmitterNV50::setAReg16(const Instruction *i, int s) 287 { 288 if (i->srcExists(s)) { 289 s = i->src(s).indirect[0]; 290 if (s >= 0) 291 setARegBits(SDATA(i->src(s)).id + 1); 292 } 293 } 294 295 void 296 CodeEmitterNV50::setImmediate(const Instruction *i, int s) 297 { 298 const ImmediateValue *imm = i->src(s).get()->asImm(); 299 assert(imm); 300 301 uint32_t u = imm->reg.data.u32; 302 303 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) 304 u = ~u; 305 306 code[1] |= 3; 307 code[0] |= (u & 0x3f) << 16; 308 code[1] |= (u >> 6) << 2; 309 } 310 311 void 312 CodeEmitterNV50::setDst(const Value *dst) 313 { 314 const Storage *reg = &dst->join->reg; 315 316 assert(reg->file != FILE_ADDRESS); 317 318 if (reg->data.id < 0 || reg->file == FILE_FLAGS) { 319 code[0] |= (127 << 2) | 1; 320 code[1] |= 8; 321 } else { 322 int id; 323 if (reg->file == FILE_SHADER_OUTPUT) { 324 code[1] |= 8; 325 id = reg->data.offset / 4; 326 } else { 327 id = reg->data.id; 328 } 329 code[0] |= id << 2; 330 } 331 } 332 333 void 334 CodeEmitterNV50::setDst(const Instruction *i, int d) 335 { 336 if (i->defExists(d)) { 337 setDst(i->getDef(d)); 338 } else 339 if (!d) { 340 code[0] |= 0x01fc; // bit bucket 341 code[1] |= 0x0008; 342 } 343 } 344 345 // 3 * 2 bits: 346 // 0: r 347 // 1: a/s 348 // 2: c 349 // 3: i 350 void 351 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) 352 { 353 uint8_t mode = 0; 354 355 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) { 356 switch (i->src(s).getFile()) { 357 case FILE_GPR: 358 break; 359 case FILE_MEMORY_SHARED: 360 case FILE_SHADER_INPUT: 361 mode |= 1 << (s * 2); 362 break; 363 case FILE_MEMORY_CONST: 364 mode |= 2 << (s * 2); 365 break; 366 case FILE_IMMEDIATE: 367 mode |= 3 << (s * 2); 368 break; 369 default: 370 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); 371 assert(0); 372 break; 373 } 374 } 375 switch (mode) { 376 case 0x00: // rrr 377 break; 378 case 0x01: // arr/grr 379 if (progType == Program::TYPE_GEOMETRY) { 380 code[0] |= 0x01800000; 381 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT) 382 code[1] |= 0x00200000; 383 } else { 384 if (enc == NV50_OP_ENC_SHORT) 385 code[0] |= 0x01000000; 386 else 387 code[1] |= 0x00200000; 388 } 389 break; 390 case 0x03: // irr 391 assert(i->op == OP_MOV); 392 return; 393 case 0x0c: // rir 394 break; 395 case 0x0d: // gir 396 code[0] |= 0x01000000; 397 assert(progType == Program::TYPE_GEOMETRY || 398 progType == Program::TYPE_COMPUTE); 399 break; 400 case 0x08: // rcr 401 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; 402 code[1] |= (i->getSrc(1)->reg.fileIndex << 22); 403 break; 404 case 0x09: // acr/gcr 405 if (progType == Program::TYPE_GEOMETRY) { 406 code[0] |= 0x01800000; 407 } else { 408 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; 409 code[1] |= 0x00200000; 410 } 411 code[1] |= (i->getSrc(1)->reg.fileIndex << 22); 412 break; 413 case 0x20: // rrc 414 code[0] |= 0x01000000; 415 code[1] |= (i->getSrc(2)->reg.fileIndex << 22); 416 break; 417 case 0x21: // arc 418 code[0] |= 0x01000000; 419 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22); 420 assert(progType != Program::TYPE_GEOMETRY); 421 break; 422 default: 423 ERROR("not encodable: %x\n", mode); 424 assert(0); 425 break; 426 } 427 if (progType != Program::TYPE_COMPUTE) 428 return; 429 430 if ((mode & 3) == 1) { 431 const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14; 432 433 switch (i->getSrc(0)->reg.type) { 434 case TYPE_U8: 435 break; 436 case TYPE_U16: 437 code[0] |= 1 << pos; 438 break; 439 case TYPE_S16: 440 code[0] |= 2 << pos; 441 break; 442 default: 443 code[0] |= 3 << pos; 444 assert(i->getSrc(0)->reg.size == 4); 445 break; 446 } 447 } 448 } 449 450 void 451 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot) 452 { 453 if (Target::operationSrcNr[i->op] <= s) 454 return; 455 const Storage *reg = &i->src(s).rep()->reg; 456 457 unsigned int id = (reg->file == FILE_GPR) ? 458 reg->data.id : 459 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here 460 461 switch (slot) { 462 case 0: code[0] |= id << 9; break; 463 case 1: code[0] |= id << 16; break; 464 case 2: code[1] |= id << 14; break; 465 default: 466 assert(0); 467 break; 468 } 469 } 470 471 // the default form: 472 // - long instruction 473 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr) 474 // - address & flags 475 void 476 CodeEmitterNV50::emitForm_MAD(const Instruction *i) 477 { 478 assert(i->encSize == 8); 479 code[0] |= 1; 480 481 emitFlagsRd(i); 482 emitFlagsWr(i); 483 484 setDst(i, 0); 485 486 setSrcFileBits(i, NV50_OP_ENC_LONG); 487 setSrc(i, 0, 0); 488 setSrc(i, 1, 1); 489 setSrc(i, 2, 2); 490 491 setAReg16(i, 1); 492 } 493 494 // like default form, but 2nd source in slot 2, and no 3rd source 495 void 496 CodeEmitterNV50::emitForm_ADD(const Instruction *i) 497 { 498 assert(i->encSize == 8); 499 code[0] |= 1; 500 501 emitFlagsRd(i); 502 emitFlagsWr(i); 503 504 setDst(i, 0); 505 506 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); 507 setSrc(i, 0, 0); 508 setSrc(i, 1, 2); 509 510 setAReg16(i, 1); 511 } 512 513 // default short form (rr, ar, rc, gr) 514 void 515 CodeEmitterNV50::emitForm_MUL(const Instruction *i) 516 { 517 assert(i->encSize == 4 && !(code[0] & 1)); 518 assert(i->defExists(0)); 519 assert(!i->getPredicate()); 520 521 setDst(i, 0); 522 523 setSrcFileBits(i, NV50_OP_ENC_SHORT); 524 setSrc(i, 0, 0); 525 setSrc(i, 1, 1); 526 } 527 528 // usual immediate form 529 // - 1 to 3 sources where last is immediate (rir, gir) 530 // - no address or predicate possible 531 void 532 CodeEmitterNV50::emitForm_IMM(const Instruction *i) 533 { 534 assert(i->encSize == 8); 535 code[0] |= 1; 536 537 assert(i->defExists(0) && i->srcExists(0)); 538 539 setDst(i, 0); 540 541 setSrcFileBits(i, NV50_OP_ENC_IMM); 542 if (Target::operationSrcNr[i->op] > 1) { 543 setSrc(i, 0, 0); 544 setImmediate(i, 1); 545 setSrc(i, 2, 1); 546 } else { 547 setImmediate(i, 0); 548 } 549 } 550 551 void 552 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos) 553 { 554 uint8_t enc; 555 556 switch (ty) { 557 case TYPE_F32: // fall through 558 case TYPE_S32: // fall through 559 case TYPE_U32: enc = 0x6; break; 560 case TYPE_B128: enc = 0x5; break; 561 case TYPE_F64: // fall through 562 case TYPE_S64: // fall through 563 case TYPE_U64: enc = 0x4; break; 564 case TYPE_S16: enc = 0x3; break; 565 case TYPE_U16: enc = 0x2; break; 566 case TYPE_S8: enc = 0x1; break; 567 case TYPE_U8: enc = 0x0; break; 568 default: 569 enc = 0; 570 assert(!"invalid load/store type"); 571 break; 572 } 573 code[pos / 32] |= enc << (pos % 32); 574 } 575 576 void 577 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) 578 { 579 switch (ty) { 580 case TYPE_U8: break; 581 case TYPE_U16: code[1] |= 0x4000; break; 582 case TYPE_S16: code[1] |= 0x8000; break; 583 case TYPE_F32: 584 case TYPE_S32: 585 case TYPE_U32: code[1] |= 0xc000; break; 586 default: 587 assert(0); 588 break; 589 } 590 } 591 592 void 593 CodeEmitterNV50::emitLOAD(const Instruction *i) 594 { 595 DataFile sf = i->src(0).getFile(); 596 int32_t offset = i->getSrc(0)->reg.data.offset; 597 598 switch (sf) { 599 case FILE_SHADER_INPUT: 600 // use 'mov' where we can 601 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001; 602 code[1] = 0x00200000 | (i->lanes << 14); 603 if (typeSizeof(i->dType) == 4) 604 code[1] |= 0x04000000; 605 break; 606 case FILE_MEMORY_SHARED: 607 if (targ->getChipset() >= 0x84) { 608 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); 609 code[0] = 0x10000001; 610 code[1] = 0x40000000; 611 612 if (typeSizeof(i->dType) == 4) 613 code[1] |= 0x04000000; 614 615 emitLoadStoreSizeCS(i->sType); 616 } else { 617 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType))); 618 code[0] = 0x10000001; 619 code[1] = 0x00200000 | (i->lanes << 14); 620 emitLoadStoreSizeCS(i->sType); 621 } 622 break; 623 case FILE_MEMORY_CONST: 624 code[0] = 0x10000001; 625 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22); 626 if (typeSizeof(i->dType) == 4) 627 code[1] |= 0x04000000; 628 emitLoadStoreSizeCS(i->sType); 629 break; 630 case FILE_MEMORY_LOCAL: 631 code[0] = 0xd0000001; 632 code[1] = 0x40000000; 633 break; 634 case FILE_MEMORY_GLOBAL: 635 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); 636 code[1] = 0x80000000; 637 break; 638 default: 639 assert(!"invalid load source file"); 640 break; 641 } 642 if (sf == FILE_MEMORY_LOCAL || 643 sf == FILE_MEMORY_GLOBAL) 644 emitLoadStoreSizeLG(i->sType, 21 + 32); 645 646 setDst(i, 0); 647 648 emitFlagsRd(i); 649 emitFlagsWr(i); 650 651 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { 652 srcId(*i->src(0).getIndirect(0), 9); 653 } else { 654 setAReg16(i, 0); 655 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9); 656 } 657 } 658 659 void 660 CodeEmitterNV50::emitSTORE(const Instruction *i) 661 { 662 DataFile f = i->getSrc(0)->reg.file; 663 int32_t offset = i->getSrc(0)->reg.data.offset; 664 665 switch (f) { 666 case FILE_SHADER_OUTPUT: 667 code[0] = 0x00000001 | ((offset >> 2) << 9); 668 code[1] = 0x80c00000; 669 srcId(i->src(1), 32 + 14); 670 break; 671 case FILE_MEMORY_GLOBAL: 672 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); 673 code[1] = 0xa0000000; 674 emitLoadStoreSizeLG(i->dType, 21 + 32); 675 srcId(i->src(1), 2); 676 break; 677 case FILE_MEMORY_LOCAL: 678 code[0] = 0xd0000001; 679 code[1] = 0x60000000; 680 emitLoadStoreSizeLG(i->dType, 21 + 32); 681 srcId(i->src(1), 2); 682 break; 683 case FILE_MEMORY_SHARED: 684 code[0] = 0x00000001; 685 code[1] = 0xe0000000; 686 switch (typeSizeof(i->dType)) { 687 case 1: 688 code[0] |= offset << 9; 689 code[1] |= 0x00400000; 690 break; 691 case 2: 692 code[0] |= (offset >> 1) << 9; 693 break; 694 case 4: 695 code[0] |= (offset >> 2) << 9; 696 code[1] |= 0x04200000; 697 break; 698 default: 699 assert(0); 700 break; 701 } 702 srcId(i->src(1), 32 + 14); 703 break; 704 default: 705 assert(!"invalid store destination file"); 706 break; 707 } 708 709 if (f == FILE_MEMORY_GLOBAL) 710 srcId(*i->src(0).getIndirect(0), 9); 711 else 712 setAReg16(i, 0); 713 714 if (f == FILE_MEMORY_LOCAL) 715 srcAddr16(i->src(0), false, 9); 716 717 emitFlagsRd(i); 718 } 719 720 void 721 CodeEmitterNV50::emitMOV(const Instruction *i) 722 { 723 DataFile sf = i->getSrc(0)->reg.file; 724 DataFile df = i->getDef(0)->reg.file; 725 726 assert(sf == FILE_GPR || df == FILE_GPR); 727 728 if (sf == FILE_FLAGS) { 729 code[0] = 0x00000001; 730 code[1] = 0x20000000; 731 defId(i->def(0), 2); 732 srcId(i->src(0), 12); 733 emitFlagsRd(i); 734 } else 735 if (sf == FILE_ADDRESS) { 736 code[0] = 0x00000001; 737 code[1] = 0x40000000; 738 defId(i->def(0), 2); 739 setARegBits(SDATA(i->src(0)).id + 1); 740 emitFlagsRd(i); 741 } else 742 if (df == FILE_FLAGS) { 743 code[0] = 0x00000001; 744 code[1] = 0xa0000000; 745 defId(i->def(0), 4); 746 srcId(i->src(0), 9); 747 emitFlagsRd(i); 748 } else 749 if (sf == FILE_IMMEDIATE) { 750 code[0] = 0x10008001; 751 code[1] = 0x00000003; 752 emitForm_IMM(i); 753 } else { 754 if (i->encSize == 4) { 755 code[0] = 0x10008000; 756 } else { 757 code[0] = 0x10000001; 758 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; 759 code[1] |= (i->lanes << 14); 760 emitFlagsRd(i); 761 } 762 defId(i->def(0), 2); 763 srcId(i->src(0), 9); 764 } 765 if (df == FILE_SHADER_OUTPUT) { 766 assert(i->encSize == 8); 767 code[1] |= 0x8; 768 } 769 } 770 771 void 772 CodeEmitterNV50::emitNOP() 773 { 774 code[0] = 0xf0000001; 775 code[1] = 0xe0000000; 776 } 777 778 void 779 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) 780 { 781 code[0] = 0xc0000000 | (lane << 16); 782 code[1] = 0x80000000; 783 784 code[0] |= (quOp & 0x03) << 20; 785 code[1] |= (quOp & 0xfc) << 20; 786 787 emitForm_ADD(i); 788 789 if (!i->srcExists(1)) 790 srcId(i->src(0), 32 + 14); 791 } 792 793 void 794 CodeEmitterNV50::emitPFETCH(const Instruction *i) 795 { 796 code[0] = 0x11800001; 797 code[1] = 0x04200000 | (0xf << 14); 798 799 defId(i->def(0), 2); 800 srcAddr8(i->src(0), 9); 801 setAReg16(i, 0); 802 } 803 804 void 805 CodeEmitterNV50::emitINTERP(const Instruction *i) 806 { 807 code[0] = 0x80000000; 808 809 defId(i->def(0), 2); 810 srcAddr8(i->src(0), 16); 811 812 if (i->getInterpMode() == NV50_IR_INTERP_FLAT) { 813 code[0] |= 1 << 8; 814 } else { 815 if (i->op == OP_PINTERP) { 816 code[0] |= 1 << 25; 817 srcId(i->src(1), 9); 818 } 819 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID) 820 code[0] |= 1 << 24; 821 } 822 823 if (i->encSize == 8) { 824 code[1] = 825 (code[0] & (3 << 24)) >> (24 - 16) | 826 (code[0] & (1 << 8)) << (18 - 8); 827 code[0] &= ~0x03000100; 828 code[0] |= 1; 829 emitFlagsRd(i); 830 } 831 } 832 833 void 834 CodeEmitterNV50::emitMINMAX(const Instruction *i) 835 { 836 if (i->dType == TYPE_F64) { 837 code[0] = 0xe0000000; 838 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000; 839 } else { 840 code[0] = 0x30000000; 841 code[1] = 0x80000000; 842 if (i->op == OP_MIN) 843 code[1] |= 0x20000000; 844 845 switch (i->dType) { 846 case TYPE_F32: code[0] |= 0x80000000; break; 847 case TYPE_S32: code[1] |= 0x8c000000; break; 848 case TYPE_U32: code[1] |= 0x84000000; break; 849 case TYPE_S16: code[1] |= 0x80000000; break; 850 case TYPE_U16: break; 851 default: 852 assert(0); 853 break; 854 } 855 code[1] |= i->src(0).mod.abs() << 20; 856 code[1] |= i->src(1).mod.abs() << 19; 857 } 858 emitForm_MAD(i); 859 } 860 861 void 862 CodeEmitterNV50::emitFMAD(const Instruction *i) 863 { 864 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); 865 const int neg_add = i->src(2).mod.neg(); 866 867 code[0] = 0xe0000000; 868 869 if (i->encSize == 4) { 870 emitForm_MUL(i); 871 assert(!neg_mul && !neg_add); 872 } else { 873 code[1] = neg_mul << 26; 874 code[1] |= neg_add << 27; 875 if (i->saturate) 876 code[1] |= 1 << 29; 877 emitForm_MAD(i); 878 } 879 } 880 881 void 882 CodeEmitterNV50::emitFADD(const Instruction *i) 883 { 884 const int neg0 = i->src(0).mod.neg(); 885 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); 886 887 code[0] = 0xb0000000; 888 889 assert(!(i->src(0).mod | i->src(1).mod).abs()); 890 891 if (i->src(1).getFile() == FILE_IMMEDIATE) { 892 code[1] = 0; 893 emitForm_IMM(i); 894 code[0] |= neg0 << 15; 895 code[0] |= neg1 << 22; 896 if (i->saturate) 897 code[0] |= 1 << 8; 898 } else 899 if (i->encSize == 8) { 900 code[1] = 0; 901 emitForm_ADD(i); 902 code[1] |= neg0 << 26; 903 code[1] |= neg1 << 27; 904 if (i->saturate) 905 code[1] |= 1 << 29; 906 } else { 907 emitForm_MUL(i); 908 code[0] |= neg0 << 15; 909 code[0] |= neg1 << 22; 910 if (i->saturate) 911 code[0] |= 1 << 8; 912 } 913 } 914 915 void 916 CodeEmitterNV50::emitUADD(const Instruction *i) 917 { 918 const int neg0 = i->src(0).mod.neg(); 919 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); 920 921 code[0] = 0x20008000; 922 923 if (i->src(1).getFile() == FILE_IMMEDIATE) { 924 code[1] = 0; 925 emitForm_IMM(i); 926 } else 927 if (i->encSize == 8) { 928 code[0] = 0x20000000; 929 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; 930 emitForm_ADD(i); 931 } else { 932 emitForm_MUL(i); 933 } 934 assert(!(neg0 && neg1)); 935 code[0] |= neg0 << 28; 936 code[0] |= neg1 << 22; 937 938 if (i->flagsSrc >= 0) { 939 // addc == sub | subr 940 assert(!(code[0] & 0x10400000) && !i->getPredicate()); 941 code[0] |= 0x10400000; 942 srcId(i->src(i->flagsSrc), 32 + 12); 943 } 944 } 945 946 void 947 CodeEmitterNV50::emitAADD(const Instruction *i) 948 { 949 const int s = (i->op == OP_MOV) ? 0 : 1; 950 951 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9); 952 code[1] = 0x20000000; 953 954 code[0] |= (DDATA(i->def(0)).id + 1) << 2; 955 956 emitFlagsRd(i); 957 958 if (s && i->srcExists(0)) 959 setARegBits(SDATA(i->src(0)).id + 1); 960 } 961 962 void 963 CodeEmitterNV50::emitIMUL(const Instruction *i) 964 { 965 code[0] = 0x40000000; 966 967 if (i->encSize == 8) { 968 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; 969 emitForm_MAD(i); 970 } else { 971 if (i->sType == TYPE_S16) 972 code[0] |= 0x8100; 973 emitForm_MUL(i); 974 } 975 } 976 977 void 978 CodeEmitterNV50::emitFMUL(const Instruction *i) 979 { 980 const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); 981 982 code[0] = 0xc0000000; 983 984 if (i->src(1).getFile() == FILE_IMMEDIATE) { 985 code[1] = 0; 986 emitForm_IMM(i); 987 if (neg) 988 code[0] |= 0x8000; 989 } else 990 if (i->encSize == 8) { 991 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0; 992 if (neg) 993 code[1] |= 0x08000000; 994 emitForm_MAD(i); 995 } else { 996 emitForm_MUL(i); 997 if (neg) 998 code[0] |= 0x8000; 999 } 1000 } 1001 1002 void 1003 CodeEmitterNV50::emitIMAD(const Instruction *i) 1004 { 1005 code[0] = 0x60000000; 1006 if (isSignedType(i->sType)) 1007 code[1] = i->saturate ? 0x40000000 : 0x20000000; 1008 else 1009 code[1] = 0x00000000; 1010 1011 int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); 1012 int neg2 = i->src(2).mod.neg(); 1013 1014 assert(!(neg1 & neg2)); 1015 code[1] |= neg1 << 27; 1016 code[1] |= neg2 << 26; 1017 1018 emitForm_MAD(i); 1019 1020 if (i->flagsSrc >= 0) { 1021 // add with carry from $cX 1022 assert(!(code[1] & 0x0c000000) && !i->getPredicate()); 1023 code[1] |= 0xc << 24; 1024 srcId(i->src(i->flagsSrc), 32 + 12); 1025 } 1026 } 1027 1028 void 1029 CodeEmitterNV50::emitISAD(const Instruction *i) 1030 { 1031 if (i->encSize == 8) { 1032 code[0] = 0x50000000; 1033 switch (i->sType) { 1034 case TYPE_U32: code[1] = 0x04000000; break; 1035 case TYPE_S32: code[1] = 0x0c000000; break; 1036 case TYPE_U16: code[1] = 0x00000000; break; 1037 case TYPE_S16: code[1] = 0x08000000; break; 1038 default: 1039 assert(0); 1040 break; 1041 } 1042 emitForm_MAD(i); 1043 } else { 1044 switch (i->sType) { 1045 case TYPE_U32: code[0] = 0x50008000; break; 1046 case TYPE_S32: code[0] = 0x50008100; break; 1047 case TYPE_U16: code[0] = 0x50000000; break; 1048 case TYPE_S16: code[0] = 0x50000100; break; 1049 default: 1050 assert(0); 1051 break; 1052 } 1053 emitForm_MUL(i); 1054 } 1055 } 1056 1057 void 1058 CodeEmitterNV50::emitSET(const Instruction *i) 1059 { 1060 code[0] = 0x30000000; 1061 code[1] = 0x60000000; 1062 1063 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); 1064 1065 switch (i->sType) { 1066 case TYPE_F32: code[0] |= 0x80000000; break; 1067 case TYPE_S32: code[1] |= 0x0c000000; break; 1068 case TYPE_U32: code[1] |= 0x04000000; break; 1069 case TYPE_S16: code[1] |= 0x08000000; break; 1070 case TYPE_U16: break; 1071 default: 1072 assert(0); 1073 break; 1074 } 1075 if (i->src(0).mod.neg()) code[1] |= 0x04000000; 1076 if (i->src(1).mod.neg()) code[1] |= 0x08000000; 1077 if (i->src(0).mod.abs()) code[1] |= 0x00100000; 1078 if (i->src(1).mod.abs()) code[1] |= 0x00080000; 1079 1080 emitForm_MAD(i); 1081 } 1082 1083 void 1084 CodeEmitterNV50::roundMode_CVT(RoundMode rnd) 1085 { 1086 switch (rnd) { 1087 case ROUND_NI: code[1] |= 0x08000000; break; 1088 case ROUND_M: code[1] |= 0x00020000; break; 1089 case ROUND_MI: code[1] |= 0x08020000; break; 1090 case ROUND_P: code[1] |= 0x00040000; break; 1091 case ROUND_PI: code[1] |= 0x08040000; break; 1092 case ROUND_Z: code[1] |= 0x00060000; break; 1093 case ROUND_ZI: code[1] |= 0x08060000; break; 1094 default: 1095 assert(rnd == ROUND_N); 1096 break; 1097 } 1098 } 1099 1100 void 1101 CodeEmitterNV50::emitCVT(const Instruction *i) 1102 { 1103 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); 1104 RoundMode rnd; 1105 1106 switch (i->op) { 1107 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; 1108 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; 1109 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; 1110 default: 1111 rnd = i->rnd; 1112 break; 1113 } 1114 1115 code[0] = 0xa0000000; 1116 1117 switch (i->dType) { 1118 case TYPE_F64: 1119 switch (i->sType) { 1120 case TYPE_F64: code[1] = 0xc4404000; break; 1121 case TYPE_S64: code[1] = 0x44414000; break; 1122 case TYPE_U64: code[1] = 0x44404000; break; 1123 case TYPE_F32: code[1] = 0xc4400000; break; 1124 case TYPE_S32: code[1] = 0x44410000; break; 1125 case TYPE_U32: code[1] = 0x44400000; break; 1126 default: 1127 assert(0); 1128 break; 1129 } 1130 break; 1131 case TYPE_S64: 1132 switch (i->sType) { 1133 case TYPE_F64: code[1] = 0x8c404000; break; 1134 case TYPE_F32: code[1] = 0x8c400000; break; 1135 default: 1136 assert(0); 1137 break; 1138 } 1139 break; 1140 case TYPE_U64: 1141 switch (i->sType) { 1142 case TYPE_F64: code[1] = 0x84404000; break; 1143 case TYPE_F32: code[1] = 0x84400000; break; 1144 default: 1145 assert(0); 1146 break; 1147 } 1148 break; 1149 case TYPE_F32: 1150 switch (i->sType) { 1151 case TYPE_F64: code[1] = 0xc0404000; break; 1152 case TYPE_S64: code[1] = 0x40414000; break; 1153 case TYPE_U64: code[1] = 0x40404000; break; 1154 case TYPE_F32: code[1] = 0xc4004000; break; 1155 case TYPE_S32: code[1] = 0x44014000; break; 1156 case TYPE_U32: code[1] = 0x44004000; break; 1157 case TYPE_F16: code[1] = 0xc4000000; break; 1158 default: 1159 assert(0); 1160 break; 1161 } 1162 break; 1163 case TYPE_S32: 1164 switch (i->sType) { 1165 case TYPE_F64: code[1] = 0x88404000; break; 1166 case TYPE_F32: code[1] = 0x8c004000; break; 1167 case TYPE_S32: code[1] = 0x0c014000; break; 1168 case TYPE_U32: code[1] = 0x0c004000; break; 1169 case TYPE_F16: code[1] = 0x8c000000; break; 1170 case TYPE_S16: code[1] = 0x0c010000; break; 1171 case TYPE_U16: code[1] = 0x0c000000; break; 1172 case TYPE_S8: code[1] = 0x0c018000; break; 1173 case TYPE_U8: code[1] = 0x0c008000; break; 1174 default: 1175 assert(0); 1176 break; 1177 } 1178 break; 1179 case TYPE_U32: 1180 switch (i->sType) { 1181 case TYPE_F64: code[1] = 0x80404000; break; 1182 case TYPE_F32: code[1] = 0x84004000; break; 1183 case TYPE_S32: code[1] = 0x04014000; break; 1184 case TYPE_U32: code[1] = 0x04004000; break; 1185 case TYPE_F16: code[1] = 0x84000000; break; 1186 case TYPE_S16: code[1] = 0x04010000; break; 1187 case TYPE_U16: code[1] = 0x04000000; break; 1188 case TYPE_S8: code[1] = 0x04018000; break; 1189 case TYPE_U8: code[1] = 0x04008000; break; 1190 default: 1191 assert(0); 1192 break; 1193 } 1194 break; 1195 case TYPE_S16: 1196 case TYPE_U16: 1197 case TYPE_S8: 1198 case TYPE_U8: 1199 default: 1200 assert(0); 1201 break; 1202 } 1203 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4) 1204 code[1] |= 0x00004000; 1205 1206 roundMode_CVT(rnd); 1207 1208 switch (i->op) { 1209 case OP_ABS: code[1] |= 1 << 20; break; 1210 case OP_SAT: code[1] |= 1 << 19; break; 1211 case OP_NEG: code[1] |= 1 << 29; break; 1212 default: 1213 break; 1214 } 1215 code[1] ^= i->src(0).mod.neg() << 29; 1216 code[1] |= i->src(0).mod.abs() << 20; 1217 if (i->saturate) 1218 code[1] |= 1 << 19; 1219 1220 assert(i->op != OP_ABS || !i->src(0).mod.neg()); 1221 1222 emitForm_MAD(i); 1223 } 1224 1225 void 1226 CodeEmitterNV50::emitPreOp(const Instruction *i) 1227 { 1228 code[0] = 0xb0000000; 1229 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000; 1230 1231 code[1] |= i->src(0).mod.abs() << 20; 1232 code[1] |= i->src(0).mod.neg() << 26; 1233 1234 emitForm_MAD(i); 1235 } 1236 1237 void 1238 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp) 1239 { 1240 code[0] = 0x90000000; 1241 1242 if (i->encSize == 4) { 1243 assert(i->op == OP_RCP); 1244 code[0] |= i->src(0).mod.abs() << 15; 1245 code[0] |= i->src(0).mod.neg() << 22; 1246 emitForm_MUL(i); 1247 } else { 1248 code[1] = subOp << 29; 1249 code[1] |= i->src(0).mod.abs() << 20; 1250 code[1] |= i->src(0).mod.neg() << 26; 1251 emitForm_MAD(i); 1252 } 1253 } 1254 1255 void 1256 CodeEmitterNV50::emitNOT(const Instruction *i) 1257 { 1258 code[0] = 0xd0000000; 1259 code[1] = 0x0002c000; 1260 1261 switch (i->sType) { 1262 case TYPE_U32: 1263 case TYPE_S32: 1264 code[1] |= 0x04000000; 1265 break; 1266 default: 1267 break; 1268 } 1269 emitForm_MAD(i); 1270 setSrc(i, 0, 1); 1271 } 1272 1273 void 1274 CodeEmitterNV50::emitLogicOp(const Instruction *i) 1275 { 1276 code[0] = 0xd0000000; 1277 code[1] = 0; 1278 1279 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1280 switch (i->op) { 1281 case OP_OR: code[0] |= 0x0100; break; 1282 case OP_XOR: code[0] |= 0x8000; break; 1283 default: 1284 assert(i->op == OP_AND); 1285 break; 1286 } 1287 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) 1288 code[0] |= 1 << 22; 1289 1290 emitForm_IMM(i); 1291 } else { 1292 switch (i->op) { 1293 case OP_AND: code[1] = 0x04000000; break; 1294 case OP_OR: code[1] = 0x04004000; break; 1295 case OP_XOR: code[1] = 0x04008000; break; 1296 default: 1297 assert(0); 1298 break; 1299 } 1300 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) 1301 code[1] |= 1 << 16; 1302 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) 1303 code[1] |= 1 << 17; 1304 1305 emitForm_MAD(i); 1306 } 1307 } 1308 1309 void 1310 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl) 1311 { 1312 code[0] = 0x00000001 | (shl << 16); 1313 code[1] = 0xc0000000; 1314 1315 code[0] |= (DDATA(i->def(0)).id + 1) << 2; 1316 1317 setSrcFileBits(i, NV50_OP_ENC_IMM); 1318 setSrc(i, 0, 0); 1319 emitFlagsRd(i); 1320 } 1321 1322 void 1323 CodeEmitterNV50::emitShift(const Instruction *i) 1324 { 1325 if (i->def(0).getFile() == FILE_ADDRESS) { 1326 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE); 1327 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f); 1328 } else { 1329 code[0] = 0x30000001; 1330 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000; 1331 if (i->op == OP_SHR && isSignedType(i->sType)) 1332 code[1] |= 1 << 27; 1333 1334 if (i->src(1).getFile() == FILE_IMMEDIATE) { 1335 code[1] |= 1 << 20; 1336 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16; 1337 defId(i->def(0), 2); 1338 srcId(i->src(0), 9); 1339 emitFlagsRd(i); 1340 } else { 1341 emitForm_MAD(i); 1342 } 1343 } 1344 } 1345 1346 void 1347 CodeEmitterNV50::emitOUT(const Instruction *i) 1348 { 1349 code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400; 1350 code[1] = 0xc0000001; 1351 1352 emitFlagsRd(i); 1353 } 1354 1355 void 1356 CodeEmitterNV50::emitTEX(const TexInstruction *i) 1357 { 1358 code[0] = 0xf0000001; 1359 code[1] = 0x00000000; 1360 1361 switch (i->op) { 1362 case OP_TXB: 1363 code[1] = 0x20000000; 1364 break; 1365 case OP_TXL: 1366 code[1] = 0x40000000; 1367 break; 1368 case OP_TXF: 1369 code[0] |= 0x01000000; 1370 break; 1371 case OP_TXG: 1372 code[0] = 0x01000000; 1373 code[1] = 0x80000000; 1374 break; 1375 default: 1376 assert(i->op == OP_TEX); 1377 break; 1378 } 1379 1380 code[0] |= i->tex.r << 9; 1381 code[0] |= i->tex.s << 17; 1382 1383 int argc = i->tex.target.getArgCount(); 1384 1385 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF) 1386 argc += 1; 1387 if (i->tex.target.isShadow()) 1388 argc += 1; 1389 assert(argc <= 4); 1390 1391 code[0] |= (argc - 1) << 22; 1392 1393 if (i->tex.target.isCube()) { 1394 code[0] |= 0x08000000; 1395 } else 1396 if (i->tex.useOffsets) { 1397 code[1] |= (i->tex.offset[0][0] & 0xf) << 24; 1398 code[1] |= (i->tex.offset[0][1] & 0xf) << 20; 1399 code[1] |= (i->tex.offset[0][2] & 0xf) << 16; 1400 } 1401 1402 code[0] |= (i->tex.mask & 0x3) << 25; 1403 code[1] |= (i->tex.mask & 0xc) << 12; 1404 1405 if (i->tex.liveOnly) 1406 code[1] |= 4; 1407 1408 defId(i->def(0), 2); 1409 1410 emitFlagsRd(i); 1411 } 1412 1413 void 1414 CodeEmitterNV50::emitTXQ(const TexInstruction *i) 1415 { 1416 assert(i->tex.query == TXQ_DIMS); 1417 1418 code[0] = 0xf0000001; 1419 code[1] = 0x60000000; 1420 1421 code[0] |= i->tex.r << 9; 1422 code[0] |= i->tex.s << 17; 1423 1424 code[0] |= (i->tex.mask & 0x3) << 25; 1425 code[1] |= (i->tex.mask & 0xc) << 12; 1426 1427 defId(i->def(0), 2); 1428 1429 emitFlagsRd(i); 1430 } 1431 1432 void 1433 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i) 1434 { 1435 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */ 1436 1437 code[0] = 0x10000003; // bra 1438 code[1] = 0x00000780; // always 1439 1440 switch (i->subOp) { 1441 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call 1442 break; 1443 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call 1444 pos += 8; 1445 break; 1446 default: 1447 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2)); 1448 code[0] = 0x20000003; // call 1449 code[1] = 0x00000000; // no predicate 1450 break; 1451 } 1452 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9); 1453 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4); 1454 } 1455 1456 void 1457 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) 1458 { 1459 const FlowInstruction *f = i->asFlow(); 1460 bool hasPred = false; 1461 bool hasTarg = false; 1462 1463 code[0] = 0x00000003 | (flowOp << 28); 1464 code[1] = 0x00000000; 1465 1466 switch (i->op) { 1467 case OP_BRA: 1468 hasPred = true; 1469 hasTarg = true; 1470 break; 1471 case OP_BREAK: 1472 case OP_BRKPT: 1473 case OP_DISCARD: 1474 case OP_RET: 1475 hasPred = true; 1476 break; 1477 case OP_CALL: 1478 case OP_PREBREAK: 1479 case OP_JOINAT: 1480 hasTarg = true; 1481 break; 1482 case OP_PRERET: 1483 hasTarg = true; 1484 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) { 1485 emitPRERETEmu(f); 1486 return; 1487 } 1488 break; 1489 default: 1490 break; 1491 } 1492 1493 if (hasPred) 1494 emitFlagsRd(i); 1495 1496 if (hasTarg && f) { 1497 uint32_t pos; 1498 1499 if (f->op == OP_CALL) { 1500 if (f->builtin) { 1501 pos = targ->getBuiltinOffset(f->target.builtin); 1502 } else { 1503 pos = f->target.fn->binPos; 1504 } 1505 } else { 1506 pos = f->target.bb->binPos; 1507 } 1508 1509 code[0] |= ((pos >> 2) & 0xffff) << 11; 1510 code[1] |= ((pos >> 18) & 0x003f) << 14; 1511 1512 RelocEntry::Type relocTy; 1513 1514 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE; 1515 1516 addReloc(relocTy, 0, pos, 0x07fff800, 9); 1517 addReloc(relocTy, 1, pos, 0x000fc000, -4); 1518 } 1519 } 1520 1521 bool 1522 CodeEmitterNV50::emitInstruction(Instruction *insn) 1523 { 1524 if (!insn->encSize) { 1525 ERROR("skipping unencodable instruction: "); insn->print(); 1526 return false; 1527 } else 1528 if (codeSize + insn->encSize > codeSizeLimit) { 1529 ERROR("code emitter output buffer too small\n"); 1530 return false; 1531 } 1532 1533 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) { 1534 INFO("EMIT: "); insn->print(); 1535 } 1536 1537 switch (insn->op) { 1538 case OP_MOV: 1539 emitMOV(insn); 1540 break; 1541 case OP_EXIT: 1542 case OP_NOP: 1543 case OP_JOIN: 1544 emitNOP(); 1545 break; 1546 case OP_VFETCH: 1547 case OP_LOAD: 1548 emitLOAD(insn); 1549 break; 1550 case OP_EXPORT: 1551 case OP_STORE: 1552 emitSTORE(insn); 1553 break; 1554 case OP_PFETCH: 1555 emitPFETCH(insn); 1556 break; 1557 case OP_LINTERP: 1558 case OP_PINTERP: 1559 emitINTERP(insn); 1560 break; 1561 case OP_ADD: 1562 case OP_SUB: 1563 if (isFloatType(insn->dType)) 1564 emitFADD(insn); 1565 else if (insn->getDef(0)->reg.file == FILE_ADDRESS) 1566 emitAADD(insn); 1567 else 1568 emitUADD(insn); 1569 break; 1570 case OP_MUL: 1571 if (isFloatType(insn->dType)) 1572 emitFMUL(insn); 1573 else 1574 emitIMUL(insn); 1575 break; 1576 case OP_MAD: 1577 case OP_FMA: 1578 if (isFloatType(insn->dType)) 1579 emitFMAD(insn); 1580 else 1581 emitIMAD(insn); 1582 break; 1583 case OP_SAD: 1584 emitISAD(insn); 1585 break; 1586 case OP_NOT: 1587 emitNOT(insn); 1588 break; 1589 case OP_AND: 1590 case OP_OR: 1591 case OP_XOR: 1592 emitLogicOp(insn); 1593 break; 1594 case OP_SHL: 1595 case OP_SHR: 1596 emitShift(insn); 1597 break; 1598 case OP_SET: 1599 emitSET(insn); 1600 break; 1601 case OP_MIN: 1602 case OP_MAX: 1603 emitMINMAX(insn); 1604 break; 1605 case OP_CEIL: 1606 case OP_FLOOR: 1607 case OP_TRUNC: 1608 case OP_ABS: 1609 case OP_NEG: 1610 case OP_SAT: 1611 emitCVT(insn); 1612 break; 1613 case OP_CVT: 1614 if (insn->def(0).getFile() == FILE_ADDRESS) 1615 emitARL(insn, 0); 1616 else 1617 if (insn->def(0).getFile() == FILE_FLAGS || 1618 insn->src(0).getFile() == FILE_FLAGS || 1619 insn->src(0).getFile() == FILE_ADDRESS) 1620 emitMOV(insn); 1621 else 1622 emitCVT(insn); 1623 break; 1624 case OP_RCP: 1625 emitSFnOp(insn, 0); 1626 break; 1627 case OP_RSQ: 1628 emitSFnOp(insn, 2); 1629 break; 1630 case OP_LG2: 1631 emitSFnOp(insn, 3); 1632 break; 1633 case OP_SIN: 1634 emitSFnOp(insn, 4); 1635 break; 1636 case OP_COS: 1637 emitSFnOp(insn, 5); 1638 break; 1639 case OP_EX2: 1640 emitSFnOp(insn, 6); 1641 break; 1642 case OP_PRESIN: 1643 case OP_PREEX2: 1644 emitPreOp(insn); 1645 break; 1646 case OP_TEX: 1647 case OP_TXB: 1648 case OP_TXL: 1649 case OP_TXF: 1650 emitTEX(insn->asTex()); 1651 break; 1652 case OP_TXQ: 1653 emitTXQ(insn->asTex()); 1654 break; 1655 case OP_EMIT: 1656 case OP_RESTART: 1657 emitOUT(insn); 1658 break; 1659 case OP_DISCARD: 1660 emitFlow(insn, 0x0); 1661 break; 1662 case OP_BRA: 1663 emitFlow(insn, 0x1); 1664 break; 1665 case OP_CALL: 1666 emitFlow(insn, 0x2); 1667 break; 1668 case OP_RET: 1669 emitFlow(insn, 0x3); 1670 break; 1671 case OP_PREBREAK: 1672 emitFlow(insn, 0x4); 1673 break; 1674 case OP_BREAK: 1675 emitFlow(insn, 0x5); 1676 break; 1677 case OP_QUADON: 1678 emitFlow(insn, 0x6); 1679 break; 1680 case OP_QUADPOP: 1681 emitFlow(insn, 0x7); 1682 break; 1683 case OP_JOINAT: 1684 emitFlow(insn, 0xa); 1685 break; 1686 case OP_PRERET: 1687 emitFlow(insn, 0xd); 1688 break; 1689 case OP_QUADOP: 1690 emitQUADOP(insn, insn->lanes, insn->subOp); 1691 break; 1692 case OP_DFDX: 1693 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99); 1694 break; 1695 case OP_DFDY: 1696 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5); 1697 break; 1698 case OP_PHI: 1699 case OP_UNION: 1700 case OP_CONSTRAINT: 1701 ERROR("operation should have been eliminated\n"); 1702 return false; 1703 case OP_EXP: 1704 case OP_LOG: 1705 case OP_SQRT: 1706 case OP_POW: 1707 case OP_SELP: 1708 case OP_SLCT: 1709 case OP_TXD: 1710 case OP_PRECONT: 1711 case OP_CONT: 1712 case OP_POPCNT: 1713 case OP_INSBF: 1714 case OP_EXTBF: 1715 ERROR("operation should have been lowered\n"); 1716 return false; 1717 default: 1718 ERROR("unknown op: %u\n", insn->op); 1719 return false; 1720 } 1721 if (insn->join || insn->op == OP_JOIN) 1722 code[1] |= 0x2; 1723 else 1724 if (insn->exit || insn->op == OP_EXIT) 1725 code[1] |= 0x1; 1726 1727 assert((insn->encSize == 8) == (code[0] & 1)); 1728 1729 code += insn->encSize / 4; 1730 codeSize += insn->encSize; 1731 return true; 1732 } 1733 1734 uint32_t 1735 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const 1736 { 1737 const Target::OpInfo &info = targ->getOpInfo(i); 1738 1739 if (info.minEncSize > 4) 1740 return 8; 1741 1742 // check constraints on dst and src operands 1743 for (int d = 0; i->defExists(d); ++d) { 1744 if (i->def(d).rep()->reg.data.id > 63 || 1745 i->def(d).rep()->reg.file != FILE_GPR) 1746 return 8; 1747 } 1748 1749 for (int s = 0; i->srcExists(s); ++s) { 1750 DataFile sf = i->src(s).getFile(); 1751 if (sf != FILE_GPR) 1752 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT) 1753 return 8; 1754 if (i->src(s).rep()->reg.data.id > 63) 1755 return 8; 1756 } 1757 1758 // check modifiers & rounding 1759 if (i->join || i->lanes != 0xf || i->exit) 1760 return 8; 1761 if (i->op == OP_MUL && i->rnd != ROUND_N) 1762 return 8; 1763 1764 if (i->asTex()) 1765 return 8; // TODO: short tex encoding 1766 1767 // check constraints on short MAD 1768 if (info.srcNr >= 2 && i->srcExists(2)) { 1769 if (i->saturate || i->src(2).mod) 1770 return 8; 1771 if ((i->src(0).mod ^ i->src(1).mod) || 1772 (i->src(0).mod | i->src(1).mod).abs()) 1773 return 8; 1774 if (!i->defExists(0) || 1775 i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) 1776 return 8; 1777 } 1778 1779 return info.minEncSize; 1780 } 1781 1782 // Change the encoding size of an instruction after BBs have been scheduled. 1783 static void 1784 makeInstructionLong(Instruction *insn) 1785 { 1786 if (insn->encSize == 8) 1787 return; 1788 Function *fn = insn->bb->getFunction(); 1789 int n = 0; 1790 int adj = 4; 1791 1792 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next); 1793 1794 if (n & 1) { 1795 adj = 8; 1796 insn->next->encSize = 8; 1797 } else 1798 if (insn->prev && insn->prev->encSize == 4) { 1799 adj = 8; 1800 insn->prev->encSize = 8; 1801 } 1802 insn->encSize = 8; 1803 1804 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { 1805 fn->bbArray[i]->binPos += 4; 1806 } 1807 fn->binSize += adj; 1808 insn->bb->binSize += adj; 1809 } 1810 1811 static bool 1812 trySetExitModifier(Instruction *insn) 1813 { 1814 if (insn->op == OP_DISCARD || 1815 insn->op == OP_QUADON || 1816 insn->op == OP_QUADPOP) 1817 return false; 1818 for (int s = 0; insn->srcExists(s); ++s) 1819 if (insn->src(s).getFile() == FILE_IMMEDIATE) 1820 return false; 1821 if (insn->asFlow()) { 1822 if (insn->op == OP_CALL) // side effects ! 1823 return false; 1824 if (insn->getPredicate()) // cannot do conditional exit (or can we ?) 1825 return false; 1826 insn->op = OP_EXIT; 1827 } 1828 insn->exit = 1; 1829 makeInstructionLong(insn); 1830 return true; 1831 } 1832 1833 static void 1834 replaceExitWithModifier(Function *func) 1835 { 1836 BasicBlock *epilogue = BasicBlock::get(func->cfgExit); 1837 1838 if (!epilogue->getExit() || 1839 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT 1840 return; 1841 1842 if (epilogue->getEntry()->op != OP_EXIT) { 1843 Instruction *insn = epilogue->getExit()->prev; 1844 if (!insn || !trySetExitModifier(insn)) 1845 return; 1846 insn->exit = 1; 1847 } else { 1848 for (Graph::EdgeIterator ei = func->cfgExit->incident(); 1849 !ei.end(); ei.next()) { 1850 BasicBlock *bb = BasicBlock::get(ei.getNode()); 1851 Instruction *i = bb->getExit(); 1852 1853 if (!i || !trySetExitModifier(i)) 1854 return; 1855 } 1856 } 1857 epilogue->binSize -= 8; 1858 func->binSize -= 8; 1859 delete_Instruction(func->getProgram(), epilogue->getExit()); 1860 } 1861 1862 void 1863 CodeEmitterNV50::prepareEmission(Function *func) 1864 { 1865 CodeEmitter::prepareEmission(func); 1866 1867 replaceExitWithModifier(func); 1868 } 1869 1870 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target) 1871 { 1872 targ = target; // specialized 1873 code = NULL; 1874 codeSize = codeSizeLimit = 0; 1875 relocInfo = NULL; 1876 } 1877 1878 CodeEmitter * 1879 TargetNV50::getCodeEmitter(Program::Type type) 1880 { 1881 CodeEmitterNV50 *emit = new CodeEmitterNV50(this); 1882 emit->setProgramType(type); 1883 return emit; 1884 } 1885 1886 } // namespace nv50_ir 1887