1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 #include "nv50_ir.h" 24 #include "nv50_ir_target.h" 25 #include "nv50_ir_driver.h" 26 27 extern "C" { 28 #include "nv50/nv50_program.h" 29 #include "nv50/nv50_debug.h" 30 } 31 32 namespace nv50_ir { 33 34 Modifier::Modifier(operation op) 35 { 36 switch (op) { 37 case OP_NEG: bits = NV50_IR_MOD_NEG; break; 38 case OP_ABS: bits = NV50_IR_MOD_ABS; break; 39 case OP_SAT: bits = NV50_IR_MOD_SAT; break; 40 case OP_NOT: bits = NV50_IR_MOD_NOT; break; 41 default: 42 bits = 0; 43 break; 44 } 45 } 46 47 Modifier Modifier::operator*(const Modifier m) const 48 { 49 unsigned int a, b, c; 50 51 b = m.bits; 52 if (this->bits & NV50_IR_MOD_ABS) 53 b &= ~NV50_IR_MOD_NEG; 54 55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); 56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); 57 58 return Modifier(a | c); 59 } 60 61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) 62 { 63 indirect[0] = -1; 64 indirect[1] = -1; 65 usedAsPtr = false; 66 set(v); 67 } 68 69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) 70 { 71 set(ref); 72 usedAsPtr = ref.usedAsPtr; 73 } 74 75 ValueRef::~ValueRef() 76 { 77 this->set(NULL); 78 } 79 80 bool ValueRef::getImmediate(ImmediateValue &imm) const 81 { 82 const ValueRef *src = this; 83 Modifier m; 84 DataType type = src->insn->sType; 85 86 while (src) { 87 if (src->mod) { 88 if (src->insn->sType != type) 89 break; 90 m *= src->mod; 91 } 92 if (src->getFile() == FILE_IMMEDIATE) { 93 imm = *(src->value->asImm()); 94 // The immediate's type isn't required to match its use, it's 95 // more of a hint; applying a modifier makes use of that hint. 96 imm.reg.type = type; 97 m.applyTo(imm); 98 return true; 99 } 100 101 Instruction *insn = src->value->getUniqueInsn(); 102 103 if (insn && insn->op == OP_MOV) { 104 src = &insn->src(0); 105 if (src->mod) 106 WARN("OP_MOV with modifier encountered !\n"); 107 } else { 108 src = NULL; 109 } 110 } 111 return false; 112 } 113 114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL) 115 { 116 set(v); 117 } 118 119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) 120 { 121 set(def.get()); 122 } 123 124 ValueDef::~ValueDef() 125 { 126 this->set(NULL); 127 } 128 129 void 130 ValueRef::set(const ValueRef &ref) 131 { 132 this->set(ref.get()); 133 mod = ref.mod; 134 indirect[0] = ref.indirect[0]; 135 indirect[1] = ref.indirect[1]; 136 } 137 138 void 139 ValueRef::set(Value *refVal) 140 { 141 if (value == refVal) 142 return; 143 if (value) 144 value->uses.remove(this); 145 if (refVal) 146 refVal->uses.push_back(this); 147 148 value = refVal; 149 } 150 151 void 152 ValueDef::set(Value *defVal) 153 { 154 if (value == defVal) 155 return; 156 if (value) 157 value->defs.remove(this); 158 if (defVal) 159 defVal->defs.push_back(this); 160 161 value = defVal; 162 } 163 164 // Check if we can replace this definition's value by the value in @rep, 165 // including the source modifiers, i.e. make sure that all uses support 166 // @rep.mod. 167 bool 168 ValueDef::mayReplace(const ValueRef &rep) 169 { 170 if (!rep.mod) 171 return true; 172 173 if (!insn || !insn->bb) // Unbound instruction ? 174 return false; 175 176 const Target *target = insn->bb->getProgram()->getTarget(); 177 178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); 179 ++it) { 180 Instruction *insn = (*it)->getInsn(); 181 int s = -1; 182 183 for (int i = 0; insn->srcExists(i); ++i) { 184 if (insn->src(i).get() == value) { 185 // If there are multiple references to us we'd have to check if the 186 // combination of mods is still supported, but just bail for now. 187 if (&insn->src(i) != (*it)) 188 return false; 189 s = i; 190 } 191 } 192 assert(s >= 0); // integrity of uses list 193 194 if (!target->isModSupported(insn, s, rep.mod)) 195 return false; 196 } 197 return true; 198 } 199 200 void 201 ValueDef::replace(const ValueRef &repVal, bool doSet) 202 { 203 assert(mayReplace(repVal)); 204 205 if (value == repVal.get()) 206 return; 207 208 while (!value->uses.empty()) { 209 ValueRef *ref = value->uses.front(); 210 ref->set(repVal.get()); 211 ref->mod *= repVal.mod; 212 } 213 214 if (doSet) 215 set(repVal.get()); 216 } 217 218 Value::Value() 219 { 220 join = this; 221 memset(®, 0, sizeof(reg)); 222 reg.size = 4; 223 } 224 225 LValue::LValue(Function *fn, DataFile file) 226 { 227 reg.file = file; 228 reg.size = (file != FILE_PREDICATE) ? 4 : 1; 229 reg.data.id = -1; 230 231 compMask = 0; 232 compound = 0; 233 ssa = 0; 234 fixedReg = 0; 235 noSpill = 0; 236 237 fn->add(this, this->id); 238 } 239 240 LValue::LValue(Function *fn, LValue *lval) 241 { 242 assert(lval); 243 244 reg.file = lval->reg.file; 245 reg.size = lval->reg.size; 246 reg.data.id = -1; 247 248 compMask = 0; 249 compound = 0; 250 ssa = 0; 251 fixedReg = 0; 252 noSpill = 0; 253 254 fn->add(this, this->id); 255 } 256 257 LValue * 258 LValue::clone(ClonePolicy<Function>& pol) const 259 { 260 LValue *that = new_LValue(pol.context(), reg.file); 261 262 pol.set<Value>(this, that); 263 264 that->reg.size = this->reg.size; 265 that->reg.type = this->reg.type; 266 that->reg.data = this->reg.data; 267 268 return that; 269 } 270 271 bool 272 LValue::isUniform() const 273 { 274 if (defs.size() > 1) 275 return false; 276 Instruction *insn = getInsn(); 277 // let's not try too hard here for now ... 278 return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); 279 } 280 281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) 282 { 283 baseSym = NULL; 284 285 reg.file = f; 286 reg.fileIndex = fidx; 287 reg.data.offset = 0; 288 289 prog->add(this, this->id); 290 } 291 292 Symbol * 293 Symbol::clone(ClonePolicy<Function>& pol) const 294 { 295 Program *prog = pol.context()->getProgram(); 296 297 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); 298 299 pol.set<Value>(this, that); 300 301 that->reg.size = this->reg.size; 302 that->reg.type = this->reg.type; 303 that->reg.data = this->reg.data; 304 305 that->baseSym = this->baseSym; 306 307 return that; 308 } 309 310 bool 311 Symbol::isUniform() const 312 { 313 return 314 reg.file != FILE_SYSTEM_VALUE && 315 reg.file != FILE_MEMORY_LOCAL && 316 reg.file != FILE_SHADER_INPUT; 317 } 318 319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) 320 { 321 memset(®, 0, sizeof(reg)); 322 323 reg.file = FILE_IMMEDIATE; 324 reg.size = 4; 325 reg.type = TYPE_U32; 326 327 reg.data.u32 = uval; 328 329 prog->add(this, this->id); 330 } 331 332 ImmediateValue::ImmediateValue(Program *prog, float fval) 333 { 334 memset(®, 0, sizeof(reg)); 335 336 reg.file = FILE_IMMEDIATE; 337 reg.size = 4; 338 reg.type = TYPE_F32; 339 340 reg.data.f32 = fval; 341 342 prog->add(this, this->id); 343 } 344 345 ImmediateValue::ImmediateValue(Program *prog, double dval) 346 { 347 memset(®, 0, sizeof(reg)); 348 349 reg.file = FILE_IMMEDIATE; 350 reg.size = 8; 351 reg.type = TYPE_F64; 352 353 reg.data.f64 = dval; 354 355 prog->add(this, this->id); 356 } 357 358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) 359 { 360 reg = proto->reg; 361 362 reg.type = ty; 363 reg.size = typeSizeof(ty); 364 } 365 366 ImmediateValue * 367 ImmediateValue::clone(ClonePolicy<Function>& pol) const 368 { 369 Program *prog = pol.context()->getProgram(); 370 ImmediateValue *that = new_ImmediateValue(prog, 0u); 371 372 pol.set<Value>(this, that); 373 374 that->reg.size = this->reg.size; 375 that->reg.type = this->reg.type; 376 that->reg.data = this->reg.data; 377 378 return that; 379 } 380 381 bool 382 ImmediateValue::isInteger(const int i) const 383 { 384 switch (reg.type) { 385 case TYPE_S8: 386 return reg.data.s8 == i; 387 case TYPE_U8: 388 return reg.data.u8 == i; 389 case TYPE_S16: 390 return reg.data.s16 == i; 391 case TYPE_U16: 392 return reg.data.u16 == i; 393 case TYPE_S32: 394 case TYPE_U32: 395 return reg.data.s32 == i; // as if ... 396 case TYPE_F32: 397 return reg.data.f32 == static_cast<float>(i); 398 case TYPE_F64: 399 return reg.data.f64 == static_cast<double>(i); 400 default: 401 return false; 402 } 403 } 404 405 bool 406 ImmediateValue::isNegative() const 407 { 408 switch (reg.type) { 409 case TYPE_S8: return reg.data.s8 < 0; 410 case TYPE_S16: return reg.data.s16 < 0; 411 case TYPE_S32: 412 case TYPE_U32: return reg.data.s32 < 0; 413 case TYPE_F32: return reg.data.u32 & (1 << 31); 414 case TYPE_F64: return reg.data.u64 & (1ULL << 63); 415 default: 416 return false; 417 } 418 } 419 420 bool 421 ImmediateValue::isPow2() const 422 { 423 switch (reg.type) { 424 case TYPE_U8: 425 case TYPE_U16: 426 case TYPE_U32: return util_is_power_of_two(reg.data.u32); 427 default: 428 return false; 429 } 430 } 431 432 void 433 ImmediateValue::applyLog2() 434 { 435 switch (reg.type) { 436 case TYPE_S8: 437 case TYPE_S16: 438 case TYPE_S32: 439 assert(!this->isNegative()); 440 // fall through 441 case TYPE_U8: 442 case TYPE_U16: 443 case TYPE_U32: 444 reg.data.u32 = util_logbase2(reg.data.u32); 445 break; 446 case TYPE_F32: 447 reg.data.f32 = log2f(reg.data.f32); 448 break; 449 case TYPE_F64: 450 reg.data.f64 = log2(reg.data.f64); 451 break; 452 default: 453 assert(0); 454 break; 455 } 456 } 457 458 bool 459 ImmediateValue::compare(CondCode cc, float fval) const 460 { 461 if (reg.type != TYPE_F32) 462 ERROR("immediate value is not of type f32"); 463 464 switch (static_cast<CondCode>(cc & 7)) { 465 case CC_TR: return true; 466 case CC_FL: return false; 467 case CC_LT: return reg.data.f32 < fval; 468 case CC_LE: return reg.data.f32 <= fval; 469 case CC_GT: return reg.data.f32 > fval; 470 case CC_GE: return reg.data.f32 >= fval; 471 case CC_EQ: return reg.data.f32 == fval; 472 case CC_NE: return reg.data.f32 != fval; 473 default: 474 assert(0); 475 return false; 476 } 477 } 478 479 ImmediateValue& 480 ImmediateValue::operator=(const ImmediateValue &that) 481 { 482 this->reg = that.reg; 483 return (*this); 484 } 485 486 bool 487 Value::interfers(const Value *that) const 488 { 489 uint32_t idA, idB; 490 491 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 492 return false; 493 if (this->asImm()) 494 return false; 495 496 if (this->asSym()) { 497 idA = this->join->reg.data.offset; 498 idB = that->join->reg.data.offset; 499 } else { 500 idA = this->join->reg.data.id * MIN2(this->reg.size, 4); 501 idB = that->join->reg.data.id * MIN2(that->reg.size, 4); 502 } 503 504 if (idA < idB) 505 return (idA + this->reg.size > idB); 506 else 507 if (idA > idB) 508 return (idB + that->reg.size > idA); 509 else 510 return (idA == idB); 511 } 512 513 bool 514 Value::equals(const Value *that, bool strict) const 515 { 516 if (strict) 517 return this == that; 518 519 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 520 return false; 521 if (that->reg.size != this->reg.size) 522 return false; 523 524 if (that->reg.data.id != this->reg.data.id) 525 return false; 526 527 return true; 528 } 529 530 bool 531 ImmediateValue::equals(const Value *that, bool strict) const 532 { 533 const ImmediateValue *imm = that->asImm(); 534 if (!imm) 535 return false; 536 return reg.data.u64 == imm->reg.data.u64; 537 } 538 539 bool 540 Symbol::equals(const Value *that, bool strict) const 541 { 542 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) 543 return false; 544 assert(that->asSym()); 545 546 if (this->baseSym != that->asSym()->baseSym) 547 return false; 548 549 return this->reg.data.offset == that->reg.data.offset; 550 } 551 552 void Instruction::init() 553 { 554 next = prev = 0; 555 556 cc = CC_ALWAYS; 557 rnd = ROUND_N; 558 cache = CACHE_CA; 559 subOp = 0; 560 561 saturate = 0; 562 join = 0; 563 exit = 0; 564 terminator = 0; 565 ftz = 0; 566 dnz = 0; 567 atomic = 0; 568 perPatch = 0; 569 fixed = 0; 570 encSize = 0; 571 ipa = 0; 572 573 lanes = 0xf; 574 575 postFactor = 0; 576 577 predSrc = -1; 578 flagsDef = -1; 579 flagsSrc = -1; 580 } 581 582 Instruction::Instruction() 583 { 584 init(); 585 586 op = OP_NOP; 587 dType = sType = TYPE_F32; 588 589 id = -1; 590 bb = 0; 591 } 592 593 Instruction::Instruction(Function *fn, operation opr, DataType ty) 594 { 595 init(); 596 597 op = opr; 598 dType = sType = ty; 599 600 fn->add(this, id); 601 } 602 603 Instruction::~Instruction() 604 { 605 if (bb) { 606 Function *fn = bb->getFunction(); 607 bb->remove(this); 608 fn->allInsns.remove(id); 609 } 610 611 for (int s = 0; srcExists(s); ++s) 612 setSrc(s, NULL); 613 // must unlink defs too since the list pointers will get deallocated 614 for (int d = 0; defExists(d); ++d) 615 setDef(d, NULL); 616 } 617 618 void 619 Instruction::setDef(int i, Value *val) 620 { 621 int size = defs.size(); 622 if (i >= size) { 623 defs.resize(i + 1); 624 while (size <= i) 625 defs[size++].setInsn(this); 626 } 627 defs[i].set(val); 628 } 629 630 void 631 Instruction::setSrc(int s, Value *val) 632 { 633 int size = srcs.size(); 634 if (s >= size) { 635 srcs.resize(s + 1); 636 while (size <= s) 637 srcs[size++].setInsn(this); 638 } 639 srcs[s].set(val); 640 } 641 642 void 643 Instruction::setSrc(int s, const ValueRef& ref) 644 { 645 setSrc(s, ref.get()); 646 srcs[s].mod = ref.mod; 647 } 648 649 void 650 Instruction::swapSources(int a, int b) 651 { 652 Value *value = srcs[a].get(); 653 Modifier m = srcs[a].mod; 654 655 setSrc(a, srcs[b]); 656 657 srcs[b].set(value); 658 srcs[b].mod = m; 659 } 660 661 // TODO: extend for delta < 0 662 void 663 Instruction::moveSources(int s, int delta) 664 { 665 if (delta == 0) 666 return; 667 assert(delta > 0); 668 669 int k; 670 for (k = 0; srcExists(k); ++k) { 671 for (int i = 0; i < 2; ++i) { 672 if (src(k).indirect[i] >= s) 673 src(k).indirect[i] += delta; 674 } 675 } 676 if (predSrc >= s) 677 predSrc += delta; 678 if (flagsSrc >= s) 679 flagsSrc += delta; 680 681 --k; 682 for (int p = k + delta; k >= s; --k, --p) 683 setSrc(p, src(k)); 684 } 685 686 void 687 Instruction::takeExtraSources(int s, Value *values[3]) 688 { 689 values[0] = getIndirect(s, 0); 690 if (values[0]) 691 setIndirect(s, 0, NULL); 692 693 values[1] = getIndirect(s, 1); 694 if (values[1]) 695 setIndirect(s, 1, NULL); 696 697 values[2] = getPredicate(); 698 if (values[2]) 699 setPredicate(cc, NULL); 700 } 701 702 void 703 Instruction::putExtraSources(int s, Value *values[3]) 704 { 705 if (values[0]) 706 setIndirect(s, 0, values[0]); 707 if (values[1]) 708 setIndirect(s, 1, values[1]); 709 if (values[2]) 710 setPredicate(cc, values[2]); 711 } 712 713 Instruction * 714 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 715 { 716 if (!i) 717 i = new_Instruction(pol.context(), op, dType); 718 #ifndef NDEBUG // non-conformant assert, so this is required 719 assert(typeid(*i) == typeid(*this)); 720 #endif 721 722 pol.set<Instruction>(this, i); 723 724 i->sType = sType; 725 726 i->rnd = rnd; 727 i->cache = cache; 728 i->subOp = subOp; 729 730 i->saturate = saturate; 731 i->join = join; 732 i->exit = exit; 733 i->atomic = atomic; 734 i->ftz = ftz; 735 i->dnz = dnz; 736 i->ipa = ipa; 737 i->lanes = lanes; 738 i->perPatch = perPatch; 739 740 i->postFactor = postFactor; 741 742 for (int d = 0; defExists(d); ++d) 743 i->setDef(d, pol.get(getDef(d))); 744 745 for (int s = 0; srcExists(s); ++s) { 746 i->setSrc(s, pol.get(getSrc(s))); 747 i->src(s).mod = src(s).mod; 748 } 749 750 i->cc = cc; 751 i->predSrc = predSrc; 752 i->flagsDef = flagsDef; 753 i->flagsSrc = flagsSrc; 754 755 return i; 756 } 757 758 unsigned int 759 Instruction::defCount(unsigned int mask, bool singleFile) const 760 { 761 unsigned int i, n; 762 763 if (singleFile) { 764 unsigned int d = ffs(mask); 765 if (!d) 766 return 0; 767 for (i = d--; defExists(i); ++i) 768 if (getDef(i)->reg.file != getDef(d)->reg.file) 769 mask &= ~(1 << i); 770 } 771 772 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) 773 n += mask & 1; 774 return n; 775 } 776 777 unsigned int 778 Instruction::srcCount(unsigned int mask, bool singleFile) const 779 { 780 unsigned int i, n; 781 782 if (singleFile) { 783 unsigned int s = ffs(mask); 784 if (!s) 785 return 0; 786 for (i = s--; srcExists(i); ++i) 787 if (getSrc(i)->reg.file != getSrc(s)->reg.file) 788 mask &= ~(1 << i); 789 } 790 791 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) 792 n += mask & 1; 793 return n; 794 } 795 796 bool 797 Instruction::setIndirect(int s, int dim, Value *value) 798 { 799 assert(this->srcExists(s)); 800 801 int p = srcs[s].indirect[dim]; 802 if (p < 0) { 803 if (!value) 804 return true; 805 p = srcs.size(); 806 while (p > 0 && !srcExists(p - 1)) 807 --p; 808 } 809 setSrc(p, value); 810 srcs[p].usedAsPtr = (value != 0); 811 srcs[s].indirect[dim] = value ? p : -1; 812 return true; 813 } 814 815 bool 816 Instruction::setPredicate(CondCode ccode, Value *value) 817 { 818 cc = ccode; 819 820 if (!value) { 821 if (predSrc >= 0) { 822 srcs[predSrc].set(NULL); 823 predSrc = -1; 824 } 825 return true; 826 } 827 828 if (predSrc < 0) { 829 predSrc = srcs.size(); 830 while (predSrc > 0 && !srcExists(predSrc - 1)) 831 --predSrc; 832 } 833 834 setSrc(predSrc, value); 835 return true; 836 } 837 838 bool 839 Instruction::writesPredicate() const 840 { 841 for (int d = 0; defExists(d); ++d) 842 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) 843 return true; 844 return false; 845 } 846 847 static bool 848 insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b) 849 { 850 for (int d = 0; a->defExists(d); ++d) 851 for (int s = 0; b->srcExists(s); ++s) 852 if (a->getDef(d)->interfers(b->getSrc(s))) 853 return false; 854 return true; 855 } 856 857 static bool 858 insnCheckCommutationDefDef(const Instruction *a, const Instruction *b) 859 { 860 for (int d = 0; a->defExists(d); ++d) 861 for (int c = 0; b->defExists(c); ++c) 862 if (a->getDef(d)->interfers(b->getDef(c))) 863 return false; 864 return true; 865 } 866 867 bool 868 Instruction::isCommutationLegal(const Instruction *i) const 869 { 870 bool ret = insnCheckCommutationDefDef(this, i); 871 ret = ret && insnCheckCommutationDefSrc(this, i); 872 ret = ret && insnCheckCommutationDefSrc(i, this); 873 return ret; 874 } 875 876 TexInstruction::TexInstruction(Function *fn, operation op) 877 : Instruction(fn, op, TYPE_F32) 878 { 879 memset(&tex, 0, sizeof(tex)); 880 881 tex.rIndirectSrc = -1; 882 tex.sIndirectSrc = -1; 883 } 884 885 TexInstruction::~TexInstruction() 886 { 887 for (int c = 0; c < 3; ++c) { 888 dPdx[c].set(NULL); 889 dPdy[c].set(NULL); 890 } 891 } 892 893 TexInstruction * 894 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 895 { 896 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : 897 new_TexInstruction(pol.context(), op)); 898 899 Instruction::clone(pol, tex); 900 901 tex->tex = this->tex; 902 903 if (op == OP_TXD) { 904 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { 905 tex->dPdx[c].set(dPdx[c]); 906 tex->dPdy[c].set(dPdy[c]); 907 } 908 } 909 910 return tex; 911 } 912 913 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = 914 { 915 { "1D", 1, 1, false, false, false }, 916 { "2D", 2, 2, false, false, false }, 917 { "2D_MS", 2, 2, false, false, false }, 918 { "3D", 3, 3, false, false, false }, 919 { "CUBE", 2, 3, false, true, false }, 920 { "1D_SHADOW", 1, 1, false, false, true }, 921 { "2D_SHADOW", 2, 2, false, false, true }, 922 { "CUBE_SHADOW", 2, 3, false, true, true }, 923 { "1D_ARRAY", 1, 2, true, false, false }, 924 { "2D_ARRAY", 2, 3, true, false, false }, 925 { "2D_MS_ARRAY", 2, 3, true, false, false }, 926 { "CUBE_ARRAY", 2, 4, true, true, false }, 927 { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, 928 { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, 929 { "RECT", 2, 2, false, false, false }, 930 { "RECT_SHADOW", 2, 2, false, false, true }, 931 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, 932 { "BUFFER", 1, 1, false, false, false }, 933 }; 934 935 CmpInstruction::CmpInstruction(Function *fn, operation op) 936 : Instruction(fn, op, TYPE_F32) 937 { 938 setCond = CC_ALWAYS; 939 } 940 941 CmpInstruction * 942 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 943 { 944 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : 945 new_CmpInstruction(pol.context(), op)); 946 cmp->dType = dType; 947 Instruction::clone(pol, cmp); 948 cmp->setCond = setCond; 949 return cmp; 950 } 951 952 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) 953 : Instruction(fn, op, TYPE_NONE) 954 { 955 if (op == OP_CALL) 956 target.fn = reinterpret_cast<Function *>(targ); 957 else 958 target.bb = reinterpret_cast<BasicBlock *>(targ); 959 960 if (op == OP_BRA || 961 op == OP_CONT || op == OP_BREAK || 962 op == OP_RET || op == OP_EXIT) 963 terminator = 1; 964 else 965 if (op == OP_JOIN) 966 terminator = targ ? 1 : 0; 967 968 allWarp = absolute = limit = builtin = 0; 969 } 970 971 FlowInstruction * 972 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 973 { 974 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : 975 new_FlowInstruction(pol.context(), op, NULL)); 976 977 Instruction::clone(pol, flow); 978 flow->allWarp = allWarp; 979 flow->absolute = absolute; 980 flow->limit = limit; 981 flow->builtin = builtin; 982 983 if (builtin) 984 flow->target.builtin = target.builtin; 985 else 986 if (op == OP_CALL) 987 flow->target.fn = target.fn; 988 else 989 if (target.bb) 990 flow->target.bb = pol.get<BasicBlock>(target.bb); 991 992 return flow; 993 } 994 995 Program::Program(Type type, Target *arch) 996 : progType(type), 997 target(arch), 998 mem_Instruction(sizeof(Instruction), 6), 999 mem_CmpInstruction(sizeof(CmpInstruction), 4), 1000 mem_TexInstruction(sizeof(TexInstruction), 4), 1001 mem_FlowInstruction(sizeof(FlowInstruction), 4), 1002 mem_LValue(sizeof(LValue), 8), 1003 mem_Symbol(sizeof(Symbol), 7), 1004 mem_ImmediateValue(sizeof(ImmediateValue), 7) 1005 { 1006 code = NULL; 1007 binSize = 0; 1008 1009 maxGPR = -1; 1010 1011 main = new Function(this, "MAIN", ~0); 1012 calls.insert(&main->call); 1013 1014 dbgFlags = 0; 1015 optLevel = 0; 1016 1017 targetPriv = NULL; 1018 } 1019 1020 Program::~Program() 1021 { 1022 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) 1023 delete reinterpret_cast<Function *>(it.get()); 1024 1025 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) 1026 releaseValue(reinterpret_cast<Value *>(it.get())); 1027 } 1028 1029 void Program::releaseInstruction(Instruction *insn) 1030 { 1031 // TODO: make this not suck so much 1032 1033 insn->~Instruction(); 1034 1035 if (insn->asCmp()) 1036 mem_CmpInstruction.release(insn); 1037 else 1038 if (insn->asTex()) 1039 mem_TexInstruction.release(insn); 1040 else 1041 if (insn->asFlow()) 1042 mem_FlowInstruction.release(insn); 1043 else 1044 mem_Instruction.release(insn); 1045 } 1046 1047 void Program::releaseValue(Value *value) 1048 { 1049 value->~Value(); 1050 1051 if (value->asLValue()) 1052 mem_LValue.release(value); 1053 else 1054 if (value->asImm()) 1055 mem_ImmediateValue.release(value); 1056 else 1057 if (value->asSym()) 1058 mem_Symbol.release(value); 1059 } 1060 1061 1062 } // namespace nv50_ir 1063 1064 extern "C" { 1065 1066 static void 1067 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) 1068 { 1069 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN) 1070 if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) { 1071 info->prop.tp.domain = PIPE_PRIM_MAX; 1072 info->prop.tp.outputPrim = PIPE_PRIM_MAX; 1073 } 1074 #endif 1075 if (info->type == PIPE_SHADER_GEOMETRY) { 1076 info->prop.gp.instanceCount = 1; 1077 info->prop.gp.maxVertices = 1; 1078 } 1079 info->io.clipDistance = 0xff; 1080 info->io.pointSize = 0xff; 1081 info->io.instanceId = 0xff; 1082 info->io.vertexId = 0xff; 1083 info->io.edgeFlagIn = 0xff; 1084 info->io.edgeFlagOut = 0xff; 1085 info->io.fragDepth = 0xff; 1086 info->io.sampleMask = 0xff; 1087 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; 1088 } 1089 1090 int 1091 nv50_ir_generate_code(struct nv50_ir_prog_info *info) 1092 { 1093 int ret = 0; 1094 1095 nv50_ir::Program::Type type; 1096 1097 nv50_ir_init_prog_info(info); 1098 1099 #define PROG_TYPE_CASE(a, b) \ 1100 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break 1101 1102 switch (info->type) { 1103 PROG_TYPE_CASE(VERTEX, VERTEX); 1104 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL); 1105 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL); 1106 PROG_TYPE_CASE(GEOMETRY, GEOMETRY); 1107 PROG_TYPE_CASE(FRAGMENT, FRAGMENT); 1108 default: 1109 type = nv50_ir::Program::TYPE_COMPUTE; 1110 break; 1111 } 1112 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); 1113 1114 nv50_ir::Target *targ = nv50_ir::Target::create(info->target); 1115 if (!targ) 1116 return -1; 1117 1118 nv50_ir::Program *prog = new nv50_ir::Program(type, targ); 1119 if (!prog) 1120 return -1; 1121 prog->dbgFlags = info->dbgFlags; 1122 prog->optLevel = info->optLevel; 1123 1124 switch (info->bin.sourceRep) { 1125 #if 0 1126 case PIPE_IR_LLVM: 1127 case PIPE_IR_GLSL: 1128 return -1; 1129 case PIPE_IR_SM4: 1130 ret = prog->makeFromSM4(info) ? 0 : -2; 1131 break; 1132 case PIPE_IR_TGSI: 1133 #endif 1134 default: 1135 ret = prog->makeFromTGSI(info) ? 0 : -2; 1136 break; 1137 } 1138 if (ret < 0) 1139 goto out; 1140 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1141 prog->print(); 1142 1143 targ->parseDriverInfo(info); 1144 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); 1145 1146 prog->convertToSSA(); 1147 1148 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1149 prog->print(); 1150 1151 prog->optimizeSSA(info->optLevel); 1152 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); 1153 1154 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1155 prog->print(); 1156 1157 if (!prog->registerAllocation()) { 1158 ret = -4; 1159 goto out; 1160 } 1161 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); 1162 1163 prog->optimizePostRA(info->optLevel); 1164 1165 if (!prog->emitBinary(info)) { 1166 ret = -5; 1167 goto out; 1168 } 1169 1170 out: 1171 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); 1172 1173 info->bin.maxGPR = prog->maxGPR; 1174 info->bin.code = prog->code; 1175 info->bin.codeSize = prog->binSize; 1176 info->bin.tlsSpace = prog->tlsSize; 1177 1178 delete prog; 1179 nv50_ir::Target::destroy(targ); 1180 1181 return ret; 1182 } 1183 1184 } // extern "C" 1185