1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "codegen/nv50_ir.h" 24 #include "codegen/nv50_ir_target.h" 25 #include "codegen/nv50_ir_driver.h" 26 27 extern "C" { 28 #include "nouveau_debug.h" 29 #include "nv50/nv50_program.h" 30 } 31 32 namespace nv50_ir { 33 34 Modifier::Modifier(operation op) 35 { 36 switch (op) { 37 case OP_NEG: bits = NV50_IR_MOD_NEG; break; 38 case OP_ABS: bits = NV50_IR_MOD_ABS; break; 39 case OP_SAT: bits = NV50_IR_MOD_SAT; break; 40 case OP_NOT: bits = NV50_IR_MOD_NOT; break; 41 default: 42 bits = 0; 43 break; 44 } 45 } 46 47 Modifier Modifier::operator*(const Modifier m) const 48 { 49 unsigned int a, b, c; 50 51 b = m.bits; 52 if (this->bits & NV50_IR_MOD_ABS) 53 b &= ~NV50_IR_MOD_NEG; 54 55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); 56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); 57 58 return Modifier(a | c); 59 } 60 61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) 62 { 63 indirect[0] = -1; 64 indirect[1] = -1; 65 usedAsPtr = false; 66 set(v); 67 } 68 69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) 70 { 71 set(ref); 72 usedAsPtr = ref.usedAsPtr; 73 } 74 75 ValueRef::~ValueRef() 76 { 77 this->set(NULL); 78 } 79 80 bool ValueRef::getImmediate(ImmediateValue &imm) const 81 { 82 const ValueRef *src = this; 83 Modifier m; 84 DataType type = src->insn->sType; 85 86 while (src) { 87 if (src->mod) { 88 if (src->insn->sType != type) 89 break; 90 m *= src->mod; 91 } 92 if (src->getFile() == FILE_IMMEDIATE) { 93 imm = *(src->value->asImm()); 94 // The immediate's type isn't required to match its use, it's 95 // more of a hint; applying a modifier makes use of that hint. 96 imm.reg.type = type; 97 m.applyTo(imm); 98 return true; 99 } 100 101 Instruction *insn = src->value->getUniqueInsn(); 102 103 if (insn && insn->op == OP_MOV) { 104 src = &insn->src(0); 105 if (src->mod) 106 WARN("OP_MOV with modifier encountered !\n"); 107 } else { 108 src = NULL; 109 } 110 } 111 return false; 112 } 113 114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL) 115 { 116 set(v); 117 } 118 119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) 120 { 121 set(def.get()); 122 } 123 124 ValueDef::~ValueDef() 125 { 126 this->set(NULL); 127 } 128 129 void 130 ValueRef::set(const ValueRef &ref) 131 { 132 this->set(ref.get()); 133 mod = ref.mod; 134 indirect[0] = ref.indirect[0]; 135 indirect[1] = ref.indirect[1]; 136 } 137 138 void 139 ValueRef::set(Value *refVal) 140 { 141 if (value == refVal) 142 return; 143 if (value) 144 value->uses.erase(this); 145 if (refVal) 146 refVal->uses.insert(this); 147 148 value = refVal; 149 } 150 151 void 152 ValueDef::set(Value *defVal) 153 { 154 if (value == defVal) 155 return; 156 if (value) 157 value->defs.remove(this); 158 if (defVal) 159 defVal->defs.push_back(this); 160 161 value = defVal; 162 } 163 164 // Check if we can replace this definition's value by the value in @rep, 165 // including the source modifiers, i.e. make sure that all uses support 166 // @rep.mod. 167 bool 168 ValueDef::mayReplace(const ValueRef &rep) 169 { 170 if (!rep.mod) 171 return true; 172 173 if (!insn || !insn->bb) // Unbound instruction ? 174 return false; 175 176 const Target *target = insn->bb->getProgram()->getTarget(); 177 178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); 179 ++it) { 180 Instruction *insn = (*it)->getInsn(); 181 int s = -1; 182 183 for (int i = 0; insn->srcExists(i); ++i) { 184 if (insn->src(i).get() == value) { 185 // If there are multiple references to us we'd have to check if the 186 // combination of mods is still supported, but just bail for now. 187 if (&insn->src(i) != (*it)) 188 return false; 189 s = i; 190 } 191 } 192 assert(s >= 0); // integrity of uses list 193 194 if (!target->isModSupported(insn, s, rep.mod)) 195 return false; 196 } 197 return true; 198 } 199 200 void 201 ValueDef::replace(const ValueRef &repVal, bool doSet) 202 { 203 assert(mayReplace(repVal)); 204 205 if (value == repVal.get()) 206 return; 207 208 while (!value->uses.empty()) { 209 ValueRef *ref = *value->uses.begin(); 210 ref->set(repVal.get()); 211 ref->mod *= repVal.mod; 212 } 213 214 if (doSet) 215 set(repVal.get()); 216 } 217 218 Value::Value() 219 { 220 join = this; 221 memset(®, 0, sizeof(reg)); 222 reg.size = 4; 223 } 224 225 LValue::LValue(Function *fn, DataFile file) 226 { 227 reg.file = file; 228 reg.size = (file != FILE_PREDICATE) ? 4 : 1; 229 reg.data.id = -1; 230 231 compMask = 0; 232 compound = 0; 233 ssa = 0; 234 fixedReg = 0; 235 noSpill = 0; 236 237 fn->add(this, this->id); 238 } 239 240 LValue::LValue(Function *fn, LValue *lval) 241 { 242 assert(lval); 243 244 reg.file = lval->reg.file; 245 reg.size = lval->reg.size; 246 reg.data.id = -1; 247 248 compMask = 0; 249 compound = 0; 250 ssa = 0; 251 fixedReg = 0; 252 noSpill = 0; 253 254 fn->add(this, this->id); 255 } 256 257 LValue * 258 LValue::clone(ClonePolicy<Function>& pol) const 259 { 260 LValue *that = new_LValue(pol.context(), reg.file); 261 262 pol.set<Value>(this, that); 263 264 that->reg.size = this->reg.size; 265 that->reg.type = this->reg.type; 266 that->reg.data = this->reg.data; 267 268 return that; 269 } 270 271 bool 272 LValue::isUniform() const 273 { 274 if (defs.size() > 1) 275 return false; 276 Instruction *insn = getInsn(); 277 // let's not try too hard here for now ... 278 return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); 279 } 280 281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) 282 { 283 baseSym = NULL; 284 285 reg.file = f; 286 reg.fileIndex = fidx; 287 reg.data.offset = 0; 288 289 prog->add(this, this->id); 290 } 291 292 Symbol * 293 Symbol::clone(ClonePolicy<Function>& pol) const 294 { 295 Program *prog = pol.context()->getProgram(); 296 297 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); 298 299 pol.set<Value>(this, that); 300 301 that->reg.size = this->reg.size; 302 that->reg.type = this->reg.type; 303 that->reg.data = this->reg.data; 304 305 that->baseSym = this->baseSym; 306 307 return that; 308 } 309 310 bool 311 Symbol::isUniform() const 312 { 313 return 314 reg.file != FILE_SYSTEM_VALUE && 315 reg.file != FILE_MEMORY_LOCAL && 316 reg.file != FILE_SHADER_INPUT; 317 } 318 319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) 320 { 321 memset(®, 0, sizeof(reg)); 322 323 reg.file = FILE_IMMEDIATE; 324 reg.size = 4; 325 reg.type = TYPE_U32; 326 327 reg.data.u32 = uval; 328 329 prog->add(this, this->id); 330 } 331 332 ImmediateValue::ImmediateValue(Program *prog, float fval) 333 { 334 memset(®, 0, sizeof(reg)); 335 336 reg.file = FILE_IMMEDIATE; 337 reg.size = 4; 338 reg.type = TYPE_F32; 339 340 reg.data.f32 = fval; 341 342 prog->add(this, this->id); 343 } 344 345 ImmediateValue::ImmediateValue(Program *prog, double dval) 346 { 347 memset(®, 0, sizeof(reg)); 348 349 reg.file = FILE_IMMEDIATE; 350 reg.size = 8; 351 reg.type = TYPE_F64; 352 353 reg.data.f64 = dval; 354 355 prog->add(this, this->id); 356 } 357 358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) 359 { 360 reg = proto->reg; 361 362 reg.type = ty; 363 reg.size = typeSizeof(ty); 364 } 365 366 ImmediateValue * 367 ImmediateValue::clone(ClonePolicy<Function>& pol) const 368 { 369 Program *prog = pol.context()->getProgram(); 370 ImmediateValue *that = new_ImmediateValue(prog, 0u); 371 372 pol.set<Value>(this, that); 373 374 that->reg.size = this->reg.size; 375 that->reg.type = this->reg.type; 376 that->reg.data = this->reg.data; 377 378 return that; 379 } 380 381 bool 382 ImmediateValue::isInteger(const int i) const 383 { 384 switch (reg.type) { 385 case TYPE_S8: 386 return reg.data.s8 == i; 387 case TYPE_U8: 388 return reg.data.u8 == i; 389 case TYPE_S16: 390 return reg.data.s16 == i; 391 case TYPE_U16: 392 return reg.data.u16 == i; 393 case TYPE_S32: 394 case TYPE_U32: 395 return reg.data.s32 == i; // as if ... 396 case TYPE_S64: 397 case TYPE_U64: 398 return reg.data.s64 == i; // as if ... 399 case TYPE_F32: 400 return reg.data.f32 == static_cast<float>(i); 401 case TYPE_F64: 402 return reg.data.f64 == static_cast<double>(i); 403 default: 404 return false; 405 } 406 } 407 408 bool 409 ImmediateValue::isNegative() const 410 { 411 switch (reg.type) { 412 case TYPE_S8: return reg.data.s8 < 0; 413 case TYPE_S16: return reg.data.s16 < 0; 414 case TYPE_S32: 415 case TYPE_U32: return reg.data.s32 < 0; 416 case TYPE_F32: return reg.data.u32 & (1 << 31); 417 case TYPE_F64: return reg.data.u64 & (1ULL << 63); 418 default: 419 return false; 420 } 421 } 422 423 bool 424 ImmediateValue::isPow2() const 425 { 426 switch (reg.type) { 427 case TYPE_U8: 428 case TYPE_U16: 429 case TYPE_U32: return util_is_power_of_two(reg.data.u32); 430 default: 431 return false; 432 } 433 } 434 435 void 436 ImmediateValue::applyLog2() 437 { 438 switch (reg.type) { 439 case TYPE_S8: 440 case TYPE_S16: 441 case TYPE_S32: 442 assert(!this->isNegative()); 443 // fall through 444 case TYPE_U8: 445 case TYPE_U16: 446 case TYPE_U32: 447 reg.data.u32 = util_logbase2(reg.data.u32); 448 break; 449 case TYPE_F32: 450 reg.data.f32 = log2f(reg.data.f32); 451 break; 452 case TYPE_F64: 453 reg.data.f64 = log2(reg.data.f64); 454 break; 455 default: 456 assert(0); 457 break; 458 } 459 } 460 461 bool 462 ImmediateValue::compare(CondCode cc, float fval) const 463 { 464 if (reg.type != TYPE_F32) 465 ERROR("immediate value is not of type f32"); 466 467 switch (static_cast<CondCode>(cc & 7)) { 468 case CC_TR: return true; 469 case CC_FL: return false; 470 case CC_LT: return reg.data.f32 < fval; 471 case CC_LE: return reg.data.f32 <= fval; 472 case CC_GT: return reg.data.f32 > fval; 473 case CC_GE: return reg.data.f32 >= fval; 474 case CC_EQ: return reg.data.f32 == fval; 475 case CC_NE: return reg.data.f32 != fval; 476 default: 477 assert(0); 478 return false; 479 } 480 } 481 482 ImmediateValue& 483 ImmediateValue::operator=(const ImmediateValue &that) 484 { 485 this->reg = that.reg; 486 return (*this); 487 } 488 489 bool 490 Value::interfers(const Value *that) const 491 { 492 uint32_t idA, idB; 493 494 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 495 return false; 496 if (this->asImm()) 497 return false; 498 499 if (this->asSym()) { 500 idA = this->join->reg.data.offset; 501 idB = that->join->reg.data.offset; 502 } else { 503 idA = this->join->reg.data.id * MIN2(this->reg.size, 4); 504 idB = that->join->reg.data.id * MIN2(that->reg.size, 4); 505 } 506 507 if (idA < idB) 508 return (idA + this->reg.size > idB); 509 else 510 if (idA > idB) 511 return (idB + that->reg.size > idA); 512 else 513 return (idA == idB); 514 } 515 516 bool 517 Value::equals(const Value *that, bool strict) const 518 { 519 if (strict) 520 return this == that; 521 522 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 523 return false; 524 if (that->reg.size != this->reg.size) 525 return false; 526 527 if (that->reg.data.id != this->reg.data.id) 528 return false; 529 530 return true; 531 } 532 533 bool 534 ImmediateValue::equals(const Value *that, bool strict) const 535 { 536 const ImmediateValue *imm = that->asImm(); 537 if (!imm) 538 return false; 539 return reg.data.u64 == imm->reg.data.u64; 540 } 541 542 bool 543 Symbol::equals(const Value *that, bool strict) const 544 { 545 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) 546 return false; 547 assert(that->asSym()); 548 549 if (this->baseSym != that->asSym()->baseSym) 550 return false; 551 552 if (reg.file == FILE_SYSTEM_VALUE) 553 return (this->reg.data.sv.sv == that->reg.data.sv.sv && 554 this->reg.data.sv.index == that->reg.data.sv.index); 555 return this->reg.data.offset == that->reg.data.offset; 556 } 557 558 void Instruction::init() 559 { 560 next = prev = 0; 561 562 cc = CC_ALWAYS; 563 rnd = ROUND_N; 564 cache = CACHE_CA; 565 subOp = 0; 566 567 saturate = 0; 568 join = 0; 569 exit = 0; 570 terminator = 0; 571 ftz = 0; 572 dnz = 0; 573 perPatch = 0; 574 fixed = 0; 575 encSize = 0; 576 ipa = 0; 577 mask = 0; 578 579 lanes = 0xf; 580 581 postFactor = 0; 582 583 predSrc = -1; 584 flagsDef = -1; 585 flagsSrc = -1; 586 } 587 588 Instruction::Instruction() 589 { 590 init(); 591 592 op = OP_NOP; 593 dType = sType = TYPE_F32; 594 595 id = -1; 596 bb = 0; 597 } 598 599 Instruction::Instruction(Function *fn, operation opr, DataType ty) 600 { 601 init(); 602 603 op = opr; 604 dType = sType = ty; 605 606 fn->add(this, id); 607 } 608 609 Instruction::~Instruction() 610 { 611 if (bb) { 612 Function *fn = bb->getFunction(); 613 bb->remove(this); 614 fn->allInsns.remove(id); 615 } 616 617 for (int s = 0; srcExists(s); ++s) 618 setSrc(s, NULL); 619 // must unlink defs too since the list pointers will get deallocated 620 for (int d = 0; defExists(d); ++d) 621 setDef(d, NULL); 622 } 623 624 void 625 Instruction::setDef(int i, Value *val) 626 { 627 int size = defs.size(); 628 if (i >= size) { 629 defs.resize(i + 1); 630 while (size <= i) 631 defs[size++].setInsn(this); 632 } 633 defs[i].set(val); 634 } 635 636 void 637 Instruction::setSrc(int s, Value *val) 638 { 639 int size = srcs.size(); 640 if (s >= size) { 641 srcs.resize(s + 1); 642 while (size <= s) 643 srcs[size++].setInsn(this); 644 } 645 srcs[s].set(val); 646 } 647 648 void 649 Instruction::setSrc(int s, const ValueRef& ref) 650 { 651 setSrc(s, ref.get()); 652 srcs[s].mod = ref.mod; 653 } 654 655 void 656 Instruction::swapSources(int a, int b) 657 { 658 Value *value = srcs[a].get(); 659 Modifier m = srcs[a].mod; 660 661 setSrc(a, srcs[b]); 662 663 srcs[b].set(value); 664 srcs[b].mod = m; 665 } 666 667 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) 668 { 669 if (index >= s) 670 index += delta; 671 else 672 if ((delta < 0) && (index >= (s + delta))) 673 index = -1; 674 } 675 676 // Moves sources [@s,last_source] by @delta. 677 // If @delta < 0, sources [@s - abs(@delta), @s) are erased. 678 void 679 Instruction::moveSources(const int s, const int delta) 680 { 681 if (delta == 0) 682 return; 683 assert(s + delta >= 0); 684 685 int k; 686 687 for (k = 0; srcExists(k); ++k) { 688 for (int i = 0; i < 2; ++i) 689 moveSourcesAdjustIndex(src(k).indirect[i], s, delta); 690 } 691 moveSourcesAdjustIndex(predSrc, s, delta); 692 moveSourcesAdjustIndex(flagsSrc, s, delta); 693 if (asTex()) { 694 TexInstruction *tex = asTex(); 695 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); 696 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); 697 } 698 699 if (delta > 0) { 700 --k; 701 for (int p = k + delta; k >= s; --k, --p) 702 setSrc(p, src(k)); 703 } else { 704 int p; 705 for (p = s; p < k; ++p) 706 setSrc(p + delta, src(p)); 707 for (; (p + delta) < k; ++p) 708 setSrc(p + delta, NULL); 709 } 710 } 711 712 void 713 Instruction::takeExtraSources(int s, Value *values[3]) 714 { 715 values[0] = getIndirect(s, 0); 716 if (values[0]) 717 setIndirect(s, 0, NULL); 718 719 values[1] = getIndirect(s, 1); 720 if (values[1]) 721 setIndirect(s, 1, NULL); 722 723 values[2] = getPredicate(); 724 if (values[2]) 725 setPredicate(cc, NULL); 726 } 727 728 void 729 Instruction::putExtraSources(int s, Value *values[3]) 730 { 731 if (values[0]) 732 setIndirect(s, 0, values[0]); 733 if (values[1]) 734 setIndirect(s, 1, values[1]); 735 if (values[2]) 736 setPredicate(cc, values[2]); 737 } 738 739 Instruction * 740 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 741 { 742 if (!i) 743 i = new_Instruction(pol.context(), op, dType); 744 #ifndef NDEBUG // non-conformant assert, so this is required 745 assert(typeid(*i) == typeid(*this)); 746 #endif 747 748 pol.set<Instruction>(this, i); 749 750 i->sType = sType; 751 752 i->rnd = rnd; 753 i->cache = cache; 754 i->subOp = subOp; 755 756 i->saturate = saturate; 757 i->join = join; 758 i->exit = exit; 759 i->mask = mask; 760 i->ftz = ftz; 761 i->dnz = dnz; 762 i->ipa = ipa; 763 i->lanes = lanes; 764 i->perPatch = perPatch; 765 766 i->postFactor = postFactor; 767 768 for (int d = 0; defExists(d); ++d) 769 i->setDef(d, pol.get(getDef(d))); 770 771 for (int s = 0; srcExists(s); ++s) { 772 i->setSrc(s, pol.get(getSrc(s))); 773 i->src(s).mod = src(s).mod; 774 } 775 776 i->cc = cc; 777 i->predSrc = predSrc; 778 i->flagsDef = flagsDef; 779 i->flagsSrc = flagsSrc; 780 781 return i; 782 } 783 784 unsigned int 785 Instruction::defCount(unsigned int mask, bool singleFile) const 786 { 787 unsigned int i, n; 788 789 if (singleFile) { 790 unsigned int d = ffs(mask); 791 if (!d) 792 return 0; 793 for (i = d--; defExists(i); ++i) 794 if (getDef(i)->reg.file != getDef(d)->reg.file) 795 mask &= ~(1 << i); 796 } 797 798 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) 799 n += mask & 1; 800 return n; 801 } 802 803 unsigned int 804 Instruction::srcCount(unsigned int mask, bool singleFile) const 805 { 806 unsigned int i, n; 807 808 if (singleFile) { 809 unsigned int s = ffs(mask); 810 if (!s) 811 return 0; 812 for (i = s--; srcExists(i); ++i) 813 if (getSrc(i)->reg.file != getSrc(s)->reg.file) 814 mask &= ~(1 << i); 815 } 816 817 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) 818 n += mask & 1; 819 return n; 820 } 821 822 bool 823 Instruction::setIndirect(int s, int dim, Value *value) 824 { 825 assert(this->srcExists(s)); 826 827 int p = srcs[s].indirect[dim]; 828 if (p < 0) { 829 if (!value) 830 return true; 831 p = srcs.size(); 832 while (p > 0 && !srcExists(p - 1)) 833 --p; 834 } 835 setSrc(p, value); 836 srcs[p].usedAsPtr = (value != 0); 837 srcs[s].indirect[dim] = value ? p : -1; 838 return true; 839 } 840 841 bool 842 Instruction::setPredicate(CondCode ccode, Value *value) 843 { 844 cc = ccode; 845 846 if (!value) { 847 if (predSrc >= 0) { 848 srcs[predSrc].set(NULL); 849 predSrc = -1; 850 } 851 return true; 852 } 853 854 if (predSrc < 0) { 855 predSrc = srcs.size(); 856 while (predSrc > 0 && !srcExists(predSrc - 1)) 857 --predSrc; 858 } 859 860 setSrc(predSrc, value); 861 return true; 862 } 863 864 bool 865 Instruction::writesPredicate() const 866 { 867 for (int d = 0; defExists(d); ++d) 868 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) 869 return true; 870 return false; 871 } 872 873 bool 874 Instruction::canCommuteDefSrc(const Instruction *i) const 875 { 876 for (int d = 0; defExists(d); ++d) 877 for (int s = 0; i->srcExists(s); ++s) 878 if (getDef(d)->interfers(i->getSrc(s))) 879 return false; 880 return true; 881 } 882 883 bool 884 Instruction::canCommuteDefDef(const Instruction *i) const 885 { 886 for (int d = 0; defExists(d); ++d) 887 for (int c = 0; i->defExists(c); ++c) 888 if (getDef(d)->interfers(i->getDef(c))) 889 return false; 890 return true; 891 } 892 893 bool 894 Instruction::isCommutationLegal(const Instruction *i) const 895 { 896 return canCommuteDefDef(i) && 897 canCommuteDefSrc(i) && 898 i->canCommuteDefSrc(this); 899 } 900 901 TexInstruction::TexInstruction(Function *fn, operation op) 902 : Instruction(fn, op, TYPE_F32) 903 { 904 memset(&tex, 0, sizeof(tex)); 905 906 tex.rIndirectSrc = -1; 907 tex.sIndirectSrc = -1; 908 } 909 910 TexInstruction::~TexInstruction() 911 { 912 for (int c = 0; c < 3; ++c) { 913 dPdx[c].set(NULL); 914 dPdy[c].set(NULL); 915 } 916 for (int n = 0; n < 4; ++n) 917 for (int c = 0; c < 3; ++c) 918 offset[n][c].set(NULL); 919 } 920 921 TexInstruction * 922 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 923 { 924 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : 925 new_TexInstruction(pol.context(), op)); 926 927 Instruction::clone(pol, tex); 928 929 tex->tex = this->tex; 930 931 if (op == OP_TXD) { 932 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { 933 tex->dPdx[c].set(dPdx[c]); 934 tex->dPdy[c].set(dPdy[c]); 935 } 936 } 937 938 for (int n = 0; n < tex->tex.useOffsets; ++n) 939 for (int c = 0; c < 3; ++c) 940 tex->offset[n][c].set(offset[n][c]); 941 942 return tex; 943 } 944 945 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = 946 { 947 { "1D", 1, 1, false, false, false }, 948 { "2D", 2, 2, false, false, false }, 949 { "2D_MS", 2, 3, false, false, false }, 950 { "3D", 3, 3, false, false, false }, 951 { "CUBE", 2, 3, false, true, false }, 952 { "1D_SHADOW", 1, 1, false, false, true }, 953 { "2D_SHADOW", 2, 2, false, false, true }, 954 { "CUBE_SHADOW", 2, 3, false, true, true }, 955 { "1D_ARRAY", 1, 2, true, false, false }, 956 { "2D_ARRAY", 2, 3, true, false, false }, 957 { "2D_MS_ARRAY", 2, 4, true, false, false }, 958 { "CUBE_ARRAY", 2, 4, true, true, false }, 959 { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, 960 { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, 961 { "RECT", 2, 2, false, false, false }, 962 { "RECT_SHADOW", 2, 2, false, false, true }, 963 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, 964 { "BUFFER", 1, 1, false, false, false }, 965 }; 966 967 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = 968 { 969 { "NONE", 0, { 0, 0, 0, 0 }, UINT }, 970 971 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT }, 972 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT }, 973 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT }, 974 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT }, 975 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT }, 976 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT }, 977 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT }, 978 979 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT }, 980 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT }, 981 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT }, 982 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT }, 983 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT }, 984 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT }, 985 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT }, 986 { "R32UI", 1, { 32, 0, 0, 0 }, UINT }, 987 { "R16UI", 1, { 16, 0, 0, 0 }, UINT }, 988 { "R8UI", 1, { 8, 0, 0, 0 }, UINT }, 989 990 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT }, 991 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT }, 992 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT }, 993 { "RG32I", 2, { 32, 32, 0, 0 }, SINT }, 994 { "RG16I", 2, { 16, 16, 0, 0 }, SINT }, 995 { "RG8I", 2, { 8, 8, 0, 0 }, SINT }, 996 { "R32I", 1, { 32, 0, 0, 0 }, SINT }, 997 { "R16I", 1, { 16, 0, 0, 0 }, SINT }, 998 { "R8I", 1, { 8, 0, 0, 0 }, SINT }, 999 1000 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM }, 1001 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM }, 1002 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM }, 1003 { "RG16", 2, { 16, 16, 0, 0 }, UNORM }, 1004 { "RG8", 2, { 8, 8, 0, 0 }, UNORM }, 1005 { "R16", 1, { 16, 0, 0, 0 }, UNORM }, 1006 { "R8", 1, { 8, 0, 0, 0 }, UNORM }, 1007 1008 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM }, 1009 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM }, 1010 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM }, 1011 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM }, 1012 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM }, 1013 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, 1014 1015 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true }, 1016 }; 1017 1018 void 1019 TexInstruction::setIndirectR(Value *v) 1020 { 1021 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; 1022 if (p >= 0) { 1023 tex.rIndirectSrc = p; 1024 setSrc(p, v); 1025 srcs[p].usedAsPtr = !!v; 1026 } 1027 } 1028 1029 void 1030 TexInstruction::setIndirectS(Value *v) 1031 { 1032 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; 1033 if (p >= 0) { 1034 tex.sIndirectSrc = p; 1035 setSrc(p, v); 1036 srcs[p].usedAsPtr = !!v; 1037 } 1038 } 1039 1040 CmpInstruction::CmpInstruction(Function *fn, operation op) 1041 : Instruction(fn, op, TYPE_F32) 1042 { 1043 setCond = CC_ALWAYS; 1044 } 1045 1046 CmpInstruction * 1047 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1048 { 1049 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : 1050 new_CmpInstruction(pol.context(), op)); 1051 cmp->dType = dType; 1052 Instruction::clone(pol, cmp); 1053 cmp->setCond = setCond; 1054 return cmp; 1055 } 1056 1057 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) 1058 : Instruction(fn, op, TYPE_NONE) 1059 { 1060 if (op == OP_CALL) 1061 target.fn = reinterpret_cast<Function *>(targ); 1062 else 1063 target.bb = reinterpret_cast<BasicBlock *>(targ); 1064 1065 if (op == OP_BRA || 1066 op == OP_CONT || op == OP_BREAK || 1067 op == OP_RET || op == OP_EXIT) 1068 terminator = 1; 1069 else 1070 if (op == OP_JOIN) 1071 terminator = targ ? 1 : 0; 1072 1073 allWarp = absolute = limit = builtin = indirect = 0; 1074 } 1075 1076 FlowInstruction * 1077 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1078 { 1079 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : 1080 new_FlowInstruction(pol.context(), op, NULL)); 1081 1082 Instruction::clone(pol, flow); 1083 flow->allWarp = allWarp; 1084 flow->absolute = absolute; 1085 flow->limit = limit; 1086 flow->builtin = builtin; 1087 1088 if (builtin) 1089 flow->target.builtin = target.builtin; 1090 else 1091 if (op == OP_CALL) 1092 flow->target.fn = target.fn; 1093 else 1094 if (target.bb) 1095 flow->target.bb = pol.get<BasicBlock>(target.bb); 1096 1097 return flow; 1098 } 1099 1100 Program::Program(Type type, Target *arch) 1101 : progType(type), 1102 target(arch), 1103 mem_Instruction(sizeof(Instruction), 6), 1104 mem_CmpInstruction(sizeof(CmpInstruction), 4), 1105 mem_TexInstruction(sizeof(TexInstruction), 4), 1106 mem_FlowInstruction(sizeof(FlowInstruction), 4), 1107 mem_LValue(sizeof(LValue), 8), 1108 mem_Symbol(sizeof(Symbol), 7), 1109 mem_ImmediateValue(sizeof(ImmediateValue), 7) 1110 { 1111 code = NULL; 1112 binSize = 0; 1113 1114 maxGPR = -1; 1115 1116 main = new Function(this, "MAIN", ~0); 1117 calls.insert(&main->call); 1118 1119 dbgFlags = 0; 1120 optLevel = 0; 1121 1122 targetPriv = NULL; 1123 } 1124 1125 Program::~Program() 1126 { 1127 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) 1128 delete reinterpret_cast<Function *>(it.get()); 1129 1130 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) 1131 releaseValue(reinterpret_cast<Value *>(it.get())); 1132 } 1133 1134 void Program::releaseInstruction(Instruction *insn) 1135 { 1136 // TODO: make this not suck so much 1137 1138 insn->~Instruction(); 1139 1140 if (insn->asCmp()) 1141 mem_CmpInstruction.release(insn); 1142 else 1143 if (insn->asTex()) 1144 mem_TexInstruction.release(insn); 1145 else 1146 if (insn->asFlow()) 1147 mem_FlowInstruction.release(insn); 1148 else 1149 mem_Instruction.release(insn); 1150 } 1151 1152 void Program::releaseValue(Value *value) 1153 { 1154 value->~Value(); 1155 1156 if (value->asLValue()) 1157 mem_LValue.release(value); 1158 else 1159 if (value->asImm()) 1160 mem_ImmediateValue.release(value); 1161 else 1162 if (value->asSym()) 1163 mem_Symbol.release(value); 1164 } 1165 1166 1167 } // namespace nv50_ir 1168 1169 extern "C" { 1170 1171 static void 1172 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) 1173 { 1174 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { 1175 info->prop.tp.domain = PIPE_PRIM_MAX; 1176 info->prop.tp.outputPrim = PIPE_PRIM_MAX; 1177 } 1178 if (info->type == PIPE_SHADER_GEOMETRY) { 1179 info->prop.gp.instanceCount = 1; 1180 info->prop.gp.maxVertices = 1; 1181 } 1182 info->prop.cp.numThreads = 1; 1183 info->io.pointSize = 0xff; 1184 info->io.instanceId = 0xff; 1185 info->io.vertexId = 0xff; 1186 info->io.edgeFlagIn = 0xff; 1187 info->io.edgeFlagOut = 0xff; 1188 info->io.fragDepth = 0xff; 1189 info->io.sampleMask = 0xff; 1190 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; 1191 } 1192 1193 int 1194 nv50_ir_generate_code(struct nv50_ir_prog_info *info) 1195 { 1196 int ret = 0; 1197 1198 nv50_ir::Program::Type type; 1199 1200 nv50_ir_init_prog_info(info); 1201 1202 #define PROG_TYPE_CASE(a, b) \ 1203 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break 1204 1205 switch (info->type) { 1206 PROG_TYPE_CASE(VERTEX, VERTEX); 1207 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL); 1208 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL); 1209 PROG_TYPE_CASE(GEOMETRY, GEOMETRY); 1210 PROG_TYPE_CASE(FRAGMENT, FRAGMENT); 1211 PROG_TYPE_CASE(COMPUTE, COMPUTE); 1212 default: 1213 type = nv50_ir::Program::TYPE_COMPUTE; 1214 break; 1215 } 1216 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); 1217 1218 nv50_ir::Target *targ = nv50_ir::Target::create(info->target); 1219 if (!targ) 1220 return -1; 1221 1222 nv50_ir::Program *prog = new nv50_ir::Program(type, targ); 1223 if (!prog) 1224 return -1; 1225 prog->driver = info; 1226 prog->dbgFlags = info->dbgFlags; 1227 prog->optLevel = info->optLevel; 1228 1229 switch (info->bin.sourceRep) { 1230 #if 0 1231 case PIPE_IR_LLVM: 1232 case PIPE_IR_GLSL: 1233 return -1; 1234 case PIPE_IR_SM4: 1235 ret = prog->makeFromSM4(info) ? 0 : -2; 1236 break; 1237 case PIPE_IR_TGSI: 1238 #endif 1239 default: 1240 ret = prog->makeFromTGSI(info) ? 0 : -2; 1241 break; 1242 } 1243 if (ret < 0) 1244 goto out; 1245 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1246 prog->print(); 1247 1248 targ->parseDriverInfo(info); 1249 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); 1250 1251 prog->convertToSSA(); 1252 1253 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1254 prog->print(); 1255 1256 prog->optimizeSSA(info->optLevel); 1257 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); 1258 1259 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1260 prog->print(); 1261 1262 if (!prog->registerAllocation()) { 1263 ret = -4; 1264 goto out; 1265 } 1266 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); 1267 1268 prog->optimizePostRA(info->optLevel); 1269 1270 if (!prog->emitBinary(info)) { 1271 ret = -5; 1272 goto out; 1273 } 1274 1275 out: 1276 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); 1277 1278 info->bin.maxGPR = prog->maxGPR; 1279 info->bin.code = prog->code; 1280 info->bin.codeSize = prog->binSize; 1281 info->bin.tlsSpace = prog->tlsSize; 1282 1283 delete prog; 1284 nv50_ir::Target::destroy(targ); 1285 1286 return ret; 1287 } 1288 1289 } // extern "C" 1290