Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50_ir.h"
     24 #include "nv50_ir_target.h"
     25 #include "nv50_ir_driver.h"
     26 
     27 extern "C" {
     28 #include "nv50/nv50_program.h"
     29 #include "nv50/nv50_debug.h"
     30 }
     31 
     32 namespace nv50_ir {
     33 
     34 Modifier::Modifier(operation op)
     35 {
     36    switch (op) {
     37    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
     38    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
     39    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
     40    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
     41    default:
     42       bits = 0;
     43       break;
     44    }
     45 }
     46 
     47 Modifier Modifier::operator*(const Modifier m) const
     48 {
     49    unsigned int a, b, c;
     50 
     51    b = m.bits;
     52    if (this->bits & NV50_IR_MOD_ABS)
     53       b &= ~NV50_IR_MOD_NEG;
     54 
     55    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
     56    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
     57 
     58    return Modifier(a | c);
     59 }
     60 
     61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
     62 {
     63    indirect[0] = -1;
     64    indirect[1] = -1;
     65    usedAsPtr = false;
     66    set(v);
     67 }
     68 
     69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
     70 {
     71    set(ref);
     72    usedAsPtr = ref.usedAsPtr;
     73 }
     74 
     75 ValueRef::~ValueRef()
     76 {
     77    this->set(NULL);
     78 }
     79 
     80 bool ValueRef::getImmediate(ImmediateValue &imm) const
     81 {
     82    const ValueRef *src = this;
     83    Modifier m;
     84    DataType type = src->insn->sType;
     85 
     86    while (src) {
     87       if (src->mod) {
     88          if (src->insn->sType != type)
     89             break;
     90          m *= src->mod;
     91       }
     92       if (src->getFile() == FILE_IMMEDIATE) {
     93          imm = *(src->value->asImm());
     94          // The immediate's type isn't required to match its use, it's
     95          // more of a hint; applying a modifier makes use of that hint.
     96          imm.reg.type = type;
     97          m.applyTo(imm);
     98          return true;
     99       }
    100 
    101       Instruction *insn = src->value->getUniqueInsn();
    102 
    103       if (insn && insn->op == OP_MOV) {
    104          src = &insn->src(0);
    105          if (src->mod)
    106             WARN("OP_MOV with modifier encountered !\n");
    107       } else {
    108          src = NULL;
    109       }
    110    }
    111    return false;
    112 }
    113 
    114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
    115 {
    116    set(v);
    117 }
    118 
    119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
    120 {
    121    set(def.get());
    122 }
    123 
    124 ValueDef::~ValueDef()
    125 {
    126    this->set(NULL);
    127 }
    128 
    129 void
    130 ValueRef::set(const ValueRef &ref)
    131 {
    132    this->set(ref.get());
    133    mod = ref.mod;
    134    indirect[0] = ref.indirect[0];
    135    indirect[1] = ref.indirect[1];
    136 }
    137 
    138 void
    139 ValueRef::set(Value *refVal)
    140 {
    141    if (value == refVal)
    142       return;
    143    if (value)
    144       value->uses.remove(this);
    145    if (refVal)
    146       refVal->uses.push_back(this);
    147 
    148    value = refVal;
    149 }
    150 
    151 void
    152 ValueDef::set(Value *defVal)
    153 {
    154    if (value == defVal)
    155       return;
    156    if (value)
    157       value->defs.remove(this);
    158    if (defVal)
    159       defVal->defs.push_back(this);
    160 
    161    value = defVal;
    162 }
    163 
    164 // Check if we can replace this definition's value by the value in @rep,
    165 // including the source modifiers, i.e. make sure that all uses support
    166 // @rep.mod.
    167 bool
    168 ValueDef::mayReplace(const ValueRef &rep)
    169 {
    170    if (!rep.mod)
    171       return true;
    172 
    173    if (!insn || !insn->bb) // Unbound instruction ?
    174       return false;
    175 
    176    const Target *target = insn->bb->getProgram()->getTarget();
    177 
    178    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
    179         ++it) {
    180       Instruction *insn = (*it)->getInsn();
    181       int s = -1;
    182 
    183       for (int i = 0; insn->srcExists(i); ++i) {
    184          if (insn->src(i).get() == value) {
    185             // If there are multiple references to us we'd have to check if the
    186             // combination of mods is still supported, but just bail for now.
    187             if (&insn->src(i) != (*it))
    188                return false;
    189             s = i;
    190          }
    191       }
    192       assert(s >= 0); // integrity of uses list
    193 
    194       if (!target->isModSupported(insn, s, rep.mod))
    195          return false;
    196    }
    197    return true;
    198 }
    199 
    200 void
    201 ValueDef::replace(const ValueRef &repVal, bool doSet)
    202 {
    203    assert(mayReplace(repVal));
    204 
    205    if (value == repVal.get())
    206       return;
    207 
    208    while (!value->uses.empty()) {
    209       ValueRef *ref = value->uses.front();
    210       ref->set(repVal.get());
    211       ref->mod *= repVal.mod;
    212    }
    213 
    214    if (doSet)
    215       set(repVal.get());
    216 }
    217 
    218 Value::Value()
    219 {
    220   join = this;
    221   memset(&reg, 0, sizeof(reg));
    222   reg.size = 4;
    223 }
    224 
    225 LValue::LValue(Function *fn, DataFile file)
    226 {
    227    reg.file = file;
    228    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
    229    reg.data.id = -1;
    230 
    231    compMask = 0;
    232    compound = 0;
    233    ssa = 0;
    234    fixedReg = 0;
    235    noSpill = 0;
    236 
    237    fn->add(this, this->id);
    238 }
    239 
    240 LValue::LValue(Function *fn, LValue *lval)
    241 {
    242    assert(lval);
    243 
    244    reg.file = lval->reg.file;
    245    reg.size = lval->reg.size;
    246    reg.data.id = -1;
    247 
    248    compMask = 0;
    249    compound = 0;
    250    ssa = 0;
    251    fixedReg = 0;
    252    noSpill = 0;
    253 
    254    fn->add(this, this->id);
    255 }
    256 
    257 LValue *
    258 LValue::clone(ClonePolicy<Function>& pol) const
    259 {
    260    LValue *that = new_LValue(pol.context(), reg.file);
    261 
    262    pol.set<Value>(this, that);
    263 
    264    that->reg.size = this->reg.size;
    265    that->reg.type = this->reg.type;
    266    that->reg.data = this->reg.data;
    267 
    268    return that;
    269 }
    270 
    271 bool
    272 LValue::isUniform() const
    273 {
    274    if (defs.size() > 1)
    275       return false;
    276    Instruction *insn = getInsn();
    277    // let's not try too hard here for now ...
    278    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
    279 }
    280 
    281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
    282 {
    283    baseSym = NULL;
    284 
    285    reg.file = f;
    286    reg.fileIndex = fidx;
    287    reg.data.offset = 0;
    288 
    289    prog->add(this, this->id);
    290 }
    291 
    292 Symbol *
    293 Symbol::clone(ClonePolicy<Function>& pol) const
    294 {
    295    Program *prog = pol.context()->getProgram();
    296 
    297    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
    298 
    299    pol.set<Value>(this, that);
    300 
    301    that->reg.size = this->reg.size;
    302    that->reg.type = this->reg.type;
    303    that->reg.data = this->reg.data;
    304 
    305    that->baseSym = this->baseSym;
    306 
    307    return that;
    308 }
    309 
    310 bool
    311 Symbol::isUniform() const
    312 {
    313    return
    314       reg.file != FILE_SYSTEM_VALUE &&
    315       reg.file != FILE_MEMORY_LOCAL &&
    316       reg.file != FILE_SHADER_INPUT;
    317 }
    318 
    319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
    320 {
    321    memset(&reg, 0, sizeof(reg));
    322 
    323    reg.file = FILE_IMMEDIATE;
    324    reg.size = 4;
    325    reg.type = TYPE_U32;
    326 
    327    reg.data.u32 = uval;
    328 
    329    prog->add(this, this->id);
    330 }
    331 
    332 ImmediateValue::ImmediateValue(Program *prog, float fval)
    333 {
    334    memset(&reg, 0, sizeof(reg));
    335 
    336    reg.file = FILE_IMMEDIATE;
    337    reg.size = 4;
    338    reg.type = TYPE_F32;
    339 
    340    reg.data.f32 = fval;
    341 
    342    prog->add(this, this->id);
    343 }
    344 
    345 ImmediateValue::ImmediateValue(Program *prog, double dval)
    346 {
    347    memset(&reg, 0, sizeof(reg));
    348 
    349    reg.file = FILE_IMMEDIATE;
    350    reg.size = 8;
    351    reg.type = TYPE_F64;
    352 
    353    reg.data.f64 = dval;
    354 
    355    prog->add(this, this->id);
    356 }
    357 
    358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
    359 {
    360    reg = proto->reg;
    361 
    362    reg.type = ty;
    363    reg.size = typeSizeof(ty);
    364 }
    365 
    366 ImmediateValue *
    367 ImmediateValue::clone(ClonePolicy<Function>& pol) const
    368 {
    369    Program *prog = pol.context()->getProgram();
    370    ImmediateValue *that = new_ImmediateValue(prog, 0u);
    371 
    372    pol.set<Value>(this, that);
    373 
    374    that->reg.size = this->reg.size;
    375    that->reg.type = this->reg.type;
    376    that->reg.data = this->reg.data;
    377 
    378    return that;
    379 }
    380 
    381 bool
    382 ImmediateValue::isInteger(const int i) const
    383 {
    384    switch (reg.type) {
    385    case TYPE_S8:
    386       return reg.data.s8 == i;
    387    case TYPE_U8:
    388       return reg.data.u8 == i;
    389    case TYPE_S16:
    390       return reg.data.s16 == i;
    391    case TYPE_U16:
    392       return reg.data.u16 == i;
    393    case TYPE_S32:
    394    case TYPE_U32:
    395       return reg.data.s32 == i; // as if ...
    396    case TYPE_F32:
    397       return reg.data.f32 == static_cast<float>(i);
    398    case TYPE_F64:
    399       return reg.data.f64 == static_cast<double>(i);
    400    default:
    401       return false;
    402    }
    403 }
    404 
    405 bool
    406 ImmediateValue::isNegative() const
    407 {
    408    switch (reg.type) {
    409    case TYPE_S8:  return reg.data.s8 < 0;
    410    case TYPE_S16: return reg.data.s16 < 0;
    411    case TYPE_S32:
    412    case TYPE_U32: return reg.data.s32 < 0;
    413    case TYPE_F32: return reg.data.u32 & (1 << 31);
    414    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
    415    default:
    416       return false;
    417    }
    418 }
    419 
    420 bool
    421 ImmediateValue::isPow2() const
    422 {
    423    switch (reg.type) {
    424    case TYPE_U8:
    425    case TYPE_U16:
    426    case TYPE_U32: return util_is_power_of_two(reg.data.u32);
    427    default:
    428       return false;
    429    }
    430 }
    431 
    432 void
    433 ImmediateValue::applyLog2()
    434 {
    435    switch (reg.type) {
    436    case TYPE_S8:
    437    case TYPE_S16:
    438    case TYPE_S32:
    439       assert(!this->isNegative());
    440       // fall through
    441    case TYPE_U8:
    442    case TYPE_U16:
    443    case TYPE_U32:
    444       reg.data.u32 = util_logbase2(reg.data.u32);
    445       break;
    446    case TYPE_F32:
    447       reg.data.f32 = log2f(reg.data.f32);
    448       break;
    449    case TYPE_F64:
    450       reg.data.f64 = log2(reg.data.f64);
    451       break;
    452    default:
    453       assert(0);
    454       break;
    455    }
    456 }
    457 
    458 bool
    459 ImmediateValue::compare(CondCode cc, float fval) const
    460 {
    461    if (reg.type != TYPE_F32)
    462       ERROR("immediate value is not of type f32");
    463 
    464    switch (static_cast<CondCode>(cc & 7)) {
    465    case CC_TR: return true;
    466    case CC_FL: return false;
    467    case CC_LT: return reg.data.f32 <  fval;
    468    case CC_LE: return reg.data.f32 <= fval;
    469    case CC_GT: return reg.data.f32 >  fval;
    470    case CC_GE: return reg.data.f32 >= fval;
    471    case CC_EQ: return reg.data.f32 == fval;
    472    case CC_NE: return reg.data.f32 != fval;
    473    default:
    474       assert(0);
    475       return false;
    476    }
    477 }
    478 
    479 ImmediateValue&
    480 ImmediateValue::operator=(const ImmediateValue &that)
    481 {
    482    this->reg = that.reg;
    483    return (*this);
    484 }
    485 
    486 bool
    487 Value::interfers(const Value *that) const
    488 {
    489    uint32_t idA, idB;
    490 
    491    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
    492       return false;
    493    if (this->asImm())
    494       return false;
    495 
    496    if (this->asSym()) {
    497       idA = this->join->reg.data.offset;
    498       idB = that->join->reg.data.offset;
    499    } else {
    500       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
    501       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
    502    }
    503 
    504    if (idA < idB)
    505       return (idA + this->reg.size > idB);
    506    else
    507    if (idA > idB)
    508       return (idB + that->reg.size > idA);
    509    else
    510       return (idA == idB);
    511 }
    512 
    513 bool
    514 Value::equals(const Value *that, bool strict) const
    515 {
    516    if (strict)
    517       return this == that;
    518 
    519    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
    520       return false;
    521    if (that->reg.size != this->reg.size)
    522       return false;
    523 
    524    if (that->reg.data.id != this->reg.data.id)
    525       return false;
    526 
    527    return true;
    528 }
    529 
    530 bool
    531 ImmediateValue::equals(const Value *that, bool strict) const
    532 {
    533    const ImmediateValue *imm = that->asImm();
    534    if (!imm)
    535       return false;
    536    return reg.data.u64 == imm->reg.data.u64;
    537 }
    538 
    539 bool
    540 Symbol::equals(const Value *that, bool strict) const
    541 {
    542    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
    543       return false;
    544    assert(that->asSym());
    545 
    546    if (this->baseSym != that->asSym()->baseSym)
    547       return false;
    548 
    549    return this->reg.data.offset == that->reg.data.offset;
    550 }
    551 
    552 void Instruction::init()
    553 {
    554    next = prev = 0;
    555 
    556    cc = CC_ALWAYS;
    557    rnd = ROUND_N;
    558    cache = CACHE_CA;
    559    subOp = 0;
    560 
    561    saturate = 0;
    562    join = 0;
    563    exit = 0;
    564    terminator = 0;
    565    ftz = 0;
    566    dnz = 0;
    567    atomic = 0;
    568    perPatch = 0;
    569    fixed = 0;
    570    encSize = 0;
    571    ipa = 0;
    572 
    573    lanes = 0xf;
    574 
    575    postFactor = 0;
    576 
    577    predSrc = -1;
    578    flagsDef = -1;
    579    flagsSrc = -1;
    580 }
    581 
    582 Instruction::Instruction()
    583 {
    584    init();
    585 
    586    op = OP_NOP;
    587    dType = sType = TYPE_F32;
    588 
    589    id = -1;
    590    bb = 0;
    591 }
    592 
    593 Instruction::Instruction(Function *fn, operation opr, DataType ty)
    594 {
    595    init();
    596 
    597    op = opr;
    598    dType = sType = ty;
    599 
    600    fn->add(this, id);
    601 }
    602 
    603 Instruction::~Instruction()
    604 {
    605    if (bb) {
    606       Function *fn = bb->getFunction();
    607       bb->remove(this);
    608       fn->allInsns.remove(id);
    609    }
    610 
    611    for (int s = 0; srcExists(s); ++s)
    612       setSrc(s, NULL);
    613    // must unlink defs too since the list pointers will get deallocated
    614    for (int d = 0; defExists(d); ++d)
    615       setDef(d, NULL);
    616 }
    617 
    618 void
    619 Instruction::setDef(int i, Value *val)
    620 {
    621    int size = defs.size();
    622    if (i >= size) {
    623       defs.resize(i + 1);
    624       while (size <= i)
    625          defs[size++].setInsn(this);
    626    }
    627    defs[i].set(val);
    628 }
    629 
    630 void
    631 Instruction::setSrc(int s, Value *val)
    632 {
    633    int size = srcs.size();
    634    if (s >= size) {
    635       srcs.resize(s + 1);
    636       while (size <= s)
    637          srcs[size++].setInsn(this);
    638    }
    639    srcs[s].set(val);
    640 }
    641 
    642 void
    643 Instruction::setSrc(int s, const ValueRef& ref)
    644 {
    645    setSrc(s, ref.get());
    646    srcs[s].mod = ref.mod;
    647 }
    648 
    649 void
    650 Instruction::swapSources(int a, int b)
    651 {
    652    Value *value = srcs[a].get();
    653    Modifier m = srcs[a].mod;
    654 
    655    setSrc(a, srcs[b]);
    656 
    657    srcs[b].set(value);
    658    srcs[b].mod = m;
    659 }
    660 
    661 // TODO: extend for delta < 0
    662 void
    663 Instruction::moveSources(int s, int delta)
    664 {
    665    if (delta == 0)
    666       return;
    667    assert(delta > 0);
    668 
    669    int k;
    670    for (k = 0; srcExists(k); ++k) {
    671       for (int i = 0; i < 2; ++i) {
    672          if (src(k).indirect[i] >= s)
    673             src(k).indirect[i] += delta;
    674       }
    675    }
    676    if (predSrc >= s)
    677       predSrc += delta;
    678    if (flagsSrc >= s)
    679       flagsSrc += delta;
    680 
    681    --k;
    682    for (int p = k + delta; k >= s; --k, --p)
    683       setSrc(p, src(k));
    684 }
    685 
    686 void
    687 Instruction::takeExtraSources(int s, Value *values[3])
    688 {
    689    values[0] = getIndirect(s, 0);
    690    if (values[0])
    691       setIndirect(s, 0, NULL);
    692 
    693    values[1] = getIndirect(s, 1);
    694    if (values[1])
    695       setIndirect(s, 1, NULL);
    696 
    697    values[2] = getPredicate();
    698    if (values[2])
    699       setPredicate(cc, NULL);
    700 }
    701 
    702 void
    703 Instruction::putExtraSources(int s, Value *values[3])
    704 {
    705    if (values[0])
    706       setIndirect(s, 0, values[0]);
    707    if (values[1])
    708       setIndirect(s, 1, values[1]);
    709    if (values[2])
    710       setPredicate(cc, values[2]);
    711 }
    712 
    713 Instruction *
    714 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    715 {
    716    if (!i)
    717       i = new_Instruction(pol.context(), op, dType);
    718 #ifndef NDEBUG // non-conformant assert, so this is required
    719    assert(typeid(*i) == typeid(*this));
    720 #endif
    721 
    722    pol.set<Instruction>(this, i);
    723 
    724    i->sType = sType;
    725 
    726    i->rnd = rnd;
    727    i->cache = cache;
    728    i->subOp = subOp;
    729 
    730    i->saturate = saturate;
    731    i->join = join;
    732    i->exit = exit;
    733    i->atomic = atomic;
    734    i->ftz = ftz;
    735    i->dnz = dnz;
    736    i->ipa = ipa;
    737    i->lanes = lanes;
    738    i->perPatch = perPatch;
    739 
    740    i->postFactor = postFactor;
    741 
    742    for (int d = 0; defExists(d); ++d)
    743       i->setDef(d, pol.get(getDef(d)));
    744 
    745    for (int s = 0; srcExists(s); ++s) {
    746       i->setSrc(s, pol.get(getSrc(s)));
    747       i->src(s).mod = src(s).mod;
    748    }
    749 
    750    i->cc = cc;
    751    i->predSrc = predSrc;
    752    i->flagsDef = flagsDef;
    753    i->flagsSrc = flagsSrc;
    754 
    755    return i;
    756 }
    757 
    758 unsigned int
    759 Instruction::defCount(unsigned int mask, bool singleFile) const
    760 {
    761    unsigned int i, n;
    762 
    763    if (singleFile) {
    764       unsigned int d = ffs(mask);
    765       if (!d)
    766          return 0;
    767       for (i = d--; defExists(i); ++i)
    768          if (getDef(i)->reg.file != getDef(d)->reg.file)
    769             mask &= ~(1 << i);
    770    }
    771 
    772    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
    773       n += mask & 1;
    774    return n;
    775 }
    776 
    777 unsigned int
    778 Instruction::srcCount(unsigned int mask, bool singleFile) const
    779 {
    780    unsigned int i, n;
    781 
    782    if (singleFile) {
    783       unsigned int s = ffs(mask);
    784       if (!s)
    785          return 0;
    786       for (i = s--; srcExists(i); ++i)
    787          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
    788             mask &= ~(1 << i);
    789    }
    790 
    791    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
    792       n += mask & 1;
    793    return n;
    794 }
    795 
    796 bool
    797 Instruction::setIndirect(int s, int dim, Value *value)
    798 {
    799    assert(this->srcExists(s));
    800 
    801    int p = srcs[s].indirect[dim];
    802    if (p < 0) {
    803       if (!value)
    804          return true;
    805       p = srcs.size();
    806       while (p > 0 && !srcExists(p - 1))
    807          --p;
    808    }
    809    setSrc(p, value);
    810    srcs[p].usedAsPtr = (value != 0);
    811    srcs[s].indirect[dim] = value ? p : -1;
    812    return true;
    813 }
    814 
    815 bool
    816 Instruction::setPredicate(CondCode ccode, Value *value)
    817 {
    818    cc = ccode;
    819 
    820    if (!value) {
    821       if (predSrc >= 0) {
    822          srcs[predSrc].set(NULL);
    823          predSrc = -1;
    824       }
    825       return true;
    826    }
    827 
    828    if (predSrc < 0) {
    829       predSrc = srcs.size();
    830       while (predSrc > 0 && !srcExists(predSrc - 1))
    831          --predSrc;
    832    }
    833 
    834    setSrc(predSrc, value);
    835    return true;
    836 }
    837 
    838 bool
    839 Instruction::writesPredicate() const
    840 {
    841    for (int d = 0; defExists(d); ++d)
    842       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
    843          return true;
    844    return false;
    845 }
    846 
    847 static bool
    848 insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b)
    849 {
    850    for (int d = 0; a->defExists(d); ++d)
    851       for (int s = 0; b->srcExists(s); ++s)
    852          if (a->getDef(d)->interfers(b->getSrc(s)))
    853             return false;
    854    return true;
    855 }
    856 
    857 static bool
    858 insnCheckCommutationDefDef(const Instruction *a, const Instruction *b)
    859 {
    860    for (int d = 0; a->defExists(d); ++d)
    861       for (int c = 0; b->defExists(c); ++c)
    862          if (a->getDef(d)->interfers(b->getDef(c)))
    863             return false;
    864    return true;
    865 }
    866 
    867 bool
    868 Instruction::isCommutationLegal(const Instruction *i) const
    869 {
    870    bool ret = insnCheckCommutationDefDef(this, i);
    871    ret = ret && insnCheckCommutationDefSrc(this, i);
    872    ret = ret && insnCheckCommutationDefSrc(i, this);
    873    return ret;
    874 }
    875 
    876 TexInstruction::TexInstruction(Function *fn, operation op)
    877    : Instruction(fn, op, TYPE_F32)
    878 {
    879    memset(&tex, 0, sizeof(tex));
    880 
    881    tex.rIndirectSrc = -1;
    882    tex.sIndirectSrc = -1;
    883 }
    884 
    885 TexInstruction::~TexInstruction()
    886 {
    887    for (int c = 0; c < 3; ++c) {
    888       dPdx[c].set(NULL);
    889       dPdy[c].set(NULL);
    890    }
    891 }
    892 
    893 TexInstruction *
    894 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    895 {
    896    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
    897                           new_TexInstruction(pol.context(), op));
    898 
    899    Instruction::clone(pol, tex);
    900 
    901    tex->tex = this->tex;
    902 
    903    if (op == OP_TXD) {
    904       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
    905          tex->dPdx[c].set(dPdx[c]);
    906          tex->dPdy[c].set(dPdy[c]);
    907       }
    908    }
    909 
    910    return tex;
    911 }
    912 
    913 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
    914 {
    915    { "1D",                1, 1, false, false, false },
    916    { "2D",                2, 2, false, false, false },
    917    { "2D_MS",             2, 2, false, false, false },
    918    { "3D",                3, 3, false, false, false },
    919    { "CUBE",              2, 3, false, true,  false },
    920    { "1D_SHADOW",         1, 1, false, false, true  },
    921    { "2D_SHADOW",         2, 2, false, false, true  },
    922    { "CUBE_SHADOW",       2, 3, false, true,  true  },
    923    { "1D_ARRAY",          1, 2, true,  false, false },
    924    { "2D_ARRAY",          2, 3, true,  false, false },
    925    { "2D_MS_ARRAY",       2, 3, true,  false, false },
    926    { "CUBE_ARRAY",        2, 4, true,  true,  false },
    927    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
    928    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
    929    { "RECT",              2, 2, false, false, false },
    930    { "RECT_SHADOW",       2, 2, false, false, true  },
    931    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
    932    { "BUFFER",            1, 1, false, false, false },
    933 };
    934 
    935 CmpInstruction::CmpInstruction(Function *fn, operation op)
    936    : Instruction(fn, op, TYPE_F32)
    937 {
    938    setCond = CC_ALWAYS;
    939 }
    940 
    941 CmpInstruction *
    942 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    943 {
    944    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
    945                           new_CmpInstruction(pol.context(), op));
    946    cmp->dType = dType;
    947    Instruction::clone(pol, cmp);
    948    cmp->setCond = setCond;
    949    return cmp;
    950 }
    951 
    952 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
    953    : Instruction(fn, op, TYPE_NONE)
    954 {
    955    if (op == OP_CALL)
    956       target.fn = reinterpret_cast<Function *>(targ);
    957    else
    958       target.bb = reinterpret_cast<BasicBlock *>(targ);
    959 
    960    if (op == OP_BRA ||
    961        op == OP_CONT || op == OP_BREAK ||
    962        op == OP_RET || op == OP_EXIT)
    963       terminator = 1;
    964    else
    965    if (op == OP_JOIN)
    966       terminator = targ ? 1 : 0;
    967 
    968    allWarp = absolute = limit = builtin = 0;
    969 }
    970 
    971 FlowInstruction *
    972 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    973 {
    974    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
    975                             new_FlowInstruction(pol.context(), op, NULL));
    976 
    977    Instruction::clone(pol, flow);
    978    flow->allWarp = allWarp;
    979    flow->absolute = absolute;
    980    flow->limit = limit;
    981    flow->builtin = builtin;
    982 
    983    if (builtin)
    984       flow->target.builtin = target.builtin;
    985    else
    986    if (op == OP_CALL)
    987       flow->target.fn = target.fn;
    988    else
    989    if (target.bb)
    990       flow->target.bb = pol.get<BasicBlock>(target.bb);
    991 
    992    return flow;
    993 }
    994 
    995 Program::Program(Type type, Target *arch)
    996    : progType(type),
    997      target(arch),
    998      mem_Instruction(sizeof(Instruction), 6),
    999      mem_CmpInstruction(sizeof(CmpInstruction), 4),
   1000      mem_TexInstruction(sizeof(TexInstruction), 4),
   1001      mem_FlowInstruction(sizeof(FlowInstruction), 4),
   1002      mem_LValue(sizeof(LValue), 8),
   1003      mem_Symbol(sizeof(Symbol), 7),
   1004      mem_ImmediateValue(sizeof(ImmediateValue), 7)
   1005 {
   1006    code = NULL;
   1007    binSize = 0;
   1008 
   1009    maxGPR = -1;
   1010 
   1011    main = new Function(this, "MAIN", ~0);
   1012    calls.insert(&main->call);
   1013 
   1014    dbgFlags = 0;
   1015    optLevel = 0;
   1016 
   1017    targetPriv = NULL;
   1018 }
   1019 
   1020 Program::~Program()
   1021 {
   1022    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
   1023       delete reinterpret_cast<Function *>(it.get());
   1024 
   1025    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
   1026       releaseValue(reinterpret_cast<Value *>(it.get()));
   1027 }
   1028 
   1029 void Program::releaseInstruction(Instruction *insn)
   1030 {
   1031    // TODO: make this not suck so much
   1032 
   1033    insn->~Instruction();
   1034 
   1035    if (insn->asCmp())
   1036       mem_CmpInstruction.release(insn);
   1037    else
   1038    if (insn->asTex())
   1039       mem_TexInstruction.release(insn);
   1040    else
   1041    if (insn->asFlow())
   1042       mem_FlowInstruction.release(insn);
   1043    else
   1044       mem_Instruction.release(insn);
   1045 }
   1046 
   1047 void Program::releaseValue(Value *value)
   1048 {
   1049    value->~Value();
   1050 
   1051    if (value->asLValue())
   1052       mem_LValue.release(value);
   1053    else
   1054    if (value->asImm())
   1055       mem_ImmediateValue.release(value);
   1056    else
   1057    if (value->asSym())
   1058       mem_Symbol.release(value);
   1059 }
   1060 
   1061 
   1062 } // namespace nv50_ir
   1063 
   1064 extern "C" {
   1065 
   1066 static void
   1067 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   1068 {
   1069 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
   1070    if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) {
   1071       info->prop.tp.domain = PIPE_PRIM_MAX;
   1072       info->prop.tp.outputPrim = PIPE_PRIM_MAX;
   1073    }
   1074 #endif
   1075    if (info->type == PIPE_SHADER_GEOMETRY) {
   1076       info->prop.gp.instanceCount = 1;
   1077       info->prop.gp.maxVertices = 1;
   1078    }
   1079    info->io.clipDistance = 0xff;
   1080    info->io.pointSize = 0xff;
   1081    info->io.instanceId = 0xff;
   1082    info->io.vertexId = 0xff;
   1083    info->io.edgeFlagIn = 0xff;
   1084    info->io.edgeFlagOut = 0xff;
   1085    info->io.fragDepth = 0xff;
   1086    info->io.sampleMask = 0xff;
   1087    info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
   1088 }
   1089 
   1090 int
   1091 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   1092 {
   1093    int ret = 0;
   1094 
   1095    nv50_ir::Program::Type type;
   1096 
   1097    nv50_ir_init_prog_info(info);
   1098 
   1099 #define PROG_TYPE_CASE(a, b)                                      \
   1100    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
   1101 
   1102    switch (info->type) {
   1103    PROG_TYPE_CASE(VERTEX, VERTEX);
   1104 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
   1105 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
   1106    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
   1107    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
   1108    default:
   1109       type = nv50_ir::Program::TYPE_COMPUTE;
   1110       break;
   1111    }
   1112    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
   1113 
   1114    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
   1115    if (!targ)
   1116       return -1;
   1117 
   1118    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
   1119    if (!prog)
   1120       return -1;
   1121    prog->dbgFlags = info->dbgFlags;
   1122    prog->optLevel = info->optLevel;
   1123 
   1124    switch (info->bin.sourceRep) {
   1125 #if 0
   1126    case PIPE_IR_LLVM:
   1127    case PIPE_IR_GLSL:
   1128       return -1;
   1129    case PIPE_IR_SM4:
   1130       ret = prog->makeFromSM4(info) ? 0 : -2;
   1131       break;
   1132    case PIPE_IR_TGSI:
   1133 #endif
   1134    default:
   1135       ret = prog->makeFromTGSI(info) ? 0 : -2;
   1136       break;
   1137    }
   1138    if (ret < 0)
   1139       goto out;
   1140    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   1141       prog->print();
   1142 
   1143    targ->parseDriverInfo(info);
   1144    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
   1145 
   1146    prog->convertToSSA();
   1147 
   1148    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   1149       prog->print();
   1150 
   1151    prog->optimizeSSA(info->optLevel);
   1152    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
   1153 
   1154    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
   1155       prog->print();
   1156 
   1157    if (!prog->registerAllocation()) {
   1158       ret = -4;
   1159       goto out;
   1160    }
   1161    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
   1162 
   1163    prog->optimizePostRA(info->optLevel);
   1164 
   1165    if (!prog->emitBinary(info)) {
   1166       ret = -5;
   1167       goto out;
   1168    }
   1169 
   1170 out:
   1171    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
   1172 
   1173    info->bin.maxGPR = prog->maxGPR;
   1174    info->bin.code = prog->code;
   1175    info->bin.codeSize = prog->binSize;
   1176    info->bin.tlsSpace = prog->tlsSize;
   1177 
   1178    delete prog;
   1179    nv50_ir::Target::destroy(targ);
   1180 
   1181    return ret;
   1182 }
   1183 
   1184 } // extern "C"
   1185