Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "codegen/nv50_ir.h"
     24 #include "codegen/nv50_ir_target.h"
     25 #include "codegen/nv50_ir_driver.h"
     26 
     27 extern "C" {
     28 #include "nouveau_debug.h"
     29 #include "nv50/nv50_program.h"
     30 }
     31 
     32 namespace nv50_ir {
     33 
     34 Modifier::Modifier(operation op)
     35 {
     36    switch (op) {
     37    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
     38    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
     39    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
     40    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
     41    default:
     42       bits = 0;
     43       break;
     44    }
     45 }
     46 
     47 Modifier Modifier::operator*(const Modifier m) const
     48 {
     49    unsigned int a, b, c;
     50 
     51    b = m.bits;
     52    if (this->bits & NV50_IR_MOD_ABS)
     53       b &= ~NV50_IR_MOD_NEG;
     54 
     55    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
     56    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
     57 
     58    return Modifier(a | c);
     59 }
     60 
     61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
     62 {
     63    indirect[0] = -1;
     64    indirect[1] = -1;
     65    usedAsPtr = false;
     66    set(v);
     67 }
     68 
     69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
     70 {
     71    set(ref);
     72    usedAsPtr = ref.usedAsPtr;
     73 }
     74 
     75 ValueRef::~ValueRef()
     76 {
     77    this->set(NULL);
     78 }
     79 
     80 bool ValueRef::getImmediate(ImmediateValue &imm) const
     81 {
     82    const ValueRef *src = this;
     83    Modifier m;
     84    DataType type = src->insn->sType;
     85 
     86    while (src) {
     87       if (src->mod) {
     88          if (src->insn->sType != type)
     89             break;
     90          m *= src->mod;
     91       }
     92       if (src->getFile() == FILE_IMMEDIATE) {
     93          imm = *(src->value->asImm());
     94          // The immediate's type isn't required to match its use, it's
     95          // more of a hint; applying a modifier makes use of that hint.
     96          imm.reg.type = type;
     97          m.applyTo(imm);
     98          return true;
     99       }
    100 
    101       Instruction *insn = src->value->getUniqueInsn();
    102 
    103       if (insn && insn->op == OP_MOV) {
    104          src = &insn->src(0);
    105          if (src->mod)
    106             WARN("OP_MOV with modifier encountered !\n");
    107       } else {
    108          src = NULL;
    109       }
    110    }
    111    return false;
    112 }
    113 
    114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
    115 {
    116    set(v);
    117 }
    118 
    119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
    120 {
    121    set(def.get());
    122 }
    123 
    124 ValueDef::~ValueDef()
    125 {
    126    this->set(NULL);
    127 }
    128 
    129 void
    130 ValueRef::set(const ValueRef &ref)
    131 {
    132    this->set(ref.get());
    133    mod = ref.mod;
    134    indirect[0] = ref.indirect[0];
    135    indirect[1] = ref.indirect[1];
    136 }
    137 
    138 void
    139 ValueRef::set(Value *refVal)
    140 {
    141    if (value == refVal)
    142       return;
    143    if (value)
    144       value->uses.erase(this);
    145    if (refVal)
    146       refVal->uses.insert(this);
    147 
    148    value = refVal;
    149 }
    150 
    151 void
    152 ValueDef::set(Value *defVal)
    153 {
    154    if (value == defVal)
    155       return;
    156    if (value)
    157       value->defs.remove(this);
    158    if (defVal)
    159       defVal->defs.push_back(this);
    160 
    161    value = defVal;
    162 }
    163 
    164 // Check if we can replace this definition's value by the value in @rep,
    165 // including the source modifiers, i.e. make sure that all uses support
    166 // @rep.mod.
    167 bool
    168 ValueDef::mayReplace(const ValueRef &rep)
    169 {
    170    if (!rep.mod)
    171       return true;
    172 
    173    if (!insn || !insn->bb) // Unbound instruction ?
    174       return false;
    175 
    176    const Target *target = insn->bb->getProgram()->getTarget();
    177 
    178    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
    179         ++it) {
    180       Instruction *insn = (*it)->getInsn();
    181       int s = -1;
    182 
    183       for (int i = 0; insn->srcExists(i); ++i) {
    184          if (insn->src(i).get() == value) {
    185             // If there are multiple references to us we'd have to check if the
    186             // combination of mods is still supported, but just bail for now.
    187             if (&insn->src(i) != (*it))
    188                return false;
    189             s = i;
    190          }
    191       }
    192       assert(s >= 0); // integrity of uses list
    193 
    194       if (!target->isModSupported(insn, s, rep.mod))
    195          return false;
    196    }
    197    return true;
    198 }
    199 
    200 void
    201 ValueDef::replace(const ValueRef &repVal, bool doSet)
    202 {
    203    assert(mayReplace(repVal));
    204 
    205    if (value == repVal.get())
    206       return;
    207 
    208    while (!value->uses.empty()) {
    209       ValueRef *ref = *value->uses.begin();
    210       ref->set(repVal.get());
    211       ref->mod *= repVal.mod;
    212    }
    213 
    214    if (doSet)
    215       set(repVal.get());
    216 }
    217 
    218 Value::Value()
    219 {
    220   join = this;
    221   memset(&reg, 0, sizeof(reg));
    222   reg.size = 4;
    223 }
    224 
    225 LValue::LValue(Function *fn, DataFile file)
    226 {
    227    reg.file = file;
    228    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
    229    reg.data.id = -1;
    230 
    231    compMask = 0;
    232    compound = 0;
    233    ssa = 0;
    234    fixedReg = 0;
    235    noSpill = 0;
    236 
    237    fn->add(this, this->id);
    238 }
    239 
    240 LValue::LValue(Function *fn, LValue *lval)
    241 {
    242    assert(lval);
    243 
    244    reg.file = lval->reg.file;
    245    reg.size = lval->reg.size;
    246    reg.data.id = -1;
    247 
    248    compMask = 0;
    249    compound = 0;
    250    ssa = 0;
    251    fixedReg = 0;
    252    noSpill = 0;
    253 
    254    fn->add(this, this->id);
    255 }
    256 
    257 LValue *
    258 LValue::clone(ClonePolicy<Function>& pol) const
    259 {
    260    LValue *that = new_LValue(pol.context(), reg.file);
    261 
    262    pol.set<Value>(this, that);
    263 
    264    that->reg.size = this->reg.size;
    265    that->reg.type = this->reg.type;
    266    that->reg.data = this->reg.data;
    267 
    268    return that;
    269 }
    270 
    271 bool
    272 LValue::isUniform() const
    273 {
    274    if (defs.size() > 1)
    275       return false;
    276    Instruction *insn = getInsn();
    277    // let's not try too hard here for now ...
    278    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
    279 }
    280 
    281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
    282 {
    283    baseSym = NULL;
    284 
    285    reg.file = f;
    286    reg.fileIndex = fidx;
    287    reg.data.offset = 0;
    288 
    289    prog->add(this, this->id);
    290 }
    291 
    292 Symbol *
    293 Symbol::clone(ClonePolicy<Function>& pol) const
    294 {
    295    Program *prog = pol.context()->getProgram();
    296 
    297    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
    298 
    299    pol.set<Value>(this, that);
    300 
    301    that->reg.size = this->reg.size;
    302    that->reg.type = this->reg.type;
    303    that->reg.data = this->reg.data;
    304 
    305    that->baseSym = this->baseSym;
    306 
    307    return that;
    308 }
    309 
    310 bool
    311 Symbol::isUniform() const
    312 {
    313    return
    314       reg.file != FILE_SYSTEM_VALUE &&
    315       reg.file != FILE_MEMORY_LOCAL &&
    316       reg.file != FILE_SHADER_INPUT;
    317 }
    318 
    319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
    320 {
    321    memset(&reg, 0, sizeof(reg));
    322 
    323    reg.file = FILE_IMMEDIATE;
    324    reg.size = 4;
    325    reg.type = TYPE_U32;
    326 
    327    reg.data.u32 = uval;
    328 
    329    prog->add(this, this->id);
    330 }
    331 
    332 ImmediateValue::ImmediateValue(Program *prog, float fval)
    333 {
    334    memset(&reg, 0, sizeof(reg));
    335 
    336    reg.file = FILE_IMMEDIATE;
    337    reg.size = 4;
    338    reg.type = TYPE_F32;
    339 
    340    reg.data.f32 = fval;
    341 
    342    prog->add(this, this->id);
    343 }
    344 
    345 ImmediateValue::ImmediateValue(Program *prog, double dval)
    346 {
    347    memset(&reg, 0, sizeof(reg));
    348 
    349    reg.file = FILE_IMMEDIATE;
    350    reg.size = 8;
    351    reg.type = TYPE_F64;
    352 
    353    reg.data.f64 = dval;
    354 
    355    prog->add(this, this->id);
    356 }
    357 
    358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
    359 {
    360    reg = proto->reg;
    361 
    362    reg.type = ty;
    363    reg.size = typeSizeof(ty);
    364 }
    365 
    366 ImmediateValue *
    367 ImmediateValue::clone(ClonePolicy<Function>& pol) const
    368 {
    369    Program *prog = pol.context()->getProgram();
    370    ImmediateValue *that = new_ImmediateValue(prog, 0u);
    371 
    372    pol.set<Value>(this, that);
    373 
    374    that->reg.size = this->reg.size;
    375    that->reg.type = this->reg.type;
    376    that->reg.data = this->reg.data;
    377 
    378    return that;
    379 }
    380 
    381 bool
    382 ImmediateValue::isInteger(const int i) const
    383 {
    384    switch (reg.type) {
    385    case TYPE_S8:
    386       return reg.data.s8 == i;
    387    case TYPE_U8:
    388       return reg.data.u8 == i;
    389    case TYPE_S16:
    390       return reg.data.s16 == i;
    391    case TYPE_U16:
    392       return reg.data.u16 == i;
    393    case TYPE_S32:
    394    case TYPE_U32:
    395       return reg.data.s32 == i; // as if ...
    396    case TYPE_S64:
    397    case TYPE_U64:
    398       return reg.data.s64 == i; // as if ...
    399    case TYPE_F32:
    400       return reg.data.f32 == static_cast<float>(i);
    401    case TYPE_F64:
    402       return reg.data.f64 == static_cast<double>(i);
    403    default:
    404       return false;
    405    }
    406 }
    407 
    408 bool
    409 ImmediateValue::isNegative() const
    410 {
    411    switch (reg.type) {
    412    case TYPE_S8:  return reg.data.s8 < 0;
    413    case TYPE_S16: return reg.data.s16 < 0;
    414    case TYPE_S32:
    415    case TYPE_U32: return reg.data.s32 < 0;
    416    case TYPE_F32: return reg.data.u32 & (1 << 31);
    417    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
    418    default:
    419       return false;
    420    }
    421 }
    422 
    423 bool
    424 ImmediateValue::isPow2() const
    425 {
    426    return util_is_power_of_two(reg.data.u32);
    427 }
    428 
    429 void
    430 ImmediateValue::applyLog2()
    431 {
    432    switch (reg.type) {
    433    case TYPE_S8:
    434    case TYPE_S16:
    435    case TYPE_S32:
    436       assert(!this->isNegative());
    437       // fall through
    438    case TYPE_U8:
    439    case TYPE_U16:
    440    case TYPE_U32:
    441       reg.data.u32 = util_logbase2(reg.data.u32);
    442       break;
    443    case TYPE_F32:
    444       reg.data.f32 = log2f(reg.data.f32);
    445       break;
    446    case TYPE_F64:
    447       reg.data.f64 = log2(reg.data.f64);
    448       break;
    449    default:
    450       assert(0);
    451       break;
    452    }
    453 }
    454 
    455 bool
    456 ImmediateValue::compare(CondCode cc, float fval) const
    457 {
    458    if (reg.type != TYPE_F32)
    459       ERROR("immediate value is not of type f32");
    460 
    461    switch (static_cast<CondCode>(cc & 7)) {
    462    case CC_TR: return true;
    463    case CC_FL: return false;
    464    case CC_LT: return reg.data.f32 <  fval;
    465    case CC_LE: return reg.data.f32 <= fval;
    466    case CC_GT: return reg.data.f32 >  fval;
    467    case CC_GE: return reg.data.f32 >= fval;
    468    case CC_EQ: return reg.data.f32 == fval;
    469    case CC_NE: return reg.data.f32 != fval;
    470    default:
    471       assert(0);
    472       return false;
    473    }
    474 }
    475 
    476 ImmediateValue&
    477 ImmediateValue::operator=(const ImmediateValue &that)
    478 {
    479    this->reg = that.reg;
    480    return (*this);
    481 }
    482 
    483 bool
    484 Value::interfers(const Value *that) const
    485 {
    486    uint32_t idA, idB;
    487 
    488    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
    489       return false;
    490    if (this->asImm())
    491       return false;
    492 
    493    if (this->asSym()) {
    494       idA = this->join->reg.data.offset;
    495       idB = that->join->reg.data.offset;
    496    } else {
    497       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
    498       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
    499    }
    500 
    501    if (idA < idB)
    502       return (idA + this->reg.size > idB);
    503    else
    504    if (idA > idB)
    505       return (idB + that->reg.size > idA);
    506    else
    507       return (idA == idB);
    508 }
    509 
    510 bool
    511 Value::equals(const Value *that, bool strict) const
    512 {
    513    if (strict)
    514       return this == that;
    515 
    516    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
    517       return false;
    518    if (that->reg.size != this->reg.size)
    519       return false;
    520 
    521    if (that->reg.data.id != this->reg.data.id)
    522       return false;
    523 
    524    return true;
    525 }
    526 
    527 bool
    528 ImmediateValue::equals(const Value *that, bool strict) const
    529 {
    530    const ImmediateValue *imm = that->asImm();
    531    if (!imm)
    532       return false;
    533    return reg.data.u64 == imm->reg.data.u64;
    534 }
    535 
    536 bool
    537 Symbol::equals(const Value *that, bool strict) const
    538 {
    539    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
    540       return false;
    541    assert(that->asSym());
    542 
    543    if (this->baseSym != that->asSym()->baseSym)
    544       return false;
    545 
    546    if (reg.file == FILE_SYSTEM_VALUE)
    547       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
    548               this->reg.data.sv.index == that->reg.data.sv.index);
    549    return this->reg.data.offset == that->reg.data.offset;
    550 }
    551 
    552 void Instruction::init()
    553 {
    554    next = prev = 0;
    555 
    556    cc = CC_ALWAYS;
    557    rnd = ROUND_N;
    558    cache = CACHE_CA;
    559    subOp = 0;
    560 
    561    saturate = 0;
    562    join = 0;
    563    exit = 0;
    564    terminator = 0;
    565    ftz = 0;
    566    dnz = 0;
    567    perPatch = 0;
    568    fixed = 0;
    569    encSize = 0;
    570    ipa = 0;
    571    mask = 0;
    572    precise = 0;
    573 
    574    lanes = 0xf;
    575 
    576    postFactor = 0;
    577 
    578    predSrc = -1;
    579    flagsDef = -1;
    580    flagsSrc = -1;
    581 }
    582 
    583 Instruction::Instruction()
    584 {
    585    init();
    586 
    587    op = OP_NOP;
    588    dType = sType = TYPE_F32;
    589 
    590    id = -1;
    591    bb = 0;
    592 }
    593 
    594 Instruction::Instruction(Function *fn, operation opr, DataType ty)
    595 {
    596    init();
    597 
    598    op = opr;
    599    dType = sType = ty;
    600 
    601    fn->add(this, id);
    602 }
    603 
    604 Instruction::~Instruction()
    605 {
    606    if (bb) {
    607       Function *fn = bb->getFunction();
    608       bb->remove(this);
    609       fn->allInsns.remove(id);
    610    }
    611 
    612    for (int s = 0; srcExists(s); ++s)
    613       setSrc(s, NULL);
    614    // must unlink defs too since the list pointers will get deallocated
    615    for (int d = 0; defExists(d); ++d)
    616       setDef(d, NULL);
    617 }
    618 
    619 void
    620 Instruction::setDef(int i, Value *val)
    621 {
    622    int size = defs.size();
    623    if (i >= size) {
    624       defs.resize(i + 1);
    625       while (size <= i)
    626          defs[size++].setInsn(this);
    627    }
    628    defs[i].set(val);
    629 }
    630 
    631 void
    632 Instruction::setSrc(int s, Value *val)
    633 {
    634    int size = srcs.size();
    635    if (s >= size) {
    636       srcs.resize(s + 1);
    637       while (size <= s)
    638          srcs[size++].setInsn(this);
    639    }
    640    srcs[s].set(val);
    641 }
    642 
    643 void
    644 Instruction::setSrc(int s, const ValueRef& ref)
    645 {
    646    setSrc(s, ref.get());
    647    srcs[s].mod = ref.mod;
    648 }
    649 
    650 void
    651 Instruction::swapSources(int a, int b)
    652 {
    653    Value *value = srcs[a].get();
    654    Modifier m = srcs[a].mod;
    655 
    656    setSrc(a, srcs[b]);
    657 
    658    srcs[b].set(value);
    659    srcs[b].mod = m;
    660 }
    661 
    662 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
    663 {
    664    if (index >= s)
    665       index += delta;
    666    else
    667    if ((delta < 0) && (index >= (s + delta)))
    668       index = -1;
    669 }
    670 
    671 // Moves sources [@s,last_source] by @delta.
    672 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
    673 void
    674 Instruction::moveSources(const int s, const int delta)
    675 {
    676    if (delta == 0)
    677       return;
    678    assert(s + delta >= 0);
    679 
    680    int k;
    681 
    682    for (k = 0; srcExists(k); ++k) {
    683       for (int i = 0; i < 2; ++i)
    684          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
    685    }
    686    moveSourcesAdjustIndex(predSrc, s, delta);
    687    moveSourcesAdjustIndex(flagsSrc, s, delta);
    688    if (asTex()) {
    689       TexInstruction *tex = asTex();
    690       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
    691       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
    692    }
    693 
    694    if (delta > 0) {
    695       --k;
    696       for (int p = k + delta; k >= s; --k, --p)
    697          setSrc(p, src(k));
    698    } else {
    699       int p;
    700       for (p = s; p < k; ++p)
    701          setSrc(p + delta, src(p));
    702       for (; (p + delta) < k; ++p)
    703          setSrc(p + delta, NULL);
    704    }
    705 }
    706 
    707 void
    708 Instruction::takeExtraSources(int s, Value *values[3])
    709 {
    710    values[0] = getIndirect(s, 0);
    711    if (values[0])
    712       setIndirect(s, 0, NULL);
    713 
    714    values[1] = getIndirect(s, 1);
    715    if (values[1])
    716       setIndirect(s, 1, NULL);
    717 
    718    values[2] = getPredicate();
    719    if (values[2])
    720       setPredicate(cc, NULL);
    721 }
    722 
    723 void
    724 Instruction::putExtraSources(int s, Value *values[3])
    725 {
    726    if (values[0])
    727       setIndirect(s, 0, values[0]);
    728    if (values[1])
    729       setIndirect(s, 1, values[1]);
    730    if (values[2])
    731       setPredicate(cc, values[2]);
    732 }
    733 
    734 Instruction *
    735 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    736 {
    737    if (!i)
    738       i = new_Instruction(pol.context(), op, dType);
    739 #ifndef NDEBUG // non-conformant assert, so this is required
    740    assert(typeid(*i) == typeid(*this));
    741 #endif
    742 
    743    pol.set<Instruction>(this, i);
    744 
    745    i->sType = sType;
    746 
    747    i->rnd = rnd;
    748    i->cache = cache;
    749    i->subOp = subOp;
    750 
    751    i->saturate = saturate;
    752    i->join = join;
    753    i->exit = exit;
    754    i->mask = mask;
    755    i->ftz = ftz;
    756    i->dnz = dnz;
    757    i->ipa = ipa;
    758    i->lanes = lanes;
    759    i->perPatch = perPatch;
    760 
    761    i->postFactor = postFactor;
    762 
    763    for (int d = 0; defExists(d); ++d)
    764       i->setDef(d, pol.get(getDef(d)));
    765 
    766    for (int s = 0; srcExists(s); ++s) {
    767       i->setSrc(s, pol.get(getSrc(s)));
    768       i->src(s).mod = src(s).mod;
    769    }
    770 
    771    i->cc = cc;
    772    i->predSrc = predSrc;
    773    i->flagsDef = flagsDef;
    774    i->flagsSrc = flagsSrc;
    775 
    776    return i;
    777 }
    778 
    779 unsigned int
    780 Instruction::defCount(unsigned int mask, bool singleFile) const
    781 {
    782    unsigned int i, n;
    783 
    784    if (singleFile) {
    785       unsigned int d = ffs(mask);
    786       if (!d)
    787          return 0;
    788       for (i = d--; defExists(i); ++i)
    789          if (getDef(i)->reg.file != getDef(d)->reg.file)
    790             mask &= ~(1 << i);
    791    }
    792 
    793    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
    794       n += mask & 1;
    795    return n;
    796 }
    797 
    798 unsigned int
    799 Instruction::srcCount(unsigned int mask, bool singleFile) const
    800 {
    801    unsigned int i, n;
    802 
    803    if (singleFile) {
    804       unsigned int s = ffs(mask);
    805       if (!s)
    806          return 0;
    807       for (i = s--; srcExists(i); ++i)
    808          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
    809             mask &= ~(1 << i);
    810    }
    811 
    812    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
    813       n += mask & 1;
    814    return n;
    815 }
    816 
    817 bool
    818 Instruction::setIndirect(int s, int dim, Value *value)
    819 {
    820    assert(this->srcExists(s));
    821 
    822    int p = srcs[s].indirect[dim];
    823    if (p < 0) {
    824       if (!value)
    825          return true;
    826       p = srcs.size();
    827       while (p > 0 && !srcExists(p - 1))
    828          --p;
    829    }
    830    setSrc(p, value);
    831    srcs[p].usedAsPtr = (value != 0);
    832    srcs[s].indirect[dim] = value ? p : -1;
    833    return true;
    834 }
    835 
    836 bool
    837 Instruction::setPredicate(CondCode ccode, Value *value)
    838 {
    839    cc = ccode;
    840 
    841    if (!value) {
    842       if (predSrc >= 0) {
    843          srcs[predSrc].set(NULL);
    844          predSrc = -1;
    845       }
    846       return true;
    847    }
    848 
    849    if (predSrc < 0) {
    850       predSrc = srcs.size();
    851       while (predSrc > 0 && !srcExists(predSrc - 1))
    852          --predSrc;
    853    }
    854 
    855    setSrc(predSrc, value);
    856    return true;
    857 }
    858 
    859 bool
    860 Instruction::writesPredicate() const
    861 {
    862    for (int d = 0; defExists(d); ++d)
    863       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
    864          return true;
    865    return false;
    866 }
    867 
    868 bool
    869 Instruction::canCommuteDefSrc(const Instruction *i) const
    870 {
    871    for (int d = 0; defExists(d); ++d)
    872       for (int s = 0; i->srcExists(s); ++s)
    873          if (getDef(d)->interfers(i->getSrc(s)))
    874             return false;
    875    return true;
    876 }
    877 
    878 bool
    879 Instruction::canCommuteDefDef(const Instruction *i) const
    880 {
    881    for (int d = 0; defExists(d); ++d)
    882       for (int c = 0; i->defExists(c); ++c)
    883          if (getDef(d)->interfers(i->getDef(c)))
    884             return false;
    885    return true;
    886 }
    887 
    888 bool
    889 Instruction::isCommutationLegal(const Instruction *i) const
    890 {
    891    return canCommuteDefDef(i) &&
    892       canCommuteDefSrc(i) &&
    893       i->canCommuteDefSrc(this);
    894 }
    895 
    896 TexInstruction::TexInstruction(Function *fn, operation op)
    897    : Instruction(fn, op, TYPE_F32)
    898 {
    899    memset(&tex, 0, sizeof(tex));
    900 
    901    tex.rIndirectSrc = -1;
    902    tex.sIndirectSrc = -1;
    903 
    904    if (op == OP_TXF)
    905       sType = TYPE_U32;
    906 }
    907 
    908 TexInstruction::~TexInstruction()
    909 {
    910    for (int c = 0; c < 3; ++c) {
    911       dPdx[c].set(NULL);
    912       dPdy[c].set(NULL);
    913    }
    914    for (int n = 0; n < 4; ++n)
    915       for (int c = 0; c < 3; ++c)
    916          offset[n][c].set(NULL);
    917 }
    918 
    919 TexInstruction *
    920 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
    921 {
    922    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
    923                           new_TexInstruction(pol.context(), op));
    924 
    925    Instruction::clone(pol, tex);
    926 
    927    tex->tex = this->tex;
    928 
    929    if (op == OP_TXD) {
    930       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
    931          tex->dPdx[c].set(dPdx[c]);
    932          tex->dPdy[c].set(dPdy[c]);
    933       }
    934    }
    935 
    936    for (int n = 0; n < tex->tex.useOffsets; ++n)
    937       for (int c = 0; c < 3; ++c)
    938          tex->offset[n][c].set(offset[n][c]);
    939 
    940    return tex;
    941 }
    942 
    943 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
    944 {
    945    { "1D",                1, 1, false, false, false },
    946    { "2D",                2, 2, false, false, false },
    947    { "2D_MS",             2, 3, false, false, false },
    948    { "3D",                3, 3, false, false, false },
    949    { "CUBE",              2, 3, false, true,  false },
    950    { "1D_SHADOW",         1, 1, false, false, true  },
    951    { "2D_SHADOW",         2, 2, false, false, true  },
    952    { "CUBE_SHADOW",       2, 3, false, true,  true  },
    953    { "1D_ARRAY",          1, 2, true,  false, false },
    954    { "2D_ARRAY",          2, 3, true,  false, false },
    955    { "2D_MS_ARRAY",       2, 4, true,  false, false },
    956    { "CUBE_ARRAY",        2, 4, true,  true,  false },
    957    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
    958    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
    959    { "RECT",              2, 2, false, false, false },
    960    { "RECT_SHADOW",       2, 2, false, false, true  },
    961    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
    962    { "BUFFER",            1, 1, false, false, false },
    963 };
    964 
    965 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
    966 {
    967    { "NONE",         0, {  0,  0,  0,  0 },  UINT },
    968 
    969    { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
    970    { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
    971    { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
    972    { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
    973    { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
    974    { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
    975    { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
    976 
    977    { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
    978    { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
    979    { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
    980    { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
    981    { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
    982    { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
    983    { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
    984    { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
    985    { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
    986    { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
    987 
    988    { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
    989    { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
    990    { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
    991    { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
    992    { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
    993    { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
    994    { "R32I",         1, { 32,  0,  0,  0 },  SINT },
    995    { "R16I",         1, { 16,  0,  0,  0 },  SINT },
    996    { "R8I",          1, {  8,  0,  0,  0 },  SINT },
    997 
    998    { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
    999    { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
   1000    { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
   1001    { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
   1002    { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
   1003    { "R16",          1, { 16,  0,  0,  0 }, UNORM },
   1004    { "R8",           1, {  8,  0,  0,  0 }, UNORM },
   1005 
   1006    { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
   1007    { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
   1008    { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
   1009    { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
   1010    { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
   1011    { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
   1012 
   1013    { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
   1014 };
   1015 
   1016 void
   1017 TexInstruction::setIndirectR(Value *v)
   1018 {
   1019    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
   1020    if (p >= 0) {
   1021       tex.rIndirectSrc = p;
   1022       setSrc(p, v);
   1023       srcs[p].usedAsPtr = !!v;
   1024    }
   1025 }
   1026 
   1027 void
   1028 TexInstruction::setIndirectS(Value *v)
   1029 {
   1030    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
   1031    if (p >= 0) {
   1032       tex.sIndirectSrc = p;
   1033       setSrc(p, v);
   1034       srcs[p].usedAsPtr = !!v;
   1035    }
   1036 }
   1037 
   1038 CmpInstruction::CmpInstruction(Function *fn, operation op)
   1039    : Instruction(fn, op, TYPE_F32)
   1040 {
   1041    setCond = CC_ALWAYS;
   1042 }
   1043 
   1044 CmpInstruction *
   1045 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
   1046 {
   1047    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
   1048                           new_CmpInstruction(pol.context(), op));
   1049    cmp->dType = dType;
   1050    Instruction::clone(pol, cmp);
   1051    cmp->setCond = setCond;
   1052    return cmp;
   1053 }
   1054 
   1055 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
   1056    : Instruction(fn, op, TYPE_NONE)
   1057 {
   1058    if (op == OP_CALL)
   1059       target.fn = reinterpret_cast<Function *>(targ);
   1060    else
   1061       target.bb = reinterpret_cast<BasicBlock *>(targ);
   1062 
   1063    if (op == OP_BRA ||
   1064        op == OP_CONT || op == OP_BREAK ||
   1065        op == OP_RET || op == OP_EXIT)
   1066       terminator = 1;
   1067    else
   1068    if (op == OP_JOIN)
   1069       terminator = targ ? 1 : 0;
   1070 
   1071    allWarp = absolute = limit = builtin = indirect = 0;
   1072 }
   1073 
   1074 FlowInstruction *
   1075 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
   1076 {
   1077    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
   1078                             new_FlowInstruction(pol.context(), op, NULL));
   1079 
   1080    Instruction::clone(pol, flow);
   1081    flow->allWarp = allWarp;
   1082    flow->absolute = absolute;
   1083    flow->limit = limit;
   1084    flow->builtin = builtin;
   1085 
   1086    if (builtin)
   1087       flow->target.builtin = target.builtin;
   1088    else
   1089    if (op == OP_CALL)
   1090       flow->target.fn = target.fn;
   1091    else
   1092    if (target.bb)
   1093       flow->target.bb = pol.get<BasicBlock>(target.bb);
   1094 
   1095    return flow;
   1096 }
   1097 
   1098 Program::Program(Type type, Target *arch)
   1099    : progType(type),
   1100      target(arch),
   1101      mem_Instruction(sizeof(Instruction), 6),
   1102      mem_CmpInstruction(sizeof(CmpInstruction), 4),
   1103      mem_TexInstruction(sizeof(TexInstruction), 4),
   1104      mem_FlowInstruction(sizeof(FlowInstruction), 4),
   1105      mem_LValue(sizeof(LValue), 8),
   1106      mem_Symbol(sizeof(Symbol), 7),
   1107      mem_ImmediateValue(sizeof(ImmediateValue), 7)
   1108 {
   1109    code = NULL;
   1110    binSize = 0;
   1111 
   1112    maxGPR = -1;
   1113 
   1114    main = new Function(this, "MAIN", ~0);
   1115    calls.insert(&main->call);
   1116 
   1117    dbgFlags = 0;
   1118    optLevel = 0;
   1119 
   1120    targetPriv = NULL;
   1121 }
   1122 
   1123 Program::~Program()
   1124 {
   1125    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
   1126       delete reinterpret_cast<Function *>(it.get());
   1127 
   1128    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
   1129       releaseValue(reinterpret_cast<Value *>(it.get()));
   1130 }
   1131 
   1132 void Program::releaseInstruction(Instruction *insn)
   1133 {
   1134    // TODO: make this not suck so much
   1135 
   1136    insn->~Instruction();
   1137 
   1138    if (insn->asCmp())
   1139       mem_CmpInstruction.release(insn);
   1140    else
   1141    if (insn->asTex())
   1142       mem_TexInstruction.release(insn);
   1143    else
   1144    if (insn->asFlow())
   1145       mem_FlowInstruction.release(insn);
   1146    else
   1147       mem_Instruction.release(insn);
   1148 }
   1149 
   1150 void Program::releaseValue(Value *value)
   1151 {
   1152    value->~Value();
   1153 
   1154    if (value->asLValue())
   1155       mem_LValue.release(value);
   1156    else
   1157    if (value->asImm())
   1158       mem_ImmediateValue.release(value);
   1159    else
   1160    if (value->asSym())
   1161       mem_Symbol.release(value);
   1162 }
   1163 
   1164 
   1165 } // namespace nv50_ir
   1166 
   1167 extern "C" {
   1168 
   1169 static void
   1170 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
   1171 {
   1172    if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
   1173       info->prop.tp.domain = PIPE_PRIM_MAX;
   1174       info->prop.tp.outputPrim = PIPE_PRIM_MAX;
   1175    }
   1176    if (info->type == PIPE_SHADER_GEOMETRY) {
   1177       info->prop.gp.instanceCount = 1;
   1178       info->prop.gp.maxVertices = 1;
   1179    }
   1180    if (info->type == PIPE_SHADER_COMPUTE) {
   1181       info->prop.cp.numThreads[0] =
   1182       info->prop.cp.numThreads[1] =
   1183       info->prop.cp.numThreads[2] = 1;
   1184    }
   1185    info->io.pointSize = 0xff;
   1186    info->io.instanceId = 0xff;
   1187    info->io.vertexId = 0xff;
   1188    info->io.edgeFlagIn = 0xff;
   1189    info->io.edgeFlagOut = 0xff;
   1190    info->io.fragDepth = 0xff;
   1191    info->io.sampleMask = 0xff;
   1192    info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
   1193 }
   1194 
   1195 int
   1196 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   1197 {
   1198    int ret = 0;
   1199 
   1200    nv50_ir::Program::Type type;
   1201 
   1202    nv50_ir_init_prog_info(info);
   1203 
   1204 #define PROG_TYPE_CASE(a, b)                                      \
   1205    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
   1206 
   1207    switch (info->type) {
   1208    PROG_TYPE_CASE(VERTEX, VERTEX);
   1209    PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
   1210    PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
   1211    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
   1212    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
   1213    PROG_TYPE_CASE(COMPUTE, COMPUTE);
   1214    default:
   1215       INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
   1216       return -1;
   1217    }
   1218    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
   1219 
   1220    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
   1221    if (!targ)
   1222       return -1;
   1223 
   1224    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
   1225    if (!prog) {
   1226       nv50_ir::Target::destroy(targ);
   1227       return -1;
   1228    }
   1229    prog->driver = info;
   1230    prog->dbgFlags = info->dbgFlags;
   1231    prog->optLevel = info->optLevel;
   1232 
   1233    switch (info->bin.sourceRep) {
   1234    case PIPE_SHADER_IR_TGSI:
   1235       ret = prog->makeFromTGSI(info) ? 0 : -2;
   1236       break;
   1237    default:
   1238       ret = -1;
   1239       break;
   1240    }
   1241    if (ret < 0)
   1242       goto out;
   1243    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   1244       prog->print();
   1245 
   1246    targ->parseDriverInfo(info);
   1247    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
   1248 
   1249    prog->convertToSSA();
   1250 
   1251    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
   1252       prog->print();
   1253 
   1254    prog->optimizeSSA(info->optLevel);
   1255    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
   1256 
   1257    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
   1258       prog->print();
   1259 
   1260    if (!prog->registerAllocation()) {
   1261       ret = -4;
   1262       goto out;
   1263    }
   1264    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
   1265 
   1266    prog->optimizePostRA(info->optLevel);
   1267 
   1268    if (!prog->emitBinary(info)) {
   1269       ret = -5;
   1270       goto out;
   1271    }
   1272 
   1273 out:
   1274    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
   1275 
   1276    info->bin.maxGPR = prog->maxGPR;
   1277    info->bin.code = prog->code;
   1278    info->bin.codeSize = prog->binSize;
   1279    info->bin.tlsSpace = prog->tlsSize;
   1280 
   1281    delete prog;
   1282    nv50_ir::Target::destroy(targ);
   1283 
   1284    return ret;
   1285 }
   1286 
   1287 } // extern "C"
   1288