Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "codegen/nv50_ir_target_nvc0.h"
     24 
     25 namespace nv50_ir {
     26 
     27 // Argh, all these assertions ...
     28 
     29 class CodeEmitterNVC0 : public CodeEmitter
     30 {
     31 public:
     32    CodeEmitterNVC0(const TargetNVC0 *);
     33 
     34    virtual bool emitInstruction(Instruction *);
     35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
     36    virtual void prepareEmission(Function *);
     37 
     38    inline void setProgramType(Program::Type pType) { progType = pType; }
     39 
     40 private:
     41    const TargetNVC0 *targNVC0;
     42 
     43    Program::Type progType;
     44 
     45    const bool writeIssueDelays;
     46 
     47 private:
     48    void emitForm_A(const Instruction *, uint64_t);
     49    void emitForm_B(const Instruction *, uint64_t);
     50    void emitForm_S(const Instruction *, uint32_t, bool pred);
     51 
     52    void emitPredicate(const Instruction *);
     53 
     54    void setAddress16(const ValueRef&);
     55    void setAddress24(const ValueRef&);
     56    void setAddressByFile(const ValueRef&);
     57    void setImmediate(const Instruction *, const int s); // needs op already set
     58    void setImmediateS8(const ValueRef&);
     59    void setSUConst16(const Instruction *, const int s);
     60    void setSUPred(const Instruction *, const int s);
     61 
     62    void emitCondCode(CondCode cc, int pos);
     63    void emitInterpMode(const Instruction *);
     64    void emitLoadStoreType(DataType ty);
     65    void emitSUGType(DataType);
     66    void emitSUAddr(const TexInstruction *);
     67    void emitSUDim(const TexInstruction *);
     68    void emitCachingMode(CacheMode c);
     69 
     70    void emitShortSrc2(const ValueRef&);
     71 
     72    inline uint8_t getSRegEncoding(const ValueRef&);
     73 
     74    void roundMode_A(const Instruction *);
     75    void roundMode_C(const Instruction *);
     76    void roundMode_CS(const Instruction *);
     77 
     78    void emitNegAbs12(const Instruction *);
     79 
     80    void emitNOP(const Instruction *);
     81 
     82    void emitLOAD(const Instruction *);
     83    void emitSTORE(const Instruction *);
     84    void emitMOV(const Instruction *);
     85    void emitATOM(const Instruction *);
     86    void emitMEMBAR(const Instruction *);
     87    void emitCCTL(const Instruction *);
     88 
     89    void emitINTERP(const Instruction *);
     90    void emitAFETCH(const Instruction *);
     91    void emitPFETCH(const Instruction *);
     92    void emitVFETCH(const Instruction *);
     93    void emitEXPORT(const Instruction *);
     94    void emitOUT(const Instruction *);
     95 
     96    void emitUADD(const Instruction *);
     97    void emitFADD(const Instruction *);
     98    void emitDADD(const Instruction *);
     99    void emitUMUL(const Instruction *);
    100    void emitFMUL(const Instruction *);
    101    void emitDMUL(const Instruction *);
    102    void emitIMAD(const Instruction *);
    103    void emitISAD(const Instruction *);
    104    void emitSHLADD(const Instruction *a);
    105    void emitFMAD(const Instruction *);
    106    void emitDMAD(const Instruction *);
    107    void emitMADSP(const Instruction *);
    108 
    109    void emitNOT(Instruction *);
    110    void emitLogicOp(const Instruction *, uint8_t subOp);
    111    void emitPOPC(const Instruction *);
    112    void emitINSBF(const Instruction *);
    113    void emitEXTBF(const Instruction *);
    114    void emitBFIND(const Instruction *);
    115    void emitPERMT(const Instruction *);
    116    void emitShift(const Instruction *);
    117 
    118    void emitSFnOp(const Instruction *, uint8_t subOp);
    119 
    120    void emitCVT(Instruction *);
    121    void emitMINMAX(const Instruction *);
    122    void emitPreOp(const Instruction *);
    123 
    124    void emitSET(const CmpInstruction *);
    125    void emitSLCT(const CmpInstruction *);
    126    void emitSELP(const Instruction *);
    127 
    128    void emitTEXBAR(const Instruction *);
    129    void emitTEX(const TexInstruction *);
    130    void emitTEXCSAA(const TexInstruction *);
    131    void emitTXQ(const TexInstruction *);
    132 
    133    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
    134 
    135    void emitFlow(const Instruction *);
    136    void emitBAR(const Instruction *);
    137 
    138    void emitSUCLAMPMode(uint16_t);
    139    void emitSUCalc(Instruction *);
    140    void emitSULDGB(const TexInstruction *);
    141    void emitSUSTGx(const TexInstruction *);
    142 
    143    void emitSULDB(const TexInstruction *);
    144    void emitSUSTx(const TexInstruction *);
    145    void emitSULEA(const TexInstruction *);
    146 
    147    void emitVSHL(const Instruction *);
    148    void emitVectorSubOp(const Instruction *);
    149 
    150    void emitPIXLD(const Instruction *);
    151 
    152    void emitVOTE(const Instruction *);
    153 
    154    inline void defId(const ValueDef&, const int pos);
    155    inline void defId(const Instruction *, int d, const int pos);
    156    inline void srcId(const ValueRef&, const int pos);
    157    inline void srcId(const ValueRef *, const int pos);
    158    inline void srcId(const Instruction *, int s, const int pos);
    159    inline void srcAddr32(const ValueRef&, int pos, int shr);
    160 
    161    inline bool isLIMM(const ValueRef&, DataType ty);
    162 };
    163 
    164 // for better visibility
    165 #define HEX64(h, l) 0x##h##l##ULL
    166 
    167 #define SDATA(a) ((a).rep()->reg.data)
    168 #define DDATA(a) ((a).rep()->reg.data)
    169 
    170 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
    171 {
    172    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
    173 }
    174 
    175 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
    176 {
    177    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
    178 }
    179 
    180 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
    181 {
    182    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
    183    code[pos / 32] |= r << (pos % 32);
    184 }
    185 
    186 void
    187 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
    188 {
    189    const uint32_t offset = SDATA(src).offset >> shr;
    190 
    191    code[pos / 32] |= offset << (pos % 32);
    192    if (pos && (pos < 32))
    193       code[1] |= offset >> (32 - pos);
    194 }
    195 
    196 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
    197 {
    198    code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
    199 }
    200 
    201 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
    202 {
    203    int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
    204    code[pos / 32] |= r << (pos % 32);
    205 }
    206 
    207 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
    208 {
    209    const ImmediateValue *imm = ref.get()->asImm();
    210 
    211    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
    212 }
    213 
    214 void
    215 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
    216 {
    217    switch (insn->rnd) {
    218    case ROUND_M: code[1] |= 1 << 23; break;
    219    case ROUND_P: code[1] |= 2 << 23; break;
    220    case ROUND_Z: code[1] |= 3 << 23; break;
    221    default:
    222       assert(insn->rnd == ROUND_N);
    223       break;
    224    }
    225 }
    226 
    227 void
    228 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
    229 {
    230    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
    231    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
    232    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
    233    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
    234 }
    235 
    236 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
    237 {
    238    uint8_t val;
    239 
    240    switch (cc) {
    241    case CC_LT:  val = 0x1; break;
    242    case CC_LTU: val = 0x9; break;
    243    case CC_EQ:  val = 0x2; break;
    244    case CC_EQU: val = 0xa; break;
    245    case CC_LE:  val = 0x3; break;
    246    case CC_LEU: val = 0xb; break;
    247    case CC_GT:  val = 0x4; break;
    248    case CC_GTU: val = 0xc; break;
    249    case CC_NE:  val = 0x5; break;
    250    case CC_NEU: val = 0xd; break;
    251    case CC_GE:  val = 0x6; break;
    252    case CC_GEU: val = 0xe; break;
    253    case CC_TR:  val = 0xf; break;
    254    case CC_FL:  val = 0x0; break;
    255 
    256    case CC_A:  val = 0x14; break;
    257    case CC_NA: val = 0x13; break;
    258    case CC_S:  val = 0x15; break;
    259    case CC_NS: val = 0x12; break;
    260    case CC_C:  val = 0x16; break;
    261    case CC_NC: val = 0x11; break;
    262    case CC_O:  val = 0x17; break;
    263    case CC_NO: val = 0x10; break;
    264 
    265    default:
    266       val = 0;
    267       assert(!"invalid condition code");
    268       break;
    269    }
    270    code[pos / 32] |= val << (pos % 32);
    271 }
    272 
    273 void
    274 CodeEmitterNVC0::emitPredicate(const Instruction *i)
    275 {
    276    if (i->predSrc >= 0) {
    277       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
    278       srcId(i->src(i->predSrc), 10);
    279       if (i->cc == CC_NOT_P)
    280          code[0] |= 0x2000; // negate
    281    } else {
    282       code[0] |= 0x1c00;
    283    }
    284 }
    285 
    286 void
    287 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
    288 {
    289    switch (src.getFile()) {
    290    case FILE_MEMORY_GLOBAL:
    291       srcAddr32(src, 26, 0);
    292       break;
    293    case FILE_MEMORY_LOCAL:
    294    case FILE_MEMORY_SHARED:
    295       setAddress24(src);
    296       break;
    297    default:
    298       assert(src.getFile() == FILE_MEMORY_CONST);
    299       setAddress16(src);
    300       break;
    301    }
    302 }
    303 
    304 void
    305 CodeEmitterNVC0::setAddress16(const ValueRef& src)
    306 {
    307    Symbol *sym = src.get()->asSym();
    308 
    309    assert(sym);
    310 
    311    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
    312    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
    313 }
    314 
    315 void
    316 CodeEmitterNVC0::setAddress24(const ValueRef& src)
    317 {
    318    Symbol *sym = src.get()->asSym();
    319 
    320    assert(sym);
    321 
    322    code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
    323    code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
    324 }
    325 
    326 void
    327 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
    328 {
    329    const ImmediateValue *imm = i->src(s).get()->asImm();
    330    uint32_t u32;
    331 
    332    assert(imm);
    333    u32 = imm->reg.data.u32;
    334 
    335    if ((code[0] & 0xf) == 0x1) {
    336       // double immediate
    337       uint64_t u64 = imm->reg.data.u64;
    338       assert(!(u64 & 0x00000fffffffffffULL));
    339       assert(!(code[1] & 0xc000));
    340       code[0] |= ((u64 >> 44) & 0x3f) << 26;
    341       code[1] |= 0xc000 | (u64 >> 50);
    342    } else
    343    if ((code[0] & 0xf) == 0x2) {
    344       // LIMM
    345       code[0] |= (u32 & 0x3f) << 26;
    346       code[1] |= u32 >> 6;
    347    } else
    348    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
    349       // integer immediate
    350       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
    351       assert(!(code[1] & 0xc000));
    352       u32 &= 0xfffff;
    353       code[0] |= (u32 & 0x3f) << 26;
    354       code[1] |= 0xc000 | (u32 >> 6);
    355    } else {
    356       // float immediate
    357       assert(!(u32 & 0x00000fff));
    358       assert(!(code[1] & 0xc000));
    359       code[0] |= ((u32 >> 12) & 0x3f) << 26;
    360       code[1] |= 0xc000 | (u32 >> 18);
    361    }
    362 }
    363 
    364 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
    365 {
    366    const ImmediateValue *imm = ref.get()->asImm();
    367 
    368    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
    369 
    370    assert(s8 == imm->reg.data.s32);
    371 
    372    code[0] |= (s8 & 0x3f) << 26;
    373    code[0] |= (s8 >> 6) << 8;
    374 }
    375 
    376 void
    377 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
    378 {
    379    code[0] = opc;
    380    code[1] = opc >> 32;
    381 
    382    emitPredicate(i);
    383 
    384    defId(i->def(0), 14);
    385 
    386    int s1 = 26;
    387    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
    388       s1 = 49;
    389 
    390    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
    391       switch (i->getSrc(s)->reg.file) {
    392       case FILE_MEMORY_CONST:
    393          assert(!(code[1] & 0xc000));
    394          code[1] |= (s == 2) ? 0x8000 : 0x4000;
    395          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
    396          setAddress16(i->src(s));
    397          break;
    398       case FILE_IMMEDIATE:
    399          assert(s == 1 ||
    400                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
    401          assert(!(code[1] & 0xc000));
    402          setImmediate(i, s);
    403          break;
    404       case FILE_GPR:
    405          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
    406             break;
    407          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
    408          break;
    409       default:
    410          if (i->op == OP_SELP) {
    411             // OP_SELP is used to implement shared+atomics on Fermi.
    412             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
    413             srcId(i->src(s), 49);
    414          }
    415          // ignore here, can be predicate or flags, but must not be address
    416          break;
    417       }
    418    }
    419 }
    420 
    421 void
    422 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
    423 {
    424    code[0] = opc;
    425    code[1] = opc >> 32;
    426 
    427    emitPredicate(i);
    428 
    429    defId(i->def(0), 14);
    430 
    431    switch (i->src(0).getFile()) {
    432    case FILE_MEMORY_CONST:
    433       assert(!(code[1] & 0xc000));
    434       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
    435       setAddress16(i->src(0));
    436       break;
    437    case FILE_IMMEDIATE:
    438       assert(!(code[1] & 0xc000));
    439       setImmediate(i, 0);
    440       break;
    441    case FILE_GPR:
    442       srcId(i->src(0), 26);
    443       break;
    444    default:
    445       // ignore here, can be predicate or flags, but must not be address
    446       break;
    447    }
    448 }
    449 
    450 void
    451 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
    452 {
    453    code[0] = opc;
    454 
    455    int ss2a = 0;
    456    if (opc == 0x0d || opc == 0x0e)
    457       ss2a = 2;
    458 
    459    defId(i->def(0), 14);
    460    srcId(i->src(0), 20);
    461 
    462    assert(pred || (i->predSrc < 0));
    463    if (pred)
    464       emitPredicate(i);
    465 
    466    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
    467       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
    468          assert(!(code[0] & (0x300 >> ss2a)));
    469          switch (i->src(s).get()->reg.fileIndex) {
    470          case 0:  code[0] |= 0x100 >> ss2a; break;
    471          case 1:  code[0] |= 0x200 >> ss2a; break;
    472          case 16: code[0] |= 0x300 >> ss2a; break;
    473          default:
    474             ERROR("invalid c[] space for short form\n");
    475             break;
    476          }
    477          if (s == 1)
    478             code[0] |= i->getSrc(s)->reg.data.offset << 24;
    479          else
    480             code[0] |= i->getSrc(s)->reg.data.offset << 6;
    481       } else
    482       if (i->src(s).getFile() == FILE_IMMEDIATE) {
    483          assert(s == 1);
    484          setImmediateS8(i->src(s));
    485       } else
    486       if (i->src(s).getFile() == FILE_GPR) {
    487          srcId(i->src(s), (s == 1) ? 26 : 8);
    488       }
    489    }
    490 }
    491 
    492 void
    493 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
    494 {
    495    if (src.getFile() == FILE_MEMORY_CONST) {
    496       switch (src.get()->reg.fileIndex) {
    497       case 0:  code[0] |= 0x100; break;
    498       case 1:  code[0] |= 0x200; break;
    499       case 16: code[0] |= 0x300; break;
    500       default:
    501          assert(!"unsupported file index for short op");
    502          break;
    503       }
    504       srcAddr32(src, 20, 2);
    505    } else {
    506       srcId(src, 20);
    507       assert(src.getFile() == FILE_GPR);
    508    }
    509 }
    510 
    511 void
    512 CodeEmitterNVC0::emitNOP(const Instruction *i)
    513 {
    514    code[0] = 0x000001e4;
    515    code[1] = 0x40000000;
    516    emitPredicate(i);
    517 }
    518 
    519 void
    520 CodeEmitterNVC0::emitFMAD(const Instruction *i)
    521 {
    522    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
    523 
    524    if (i->encSize == 8) {
    525       if (isLIMM(i->src(1), TYPE_F32)) {
    526          emitForm_A(i, HEX64(20000000, 00000002));
    527       } else {
    528          emitForm_A(i, HEX64(30000000, 00000000));
    529 
    530          if (i->src(2).mod.neg())
    531             code[0] |= 1 << 8;
    532       }
    533       roundMode_A(i);
    534 
    535       if (neg1)
    536          code[0] |= 1 << 9;
    537 
    538       if (i->saturate)
    539          code[0] |= 1 << 5;
    540 
    541       if (i->dnz)
    542          code[0] |= 1 << 7;
    543       else
    544       if (i->ftz)
    545          code[0] |= 1 << 6;
    546    } else {
    547       assert(!i->saturate && !i->src(2).mod.neg());
    548       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
    549                  false);
    550       if (neg1)
    551          code[0] |= 1 << 4;
    552    }
    553 }
    554 
    555 void
    556 CodeEmitterNVC0::emitDMAD(const Instruction *i)
    557 {
    558    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
    559 
    560    emitForm_A(i, HEX64(20000000, 00000001));
    561 
    562    if (i->src(2).mod.neg())
    563       code[0] |= 1 << 8;
    564 
    565    roundMode_A(i);
    566 
    567    if (neg1)
    568       code[0] |= 1 << 9;
    569 
    570    assert(!i->saturate);
    571    assert(!i->ftz);
    572 }
    573 
    574 void
    575 CodeEmitterNVC0::emitFMUL(const Instruction *i)
    576 {
    577    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
    578 
    579    assert(i->postFactor >= -3 && i->postFactor <= 3);
    580 
    581    if (i->encSize == 8) {
    582       if (isLIMM(i->src(1), TYPE_F32)) {
    583          assert(i->postFactor == 0); // constant folded, hopefully
    584          emitForm_A(i, HEX64(30000000, 00000002));
    585       } else {
    586          emitForm_A(i, HEX64(58000000, 00000000));
    587          roundMode_A(i);
    588          code[1] |= ((i->postFactor > 0) ?
    589                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
    590       }
    591       if (neg)
    592          code[1] ^= 1 << 25; // aliases with LIMM sign bit
    593 
    594       if (i->saturate)
    595          code[0] |= 1 << 5;
    596 
    597       if (i->dnz)
    598          code[0] |= 1 << 7;
    599       else
    600       if (i->ftz)
    601          code[0] |= 1 << 6;
    602    } else {
    603       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
    604       emitForm_S(i, 0xa8, true);
    605    }
    606 }
    607 
    608 void
    609 CodeEmitterNVC0::emitDMUL(const Instruction *i)
    610 {
    611    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
    612 
    613    emitForm_A(i, HEX64(50000000, 00000001));
    614    roundMode_A(i);
    615 
    616    if (neg)
    617       code[0] |= 1 << 9;
    618 
    619    assert(!i->saturate);
    620    assert(!i->ftz);
    621    assert(!i->dnz);
    622    assert(!i->postFactor);
    623 }
    624 
    625 void
    626 CodeEmitterNVC0::emitUMUL(const Instruction *i)
    627 {
    628    if (i->encSize == 8) {
    629       if (i->src(1).getFile() == FILE_IMMEDIATE) {
    630          emitForm_A(i, HEX64(10000000, 00000002));
    631       } else {
    632          emitForm_A(i, HEX64(50000000, 00000003));
    633       }
    634       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
    635          code[0] |= 1 << 6;
    636       if (i->sType == TYPE_S32)
    637          code[0] |= 1 << 5;
    638       if (i->dType == TYPE_S32)
    639          code[0] |= 1 << 7;
    640    } else {
    641       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
    642 
    643       if (i->sType == TYPE_S32)
    644          code[0] |= 1 << 6;
    645    }
    646 }
    647 
    648 void
    649 CodeEmitterNVC0::emitFADD(const Instruction *i)
    650 {
    651    if (i->encSize == 8) {
    652       if (isLIMM(i->src(1), TYPE_F32)) {
    653          assert(!i->saturate);
    654          emitForm_A(i, HEX64(28000000, 00000002));
    655 
    656          code[0] |= i->src(0).mod.abs() << 7;
    657          code[0] |= i->src(0).mod.neg() << 9;
    658 
    659          if (i->src(1).mod.abs())
    660             code[1] &= 0xfdffffff;
    661          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
    662             code[1] ^= 0x02000000;
    663       } else {
    664          emitForm_A(i, HEX64(50000000, 00000000));
    665 
    666          roundMode_A(i);
    667          if (i->saturate)
    668             code[1] |= 1 << 17;
    669 
    670          emitNegAbs12(i);
    671          if (i->op == OP_SUB) code[0] ^= 1 << 8;
    672       }
    673       if (i->ftz)
    674          code[0] |= 1 << 5;
    675    } else {
    676       assert(!i->saturate && i->op != OP_SUB &&
    677              !i->src(0).mod.abs() &&
    678              !i->src(1).mod.neg() && !i->src(1).mod.abs());
    679 
    680       emitForm_S(i, 0x49, true);
    681 
    682       if (i->src(0).mod.neg())
    683          code[0] |= 1 << 7;
    684    }
    685 }
    686 
    687 void
    688 CodeEmitterNVC0::emitDADD(const Instruction *i)
    689 {
    690    assert(i->encSize == 8);
    691    emitForm_A(i, HEX64(48000000, 00000001));
    692    roundMode_A(i);
    693    assert(!i->saturate);
    694    assert(!i->ftz);
    695    emitNegAbs12(i);
    696    if (i->op == OP_SUB)
    697       code[0] ^= 1 << 8;
    698 }
    699 
    700 void
    701 CodeEmitterNVC0::emitUADD(const Instruction *i)
    702 {
    703    uint32_t addOp = 0;
    704 
    705    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
    706 
    707    if (i->src(0).mod.neg())
    708       addOp |= 0x200;
    709    if (i->src(1).mod.neg())
    710       addOp |= 0x100;
    711    if (i->op == OP_SUB)
    712       addOp ^= 0x100;
    713 
    714    assert(addOp != 0x300); // would be add-plus-one
    715 
    716    if (i->encSize == 8) {
    717       if (isLIMM(i->src(1), TYPE_U32)) {
    718          emitForm_A(i, HEX64(08000000, 00000002));
    719          if (i->defExists(1))
    720             code[1] |= 1 << 26; // write carry
    721       } else {
    722          emitForm_A(i, HEX64(48000000, 00000003));
    723          if (i->defExists(1))
    724             code[1] |= 1 << 16; // write carry
    725       }
    726       code[0] |= addOp;
    727 
    728       if (i->saturate)
    729          code[0] |= 1 << 5;
    730       if (i->flagsSrc >= 0) // add carry
    731          code[0] |= 1 << 6;
    732    } else {
    733       assert(!(addOp & 0x100));
    734       emitForm_S(i, (addOp >> 3) |
    735                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
    736    }
    737 }
    738 
    739 void
    740 CodeEmitterNVC0::emitIMAD(const Instruction *i)
    741 {
    742    uint8_t addOp =
    743       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
    744 
    745    assert(i->encSize == 8);
    746    emitForm_A(i, HEX64(20000000, 00000003));
    747 
    748    assert(addOp != 3);
    749    code[0] |= addOp << 8;
    750 
    751    if (isSignedType(i->dType))
    752       code[0] |= 1 << 7;
    753    if (isSignedType(i->sType))
    754       code[0] |= 1 << 5;
    755 
    756    code[1] |= i->saturate << 24;
    757 
    758    if (i->flagsDef >= 0) code[1] |= 1 << 16;
    759    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
    760 
    761    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
    762       code[0] |= 1 << 6;
    763 }
    764 
    765 void
    766 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
    767 {
    768    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
    769    const ImmediateValue *imm = i->src(1).get()->asImm();
    770    assert(imm);
    771 
    772    code[0] = 0x00000003;
    773    code[1] = 0x40000000 | addOp << 23;
    774 
    775    emitPredicate(i);
    776 
    777    defId(i->def(0), 14);
    778    srcId(i->src(0), 20);
    779 
    780    if (i->flagsDef >= 0)
    781       code[1] |= 1 << 16;
    782 
    783    assert(!(imm->reg.data.u32 & 0xffffffe0));
    784    code[0] |= imm->reg.data.u32 << 5;
    785 
    786    switch (i->src(2).getFile()) {
    787    case FILE_GPR:
    788       srcId(i->src(2), 26);
    789       break;
    790    case FILE_MEMORY_CONST:
    791       code[1] |= 0x4000;
    792       code[1] |= i->getSrc(2)->reg.fileIndex << 10;
    793       setAddress16(i->src(2));
    794       break;
    795    case FILE_IMMEDIATE:
    796       setImmediate(i, 2);
    797       break;
    798    default:
    799       assert(!"bad src2 file");
    800       break;
    801    }
    802 }
    803 
    804 void
    805 CodeEmitterNVC0::emitMADSP(const Instruction *i)
    806 {
    807    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
    808 
    809    emitForm_A(i, HEX64(00000000, 00000003));
    810 
    811    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
    812       code[1] |= 0x01800000;
    813    } else {
    814       code[0] |= (i->subOp & 0x00f) << 7;
    815       code[0] |= (i->subOp & 0x0f0) << 1;
    816       code[0] |= (i->subOp & 0x100) >> 3;
    817       code[0] |= (i->subOp & 0x200) >> 2;
    818       code[1] |= (i->subOp & 0xc00) << 13;
    819    }
    820 
    821    if (i->flagsDef >= 0)
    822       code[1] |= 1 << 16;
    823 }
    824 
    825 void
    826 CodeEmitterNVC0::emitISAD(const Instruction *i)
    827 {
    828    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
    829    assert(i->encSize == 8);
    830 
    831    emitForm_A(i, HEX64(38000000, 00000003));
    832 
    833    if (i->dType == TYPE_S32)
    834       code[0] |= 1 << 5;
    835 }
    836 
    837 void
    838 CodeEmitterNVC0::emitNOT(Instruction *i)
    839 {
    840    assert(i->encSize == 8);
    841    i->setSrc(1, i->src(0));
    842    emitForm_A(i, HEX64(68000000, 000001c3));
    843 }
    844 
    845 void
    846 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
    847 {
    848    if (i->def(0).getFile() == FILE_PREDICATE) {
    849       code[0] = 0x00000004 | (subOp << 30);
    850       code[1] = 0x0c000000;
    851 
    852       emitPredicate(i);
    853 
    854       defId(i->def(0), 17);
    855       srcId(i->src(0), 20);
    856       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
    857       srcId(i->src(1), 26);
    858       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
    859 
    860       if (i->defExists(1)) {
    861          defId(i->def(1), 14);
    862       } else {
    863          code[0] |= 7 << 14;
    864       }
    865       // (a OP b) OP c
    866       if (i->predSrc != 2 && i->srcExists(2)) {
    867          code[1] |= subOp << 21;
    868          srcId(i->src(2), 49);
    869          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
    870       } else {
    871          code[1] |= 0x000e0000;
    872       }
    873    } else
    874    if (i->encSize == 8) {
    875       if (isLIMM(i->src(1), TYPE_U32)) {
    876          emitForm_A(i, HEX64(38000000, 00000002));
    877 
    878          if (i->flagsDef >= 0)
    879             code[1] |= 1 << 26;
    880       } else {
    881          emitForm_A(i, HEX64(68000000, 00000003));
    882 
    883          if (i->flagsDef >= 0)
    884             code[1] |= 1 << 16;
    885       }
    886       code[0] |= subOp << 6;
    887 
    888       if (i->flagsSrc >= 0) // carry
    889          code[0] |= 1 << 5;
    890 
    891       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
    892       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
    893    } else {
    894       emitForm_S(i, (subOp << 5) |
    895                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
    896    }
    897 }
    898 
    899 void
    900 CodeEmitterNVC0::emitPOPC(const Instruction *i)
    901 {
    902    emitForm_A(i, HEX64(54000000, 00000004));
    903 
    904    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
    905    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
    906 }
    907 
    908 void
    909 CodeEmitterNVC0::emitINSBF(const Instruction *i)
    910 {
    911    emitForm_A(i, HEX64(28000000, 00000003));
    912 }
    913 
    914 void
    915 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
    916 {
    917    emitForm_A(i, HEX64(70000000, 00000003));
    918 
    919    if (i->dType == TYPE_S32)
    920       code[0] |= 1 << 5;
    921    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
    922       code[0] |= 1 << 8;
    923 }
    924 
    925 void
    926 CodeEmitterNVC0::emitBFIND(const Instruction *i)
    927 {
    928    emitForm_B(i, HEX64(78000000, 00000003));
    929 
    930    if (i->dType == TYPE_S32)
    931       code[0] |= 1 << 5;
    932    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
    933       code[0] |= 1 << 8;
    934    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
    935       code[0] |= 1 << 6;
    936 }
    937 
    938 void
    939 CodeEmitterNVC0::emitPERMT(const Instruction *i)
    940 {
    941    emitForm_A(i, HEX64(24000000, 00000004));
    942 
    943    code[0] |= i->subOp << 5;
    944 }
    945 
    946 void
    947 CodeEmitterNVC0::emitShift(const Instruction *i)
    948 {
    949    if (i->op == OP_SHR) {
    950       emitForm_A(i, HEX64(58000000, 00000003)
    951                  | (isSignedType(i->dType) ? 0x20 : 0x00));
    952    } else {
    953       emitForm_A(i, HEX64(60000000, 00000003));
    954    }
    955 
    956    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
    957       code[0] |= 1 << 9;
    958 }
    959 
    960 void
    961 CodeEmitterNVC0::emitPreOp(const Instruction *i)
    962 {
    963    if (i->encSize == 8) {
    964       emitForm_B(i, HEX64(60000000, 00000000));
    965 
    966       if (i->op == OP_PREEX2)
    967          code[0] |= 0x20;
    968 
    969       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
    970       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
    971    } else {
    972       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
    973    }
    974 }
    975 
    976 void
    977 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
    978 {
    979    if (i->encSize == 8) {
    980       code[0] = 0x00000000 | (subOp << 26);
    981       code[1] = 0xc8000000;
    982 
    983       emitPredicate(i);
    984 
    985       defId(i->def(0), 14);
    986       srcId(i->src(0), 20);
    987 
    988       assert(i->src(0).getFile() == FILE_GPR);
    989 
    990       if (i->saturate) code[0] |= 1 << 5;
    991 
    992       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
    993       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
    994    } else {
    995       emitForm_S(i, 0x80000008 | (subOp << 26), true);
    996 
    997       assert(!i->src(0).mod.neg());
    998       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
    999    }
   1000 }
   1001 
   1002 void
   1003 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
   1004 {
   1005    uint64_t op;
   1006 
   1007    assert(i->encSize == 8);
   1008 
   1009    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
   1010 
   1011    if (i->ftz)
   1012       op |= 1 << 5;
   1013    else
   1014    if (!isFloatType(i->dType))
   1015       op |= isSignedType(i->dType) ? 0x23 : 0x03;
   1016    if (i->dType == TYPE_F64)
   1017       op |= 0x01;
   1018 
   1019    emitForm_A(i, op);
   1020    emitNegAbs12(i);
   1021 }
   1022 
   1023 void
   1024 CodeEmitterNVC0::roundMode_C(const Instruction *i)
   1025 {
   1026    switch (i->rnd) {
   1027    case ROUND_M:  code[1] |= 1 << 17; break;
   1028    case ROUND_P:  code[1] |= 2 << 17; break;
   1029    case ROUND_Z:  code[1] |= 3 << 17; break;
   1030    case ROUND_NI: code[0] |= 1 << 7; break;
   1031    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
   1032    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
   1033    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
   1034    case ROUND_N: break;
   1035    default:
   1036       assert(!"invalid round mode");
   1037       break;
   1038    }
   1039 }
   1040 
   1041 void
   1042 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
   1043 {
   1044    switch (i->rnd) {
   1045    case ROUND_M:
   1046    case ROUND_MI: code[0] |= 1 << 16; break;
   1047    case ROUND_P:
   1048    case ROUND_PI: code[0] |= 2 << 16; break;
   1049    case ROUND_Z:
   1050    case ROUND_ZI: code[0] |= 3 << 16; break;
   1051    default:
   1052       break;
   1053    }
   1054 }
   1055 
   1056 void
   1057 CodeEmitterNVC0::emitCVT(Instruction *i)
   1058 {
   1059    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
   1060    DataType dType;
   1061 
   1062    switch (i->op) {
   1063    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
   1064    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
   1065    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
   1066    default:
   1067       break;
   1068    }
   1069 
   1070    const bool sat = (i->op == OP_SAT) || i->saturate;
   1071    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
   1072    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
   1073 
   1074    if (i->op == OP_NEG && i->dType == TYPE_U32)
   1075       dType = TYPE_S32;
   1076    else
   1077       dType = i->dType;
   1078 
   1079    if (i->encSize == 8) {
   1080       emitForm_B(i, HEX64(10000000, 00000004));
   1081 
   1082       roundMode_C(i);
   1083 
   1084       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
   1085       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
   1086       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
   1087 
   1088       // for 8/16 source types, the byte/word is in subOp. word 1 is
   1089       // represented as 2.
   1090       if (!isFloatType(i->sType))
   1091          code[1] |= i->subOp << 0x17;
   1092       else
   1093          code[1] |= i->subOp << 0x18;
   1094 
   1095       if (sat)
   1096          code[0] |= 0x20;
   1097       if (abs)
   1098          code[0] |= 1 << 6;
   1099       if (neg && i->op != OP_ABS)
   1100          code[0] |= 1 << 8;
   1101 
   1102       if (i->ftz)
   1103          code[1] |= 1 << 23;
   1104 
   1105       if (isSignedIntType(dType))
   1106          code[0] |= 0x080;
   1107       if (isSignedIntType(i->sType))
   1108          code[0] |= 0x200;
   1109 
   1110       if (isFloatType(dType)) {
   1111          if (!isFloatType(i->sType))
   1112             code[1] |= 0x08000000;
   1113       } else {
   1114          if (isFloatType(i->sType))
   1115             code[1] |= 0x04000000;
   1116          else
   1117             code[1] |= 0x0c000000;
   1118       }
   1119    } else {
   1120       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
   1121          code[0] = 0x298;
   1122       } else
   1123       if (isFloatType(dType)) {
   1124          if (isFloatType(i->sType))
   1125             code[0] = 0x098;
   1126          else
   1127             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
   1128       } else {
   1129          assert(isFloatType(i->sType));
   1130 
   1131          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
   1132       }
   1133 
   1134       if (neg) code[0] |= 1 << 16;
   1135       if (sat) code[0] |= 1 << 18;
   1136       if (abs) code[0] |= 1 << 19;
   1137 
   1138       roundMode_CS(i);
   1139    }
   1140 }
   1141 
   1142 void
   1143 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
   1144 {
   1145    uint32_t hi;
   1146    uint32_t lo = 0;
   1147 
   1148    if (i->sType == TYPE_F64)
   1149       lo = 0x1;
   1150    else
   1151    if (!isFloatType(i->sType))
   1152       lo = 0x3;
   1153 
   1154    if (isSignedIntType(i->sType))
   1155       lo |= 0x20;
   1156    if (isFloatType(i->dType)) {
   1157       if (isFloatType(i->sType))
   1158          lo |= 0x20;
   1159       else
   1160          lo |= 0x80;
   1161    }
   1162 
   1163    switch (i->op) {
   1164    case OP_SET_AND: hi = 0x10000000; break;
   1165    case OP_SET_OR:  hi = 0x10200000; break;
   1166    case OP_SET_XOR: hi = 0x10400000; break;
   1167    default:
   1168       hi = 0x100e0000;
   1169       break;
   1170    }
   1171    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
   1172 
   1173    if (i->op != OP_SET)
   1174       srcId(i->src(2), 32 + 17);
   1175 
   1176    if (i->def(0).getFile() == FILE_PREDICATE) {
   1177       if (i->sType == TYPE_F32)
   1178          code[1] += 0x10000000;
   1179       else
   1180          code[1] += 0x08000000;
   1181 
   1182       code[0] &= ~0xfc000;
   1183       defId(i->def(0), 17);
   1184       if (i->defExists(1))
   1185          defId(i->def(1), 14);
   1186       else
   1187          code[0] |= 0x1c000;
   1188    }
   1189 
   1190    if (i->ftz)
   1191       code[1] |= 1 << 27;
   1192 
   1193    emitCondCode(i->setCond, 32 + 23);
   1194    emitNegAbs12(i);
   1195 }
   1196 
   1197 void
   1198 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
   1199 {
   1200    uint64_t op;
   1201 
   1202    switch (i->dType) {
   1203    case TYPE_S32:
   1204       op = HEX64(30000000, 00000023);
   1205       break;
   1206    case TYPE_U32:
   1207       op = HEX64(30000000, 00000003);
   1208       break;
   1209    case TYPE_F32:
   1210       op = HEX64(38000000, 00000000);
   1211       break;
   1212    default:
   1213       assert(!"invalid type for SLCT");
   1214       op = 0;
   1215       break;
   1216    }
   1217    emitForm_A(i, op);
   1218 
   1219    CondCode cc = i->setCond;
   1220 
   1221    if (i->src(2).mod.neg())
   1222       cc = reverseCondCode(cc);
   1223 
   1224    emitCondCode(cc, 32 + 23);
   1225 
   1226    if (i->ftz)
   1227       code[0] |= 1 << 5;
   1228 }
   1229 
   1230 static void
   1231 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
   1232 {
   1233    int loc = entry->loc;
   1234    if (data.force_persample_interp)
   1235       code[loc + 1] |= 1 << 20;
   1236    else
   1237       code[loc + 1] &= ~(1 << 20);
   1238 }
   1239 
   1240 void CodeEmitterNVC0::emitSELP(const Instruction *i)
   1241 {
   1242    emitForm_A(i, HEX64(20000000, 00000004));
   1243 
   1244    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
   1245       code[1] |= 1 << 20;
   1246 
   1247    if (i->subOp == 1) {
   1248       addInterp(0, 0, selpFlip);
   1249    }
   1250 }
   1251 
   1252 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
   1253 {
   1254    code[0] = 0x00000006 | (i->subOp << 26);
   1255    code[1] = 0xf0000000;
   1256    emitPredicate(i);
   1257    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
   1258 }
   1259 
   1260 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
   1261 {
   1262    code[0] = 0x00000086;
   1263    code[1] = 0xd0000000;
   1264 
   1265    code[1] |= i->tex.r;
   1266    code[1] |= i->tex.s << 8;
   1267 
   1268    if (i->tex.liveOnly)
   1269       code[0] |= 1 << 9;
   1270 
   1271    defId(i->def(0), 14);
   1272    srcId(i->src(0), 20);
   1273 }
   1274 
   1275 static inline bool
   1276 isNextIndependentTex(const TexInstruction *i)
   1277 {
   1278    if (!i->next || !isTextureOp(i->next->op))
   1279       return false;
   1280    if (i->getDef(0)->interfers(i->next->getSrc(0)))
   1281       return false;
   1282    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
   1283 }
   1284 
   1285 void
   1286 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
   1287 {
   1288    code[0] = 0x00000006;
   1289 
   1290    if (isNextIndependentTex(i))
   1291       code[0] |= 0x080; // t mode
   1292    else
   1293       code[0] |= 0x100; // p mode
   1294 
   1295    if (i->tex.liveOnly)
   1296       code[0] |= 1 << 9;
   1297 
   1298    switch (i->op) {
   1299    case OP_TEX: code[1] = 0x80000000; break;
   1300    case OP_TXB: code[1] = 0x84000000; break;
   1301    case OP_TXL: code[1] = 0x86000000; break;
   1302    case OP_TXF: code[1] = 0x90000000; break;
   1303    case OP_TXG: code[1] = 0xa0000000; break;
   1304    case OP_TXLQ: code[1] = 0xb0000000; break;
   1305    case OP_TXD: code[1] = 0xe0000000; break;
   1306    default:
   1307       assert(!"invalid texture op");
   1308       break;
   1309    }
   1310    if (i->op == OP_TXF) {
   1311       if (!i->tex.levelZero)
   1312          code[1] |= 0x02000000;
   1313    } else
   1314    if (i->tex.levelZero) {
   1315       code[1] |= 0x02000000;
   1316    }
   1317 
   1318    if (i->op != OP_TXD && i->tex.derivAll)
   1319       code[1] |= 1 << 13;
   1320 
   1321    defId(i->def(0), 14);
   1322    srcId(i->src(0), 20);
   1323 
   1324    emitPredicate(i);
   1325 
   1326    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
   1327 
   1328    code[1] |= i->tex.mask << 14;
   1329 
   1330    code[1] |= i->tex.r;
   1331    code[1] |= i->tex.s << 8;
   1332    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
   1333       code[1] |= 1 << 18; // in 1st source (with array index)
   1334 
   1335    // texture target:
   1336    code[1] |= (i->tex.target.getDim() - 1) << 20;
   1337    if (i->tex.target.isCube())
   1338       code[1] += 2 << 20;
   1339    if (i->tex.target.isArray())
   1340       code[1] |= 1 << 19;
   1341    if (i->tex.target.isShadow())
   1342       code[1] |= 1 << 24;
   1343 
   1344    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
   1345 
   1346    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
   1347       // lzero
   1348       if (i->op == OP_TXL)
   1349          code[1] &= ~(1 << 26);
   1350       else
   1351       if (i->op == OP_TXF)
   1352          code[1] &= ~(1 << 25);
   1353    }
   1354    if (i->tex.target == TEX_TARGET_2D_MS ||
   1355        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
   1356       code[1] |= 1 << 23;
   1357 
   1358    if (i->tex.useOffsets == 1)
   1359       code[1] |= 1 << 22;
   1360    if (i->tex.useOffsets == 4)
   1361       code[1] |= 1 << 23;
   1362 
   1363    srcId(i, src1, 26);
   1364 }
   1365 
   1366 void
   1367 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
   1368 {
   1369    code[0] = 0x00000086;
   1370    code[1] = 0xc0000000;
   1371 
   1372    switch (i->tex.query) {
   1373    case TXQ_DIMS:            code[1] |= 0 << 22; break;
   1374    case TXQ_TYPE:            code[1] |= 1 << 22; break;
   1375    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
   1376    case TXQ_FILTER:          code[1] |= 3 << 22; break;
   1377    case TXQ_LOD:             code[1] |= 4 << 22; break;
   1378    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
   1379    default:
   1380       assert(!"invalid texture query");
   1381       break;
   1382    }
   1383 
   1384    code[1] |= i->tex.mask << 14;
   1385 
   1386    code[1] |= i->tex.r;
   1387    code[1] |= i->tex.s << 8;
   1388    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
   1389       code[1] |= 1 << 18;
   1390 
   1391    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
   1392 
   1393    defId(i->def(0), 14);
   1394    srcId(i->src(0), 20);
   1395    srcId(i, src1, 26);
   1396 
   1397    emitPredicate(i);
   1398 }
   1399 
   1400 void
   1401 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
   1402 {
   1403    code[0] = 0x00000200 | (laneMask << 6); // dall
   1404    code[1] = 0x48000000 | qOp;
   1405 
   1406    defId(i->def(0), 14);
   1407    srcId(i->src(0), 20);
   1408    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
   1409 
   1410    emitPredicate(i);
   1411 }
   1412 
   1413 void
   1414 CodeEmitterNVC0::emitFlow(const Instruction *i)
   1415 {
   1416    const FlowInstruction *f = i->asFlow();
   1417 
   1418    unsigned mask; // bit 0: predicate, bit 1: target
   1419 
   1420    code[0] = 0x00000007;
   1421 
   1422    switch (i->op) {
   1423    case OP_BRA:
   1424       code[1] = f->absolute ? 0x00000000 : 0x40000000;
   1425       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
   1426          code[0] |= 0x4000;
   1427       mask = 3;
   1428       break;
   1429    case OP_CALL:
   1430       code[1] = f->absolute ? 0x10000000 : 0x50000000;
   1431       if (f->indirect)
   1432          code[0] |= 0x4000; // indirect calls always use c[] source
   1433       mask = 2;
   1434       break;
   1435 
   1436    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
   1437    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
   1438    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
   1439    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
   1440    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
   1441 
   1442    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
   1443    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
   1444    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
   1445    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
   1446 
   1447    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
   1448    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
   1449    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
   1450    default:
   1451       assert(!"invalid flow operation");
   1452       return;
   1453    }
   1454 
   1455    if (mask & 1) {
   1456       emitPredicate(i);
   1457       if (i->flagsSrc < 0)
   1458          code[0] |= 0x1e0;
   1459    }
   1460 
   1461    if (!f)
   1462       return;
   1463 
   1464    if (f->allWarp)
   1465       code[0] |= 1 << 15;
   1466    if (f->limit)
   1467       code[0] |= 1 << 16;
   1468 
   1469    if (f->indirect) {
   1470       if (code[0] & 0x4000) {
   1471          assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
   1472          setAddress16(i->src(0));
   1473          code[1] |= i->getSrc(0)->reg.fileIndex << 10;
   1474          if (f->op == OP_BRA)
   1475             srcId(f->src(0).getIndirect(0), 20);
   1476       } else {
   1477          srcId(f, 0, 20);
   1478       }
   1479    }
   1480 
   1481    if (f->op == OP_CALL) {
   1482       if (f->indirect) {
   1483          // nothing
   1484       } else
   1485       if (f->builtin) {
   1486          assert(f->absolute);
   1487          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
   1488          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
   1489          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
   1490       } else {
   1491          assert(!f->absolute);
   1492          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
   1493          code[0] |= (pcRel & 0x3f) << 26;
   1494          code[1] |= (pcRel >> 6) & 0x3ffff;
   1495       }
   1496    } else
   1497    if (mask & 2) {
   1498       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
   1499       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
   1500          pcRel += 8;
   1501       // currently we don't want absolute branches
   1502       assert(!f->absolute);
   1503       code[0] |= (pcRel & 0x3f) << 26;
   1504       code[1] |= (pcRel >> 6) & 0x3ffff;
   1505    }
   1506 }
   1507 
   1508 void
   1509 CodeEmitterNVC0::emitBAR(const Instruction *i)
   1510 {
   1511    Value *rDef = NULL, *pDef = NULL;
   1512 
   1513    switch (i->subOp) {
   1514    case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
   1515    case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
   1516    case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
   1517    case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
   1518    default:
   1519       code[0] = 0x04;
   1520       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
   1521       break;
   1522    }
   1523    code[1] = 0x50000000;
   1524 
   1525    code[0] |= 63 << 14;
   1526    code[1] |= 7 << 21;
   1527 
   1528    emitPredicate(i);
   1529 
   1530    // barrier id
   1531    if (i->src(0).getFile() == FILE_GPR) {
   1532       srcId(i->src(0), 20);
   1533    } else {
   1534       ImmediateValue *imm = i->getSrc(0)->asImm();
   1535       assert(imm);
   1536       code[0] |= imm->reg.data.u32 << 20;
   1537       code[1] |= 0x8000;
   1538    }
   1539 
   1540    // thread count
   1541    if (i->src(1).getFile() == FILE_GPR) {
   1542       srcId(i->src(1), 26);
   1543    } else {
   1544       ImmediateValue *imm = i->getSrc(1)->asImm();
   1545       assert(imm);
   1546       assert(imm->reg.data.u32 <= 0xfff);
   1547       code[0] |= imm->reg.data.u32 << 26;
   1548       code[1] |= imm->reg.data.u32 >> 6;
   1549       code[1] |= 0x4000;
   1550    }
   1551 
   1552    if (i->srcExists(2) && (i->predSrc != 2)) {
   1553       srcId(i->src(2), 32 + 17);
   1554       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
   1555          code[1] |= 1 << 20;
   1556    } else {
   1557       code[1] |= 7 << 17;
   1558    }
   1559 
   1560    if (i->defExists(0)) {
   1561       if (i->def(0).getFile() == FILE_GPR)
   1562          rDef = i->getDef(0);
   1563       else
   1564          pDef = i->getDef(0);
   1565 
   1566       if (i->defExists(1)) {
   1567          if (i->def(1).getFile() == FILE_GPR)
   1568             rDef = i->getDef(1);
   1569          else
   1570             pDef = i->getDef(1);
   1571       }
   1572    }
   1573    if (rDef) {
   1574       code[0] &= ~(63 << 14);
   1575       defId(rDef, 14);
   1576    }
   1577    if (pDef) {
   1578       code[1] &= ~(7 << 21);
   1579       defId(pDef, 32 + 21);
   1580    }
   1581 }
   1582 
   1583 void
   1584 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
   1585 {
   1586    code[0] = 0x00000006;
   1587    code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
   1588 
   1589    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
   1590       code[0] |= 0x200;
   1591 
   1592    emitPredicate(i);
   1593 
   1594    defId(i->def(0), 14);
   1595    srcId(i->src(0).getIndirect(0), 20);
   1596 }
   1597 
   1598 void
   1599 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
   1600 {
   1601    uint32_t prim = i->src(0).get()->reg.data.u32;
   1602 
   1603    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
   1604    code[1] = 0x00000000 | (prim >> 6);
   1605 
   1606    emitPredicate(i);
   1607 
   1608    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
   1609 
   1610    defId(i->def(0), 14);
   1611    srcId(i, src1, 20);
   1612 }
   1613 
   1614 void
   1615 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
   1616 {
   1617    code[0] = 0x00000006;
   1618    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
   1619 
   1620    if (i->perPatch)
   1621       code[0] |= 0x100;
   1622    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
   1623       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
   1624 
   1625    emitPredicate(i);
   1626 
   1627    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
   1628 
   1629    defId(i->def(0), 14);
   1630    srcId(i->src(0).getIndirect(0), 20);
   1631    srcId(i->src(0).getIndirect(1), 26); // vertex address
   1632 }
   1633 
   1634 void
   1635 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
   1636 {
   1637    unsigned int size = typeSizeof(i->dType);
   1638 
   1639    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
   1640    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
   1641 
   1642    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
   1643 
   1644    if (i->perPatch)
   1645       code[0] |= 0x100;
   1646 
   1647    emitPredicate(i);
   1648 
   1649    assert(i->src(1).getFile() == FILE_GPR);
   1650 
   1651    srcId(i->src(0).getIndirect(0), 20);
   1652    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
   1653    srcId(i->src(1), 26);
   1654 }
   1655 
   1656 void
   1657 CodeEmitterNVC0::emitOUT(const Instruction *i)
   1658 {
   1659    code[0] = 0x00000006;
   1660    code[1] = 0x1c000000;
   1661 
   1662    emitPredicate(i);
   1663 
   1664    defId(i->def(0), 14); // new secret address
   1665    srcId(i->src(0), 20); // old secret address, should be 0 initially
   1666 
   1667    assert(i->src(0).getFile() == FILE_GPR);
   1668 
   1669    if (i->op == OP_EMIT)
   1670       code[0] |= 1 << 5;
   1671    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
   1672       code[0] |= 1 << 6;
   1673 
   1674    // vertex stream
   1675    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1676       unsigned int stream = SDATA(i->src(1)).u32;
   1677       assert(stream < 4);
   1678       if (stream) {
   1679          code[1] |= 0xc000;
   1680          code[0] |= stream << 26;
   1681       } else {
   1682          srcId(NULL, 26);
   1683       }
   1684    } else {
   1685       srcId(i->src(1), 26);
   1686    }
   1687 }
   1688 
   1689 void
   1690 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
   1691 {
   1692    if (i->encSize == 8) {
   1693       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
   1694    } else {
   1695       if (i->getInterpMode() == NV50_IR_INTERP_SC)
   1696          code[0] |= 0x80;
   1697       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
   1698    }
   1699 }
   1700 
   1701 static void
   1702 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
   1703 {
   1704    int ipa = entry->ipa;
   1705    int reg = entry->reg;
   1706    int loc = entry->loc;
   1707 
   1708    if (data.flatshade &&
   1709        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
   1710       ipa = NV50_IR_INTERP_FLAT;
   1711       reg = 0x3f;
   1712    } else if (data.force_persample_interp &&
   1713               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
   1714               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
   1715       ipa |= NV50_IR_INTERP_CENTROID;
   1716    }
   1717    code[loc + 0] &= ~(0xf << 6);
   1718    code[loc + 0] |= ipa << 6;
   1719    code[loc + 0] &= ~(0x3f << 26);
   1720    code[loc + 0] |= reg << 26;
   1721 }
   1722 
   1723 void
   1724 CodeEmitterNVC0::emitINTERP(const Instruction *i)
   1725 {
   1726    const uint32_t base = i->getSrc(0)->reg.data.offset;
   1727 
   1728    if (i->encSize == 8) {
   1729       code[0] = 0x00000000;
   1730       code[1] = 0xc0000000 | (base & 0xffff);
   1731 
   1732       if (i->saturate)
   1733          code[0] |= 1 << 5;
   1734 
   1735       if (i->op == OP_PINTERP) {
   1736          srcId(i->src(1), 26);
   1737          addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
   1738       } else {
   1739          code[0] |= 0x3f << 26;
   1740          addInterp(i->ipa, 0x3f, interpApply);
   1741       }
   1742 
   1743       srcId(i->src(0).getIndirect(0), 20);
   1744    } else {
   1745       assert(i->op == OP_PINTERP);
   1746       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
   1747       srcId(i->src(1), 20);
   1748    }
   1749    emitInterpMode(i);
   1750 
   1751    emitPredicate(i);
   1752    defId(i->def(0), 14);
   1753 
   1754    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
   1755       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
   1756    else
   1757       code[1] |= 0x3f << 17;
   1758 }
   1759 
   1760 void
   1761 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
   1762 {
   1763    uint8_t val;
   1764 
   1765    switch (ty) {
   1766    case TYPE_U8:
   1767       val = 0x00;
   1768       break;
   1769    case TYPE_S8:
   1770       val = 0x20;
   1771       break;
   1772    case TYPE_F16:
   1773    case TYPE_U16:
   1774       val = 0x40;
   1775       break;
   1776    case TYPE_S16:
   1777       val = 0x60;
   1778       break;
   1779    case TYPE_F32:
   1780    case TYPE_U32:
   1781    case TYPE_S32:
   1782       val = 0x80;
   1783       break;
   1784    case TYPE_F64:
   1785    case TYPE_U64:
   1786    case TYPE_S64:
   1787       val = 0xa0;
   1788       break;
   1789    case TYPE_B128:
   1790       val = 0xc0;
   1791       break;
   1792    default:
   1793       val = 0x80;
   1794       assert(!"invalid type");
   1795       break;
   1796    }
   1797    code[0] |= val;
   1798 }
   1799 
   1800 void
   1801 CodeEmitterNVC0::emitCachingMode(CacheMode c)
   1802 {
   1803    uint32_t val;
   1804 
   1805    switch (c) {
   1806    case CACHE_CA:
   1807 // case CACHE_WB:
   1808       val = 0x000;
   1809       break;
   1810    case CACHE_CG:
   1811       val = 0x100;
   1812       break;
   1813    case CACHE_CS:
   1814       val = 0x200;
   1815       break;
   1816    case CACHE_CV:
   1817 // case CACHE_WT:
   1818       val = 0x300;
   1819       break;
   1820    default:
   1821       val = 0;
   1822       assert(!"invalid caching mode");
   1823       break;
   1824    }
   1825    code[0] |= val;
   1826 }
   1827 
   1828 static inline bool
   1829 uses64bitAddress(const Instruction *ldst)
   1830 {
   1831    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
   1832       ldst->src(0).isIndirect(0) &&
   1833       ldst->getIndirect(0, 0)->reg.size == 8;
   1834 }
   1835 
   1836 void
   1837 CodeEmitterNVC0::emitSTORE(const Instruction *i)
   1838 {
   1839    uint32_t opc;
   1840 
   1841    switch (i->src(0).getFile()) {
   1842    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
   1843    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
   1844    case FILE_MEMORY_SHARED:
   1845       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
   1846          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
   1847             opc = 0xb8000000;
   1848          else
   1849             opc = 0xcc000000;
   1850       } else {
   1851          opc = 0xc9000000;
   1852       }
   1853       break;
   1854    default:
   1855       assert(!"invalid memory file");
   1856       opc = 0;
   1857       break;
   1858    }
   1859    code[0] = 0x00000005;
   1860    code[1] = opc;
   1861 
   1862    if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
   1863       // Unlocked store on shared memory can fail.
   1864       if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
   1865           i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
   1866          assert(i->defExists(0));
   1867          defId(i->def(0), 8);
   1868       }
   1869    }
   1870 
   1871    setAddressByFile(i->src(0));
   1872    srcId(i->src(1), 14);
   1873    srcId(i->src(0).getIndirect(0), 20);
   1874    if (uses64bitAddress(i))
   1875       code[1] |= 1 << 26;
   1876 
   1877    emitPredicate(i);
   1878 
   1879    emitLoadStoreType(i->dType);
   1880    emitCachingMode(i->cache);
   1881 }
   1882 
   1883 void
   1884 CodeEmitterNVC0::emitLOAD(const Instruction *i)
   1885 {
   1886    uint32_t opc;
   1887 
   1888    code[0] = 0x00000005;
   1889 
   1890    switch (i->src(0).getFile()) {
   1891    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
   1892    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
   1893    case FILE_MEMORY_SHARED:
   1894       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
   1895          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
   1896             opc = 0xa8000000;
   1897          else
   1898             opc = 0xc4000000;
   1899       } else {
   1900          opc = 0xc1000000;
   1901       }
   1902       break;
   1903    case FILE_MEMORY_CONST:
   1904       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
   1905          emitMOV(i); // not sure if this is any better
   1906          return;
   1907       }
   1908       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
   1909       code[0] = 0x00000006 | (i->subOp << 8);
   1910       break;
   1911    default:
   1912       assert(!"invalid memory file");
   1913       opc = 0;
   1914       break;
   1915    }
   1916    code[1] = opc;
   1917 
   1918    int r = 0, p = -1;
   1919    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
   1920       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
   1921          if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
   1922             r = -1;
   1923             p = 0;
   1924          } else if (i->defExists(1)) { // r, p
   1925             p = 1;
   1926          } else {
   1927             assert(!"Expected predicate dest for load locked");
   1928          }
   1929       }
   1930    }
   1931 
   1932    if (r >= 0)
   1933       defId(i->def(r), 14);
   1934    else
   1935       code[0] |= 63 << 14;
   1936 
   1937    if (p >= 0) {
   1938       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
   1939          defId(i->def(p), 8);
   1940       else
   1941          defId(i->def(p), 32 + 18);
   1942    }
   1943 
   1944    setAddressByFile(i->src(0));
   1945    srcId(i->src(0).getIndirect(0), 20);
   1946    if (uses64bitAddress(i))
   1947       code[1] |= 1 << 26;
   1948 
   1949    emitPredicate(i);
   1950 
   1951    emitLoadStoreType(i->dType);
   1952    emitCachingMode(i->cache);
   1953 }
   1954 
   1955 uint8_t
   1956 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
   1957 {
   1958    switch (SDATA(ref).sv.sv) {
   1959    case SV_LANEID:        return 0x00;
   1960    case SV_PHYSID:        return 0x03;
   1961    case SV_VERTEX_COUNT:  return 0x10;
   1962    case SV_INVOCATION_ID: return 0x11;
   1963    case SV_YDIR:          return 0x12;
   1964    case SV_THREAD_KILL:   return 0x13;
   1965    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
   1966    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
   1967    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
   1968    case SV_GRIDID:        return 0x2c;
   1969    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
   1970    case SV_LBASE:         return 0x34;
   1971    case SV_SBASE:         return 0x30;
   1972    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
   1973    default:
   1974       assert(!"no sreg for system value");
   1975       return 0;
   1976    }
   1977 }
   1978 
   1979 void
   1980 CodeEmitterNVC0::emitMOV(const Instruction *i)
   1981 {
   1982    if (i->def(0).getFile() == FILE_PREDICATE) {
   1983       if (i->src(0).getFile() == FILE_GPR) {
   1984          code[0] = 0xfc01c003;
   1985          code[1] = 0x1a8e0000;
   1986          srcId(i->src(0), 20);
   1987       } else {
   1988          code[0] = 0x0001c004;
   1989          code[1] = 0x0c0e0000;
   1990          if (i->src(0).getFile() == FILE_IMMEDIATE) {
   1991             code[0] |= 7 << 20;
   1992             if (!i->getSrc(0)->reg.data.u32)
   1993                code[0] |= 1 << 23;
   1994          } else {
   1995             srcId(i->src(0), 20);
   1996          }
   1997       }
   1998       defId(i->def(0), 17);
   1999       emitPredicate(i);
   2000    } else
   2001    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
   2002       uint8_t sr = getSRegEncoding(i->src(0));
   2003 
   2004       if (i->encSize == 8) {
   2005          code[0] = 0x00000004 | (sr << 26);
   2006          code[1] = 0x2c000000;
   2007       } else {
   2008          code[0] = 0x40000008 | (sr << 20);
   2009       }
   2010       defId(i->def(0), 14);
   2011 
   2012       emitPredicate(i);
   2013    } else
   2014    if (i->encSize == 8) {
   2015       uint64_t opc;
   2016 
   2017       if (i->src(0).getFile() == FILE_IMMEDIATE)
   2018          opc = HEX64(18000000, 000001e2);
   2019       else
   2020       if (i->src(0).getFile() == FILE_PREDICATE)
   2021          opc = HEX64(080e0000, 1c000004);
   2022       else
   2023          opc = HEX64(28000000, 00000004);
   2024 
   2025       if (i->src(0).getFile() != FILE_PREDICATE)
   2026          opc |= i->lanes << 5;
   2027 
   2028       emitForm_B(i, opc);
   2029 
   2030       // Explicitly emit the predicate source as emitForm_B skips it.
   2031       if (i->src(0).getFile() == FILE_PREDICATE)
   2032          srcId(i->src(0), 20);
   2033    } else {
   2034       uint32_t imm;
   2035 
   2036       if (i->src(0).getFile() == FILE_IMMEDIATE) {
   2037          imm = SDATA(i->src(0)).u32;
   2038          if (imm & 0xfff00000) {
   2039             assert(!(imm & 0x000fffff));
   2040             code[0] = 0x00000318 | imm;
   2041          } else {
   2042             assert(imm < 0x800 || ((int32_t)imm >= -0x800));
   2043             code[0] = 0x00000118 | (imm << 20);
   2044          }
   2045       } else {
   2046          code[0] = 0x0028;
   2047          emitShortSrc2(i->src(0));
   2048       }
   2049       defId(i->def(0), 14);
   2050 
   2051       emitPredicate(i);
   2052    }
   2053 }
   2054 
   2055 void
   2056 CodeEmitterNVC0::emitATOM(const Instruction *i)
   2057 {
   2058    const bool hasDst = i->defExists(0);
   2059    const bool casOrExch =
   2060       i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
   2061       i->subOp == NV50_IR_SUBOP_ATOM_CAS;
   2062 
   2063    if (i->dType == TYPE_U64) {
   2064       switch (i->subOp) {
   2065       case NV50_IR_SUBOP_ATOM_ADD:
   2066          code[0] = 0x205;
   2067          if (hasDst)
   2068             code[1] = 0x507e0000;
   2069          else
   2070             code[1] = 0x10000000;
   2071          break;
   2072       case NV50_IR_SUBOP_ATOM_EXCH:
   2073          code[0] = 0x305;
   2074          code[1] = 0x507e0000;
   2075          break;
   2076       case NV50_IR_SUBOP_ATOM_CAS:
   2077          code[0] = 0x325;
   2078          code[1] = 0x50000000;
   2079          break;
   2080       default:
   2081          assert(!"invalid u64 red op");
   2082          break;
   2083       }
   2084    } else
   2085    if (i->dType == TYPE_U32) {
   2086       switch (i->subOp) {
   2087       case NV50_IR_SUBOP_ATOM_EXCH:
   2088          code[0] = 0x105;
   2089          code[1] = 0x507e0000;
   2090          break;
   2091       case NV50_IR_SUBOP_ATOM_CAS:
   2092          code[0] = 0x125;
   2093          code[1] = 0x50000000;
   2094          break;
   2095       default:
   2096          code[0] = 0x5 | (i->subOp << 5);
   2097          if (hasDst)
   2098             code[1] = 0x507e0000;
   2099          else
   2100             code[1] = 0x10000000;
   2101          break;
   2102       }
   2103    } else
   2104    if (i->dType == TYPE_S32) {
   2105       assert(i->subOp <= 2);
   2106       code[0] = 0x205 | (i->subOp << 5);
   2107       if (hasDst)
   2108          code[1] = 0x587e0000;
   2109       else
   2110          code[1] = 0x18000000;
   2111    } else
   2112    if (i->dType == TYPE_F32) {
   2113       assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
   2114       code[0] = 0x205;
   2115       if (hasDst)
   2116          code[1] = 0x687e0000;
   2117       else
   2118          code[1] = 0x28000000;
   2119    }
   2120 
   2121    emitPredicate(i);
   2122 
   2123    srcId(i->src(1), 14);
   2124 
   2125    if (hasDst)
   2126       defId(i->def(0), 32 + 11);
   2127    else
   2128    if (casOrExch)
   2129       code[1] |= 63 << 11;
   2130 
   2131    if (hasDst || casOrExch) {
   2132       const int32_t offset = SDATA(i->src(0)).offset;
   2133       assert(offset < 0x80000 && offset >= -0x80000);
   2134       code[0] |= offset << 26;
   2135       code[1] |= (offset & 0x1ffc0) >> 6;
   2136       code[1] |= (offset & 0xe0000) << 6;
   2137    } else {
   2138       srcAddr32(i->src(0), 26, 0);
   2139    }
   2140    if (i->getIndirect(0, 0)) {
   2141       srcId(i->getIndirect(0, 0), 20);
   2142       if (i->getIndirect(0, 0)->reg.size == 8)
   2143          code[1] |= 1 << 26;
   2144    } else {
   2145       code[0] |= 63 << 20;
   2146    }
   2147 
   2148    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
   2149       assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
   2150       code[1] |= (SDATA(i->src(1)).id + 1) << 17;
   2151    }
   2152 }
   2153 
   2154 void
   2155 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
   2156 {
   2157    switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
   2158    case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
   2159    case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
   2160    default:
   2161       code[0] = 0x45;
   2162       assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
   2163       break;
   2164    }
   2165    code[1] = 0xe0000000;
   2166 
   2167    emitPredicate(i);
   2168 }
   2169 
   2170 void
   2171 CodeEmitterNVC0::emitCCTL(const Instruction *i)
   2172 {
   2173    code[0] = 0x00000005 | (i->subOp << 5);
   2174 
   2175    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
   2176       code[1] = 0x98000000;
   2177       srcAddr32(i->src(0), 28, 2);
   2178    } else {
   2179       code[1] = 0xd0000000;
   2180       setAddress24(i->src(0));
   2181    }
   2182    if (uses64bitAddress(i))
   2183       code[1] |= 1 << 26;
   2184    srcId(i->src(0).getIndirect(0), 20);
   2185 
   2186    emitPredicate(i);
   2187 
   2188    defId(i, 0, 14);
   2189 }
   2190 
   2191 void
   2192 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
   2193 {
   2194    uint8_t m;
   2195    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
   2196    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
   2197    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
   2198    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
   2199    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
   2200    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
   2201    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
   2202    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
   2203    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
   2204    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
   2205    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
   2206    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
   2207    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
   2208    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
   2209    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
   2210    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
   2211    default:
   2212       return;
   2213    }
   2214    code[0] |= m << 5;
   2215    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
   2216       code[1] |= 1 << 16;
   2217 }
   2218 
   2219 void
   2220 CodeEmitterNVC0::emitSUCalc(Instruction *i)
   2221 {
   2222    ImmediateValue *imm = NULL;
   2223    uint64_t opc;
   2224 
   2225    if (i->srcExists(2)) {
   2226       imm = i->getSrc(2)->asImm();
   2227       if (imm)
   2228          i->setSrc(2, NULL); // special case, make emitForm_A not assert
   2229    }
   2230 
   2231    switch (i->op) {
   2232    case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
   2233    case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
   2234    case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
   2235    default:
   2236       assert(0);
   2237       return;
   2238    }
   2239    emitForm_A(i, opc);
   2240 
   2241    if (i->op == OP_SUCLAMP) {
   2242       if (i->dType == TYPE_S32)
   2243          code[0] |= 1 << 9;
   2244       emitSUCLAMPMode(i->subOp);
   2245    }
   2246 
   2247    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
   2248          code[1] |= 1 << 16;
   2249 
   2250    if (i->op != OP_SUEAU) {
   2251       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
   2252          code[0] |= 63 << 14;
   2253          code[1] |= i->getDef(0)->reg.data.id << 23;
   2254       } else
   2255       if (i->defExists(1)) { // r, p
   2256          assert(i->def(1).getFile() == FILE_PREDICATE);
   2257          code[1] |= i->getDef(1)->reg.data.id << 23;
   2258       } else { // r, #
   2259          code[1] |= 7 << 23;
   2260       }
   2261    }
   2262    if (imm) {
   2263       assert(i->op == OP_SUCLAMP);
   2264       i->setSrc(2, imm);
   2265       code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
   2266    }
   2267 }
   2268 
   2269 void
   2270 CodeEmitterNVC0::emitSUGType(DataType ty)
   2271 {
   2272    switch (ty) {
   2273    case TYPE_S32: code[1] |= 1 << 13; break;
   2274    case TYPE_U8:  code[1] |= 2 << 13; break;
   2275    case TYPE_S8:  code[1] |= 3 << 13; break;
   2276    default:
   2277       assert(ty == TYPE_U32);
   2278       break;
   2279    }
   2280 }
   2281 
   2282 void
   2283 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
   2284 {
   2285    const uint32_t offset = i->getSrc(s)->reg.data.offset;
   2286 
   2287    assert(i->src(s).getFile() == FILE_MEMORY_CONST);
   2288    assert(offset == (offset & 0xfffc));
   2289 
   2290    code[1] |= 1 << 21;
   2291    code[0] |= offset << 24;
   2292    code[1] |= offset >> 8;
   2293    code[1] |= i->getSrc(s)->reg.fileIndex << 8;
   2294 }
   2295 
   2296 void
   2297 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
   2298 {
   2299    if (!i->srcExists(s) || (i->predSrc == s)) {
   2300       code[1] |= 0x7 << 17;
   2301    } else {
   2302       if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
   2303          code[1] |= 1 << 20;
   2304       srcId(i->src(s), 32 + 17);
   2305    }
   2306 }
   2307 
   2308 void
   2309 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
   2310 {
   2311    code[0] = 0x5;
   2312    code[1] = 0xd4000000 | (i->subOp << 15);
   2313 
   2314    emitLoadStoreType(i->dType);
   2315    emitSUGType(i->sType);
   2316    emitCachingMode(i->cache);
   2317 
   2318    emitPredicate(i);
   2319    defId(i->def(0), 14); // destination
   2320    srcId(i->src(0), 20); // address
   2321    // format
   2322    if (i->src(1).getFile() == FILE_GPR)
   2323       srcId(i->src(1), 26);
   2324    else
   2325       setSUConst16(i, 1);
   2326    setSUPred(i, 2);
   2327 }
   2328 
   2329 void
   2330 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
   2331 {
   2332    code[0] = 0x5;
   2333    code[1] = 0xdc000000 | (i->subOp << 15);
   2334 
   2335    if (i->op == OP_SUSTP)
   2336       code[1] |= i->tex.mask << 22;
   2337    else
   2338       emitLoadStoreType(i->dType);
   2339    emitSUGType(i->sType);
   2340    emitCachingMode(i->cache);
   2341 
   2342    emitPredicate(i);
   2343    srcId(i->src(0), 20); // address
   2344    // format
   2345    if (i->src(1).getFile() == FILE_GPR)
   2346       srcId(i->src(1), 26);
   2347    else
   2348       setSUConst16(i, 1);
   2349    srcId(i->src(3), 14); // values
   2350    setSUPred(i, 2);
   2351 }
   2352 
   2353 void
   2354 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
   2355 {
   2356    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
   2357 
   2358    if (i->tex.rIndirectSrc < 0) {
   2359       code[1] |= 0x00004000;
   2360       code[0] |= i->tex.r << 26;
   2361    } else {
   2362       srcId(i, i->tex.rIndirectSrc, 26);
   2363    }
   2364 }
   2365 
   2366 void
   2367 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
   2368 {
   2369    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
   2370 
   2371    code[1] |= (i->tex.target.getDim() - 1) << 12;
   2372    if (i->tex.target.isArray() || i->tex.target.isCube() ||
   2373        i->tex.target.getDim() == 3) {
   2374       // use e2d mode for 3-dim images, arrays and cubes.
   2375       code[1] |= 3 << 12;
   2376    }
   2377 
   2378    srcId(i->src(0), 20);
   2379 }
   2380 
   2381 void
   2382 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
   2383 {
   2384    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
   2385 
   2386    code[0] = 0x5;
   2387    code[1] = 0xf0000000;
   2388 
   2389    emitPredicate(i);
   2390    emitLoadStoreType(i->sType);
   2391 
   2392    defId(i->def(0), 14);
   2393 
   2394    if (i->defExists(1)) {
   2395       defId(i->def(1), 32 + 22);
   2396    } else {
   2397       code[1] |= 7 << 22;
   2398    }
   2399 
   2400    emitSUAddr(i);
   2401    emitSUDim(i);
   2402 }
   2403 
   2404 void
   2405 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
   2406 {
   2407    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
   2408 
   2409    code[0] = 0x5;
   2410    code[1] = 0xd4000000 | (i->subOp << 15);
   2411 
   2412    emitPredicate(i);
   2413    emitLoadStoreType(i->dType);
   2414 
   2415    defId(i->def(0), 14);
   2416 
   2417    emitCachingMode(i->cache);
   2418    emitSUAddr(i);
   2419    emitSUDim(i);
   2420 }
   2421 
   2422 void
   2423 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
   2424 {
   2425    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
   2426 
   2427    code[0] = 0x5;
   2428    code[1] = 0xdc000000 | (i->subOp << 15);
   2429 
   2430    if (i->op == OP_SUSTP)
   2431       code[1] |= i->tex.mask << 17;
   2432    else
   2433       emitLoadStoreType(i->dType);
   2434 
   2435    emitPredicate(i);
   2436 
   2437    srcId(i->src(1), 14);
   2438 
   2439    emitCachingMode(i->cache);
   2440    emitSUAddr(i);
   2441    emitSUDim(i);
   2442 }
   2443 
   2444 void
   2445 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
   2446 {
   2447    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
   2448    case 0:
   2449       code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
   2450       code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
   2451       code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
   2452       code[1] |= (i->subOp & 0x3c00) << 13; // vdst
   2453       break;
   2454    case 1:
   2455       code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
   2456       code[1] |= (i->subOp & 0x0010) << 11; // v2src1
   2457       code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
   2458       code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
   2459       code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
   2460       code[1] |= (i->mask & 0x3) << 2;
   2461       break;
   2462    case 2:
   2463       code[1] |= (i->subOp & 0x000f) << 8; // v4src1
   2464       code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
   2465       code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
   2466       code[1] |= (i->mask & 0x3) << 2;
   2467       code[1] |= (i->mask & 0xc) << 21;
   2468       break;
   2469    default:
   2470       assert(0);
   2471       break;
   2472    }
   2473 }
   2474 
   2475 void
   2476 CodeEmitterNVC0::emitVSHL(const Instruction *i)
   2477 {
   2478    uint64_t opc = 0x4;
   2479 
   2480    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
   2481    case 0: opc |= 0xe8ULL << 56; break;
   2482    case 1: opc |= 0xb4ULL << 56; break;
   2483    case 2: opc |= 0x94ULL << 56; break;
   2484    default:
   2485       assert(0);
   2486       break;
   2487    }
   2488    if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
   2489       if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
   2490       if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
   2491    } else {
   2492       if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
   2493       if (isSignedType(i->sType)) opc |= 1 << 6;
   2494    }
   2495    emitForm_A(i, opc);
   2496    emitVectorSubOp(i);
   2497 
   2498    if (i->saturate)
   2499       code[0] |= 1 << 9;
   2500    if (i->flagsDef >= 0)
   2501       code[1] |= 1 << 16;
   2502 }
   2503 
   2504 void
   2505 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
   2506 {
   2507    assert(i->encSize == 8);
   2508    emitForm_A(i, HEX64(10000000, 00000006));
   2509    code[0] |= i->subOp << 5;
   2510    code[1] |= 0x00e00000;
   2511 }
   2512 
   2513 void
   2514 CodeEmitterNVC0::emitVOTE(const Instruction *i)
   2515 {
   2516    assert(i->src(0).getFile() == FILE_PREDICATE);
   2517 
   2518    code[0] = 0x00000004 | (i->subOp << 5);
   2519    code[1] = 0x48000000;
   2520 
   2521    emitPredicate(i);
   2522 
   2523    unsigned rp = 0;
   2524    for (int d = 0; i->defExists(d); d++) {
   2525       if (i->def(d).getFile() == FILE_PREDICATE) {
   2526          assert(!(rp & 2));
   2527          rp |= 2;
   2528          defId(i->def(d), 32 + 22);
   2529       } else if (i->def(d).getFile() == FILE_GPR) {
   2530          assert(!(rp & 1));
   2531          rp |= 1;
   2532          defId(i->def(d), 14);
   2533       } else {
   2534          assert(!"Unhandled def");
   2535       }
   2536    }
   2537    if (!(rp & 1))
   2538       code[0] |= 63 << 14;
   2539    if (!(rp & 2))
   2540       code[1] |= 7 << 22;
   2541    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
   2542       code[0] |= 1 << 23;
   2543    srcId(i->src(0), 20);
   2544 }
   2545 
   2546 bool
   2547 CodeEmitterNVC0::emitInstruction(Instruction *insn)
   2548 {
   2549    unsigned int size = insn->encSize;
   2550 
   2551    if (writeIssueDelays && !(codeSize & 0x3f))
   2552       size += 8;
   2553 
   2554    if (!insn->encSize) {
   2555       ERROR("skipping unencodable instruction: "); insn->print();
   2556       return false;
   2557    } else
   2558    if (codeSize + size > codeSizeLimit) {
   2559       ERROR("code emitter output buffer too small\n");
   2560       return false;
   2561    }
   2562 
   2563    if (writeIssueDelays) {
   2564       if (!(codeSize & 0x3f)) {
   2565          code[0] = 0x00000007; // cf issue delay "instruction"
   2566          code[1] = 0x20000000;
   2567          code += 2;
   2568          codeSize += 8;
   2569       }
   2570       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
   2571       uint32_t *data = code - (id * 2 + 2);
   2572       if (id <= 2) {
   2573          data[0] |= insn->sched << (id * 8 + 4);
   2574       } else
   2575       if (id == 3) {
   2576          data[0] |= insn->sched << 28;
   2577          data[1] |= insn->sched >> 4;
   2578       } else {
   2579          data[1] |= insn->sched << ((id - 4) * 8 + 4);
   2580       }
   2581    }
   2582 
   2583    // assert that instructions with multiple defs don't corrupt registers
   2584    for (int d = 0; insn->defExists(d); ++d)
   2585       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
   2586 
   2587    switch (insn->op) {
   2588    case OP_MOV:
   2589    case OP_RDSV:
   2590       emitMOV(insn);
   2591       break;
   2592    case OP_NOP:
   2593       break;
   2594    case OP_LOAD:
   2595       emitLOAD(insn);
   2596       break;
   2597    case OP_STORE:
   2598       emitSTORE(insn);
   2599       break;
   2600    case OP_LINTERP:
   2601    case OP_PINTERP:
   2602       emitINTERP(insn);
   2603       break;
   2604    case OP_VFETCH:
   2605       emitVFETCH(insn);
   2606       break;
   2607    case OP_EXPORT:
   2608       emitEXPORT(insn);
   2609       break;
   2610    case OP_PFETCH:
   2611       emitPFETCH(insn);
   2612       break;
   2613    case OP_AFETCH:
   2614       emitAFETCH(insn);
   2615       break;
   2616    case OP_EMIT:
   2617    case OP_RESTART:
   2618       emitOUT(insn);
   2619       break;
   2620    case OP_ADD:
   2621    case OP_SUB:
   2622       if (insn->dType == TYPE_F64)
   2623          emitDADD(insn);
   2624       else if (isFloatType(insn->dType))
   2625          emitFADD(insn);
   2626       else
   2627          emitUADD(insn);
   2628       break;
   2629    case OP_MUL:
   2630       if (insn->dType == TYPE_F64)
   2631          emitDMUL(insn);
   2632       else if (isFloatType(insn->dType))
   2633          emitFMUL(insn);
   2634       else
   2635          emitUMUL(insn);
   2636       break;
   2637    case OP_MAD:
   2638    case OP_FMA:
   2639       if (insn->dType == TYPE_F64)
   2640          emitDMAD(insn);
   2641       else if (isFloatType(insn->dType))
   2642          emitFMAD(insn);
   2643       else
   2644          emitIMAD(insn);
   2645       break;
   2646    case OP_SAD:
   2647       emitISAD(insn);
   2648       break;
   2649    case OP_SHLADD:
   2650       emitSHLADD(insn);
   2651       break;
   2652    case OP_NOT:
   2653       emitNOT(insn);
   2654       break;
   2655    case OP_AND:
   2656       emitLogicOp(insn, 0);
   2657       break;
   2658    case OP_OR:
   2659       emitLogicOp(insn, 1);
   2660       break;
   2661    case OP_XOR:
   2662       emitLogicOp(insn, 2);
   2663       break;
   2664    case OP_SHL:
   2665    case OP_SHR:
   2666       emitShift(insn);
   2667       break;
   2668    case OP_SET:
   2669    case OP_SET_AND:
   2670    case OP_SET_OR:
   2671    case OP_SET_XOR:
   2672       emitSET(insn->asCmp());
   2673       break;
   2674    case OP_SELP:
   2675       emitSELP(insn);
   2676       break;
   2677    case OP_SLCT:
   2678       emitSLCT(insn->asCmp());
   2679       break;
   2680    case OP_MIN:
   2681    case OP_MAX:
   2682       emitMINMAX(insn);
   2683       break;
   2684    case OP_ABS:
   2685    case OP_NEG:
   2686    case OP_CEIL:
   2687    case OP_FLOOR:
   2688    case OP_TRUNC:
   2689    case OP_SAT:
   2690       emitCVT(insn);
   2691       break;
   2692    case OP_CVT:
   2693       if (insn->def(0).getFile() == FILE_PREDICATE ||
   2694           insn->src(0).getFile() == FILE_PREDICATE)
   2695          emitMOV(insn);
   2696       else
   2697          emitCVT(insn);
   2698       break;
   2699    case OP_RSQ:
   2700       emitSFnOp(insn, 5 + 2 * insn->subOp);
   2701       break;
   2702    case OP_RCP:
   2703       emitSFnOp(insn, 4 + 2 * insn->subOp);
   2704       break;
   2705    case OP_LG2:
   2706       emitSFnOp(insn, 3);
   2707       break;
   2708    case OP_EX2:
   2709       emitSFnOp(insn, 2);
   2710       break;
   2711    case OP_SIN:
   2712       emitSFnOp(insn, 1);
   2713       break;
   2714    case OP_COS:
   2715       emitSFnOp(insn, 0);
   2716       break;
   2717    case OP_PRESIN:
   2718    case OP_PREEX2:
   2719       emitPreOp(insn);
   2720       break;
   2721    case OP_TEX:
   2722    case OP_TXB:
   2723    case OP_TXL:
   2724    case OP_TXD:
   2725    case OP_TXF:
   2726    case OP_TXG:
   2727    case OP_TXLQ:
   2728       emitTEX(insn->asTex());
   2729       break;
   2730    case OP_TXQ:
   2731       emitTXQ(insn->asTex());
   2732       break;
   2733    case OP_TEXBAR:
   2734       emitTEXBAR(insn);
   2735       break;
   2736    case OP_SUBFM:
   2737    case OP_SUCLAMP:
   2738    case OP_SUEAU:
   2739       emitSUCalc(insn);
   2740       break;
   2741    case OP_MADSP:
   2742       emitMADSP(insn);
   2743       break;
   2744    case OP_SULDB:
   2745       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
   2746          emitSULDGB(insn->asTex());
   2747       else
   2748          emitSULDB(insn->asTex());
   2749       break;
   2750    case OP_SUSTB:
   2751    case OP_SUSTP:
   2752       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
   2753          emitSUSTGx(insn->asTex());
   2754       else
   2755          emitSUSTx(insn->asTex());
   2756       break;
   2757    case OP_SULEA:
   2758       emitSULEA(insn->asTex());
   2759       break;
   2760    case OP_ATOM:
   2761       emitATOM(insn);
   2762       break;
   2763    case OP_BRA:
   2764    case OP_CALL:
   2765    case OP_PRERET:
   2766    case OP_RET:
   2767    case OP_DISCARD:
   2768    case OP_EXIT:
   2769    case OP_PRECONT:
   2770    case OP_CONT:
   2771    case OP_PREBREAK:
   2772    case OP_BREAK:
   2773    case OP_JOINAT:
   2774    case OP_BRKPT:
   2775    case OP_QUADON:
   2776    case OP_QUADPOP:
   2777       emitFlow(insn);
   2778       break;
   2779    case OP_QUADOP:
   2780       emitQUADOP(insn, insn->subOp, insn->lanes);
   2781       break;
   2782    case OP_DFDX:
   2783       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
   2784       break;
   2785    case OP_DFDY:
   2786       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
   2787       break;
   2788    case OP_POPCNT:
   2789       emitPOPC(insn);
   2790       break;
   2791    case OP_INSBF:
   2792       emitINSBF(insn);
   2793       break;
   2794    case OP_EXTBF:
   2795       emitEXTBF(insn);
   2796       break;
   2797    case OP_BFIND:
   2798       emitBFIND(insn);
   2799       break;
   2800    case OP_PERMT:
   2801       emitPERMT(insn);
   2802       break;
   2803    case OP_JOIN:
   2804       emitNOP(insn);
   2805       insn->join = 1;
   2806       break;
   2807    case OP_BAR:
   2808       emitBAR(insn);
   2809       break;
   2810    case OP_MEMBAR:
   2811       emitMEMBAR(insn);
   2812       break;
   2813    case OP_CCTL:
   2814       emitCCTL(insn);
   2815       break;
   2816    case OP_VSHL:
   2817       emitVSHL(insn);
   2818       break;
   2819    case OP_PIXLD:
   2820       emitPIXLD(insn);
   2821       break;
   2822    case OP_VOTE:
   2823       emitVOTE(insn);
   2824       break;
   2825    case OP_PHI:
   2826    case OP_UNION:
   2827    case OP_CONSTRAINT:
   2828       ERROR("operation should have been eliminated");
   2829       return false;
   2830    case OP_EXP:
   2831    case OP_LOG:
   2832    case OP_SQRT:
   2833    case OP_POW:
   2834       ERROR("operation should have been lowered\n");
   2835       return false;
   2836    default:
   2837       ERROR("unknown op: %u\n", insn->op);
   2838       return false;
   2839    }
   2840 
   2841    if (insn->join) {
   2842       code[0] |= 0x10;
   2843       assert(insn->encSize == 8);
   2844    }
   2845 
   2846    code += insn->encSize / 4;
   2847    codeSize += insn->encSize;
   2848    return true;
   2849 }
   2850 
   2851 uint32_t
   2852 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
   2853 {
   2854    const Target::OpInfo &info = targ->getOpInfo(i);
   2855 
   2856    if (writeIssueDelays || info.minEncSize == 8 || 1)
   2857       return 8;
   2858 
   2859    if (i->ftz || i->saturate || i->join)
   2860       return 8;
   2861    if (i->rnd != ROUND_N)
   2862       return 8;
   2863    if (i->predSrc >= 0 && i->op == OP_MAD)
   2864       return 8;
   2865 
   2866    if (i->op == OP_PINTERP) {
   2867       if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
   2868          return 8;
   2869    } else
   2870    if (i->op == OP_MOV && i->lanes != 0xf) {
   2871       return 8;
   2872    }
   2873 
   2874    for (int s = 0; i->srcExists(s); ++s) {
   2875       if (i->src(s).isIndirect(0))
   2876          return 8;
   2877 
   2878       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
   2879          if (SDATA(i->src(s)).offset >= 0x100)
   2880             return 8;
   2881          if (i->getSrc(s)->reg.fileIndex > 1 &&
   2882              i->getSrc(s)->reg.fileIndex != 16)
   2883              return 8;
   2884       } else
   2885       if (i->src(s).getFile() == FILE_IMMEDIATE) {
   2886          if (i->dType == TYPE_F32) {
   2887             if (SDATA(i->src(s)).u32 >= 0x100)
   2888                return 8;
   2889          } else {
   2890             if (SDATA(i->src(s)).u32 > 0xff)
   2891                return 8;
   2892          }
   2893       }
   2894 
   2895       if (i->op == OP_CVT)
   2896          continue;
   2897       if (i->src(s).mod != Modifier(0)) {
   2898          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
   2899             if (i->op != OP_RSQ)
   2900                return 8;
   2901          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
   2902             if (i->op != OP_ADD || s != 0)
   2903                return 8;
   2904       }
   2905    }
   2906 
   2907    return 4;
   2908 }
   2909 
   2910 // Simplified, erring on safe side.
   2911 class SchedDataCalculator : public Pass
   2912 {
   2913 public:
   2914    SchedDataCalculator(const Target *targ) : targ(targ) { }
   2915 
   2916 private:
   2917    struct RegScores
   2918    {
   2919       struct Resource {
   2920          int st[DATA_FILE_COUNT]; // LD to LD delay 3
   2921          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
   2922          int tex; // TEX to non-TEX delay 17 (0x11)
   2923          int sfu; // SFU to SFU delay 3 (except PRE-ops)
   2924          int imul; // integer MUL to MUL delay 3
   2925       } res;
   2926       struct ScoreData {
   2927          int r[256];
   2928          int p[8];
   2929          int c;
   2930       } rd, wr;
   2931       int base;
   2932       int regs;
   2933 
   2934       void rebase(const int base)
   2935       {
   2936          const int delta = this->base - base;
   2937          if (!delta)
   2938             return;
   2939          this->base = 0;
   2940 
   2941          for (int i = 0; i < regs; ++i) {
   2942             rd.r[i] += delta;
   2943             wr.r[i] += delta;
   2944          }
   2945          for (int i = 0; i < 8; ++i) {
   2946             rd.p[i] += delta;
   2947             wr.p[i] += delta;
   2948          }
   2949          rd.c += delta;
   2950          wr.c += delta;
   2951 
   2952          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   2953             res.ld[f] += delta;
   2954             res.st[f] += delta;
   2955          }
   2956          res.sfu += delta;
   2957          res.imul += delta;
   2958          res.tex += delta;
   2959       }
   2960       void wipe(int regs)
   2961       {
   2962          memset(&rd, 0, sizeof(rd));
   2963          memset(&wr, 0, sizeof(wr));
   2964          memset(&res, 0, sizeof(res));
   2965          this->regs = regs;
   2966       }
   2967       int getLatest(const ScoreData& d) const
   2968       {
   2969          int max = 0;
   2970          for (int i = 0; i < regs; ++i)
   2971             if (d.r[i] > max)
   2972                max = d.r[i];
   2973          for (int i = 0; i < 8; ++i)
   2974             if (d.p[i] > max)
   2975                max = d.p[i];
   2976          if (d.c > max)
   2977             max = d.c;
   2978          return max;
   2979       }
   2980       inline int getLatestRd() const
   2981       {
   2982          return getLatest(rd);
   2983       }
   2984       inline int getLatestWr() const
   2985       {
   2986          return getLatest(wr);
   2987       }
   2988       inline int getLatest() const
   2989       {
   2990          const int a = getLatestRd();
   2991          const int b = getLatestWr();
   2992 
   2993          int max = MAX2(a, b);
   2994          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   2995             max = MAX2(res.ld[f], max);
   2996             max = MAX2(res.st[f], max);
   2997          }
   2998          max = MAX2(res.sfu, max);
   2999          max = MAX2(res.imul, max);
   3000          max = MAX2(res.tex, max);
   3001          return max;
   3002       }
   3003       void setMax(const RegScores *that)
   3004       {
   3005          for (int i = 0; i < regs; ++i) {
   3006             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
   3007             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
   3008          }
   3009          for (int i = 0; i < 8; ++i) {
   3010             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
   3011             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
   3012          }
   3013          rd.c = MAX2(rd.c, that->rd.c);
   3014          wr.c = MAX2(wr.c, that->wr.c);
   3015 
   3016          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   3017             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
   3018             res.st[f] = MAX2(res.st[f], that->res.st[f]);
   3019          }
   3020          res.sfu = MAX2(res.sfu, that->res.sfu);
   3021          res.imul = MAX2(res.imul, that->res.imul);
   3022          res.tex = MAX2(res.tex, that->res.tex);
   3023       }
   3024       void print(int cycle)
   3025       {
   3026          for (int i = 0; i < regs; ++i) {
   3027             if (rd.r[i] > cycle)
   3028                INFO("rd $r%i @ %i\n", i, rd.r[i]);
   3029             if (wr.r[i] > cycle)
   3030                INFO("wr $r%i @ %i\n", i, wr.r[i]);
   3031          }
   3032          for (int i = 0; i < 8; ++i) {
   3033             if (rd.p[i] > cycle)
   3034                INFO("rd $p%i @ %i\n", i, rd.p[i]);
   3035             if (wr.p[i] > cycle)
   3036                INFO("wr $p%i @ %i\n", i, wr.p[i]);
   3037          }
   3038          if (rd.c > cycle)
   3039             INFO("rd $c @ %i\n", rd.c);
   3040          if (wr.c > cycle)
   3041             INFO("wr $c @ %i\n", wr.c);
   3042          if (res.sfu > cycle)
   3043             INFO("sfu @ %i\n", res.sfu);
   3044          if (res.imul > cycle)
   3045             INFO("imul @ %i\n", res.imul);
   3046          if (res.tex > cycle)
   3047             INFO("tex @ %i\n", res.tex);
   3048       }
   3049    };
   3050 
   3051    RegScores *score; // for current BB
   3052    std::vector<RegScores> scoreBoards;
   3053    int prevData;
   3054    operation prevOp;
   3055 
   3056    const Target *targ;
   3057 
   3058    bool visit(Function *);
   3059    bool visit(BasicBlock *);
   3060 
   3061    void commitInsn(const Instruction *, int cycle);
   3062    int calcDelay(const Instruction *, int cycle) const;
   3063    void setDelay(Instruction *, int delay, Instruction *next);
   3064 
   3065    void recordRd(const Value *, const int ready);
   3066    void recordWr(const Value *, const int ready);
   3067    void checkRd(const Value *, int cycle, int& delay) const;
   3068    void checkWr(const Value *, int cycle, int& delay) const;
   3069 
   3070    int getCycles(const Instruction *, int origDelay) const;
   3071 };
   3072 
   3073 void
   3074 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
   3075 {
   3076    if (insn->op == OP_EXIT || insn->op == OP_RET)
   3077       delay = MAX2(delay, 14);
   3078 
   3079    if (insn->op == OP_TEXBAR) {
   3080       // TODO: except if results not used before EXIT
   3081       insn->sched = 0xc2;
   3082    } else
   3083    if (insn->op == OP_JOIN || insn->join) {
   3084       insn->sched = 0x00;
   3085    } else
   3086    if (delay >= 0 || prevData == 0x04 ||
   3087        !next || !targ->canDualIssue(insn, next)) {
   3088       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
   3089       if (prevOp == OP_EXPORT)
   3090          insn->sched |= 0x40;
   3091       else
   3092          insn->sched |= 0x20;
   3093    } else {
   3094       insn->sched = 0x04; // dual-issue
   3095    }
   3096 
   3097    if (prevData != 0x04 || prevOp != OP_EXPORT)
   3098       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
   3099          prevOp = insn->op;
   3100 
   3101    prevData = insn->sched;
   3102 }
   3103 
   3104 int
   3105 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
   3106 {
   3107    if (insn->sched & 0x80) {
   3108       int c = (insn->sched & 0x0f) * 2 + 1;
   3109       if (insn->op == OP_TEXBAR && origDelay > 0)
   3110          c += origDelay;
   3111       return c;
   3112    }
   3113    if (insn->sched & 0x60)
   3114       return (insn->sched & 0x1f) + 1;
   3115    return (insn->sched == 0x04) ? 0 : 32;
   3116 }
   3117 
   3118 bool
   3119 SchedDataCalculator::visit(Function *func)
   3120 {
   3121    int regs = targ->getFileSize(FILE_GPR) + 1;
   3122    scoreBoards.resize(func->cfg.getSize());
   3123    for (size_t i = 0; i < scoreBoards.size(); ++i)
   3124       scoreBoards[i].wipe(regs);
   3125    return true;
   3126 }
   3127 
   3128 bool
   3129 SchedDataCalculator::visit(BasicBlock *bb)
   3130 {
   3131    Instruction *insn;
   3132    Instruction *next = NULL;
   3133 
   3134    int cycle = 0;
   3135 
   3136    prevData = 0x00;
   3137    prevOp = OP_NOP;
   3138    score = &scoreBoards.at(bb->getId());
   3139 
   3140    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
   3141       // back branches will wait until all target dependencies are satisfied
   3142       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
   3143          continue;
   3144       BasicBlock *in = BasicBlock::get(ei.getNode());
   3145       if (in->getExit()) {
   3146          if (prevData != 0x04)
   3147             prevData = in->getExit()->sched;
   3148          prevOp = in->getExit()->op;
   3149       }
   3150       score->setMax(&scoreBoards.at(in->getId()));
   3151    }
   3152    if (bb->cfg.incidentCount() > 1)
   3153       prevOp = OP_NOP;
   3154 
   3155 #ifdef NVC0_DEBUG_SCHED_DATA
   3156    INFO("=== BB:%i initial scores\n", bb->getId());
   3157    score->print(cycle);
   3158 #endif
   3159 
   3160    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
   3161       next = insn->next;
   3162 
   3163       commitInsn(insn, cycle);
   3164       int delay = calcDelay(next, cycle);
   3165       setDelay(insn, delay, next);
   3166       cycle += getCycles(insn, delay);
   3167 
   3168 #ifdef NVC0_DEBUG_SCHED_DATA
   3169       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
   3170       insn->print();
   3171       next->print();
   3172 #endif
   3173    }
   3174    if (!insn)
   3175       return true;
   3176    commitInsn(insn, cycle);
   3177 
   3178    int bbDelay = -1;
   3179 
   3180    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
   3181       BasicBlock *out = BasicBlock::get(ei.getNode());
   3182 
   3183       if (ei.getType() != Graph::Edge::BACK) {
   3184          // only test the first instruction of the outgoing block
   3185          next = out->getEntry();
   3186          if (next)
   3187             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
   3188       } else {
   3189          // wait until all dependencies are satisfied
   3190          const int regsFree = score->getLatest();
   3191          next = out->getFirst();
   3192          for (int c = cycle; next && c < regsFree; next = next->next) {
   3193             bbDelay = MAX2(bbDelay, calcDelay(next, c));
   3194             c += getCycles(next, bbDelay);
   3195          }
   3196          next = NULL;
   3197       }
   3198    }
   3199    if (bb->cfg.outgoingCount() != 1)
   3200       next = NULL;
   3201    setDelay(insn, bbDelay, next);
   3202    cycle += getCycles(insn, bbDelay);
   3203 
   3204    score->rebase(cycle); // common base for initializing out blocks' scores
   3205    return true;
   3206 }
   3207 
   3208 #define NVE4_MAX_ISSUE_DELAY 0x1f
   3209 int
   3210 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
   3211 {
   3212    int delay = 0, ready = cycle;
   3213 
   3214    for (int s = 0; insn->srcExists(s); ++s)
   3215       checkRd(insn->getSrc(s), cycle, delay);
   3216    // WAR & WAW don't seem to matter
   3217    // for (int s = 0; insn->srcExists(s); ++s)
   3218    //   recordRd(insn->getSrc(s), cycle);
   3219 
   3220    switch (Target::getOpClass(insn->op)) {
   3221    case OPCLASS_SFU:
   3222       ready = score->res.sfu;
   3223       break;
   3224    case OPCLASS_ARITH:
   3225       if (insn->op == OP_MUL && !isFloatType(insn->dType))
   3226          ready = score->res.imul;
   3227       break;
   3228    case OPCLASS_TEXTURE:
   3229       ready = score->res.tex;
   3230       break;
   3231    case OPCLASS_LOAD:
   3232       ready = score->res.ld[insn->src(0).getFile()];
   3233       break;
   3234    case OPCLASS_STORE:
   3235       ready = score->res.st[insn->src(0).getFile()];
   3236       break;
   3237    default:
   3238       break;
   3239    }
   3240    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
   3241       ready = MAX2(ready, score->res.tex);
   3242 
   3243    delay = MAX2(delay, ready - cycle);
   3244 
   3245    // if can issue next cycle, delay is 0, not 1
   3246    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
   3247 }
   3248 
   3249 void
   3250 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
   3251 {
   3252    const int ready = cycle + targ->getLatency(insn);
   3253 
   3254    for (int d = 0; insn->defExists(d); ++d)
   3255       recordWr(insn->getDef(d), ready);
   3256    // WAR & WAW don't seem to matter
   3257    // for (int s = 0; insn->srcExists(s); ++s)
   3258    //   recordRd(insn->getSrc(s), cycle);
   3259 
   3260    switch (Target::getOpClass(insn->op)) {
   3261    case OPCLASS_SFU:
   3262       score->res.sfu = cycle + 4;
   3263       break;
   3264    case OPCLASS_ARITH:
   3265       if (insn->op == OP_MUL && !isFloatType(insn->dType))
   3266          score->res.imul = cycle + 4;
   3267       break;
   3268    case OPCLASS_TEXTURE:
   3269       score->res.tex = cycle + 18;
   3270       break;
   3271    case OPCLASS_LOAD:
   3272       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
   3273          break;
   3274       score->res.ld[insn->src(0).getFile()] = cycle + 4;
   3275       score->res.st[insn->src(0).getFile()] = ready;
   3276       break;
   3277    case OPCLASS_STORE:
   3278       score->res.st[insn->src(0).getFile()] = cycle + 4;
   3279       score->res.ld[insn->src(0).getFile()] = ready;
   3280       break;
   3281    case OPCLASS_OTHER:
   3282       if (insn->op == OP_TEXBAR)
   3283          score->res.tex = cycle;
   3284       break;
   3285    default:
   3286       break;
   3287    }
   3288 
   3289 #ifdef NVC0_DEBUG_SCHED_DATA
   3290    score->print(cycle);
   3291 #endif
   3292 }
   3293 
   3294 void
   3295 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
   3296 {
   3297    int ready = cycle;
   3298    int a, b;
   3299 
   3300    switch (v->reg.file) {
   3301    case FILE_GPR:
   3302       a = v->reg.data.id;
   3303       b = a + v->reg.size / 4;
   3304       for (int r = a; r < b; ++r)
   3305          ready = MAX2(ready, score->rd.r[r]);
   3306       break;
   3307    case FILE_PREDICATE:
   3308       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
   3309       break;
   3310    case FILE_FLAGS:
   3311       ready = MAX2(ready, score->rd.c);
   3312       break;
   3313    case FILE_SHADER_INPUT:
   3314    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
   3315    case FILE_MEMORY_LOCAL:
   3316    case FILE_MEMORY_CONST:
   3317    case FILE_MEMORY_SHARED:
   3318    case FILE_MEMORY_GLOBAL:
   3319    case FILE_SYSTEM_VALUE:
   3320       // TODO: any restrictions here ?
   3321       break;
   3322    case FILE_IMMEDIATE:
   3323       break;
   3324    default:
   3325       assert(0);
   3326       break;
   3327    }
   3328    if (cycle < ready)
   3329       delay = MAX2(delay, ready - cycle);
   3330 }
   3331 
   3332 void
   3333 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
   3334 {
   3335    int ready = cycle;
   3336    int a, b;
   3337 
   3338    switch (v->reg.file) {
   3339    case FILE_GPR:
   3340       a = v->reg.data.id;
   3341       b = a + v->reg.size / 4;
   3342       for (int r = a; r < b; ++r)
   3343          ready = MAX2(ready, score->wr.r[r]);
   3344       break;
   3345    case FILE_PREDICATE:
   3346       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
   3347       break;
   3348    default:
   3349       assert(v->reg.file == FILE_FLAGS);
   3350       ready = MAX2(ready, score->wr.c);
   3351       break;
   3352    }
   3353    if (cycle < ready)
   3354       delay = MAX2(delay, ready - cycle);
   3355 }
   3356 
   3357 void
   3358 SchedDataCalculator::recordWr(const Value *v, const int ready)
   3359 {
   3360    int a = v->reg.data.id;
   3361 
   3362    if (v->reg.file == FILE_GPR) {
   3363       int b = a + v->reg.size / 4;
   3364       for (int r = a; r < b; ++r)
   3365          score->rd.r[r] = ready;
   3366    } else
   3367    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
   3368    if (v->reg.file == FILE_PREDICATE) {
   3369       score->rd.p[a] = ready + 4;
   3370    } else {
   3371       assert(v->reg.file == FILE_FLAGS);
   3372       score->rd.c = ready + 4;
   3373    }
   3374 }
   3375 
   3376 void
   3377 SchedDataCalculator::recordRd(const Value *v, const int ready)
   3378 {
   3379    int a = v->reg.data.id;
   3380 
   3381    if (v->reg.file == FILE_GPR) {
   3382       int b = a + v->reg.size / 4;
   3383       for (int r = a; r < b; ++r)
   3384          score->wr.r[r] = ready;
   3385    } else
   3386    if (v->reg.file == FILE_PREDICATE) {
   3387       score->wr.p[a] = ready;
   3388    } else
   3389    if (v->reg.file == FILE_FLAGS) {
   3390       score->wr.c = ready;
   3391    }
   3392 }
   3393 
   3394 bool
   3395 calculateSchedDataNVC0(const Target *targ, Function *func)
   3396 {
   3397    SchedDataCalculator sched(targ);
   3398    return sched.run(func, true, true);
   3399 }
   3400 
   3401 void
   3402 CodeEmitterNVC0::prepareEmission(Function *func)
   3403 {
   3404    CodeEmitter::prepareEmission(func);
   3405 
   3406    if (targ->hasSWSched)
   3407       calculateSchedDataNVC0(targ, func);
   3408 }
   3409 
   3410 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
   3411    : CodeEmitter(target),
   3412      targNVC0(target),
   3413      writeIssueDelays(target->hasSWSched)
   3414 {
   3415    code = NULL;
   3416    codeSize = codeSizeLimit = 0;
   3417    relocInfo = NULL;
   3418 }
   3419 
   3420 CodeEmitter *
   3421 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
   3422 {
   3423    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
   3424    emit->setProgramType(type);
   3425    return emit;
   3426 }
   3427 
   3428 CodeEmitter *
   3429 TargetNVC0::getCodeEmitter(Program::Type type)
   3430 {
   3431    if (chipset >= NVISA_GK20A_CHIPSET)
   3432       return createCodeEmitterGK110(type);
   3433    return createCodeEmitterNVC0(type);
   3434 }
   3435 
   3436 } // namespace nv50_ir
   3437