Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50_ir_target_nvc0.h"
     24 
     25 namespace nv50_ir {
     26 
     27 // Argh, all these assertions ...
     28 
     29 class CodeEmitterNVC0 : public CodeEmitter
     30 {
     31 public:
     32    CodeEmitterNVC0(const TargetNVC0 *);
     33 
     34    virtual bool emitInstruction(Instruction *);
     35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
     36    virtual void prepareEmission(Function *);
     37 
     38    inline void setProgramType(Program::Type pType) { progType = pType; }
     39 
     40 private:
     41    const TargetNVC0 *targ;
     42 
     43    Program::Type progType;
     44 
     45    const bool writeIssueDelays;
     46 
     47 private:
     48    void emitForm_A(const Instruction *, uint64_t);
     49    void emitForm_B(const Instruction *, uint64_t);
     50    void emitForm_S(const Instruction *, uint32_t, bool pred);
     51 
     52    void emitPredicate(const Instruction *);
     53 
     54    void setAddress16(const ValueRef&);
     55    void setImmediate(const Instruction *, const int s); // needs op already set
     56    void setImmediateS8(const ValueRef&);
     57 
     58    void emitCondCode(CondCode cc, int pos);
     59    void emitInterpMode(const Instruction *);
     60    void emitLoadStoreType(DataType ty);
     61    void emitCachingMode(CacheMode c);
     62 
     63    void emitShortSrc2(const ValueRef&);
     64 
     65    inline uint8_t getSRegEncoding(const ValueRef&);
     66 
     67    void roundMode_A(const Instruction *);
     68    void roundMode_C(const Instruction *);
     69    void roundMode_CS(const Instruction *);
     70 
     71    void emitNegAbs12(const Instruction *);
     72 
     73    void emitNOP(const Instruction *);
     74 
     75    void emitLOAD(const Instruction *);
     76    void emitSTORE(const Instruction *);
     77    void emitMOV(const Instruction *);
     78 
     79    void emitINTERP(const Instruction *);
     80    void emitPFETCH(const Instruction *);
     81    void emitVFETCH(const Instruction *);
     82    void emitEXPORT(const Instruction *);
     83    void emitOUT(const Instruction *);
     84 
     85    void emitUADD(const Instruction *);
     86    void emitFADD(const Instruction *);
     87    void emitUMUL(const Instruction *);
     88    void emitFMUL(const Instruction *);
     89    void emitIMAD(const Instruction *);
     90    void emitISAD(const Instruction *);
     91    void emitFMAD(const Instruction *);
     92 
     93    void emitNOT(Instruction *);
     94    void emitLogicOp(const Instruction *, uint8_t subOp);
     95    void emitPOPC(const Instruction *);
     96    void emitINSBF(const Instruction *);
     97    void emitShift(const Instruction *);
     98 
     99    void emitSFnOp(const Instruction *, uint8_t subOp);
    100 
    101    void emitCVT(Instruction *);
    102    void emitMINMAX(const Instruction *);
    103    void emitPreOp(const Instruction *);
    104 
    105    void emitSET(const CmpInstruction *);
    106    void emitSLCT(const CmpInstruction *);
    107    void emitSELP(const Instruction *);
    108 
    109    void emitTEXBAR(const Instruction *);
    110    void emitTEX(const TexInstruction *);
    111    void emitTEXCSAA(const TexInstruction *);
    112    void emitTXQ(const TexInstruction *);
    113    void emitPIXLD(const TexInstruction *);
    114 
    115    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
    116 
    117    void emitFlow(const Instruction *);
    118 
    119    inline void defId(const ValueDef&, const int pos);
    120    inline void srcId(const ValueRef&, const int pos);
    121    inline void srcId(const ValueRef *, const int pos);
    122    inline void srcId(const Instruction *, int s, const int pos);
    123 
    124    inline void srcAddr32(const ValueRef&, const int pos); // address / 4
    125 
    126    inline bool isLIMM(const ValueRef&, DataType ty);
    127 };
    128 
    129 // for better visibility
    130 #define HEX64(h, l) 0x##h##l##ULL
    131 
    132 #define SDATA(a) ((a).rep()->reg.data)
    133 #define DDATA(a) ((a).rep()->reg.data)
    134 
    135 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
    136 {
    137    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
    138 }
    139 
    140 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
    141 {
    142    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
    143 }
    144 
    145 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
    146 {
    147    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
    148    code[pos / 32] |= r << (pos % 32);
    149 }
    150 
    151 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
    152 {
    153    code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
    154 }
    155 
    156 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
    157 {
    158    code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
    159 }
    160 
    161 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
    162 {
    163    const ImmediateValue *imm = ref.get()->asImm();
    164 
    165    return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
    166 }
    167 
    168 void
    169 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
    170 {
    171    switch (insn->rnd) {
    172    case ROUND_M: code[1] |= 1 << 23; break;
    173    case ROUND_P: code[1] |= 2 << 23; break;
    174    case ROUND_Z: code[1] |= 3 << 23; break;
    175    default:
    176       assert(insn->rnd == ROUND_N);
    177       break;
    178    }
    179 }
    180 
    181 void
    182 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
    183 {
    184    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
    185    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
    186    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
    187    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
    188 }
    189 
    190 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
    191 {
    192    uint8_t val;
    193 
    194    switch (cc) {
    195    case CC_LT:  val = 0x1; break;
    196    case CC_LTU: val = 0x9; break;
    197    case CC_EQ:  val = 0x2; break;
    198    case CC_EQU: val = 0xa; break;
    199    case CC_LE:  val = 0x3; break;
    200    case CC_LEU: val = 0xb; break;
    201    case CC_GT:  val = 0x4; break;
    202    case CC_GTU: val = 0xc; break;
    203    case CC_NE:  val = 0x5; break;
    204    case CC_NEU: val = 0xd; break;
    205    case CC_GE:  val = 0x6; break;
    206    case CC_GEU: val = 0xe; break;
    207    case CC_TR:  val = 0xf; break;
    208    case CC_FL:  val = 0x0; break;
    209 
    210    case CC_A:  val = 0x14; break;
    211    case CC_NA: val = 0x13; break;
    212    case CC_S:  val = 0x15; break;
    213    case CC_NS: val = 0x12; break;
    214    case CC_C:  val = 0x16; break;
    215    case CC_NC: val = 0x11; break;
    216    case CC_O:  val = 0x17; break;
    217    case CC_NO: val = 0x10; break;
    218 
    219    default:
    220       val = 0;
    221       assert(!"invalid condition code");
    222       break;
    223    }
    224    code[pos / 32] |= val << (pos % 32);
    225 }
    226 
    227 void
    228 CodeEmitterNVC0::emitPredicate(const Instruction *i)
    229 {
    230    if (i->predSrc >= 0) {
    231       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
    232       srcId(i->src(i->predSrc), 10);
    233       if (i->cc == CC_NOT_P)
    234          code[0] |= 0x2000; // negate
    235    } else {
    236       code[0] |= 0x1c00;
    237    }
    238 }
    239 
    240 void
    241 CodeEmitterNVC0::setAddress16(const ValueRef& src)
    242 {
    243    Symbol *sym = src.get()->asSym();
    244 
    245    assert(sym);
    246 
    247    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
    248    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
    249 }
    250 
    251 void
    252 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
    253 {
    254    const ImmediateValue *imm = i->src(s).get()->asImm();
    255    uint32_t u32;
    256 
    257    assert(imm);
    258    u32 = imm->reg.data.u32;
    259 
    260    if ((code[0] & 0xf) == 0x2) {
    261       // LIMM
    262       code[0] |= (u32 & 0x3f) << 26;
    263       code[1] |= u32 >> 6;
    264    } else
    265    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
    266       // integer immediate
    267       assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
    268       assert(!(code[1] & 0xc000));
    269       u32 &= 0xfffff;
    270       code[0] |= (u32 & 0x3f) << 26;
    271       code[1] |= 0xc000 | (u32 >> 6);
    272    } else {
    273       // float immediate
    274       assert(!(u32 & 0x00000fff));
    275       assert(!(code[1] & 0xc000));
    276       code[0] |= ((u32 >> 12) & 0x3f) << 26;
    277       code[1] |= 0xc000 | (u32 >> 18);
    278    }
    279 }
    280 
    281 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
    282 {
    283    const ImmediateValue *imm = ref.get()->asImm();
    284 
    285    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
    286 
    287    assert(s8 == imm->reg.data.s32);
    288 
    289    code[0] |= (s8 & 0x3f) << 26;
    290    code[0] |= (s8 >> 6) << 8;
    291 }
    292 
    293 void
    294 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
    295 {
    296    code[0] = opc;
    297    code[1] = opc >> 32;
    298 
    299    emitPredicate(i);
    300 
    301    defId(i->def(0), 14);
    302 
    303    int s1 = 26;
    304    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
    305       s1 = 49;
    306 
    307    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
    308       switch (i->getSrc(s)->reg.file) {
    309       case FILE_MEMORY_CONST:
    310          assert(!(code[1] & 0xc000));
    311          code[1] |= (s == 2) ? 0x8000 : 0x4000;
    312          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
    313          setAddress16(i->src(s));
    314          break;
    315       case FILE_IMMEDIATE:
    316          assert(s == 1 ||
    317                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
    318          assert(!(code[1] & 0xc000));
    319          setImmediate(i, s);
    320          break;
    321       case FILE_GPR:
    322          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
    323             break;
    324          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
    325          break;
    326       default:
    327          // ignore here, can be predicate or flags, but must not be address
    328          break;
    329       }
    330    }
    331 }
    332 
    333 void
    334 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
    335 {
    336    code[0] = opc;
    337    code[1] = opc >> 32;
    338 
    339    emitPredicate(i);
    340 
    341    defId(i->def(0), 14);
    342 
    343    switch (i->src(0).getFile()) {
    344    case FILE_MEMORY_CONST:
    345       assert(!(code[1] & 0xc000));
    346       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
    347       setAddress16(i->src(0));
    348       break;
    349    case FILE_IMMEDIATE:
    350       assert(!(code[1] & 0xc000));
    351       setImmediate(i, 0);
    352       break;
    353    case FILE_GPR:
    354       srcId(i->src(0), 26);
    355       break;
    356    default:
    357       // ignore here, can be predicate or flags, but must not be address
    358       break;
    359    }
    360 }
    361 
    362 void
    363 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
    364 {
    365    code[0] = opc;
    366 
    367    int ss2a = 0;
    368    if (opc == 0x0d || opc == 0x0e)
    369       ss2a = 2;
    370 
    371    defId(i->def(0), 14);
    372    srcId(i->src(0), 20);
    373 
    374    assert(pred || (i->predSrc < 0));
    375    if (pred)
    376       emitPredicate(i);
    377 
    378    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
    379       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
    380          assert(!(code[0] & (0x300 >> ss2a)));
    381          switch (i->src(s).get()->reg.fileIndex) {
    382          case 0:  code[0] |= 0x100 >> ss2a; break;
    383          case 1:  code[0] |= 0x200 >> ss2a; break;
    384          case 16: code[0] |= 0x300 >> ss2a; break;
    385          default:
    386             ERROR("invalid c[] space for short form\n");
    387             break;
    388          }
    389          if (s == 1)
    390             code[0] |= i->getSrc(s)->reg.data.offset << 24;
    391          else
    392             code[0] |= i->getSrc(s)->reg.data.offset << 6;
    393       } else
    394       if (i->src(s).getFile() == FILE_IMMEDIATE) {
    395          assert(s == 1);
    396          setImmediateS8(i->src(s));
    397       } else
    398       if (i->src(s).getFile() == FILE_GPR) {
    399          srcId(i->src(s), (s == 1) ? 26 : 8);
    400       }
    401    }
    402 }
    403 
    404 void
    405 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
    406 {
    407    if (src.getFile() == FILE_MEMORY_CONST) {
    408       switch (src.get()->reg.fileIndex) {
    409       case 0:  code[0] |= 0x100; break;
    410       case 1:  code[0] |= 0x200; break;
    411       case 16: code[0] |= 0x300; break;
    412       default:
    413          assert(!"unsupported file index for short op");
    414          break;
    415       }
    416       srcAddr32(src, 20);
    417    } else {
    418       srcId(src, 20);
    419       assert(src.getFile() == FILE_GPR);
    420    }
    421 }
    422 
    423 void
    424 CodeEmitterNVC0::emitNOP(const Instruction *i)
    425 {
    426    code[0] = 0x000001e4;
    427    code[1] = 0x40000000;
    428    emitPredicate(i);
    429 }
    430 
    431 void
    432 CodeEmitterNVC0::emitFMAD(const Instruction *i)
    433 {
    434    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
    435 
    436    if (i->encSize == 8) {
    437       if (isLIMM(i->src(1), TYPE_F32)) {
    438          emitForm_A(i, HEX64(20000000, 00000002));
    439       } else {
    440          emitForm_A(i, HEX64(30000000, 00000000));
    441 
    442          if (i->src(2).mod.neg())
    443             code[0] |= 1 << 8;
    444       }
    445       roundMode_A(i);
    446 
    447       if (neg1)
    448          code[0] |= 1 << 9;
    449 
    450       if (i->saturate)
    451          code[0] |= 1 << 5;
    452       if (i->ftz)
    453          code[0] |= 1 << 6;
    454    } else {
    455       assert(!i->saturate && !i->src(2).mod.neg());
    456       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
    457                  false);
    458       if (neg1)
    459          code[0] |= 1 << 4;
    460    }
    461 }
    462 
    463 void
    464 CodeEmitterNVC0::emitFMUL(const Instruction *i)
    465 {
    466    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
    467 
    468    assert(i->postFactor >= -3 && i->postFactor <= 3);
    469 
    470    if (i->encSize == 8) {
    471       if (isLIMM(i->src(1), TYPE_F32)) {
    472          assert(i->postFactor == 0); // constant folded, hopefully
    473          emitForm_A(i, HEX64(30000000, 00000002));
    474       } else {
    475          emitForm_A(i, HEX64(58000000, 00000000));
    476          roundMode_A(i);
    477          code[1] |= ((i->postFactor > 0) ?
    478                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
    479       }
    480       if (neg)
    481          code[1] ^= 1 << 25; // aliases with LIMM sign bit
    482 
    483       if (i->saturate)
    484          code[0] |= 1 << 5;
    485 
    486       if (i->dnz)
    487          code[0] |= 1 << 7;
    488       else
    489       if (i->ftz)
    490          code[0] |= 1 << 6;
    491    } else {
    492       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
    493       emitForm_S(i, 0xa8, true);
    494    }
    495 }
    496 
    497 void
    498 CodeEmitterNVC0::emitUMUL(const Instruction *i)
    499 {
    500    if (i->encSize == 8) {
    501       if (i->src(1).getFile() == FILE_IMMEDIATE) {
    502          emitForm_A(i, HEX64(10000000, 00000002));
    503       } else {
    504          emitForm_A(i, HEX64(50000000, 00000003));
    505       }
    506       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
    507          code[0] |= 1 << 6;
    508       if (i->sType == TYPE_S32)
    509          code[0] |= 1 << 5;
    510       if (i->dType == TYPE_S32)
    511          code[0] |= 1 << 7;
    512    } else {
    513       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
    514 
    515       if (i->sType == TYPE_S32)
    516          code[0] |= 1 << 6;
    517    }
    518 }
    519 
    520 void
    521 CodeEmitterNVC0::emitFADD(const Instruction *i)
    522 {
    523    if (i->encSize == 8) {
    524       if (isLIMM(i->src(1), TYPE_F32)) {
    525          assert(!i->saturate);
    526          emitForm_A(i, HEX64(28000000, 00000002));
    527 
    528          code[0] |= i->src(0).mod.abs() << 7;
    529          code[0] |= i->src(0).mod.neg() << 9;
    530 
    531          if (i->src(1).mod.abs())
    532             code[1] &= 0xfdffffff;
    533          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
    534             code[1] ^= 0x02000000;
    535       } else {
    536          emitForm_A(i, HEX64(50000000, 00000000));
    537 
    538          roundMode_A(i);
    539          if (i->saturate)
    540             code[1] |= 1 << 17;
    541 
    542          emitNegAbs12(i);
    543          if (i->op == OP_SUB) code[0] ^= 1 << 8;
    544       }
    545       if (i->ftz)
    546          code[0] |= 1 << 5;
    547    } else {
    548       assert(!i->saturate && i->op != OP_SUB &&
    549              !i->src(0).mod.abs() &&
    550              !i->src(1).mod.neg() && !i->src(1).mod.abs());
    551 
    552       emitForm_S(i, 0x49, true);
    553 
    554       if (i->src(0).mod.neg())
    555          code[0] |= 1 << 7;
    556    }
    557 }
    558 
    559 void
    560 CodeEmitterNVC0::emitUADD(const Instruction *i)
    561 {
    562    uint32_t addOp = 0;
    563 
    564    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
    565    assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
    566 
    567    if (i->src(0).mod.neg())
    568       addOp |= 0x200;
    569    if (i->src(1).mod.neg())
    570       addOp |= 0x100;
    571    if (i->op == OP_SUB) {
    572       addOp ^= 0x100;
    573       assert(addOp != 0x300); // would be add-plus-one
    574    }
    575 
    576    if (i->encSize == 8) {
    577       if (isLIMM(i->src(1), TYPE_U32)) {
    578          emitForm_A(i, HEX64(08000000, 00000002));
    579          if (i->defExists(1))
    580             code[1] |= 1 << 26; // write carry
    581       } else {
    582          emitForm_A(i, HEX64(48000000, 00000003));
    583          if (i->defExists(1))
    584             code[1] |= 1 << 16; // write carry
    585       }
    586       code[0] |= addOp;
    587 
    588       if (i->saturate)
    589          code[0] |= 1 << 5;
    590       if (i->flagsSrc >= 0) // add carry
    591          code[0] |= 1 << 6;
    592    } else {
    593       assert(!(addOp & 0x100));
    594       emitForm_S(i, (addOp >> 3) |
    595                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
    596    }
    597 }
    598 
    599 // TODO: shl-add
    600 void
    601 CodeEmitterNVC0::emitIMAD(const Instruction *i)
    602 {
    603    assert(i->encSize == 8);
    604    emitForm_A(i, HEX64(20000000, 00000003));
    605 
    606    if (isSignedType(i->dType))
    607       code[0] |= 1 << 7;
    608    if (isSignedType(i->sType))
    609       code[0] |= 1 << 5;
    610 
    611    code[1] |= i->saturate << 24;
    612 
    613    if (i->flagsDef >= 0) code[1] |= 1 << 16;
    614    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
    615 
    616    if (i->src(2).mod.neg()) code[0] |= 0x10;
    617    if (i->src(1).mod.neg() ^
    618        i->src(0).mod.neg()) code[0] |= 0x20;
    619 
    620    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
    621       code[0] |= 1 << 6;
    622 }
    623 
    624 void
    625 CodeEmitterNVC0::emitISAD(const Instruction *i)
    626 {
    627    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
    628    assert(i->encSize == 8);
    629 
    630    emitForm_A(i, HEX64(38000000, 00000003));
    631 
    632    if (i->dType == TYPE_S32)
    633       code[0] |= 1 << 5;
    634 }
    635 
    636 void
    637 CodeEmitterNVC0::emitNOT(Instruction *i)
    638 {
    639    assert(i->encSize == 8);
    640    i->setSrc(1, i->src(0));
    641    emitForm_A(i, HEX64(68000000, 000001c3));
    642 }
    643 
    644 void
    645 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
    646 {
    647    if (i->encSize == 8) {
    648       if (isLIMM(i->src(1), TYPE_U32)) {
    649          emitForm_A(i, HEX64(38000000, 00000002));
    650 
    651          if (i->srcExists(2))
    652             code[1] |= 1 << 26;
    653       } else {
    654          emitForm_A(i, HEX64(68000000, 00000003));
    655 
    656          if (i->srcExists(2))
    657             code[1] |= 1 << 16;
    658       }
    659       code[0] |= subOp << 6;
    660 
    661       if (i->srcExists(2)) // carry
    662          code[0] |= 1 << 5;
    663 
    664       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
    665       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
    666    } else {
    667       emitForm_S(i, (subOp << 5) |
    668                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
    669    }
    670 }
    671 
    672 void
    673 CodeEmitterNVC0::emitPOPC(const Instruction *i)
    674 {
    675    emitForm_A(i, HEX64(54000000, 00000004));
    676 
    677    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
    678    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
    679 }
    680 
    681 void
    682 CodeEmitterNVC0::emitINSBF(const Instruction *i)
    683 {
    684    emitForm_A(i, HEX64(28000000, 30000000));
    685 }
    686 
    687 void
    688 CodeEmitterNVC0::emitShift(const Instruction *i)
    689 {
    690    if (i->op == OP_SHR) {
    691       emitForm_A(i, HEX64(58000000, 00000003)
    692                  | (isSignedType(i->dType) ? 0x20 : 0x00));
    693    } else {
    694       emitForm_A(i, HEX64(60000000, 00000003));
    695    }
    696 
    697    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
    698       code[0] |= 1 << 9;
    699 }
    700 
    701 void
    702 CodeEmitterNVC0::emitPreOp(const Instruction *i)
    703 {
    704    if (i->encSize == 8) {
    705       emitForm_B(i, HEX64(60000000, 00000000));
    706 
    707       if (i->op == OP_PREEX2)
    708          code[0] |= 0x20;
    709 
    710       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
    711       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
    712    } else {
    713       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
    714    }
    715 }
    716 
    717 void
    718 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
    719 {
    720    if (i->encSize == 8) {
    721       code[0] = 0x00000000 | (subOp << 26);
    722       code[1] = 0xc8000000;
    723 
    724       emitPredicate(i);
    725 
    726       defId(i->def(0), 14);
    727       srcId(i->src(0), 20);
    728 
    729       assert(i->src(0).getFile() == FILE_GPR);
    730 
    731       if (i->saturate) code[0] |= 1 << 5;
    732 
    733       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
    734       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
    735    } else {
    736       emitForm_S(i, 0x80000008 | (subOp << 26), true);
    737 
    738       assert(!i->src(0).mod.neg());
    739       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
    740    }
    741 }
    742 
    743 void
    744 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
    745 {
    746    uint64_t op;
    747 
    748    assert(i->encSize == 8);
    749 
    750    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
    751 
    752    if (i->ftz)
    753       op |= 1 << 5;
    754    else
    755    if (!isFloatType(i->dType))
    756       op |= isSignedType(i->dType) ? 0x23 : 0x03;
    757 
    758    emitForm_A(i, op);
    759    emitNegAbs12(i);
    760 }
    761 
    762 void
    763 CodeEmitterNVC0::roundMode_C(const Instruction *i)
    764 {
    765    switch (i->rnd) {
    766    case ROUND_M:  code[1] |= 1 << 17; break;
    767    case ROUND_P:  code[1] |= 2 << 17; break;
    768    case ROUND_Z:  code[1] |= 3 << 17; break;
    769    case ROUND_NI: code[0] |= 1 << 7; break;
    770    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
    771    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
    772    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
    773    case ROUND_N: break;
    774    default:
    775       assert(!"invalid round mode");
    776       break;
    777    }
    778 }
    779 
    780 void
    781 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
    782 {
    783    switch (i->rnd) {
    784    case ROUND_M:
    785    case ROUND_MI: code[0] |= 1 << 16; break;
    786    case ROUND_P:
    787    case ROUND_PI: code[0] |= 2 << 16; break;
    788    case ROUND_Z:
    789    case ROUND_ZI: code[0] |= 3 << 16; break;
    790    default:
    791       break;
    792    }
    793 }
    794 
    795 void
    796 CodeEmitterNVC0::emitCVT(Instruction *i)
    797 {
    798    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
    799 
    800    switch (i->op) {
    801    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
    802    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
    803    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
    804    default:
    805       break;
    806    }
    807 
    808    const bool sat = (i->op == OP_SAT) || i->saturate;
    809    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
    810    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
    811 
    812    if (i->encSize == 8) {
    813       emitForm_B(i, HEX64(10000000, 00000004));
    814 
    815       roundMode_C(i);
    816 
    817       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
    818       code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
    819       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
    820 
    821       if (sat)
    822          code[0] |= 0x20;
    823       if (abs)
    824          code[0] |= 1 << 6;
    825       if (neg && i->op != OP_ABS)
    826          code[0] |= 1 << 8;
    827 
    828       if (i->ftz)
    829          code[1] |= 1 << 23;
    830 
    831       if (isSignedIntType(i->dType))
    832          code[0] |= 0x080;
    833       if (isSignedIntType(i->sType))
    834          code[0] |= 0x200;
    835 
    836       if (isFloatType(i->dType)) {
    837          if (!isFloatType(i->sType))
    838             code[1] |= 0x08000000;
    839       } else {
    840          if (isFloatType(i->sType))
    841             code[1] |= 0x04000000;
    842          else
    843             code[1] |= 0x0c000000;
    844       }
    845    } else {
    846       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
    847          code[0] = 0x298;
    848       } else
    849       if (isFloatType(i->dType)) {
    850          if (isFloatType(i->sType))
    851             code[0] = 0x098;
    852          else
    853             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
    854       } else {
    855          assert(isFloatType(i->sType));
    856 
    857          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
    858       }
    859 
    860       if (neg) code[0] |= 1 << 16;
    861       if (sat) code[0] |= 1 << 18;
    862       if (abs) code[0] |= 1 << 19;
    863 
    864       roundMode_CS(i);
    865    }
    866 }
    867 
    868 void
    869 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
    870 {
    871    uint32_t hi;
    872    uint32_t lo = 0;
    873 
    874    if (i->sType == TYPE_F64)
    875       lo = 0x1;
    876    else
    877    if (!isFloatType(i->sType))
    878       lo = 0x3;
    879 
    880    if (isFloatType(i->dType) || isSignedIntType(i->sType))
    881       lo |= 0x20;
    882 
    883    switch (i->op) {
    884    case OP_SET_AND: hi = 0x10000000; break;
    885    case OP_SET_OR:  hi = 0x10200000; break;
    886    case OP_SET_XOR: hi = 0x10400000; break;
    887    default:
    888       hi = 0x100e0000;
    889       break;
    890    }
    891    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
    892 
    893    if (i->op != OP_SET)
    894       srcId(i->src(2), 32 + 17);
    895 
    896    if (i->def(0).getFile() == FILE_PREDICATE) {
    897       if (i->sType == TYPE_F32)
    898          code[1] += 0x10000000;
    899       else
    900          code[1] += 0x08000000;
    901 
    902       code[0] &= ~0xfc000;
    903       defId(i->def(0), 17);
    904       if (i->defExists(1))
    905          defId(i->def(1), 14);
    906       else
    907          code[0] |= 0x1c000;
    908    }
    909 
    910    if (i->ftz)
    911       code[1] |= 1 << 27;
    912 
    913    emitCondCode(i->setCond, 32 + 23);
    914    emitNegAbs12(i);
    915 }
    916 
    917 void
    918 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
    919 {
    920    uint64_t op;
    921 
    922    switch (i->dType) {
    923    case TYPE_S32:
    924       op = HEX64(30000000, 00000023);
    925       break;
    926    case TYPE_U32:
    927       op = HEX64(30000000, 00000003);
    928       break;
    929    case TYPE_F32:
    930       op = HEX64(38000000, 00000000);
    931       break;
    932    default:
    933       assert(!"invalid type for SLCT");
    934       op = 0;
    935       break;
    936    }
    937    emitForm_A(i, op);
    938 
    939    CondCode cc = i->setCond;
    940 
    941    if (i->src(2).mod.neg())
    942       cc = reverseCondCode(cc);
    943 
    944    emitCondCode(cc, 32 + 23);
    945 
    946    if (i->ftz)
    947       code[0] |= 1 << 5;
    948 }
    949 
    950 void CodeEmitterNVC0::emitSELP(const Instruction *i)
    951 {
    952    emitForm_A(i, HEX64(20000000, 00000004));
    953 
    954    if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
    955       code[1] |= 1 << 20;
    956 }
    957 
    958 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
    959 {
    960    code[0] = 0x00000006 | (i->subOp << 26);
    961    code[1] = 0xf0000000;
    962    emitPredicate(i);
    963    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
    964 }
    965 
    966 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
    967 {
    968    code[0] = 0x00000086;
    969    code[1] = 0xd0000000;
    970 
    971    code[1] |= i->tex.r;
    972    code[1] |= i->tex.s << 8;
    973 
    974    if (i->tex.liveOnly)
    975       code[0] |= 1 << 9;
    976 
    977    defId(i->def(0), 14);
    978    srcId(i->src(0), 20);
    979 }
    980 
    981 static inline bool
    982 isNextIndependentTex(const TexInstruction *i)
    983 {
    984    if (!i->next || !isTextureOp(i->next->op))
    985       return false;
    986    if (i->getDef(0)->interfers(i->next->getSrc(0)))
    987       return false;
    988    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
    989 }
    990 
    991 void
    992 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
    993 {
    994    code[0] = 0x00000006;
    995 
    996    if (isNextIndependentTex(i))
    997       code[0] |= 0x080; // t mode
    998    else
    999       code[0] |= 0x100; // p mode
   1000 
   1001    if (i->tex.liveOnly)
   1002       code[0] |= 1 << 9;
   1003 
   1004    switch (i->op) {
   1005    case OP_TEX: code[1] = 0x80000000; break;
   1006    case OP_TXB: code[1] = 0x84000000; break;
   1007    case OP_TXL: code[1] = 0x86000000; break;
   1008    case OP_TXF: code[1] = 0x90000000; break;
   1009    case OP_TXG: code[1] = 0xa0000000; break;
   1010    case OP_TXD: code[1] = 0xe0000000; break;
   1011    default:
   1012       assert(!"invalid texture op");
   1013       break;
   1014    }
   1015    if (i->op == OP_TXF) {
   1016       if (!i->tex.levelZero)
   1017          code[1] |= 0x02000000;
   1018    } else
   1019    if (i->tex.levelZero) {
   1020       code[1] |= 0x02000000;
   1021    }
   1022 
   1023    if (i->op != OP_TXD && i->tex.derivAll)
   1024       code[1] |= 1 << 13;
   1025 
   1026    defId(i->def(0), 14);
   1027    srcId(i->src(0), 20);
   1028 
   1029    emitPredicate(i);
   1030 
   1031    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
   1032 
   1033    code[1] |= i->tex.mask << 14;
   1034 
   1035    code[1] |= i->tex.r;
   1036    code[1] |= i->tex.s << 8;
   1037    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
   1038       code[1] |= 1 << 18; // in 1st source (with array index)
   1039 
   1040    // texture target:
   1041    code[1] |= (i->tex.target.getDim() - 1) << 20;
   1042    if (i->tex.target.isCube())
   1043       code[1] += 2 << 20;
   1044    if (i->tex.target.isArray())
   1045       code[1] |= 1 << 19;
   1046    if (i->tex.target.isShadow())
   1047       code[1] |= 1 << 24;
   1048 
   1049    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
   1050 
   1051    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
   1052       // lzero
   1053       if (i->op == OP_TXL)
   1054          code[1] &= ~(1 << 26);
   1055       else
   1056       if (i->op == OP_TXF)
   1057          code[1] &= ~(1 << 25);
   1058    }
   1059    if (i->tex.target == TEX_TARGET_2D_MS ||
   1060        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
   1061       code[1] |= 1 << 23;
   1062 
   1063    if (i->tex.useOffsets) // in vecSrc0.w
   1064       code[1] |= 1 << 22;
   1065 
   1066    srcId(i, src1, 26);
   1067 }
   1068 
   1069 void
   1070 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
   1071 {
   1072    code[0] = 0x00000086;
   1073    code[1] = 0xc0000000;
   1074 
   1075    switch (i->tex.query) {
   1076    case TXQ_DIMS:            code[1] |= 0 << 22; break;
   1077    case TXQ_TYPE:            code[1] |= 1 << 22; break;
   1078    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
   1079    case TXQ_FILTER:          code[1] |= 3 << 22; break;
   1080    case TXQ_LOD:             code[1] |= 4 << 22; break;
   1081    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
   1082    default:
   1083       assert(!"invalid texture query");
   1084       break;
   1085    }
   1086 
   1087    code[1] |= i->tex.mask << 14;
   1088 
   1089    code[1] |= i->tex.r;
   1090    code[1] |= i->tex.s << 8;
   1091    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
   1092       code[1] |= 1 << 18;
   1093 
   1094    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
   1095 
   1096    defId(i->def(0), 14);
   1097    srcId(i->src(0), 20);
   1098    srcId(i, src1, 26);
   1099 
   1100    emitPredicate(i);
   1101 }
   1102 
   1103 void
   1104 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
   1105 {
   1106    code[0] = 0x00000000 | (laneMask << 6);
   1107    code[1] = 0x48000000 | qOp;
   1108 
   1109    defId(i->def(0), 14);
   1110    srcId(i->src(0), 20);
   1111    srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
   1112 
   1113    if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
   1114       code[0] |= 1 << 9; // dall
   1115 
   1116    emitPredicate(i);
   1117 }
   1118 
   1119 void
   1120 CodeEmitterNVC0::emitFlow(const Instruction *i)
   1121 {
   1122    const FlowInstruction *f = i->asFlow();
   1123 
   1124    unsigned mask; // bit 0: predicate, bit 1: target
   1125 
   1126    code[0] = 0x00000007;
   1127 
   1128    switch (i->op) {
   1129    case OP_BRA:
   1130       code[1] = f->absolute ? 0x00000000 : 0x40000000;
   1131       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
   1132          code[0] |= 0x4000;
   1133       mask = 3;
   1134       break;
   1135    case OP_CALL:
   1136       code[1] = f->absolute ? 0x10000000 : 0x50000000;
   1137       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
   1138          code[0] |= 0x4000;
   1139       mask = 2;
   1140       break;
   1141 
   1142    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
   1143    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
   1144    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
   1145    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
   1146    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
   1147 
   1148    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
   1149    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
   1150    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
   1151    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
   1152 
   1153    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
   1154    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
   1155    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
   1156    default:
   1157       assert(!"invalid flow operation");
   1158       return;
   1159    }
   1160 
   1161    if (mask & 1) {
   1162       emitPredicate(i);
   1163       if (i->flagsSrc < 0)
   1164          code[0] |= 0x1e0;
   1165    }
   1166 
   1167    if (!f)
   1168       return;
   1169 
   1170    if (f->allWarp)
   1171       code[0] |= 1 << 15;
   1172    if (f->limit)
   1173       code[0] |= 1 << 16;
   1174 
   1175    if (f->op == OP_CALL) {
   1176       if (f->builtin) {
   1177          assert(f->absolute);
   1178          uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
   1179          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
   1180          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
   1181       } else {
   1182          assert(!f->absolute);
   1183          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
   1184          code[0] |= (pcRel & 0x3f) << 26;
   1185          code[1] |= (pcRel >> 6) & 0x3ffff;
   1186       }
   1187    } else
   1188    if (mask & 2) {
   1189       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
   1190       // currently we don't want absolute branches
   1191       assert(!f->absolute);
   1192       code[0] |= (pcRel & 0x3f) << 26;
   1193       code[1] |= (pcRel >> 6) & 0x3ffff;
   1194    }
   1195 }
   1196 
   1197 void
   1198 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
   1199 {
   1200    uint32_t prim = i->src(0).get()->reg.data.u32;
   1201 
   1202    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
   1203    code[1] = 0x00000000 | (prim >> 6);
   1204 
   1205    emitPredicate(i);
   1206 
   1207    defId(i->def(0), 14);
   1208    srcId(i->src(1), 20);
   1209 }
   1210 
   1211 void
   1212 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
   1213 {
   1214    code[0] = 0x00000006;
   1215    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
   1216 
   1217    if (i->perPatch)
   1218       code[0] |= 0x100;
   1219    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
   1220       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
   1221 
   1222    emitPredicate(i);
   1223 
   1224    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
   1225 
   1226    defId(i->def(0), 14);
   1227    srcId(i->src(0).getIndirect(0), 20);
   1228    srcId(i->src(0).getIndirect(1), 26); // vertex address
   1229 }
   1230 
   1231 void
   1232 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
   1233 {
   1234    unsigned int size = typeSizeof(i->dType);
   1235 
   1236    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
   1237    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
   1238 
   1239    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
   1240 
   1241    if (i->perPatch)
   1242       code[0] |= 0x100;
   1243 
   1244    emitPredicate(i);
   1245 
   1246    assert(i->src(1).getFile() == FILE_GPR);
   1247 
   1248    srcId(i->src(0).getIndirect(0), 20);
   1249    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
   1250    srcId(i->src(1), 26);
   1251 }
   1252 
   1253 void
   1254 CodeEmitterNVC0::emitOUT(const Instruction *i)
   1255 {
   1256    code[0] = 0x00000006;
   1257    code[1] = 0x1c000000;
   1258 
   1259    emitPredicate(i);
   1260 
   1261    defId(i->def(0), 14); // new secret address
   1262    srcId(i->src(0), 20); // old secret address, should be 0 initially
   1263 
   1264    assert(i->src(0).getFile() == FILE_GPR);
   1265 
   1266    if (i->op == OP_EMIT)
   1267       code[0] |= 1 << 5;
   1268    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
   1269       code[0] |= 1 << 6;
   1270 
   1271    // vertex stream
   1272    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1273       code[1] |= 0xc000;
   1274       code[0] |= SDATA(i->src(1)).u32 << 26;
   1275    } else {
   1276       srcId(i->src(1), 26);
   1277    }
   1278 }
   1279 
   1280 void
   1281 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
   1282 {
   1283    if (i->encSize == 8) {
   1284       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
   1285    } else {
   1286       if (i->getInterpMode() == NV50_IR_INTERP_SC)
   1287          code[0] |= 0x80;
   1288       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
   1289    }
   1290 }
   1291 
   1292 void
   1293 CodeEmitterNVC0::emitINTERP(const Instruction *i)
   1294 {
   1295    const uint32_t base = i->getSrc(0)->reg.data.offset;
   1296 
   1297    if (i->encSize == 8) {
   1298       code[0] = 0x00000000;
   1299       code[1] = 0xc0000000 | (base & 0xffff);
   1300 
   1301       if (i->saturate)
   1302          code[0] |= 1 << 5;
   1303 
   1304       if (i->op == OP_PINTERP)
   1305          srcId(i->src(1), 26);
   1306       else
   1307          code[0] |= 0x3f << 26;
   1308 
   1309       srcId(i->src(0).getIndirect(0), 20);
   1310    } else {
   1311       assert(i->op == OP_PINTERP);
   1312       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
   1313       srcId(i->src(1), 20);
   1314    }
   1315    emitInterpMode(i);
   1316 
   1317    emitPredicate(i);
   1318    defId(i->def(0), 14);
   1319 
   1320    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
   1321       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
   1322    else
   1323       code[1] |= 0x3f << 17;
   1324 }
   1325 
   1326 void
   1327 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
   1328 {
   1329    uint8_t val;
   1330 
   1331    switch (ty) {
   1332    case TYPE_U8:
   1333       val = 0x00;
   1334       break;
   1335    case TYPE_S8:
   1336       val = 0x20;
   1337       break;
   1338    case TYPE_F16:
   1339    case TYPE_U16:
   1340       val = 0x40;
   1341       break;
   1342    case TYPE_S16:
   1343       val = 0x60;
   1344       break;
   1345    case TYPE_F32:
   1346    case TYPE_U32:
   1347    case TYPE_S32:
   1348       val = 0x80;
   1349       break;
   1350    case TYPE_F64:
   1351    case TYPE_U64:
   1352    case TYPE_S64:
   1353       val = 0xa0;
   1354       break;
   1355    case TYPE_B128:
   1356       val = 0xc0;
   1357       break;
   1358    default:
   1359       val = 0x80;
   1360       assert(!"invalid type");
   1361       break;
   1362    }
   1363    code[0] |= val;
   1364 }
   1365 
   1366 void
   1367 CodeEmitterNVC0::emitCachingMode(CacheMode c)
   1368 {
   1369    uint32_t val;
   1370 
   1371    switch (c) {
   1372    case CACHE_CA:
   1373 // case CACHE_WB:
   1374       val = 0x000;
   1375       break;
   1376    case CACHE_CG:
   1377       val = 0x100;
   1378       break;
   1379    case CACHE_CS:
   1380       val = 0x200;
   1381       break;
   1382    case CACHE_CV:
   1383 // case CACHE_WT:
   1384       val = 0x300;
   1385       break;
   1386    default:
   1387       val = 0;
   1388       assert(!"invalid caching mode");
   1389       break;
   1390    }
   1391    code[0] |= val;
   1392 }
   1393 
   1394 void
   1395 CodeEmitterNVC0::emitSTORE(const Instruction *i)
   1396 {
   1397    uint32_t opc;
   1398 
   1399    switch (i->src(0).getFile()) {
   1400    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
   1401    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
   1402    case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
   1403    default:
   1404       assert(!"invalid memory file");
   1405       opc = 0;
   1406       break;
   1407    }
   1408    code[0] = 0x00000005;
   1409    code[1] = opc;
   1410 
   1411    setAddress16(i->src(0));
   1412    srcId(i->src(1), 14);
   1413    srcId(i->src(0).getIndirect(0), 20);
   1414 
   1415    emitPredicate(i);
   1416 
   1417    emitLoadStoreType(i->dType);
   1418    emitCachingMode(i->cache);
   1419 }
   1420 
   1421 void
   1422 CodeEmitterNVC0::emitLOAD(const Instruction *i)
   1423 {
   1424    uint32_t opc;
   1425 
   1426    code[0] = 0x00000005;
   1427 
   1428    switch (i->src(0).getFile()) {
   1429    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
   1430    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
   1431    case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
   1432    case FILE_MEMORY_CONST:
   1433       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
   1434          emitMOV(i); // not sure if this is any better
   1435          return;
   1436       }
   1437       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
   1438       code[0] = 0x00000006 | (i->subOp << 8);
   1439       break;
   1440    default:
   1441       assert(!"invalid memory file");
   1442       opc = 0;
   1443       break;
   1444    }
   1445    code[1] = opc;
   1446 
   1447    defId(i->def(0), 14);
   1448 
   1449    setAddress16(i->src(0));
   1450    srcId(i->src(0).getIndirect(0), 20);
   1451 
   1452    emitPredicate(i);
   1453 
   1454    emitLoadStoreType(i->dType);
   1455    emitCachingMode(i->cache);
   1456 }
   1457 
   1458 uint8_t
   1459 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
   1460 {
   1461    switch (SDATA(ref).sv.sv) {
   1462    case SV_LANEID:        return 0x00;
   1463    case SV_PHYSID:        return 0x03;
   1464    case SV_VERTEX_COUNT:  return 0x10;
   1465    case SV_INVOCATION_ID: return 0x11;
   1466    case SV_YDIR:          return 0x12;
   1467    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
   1468    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
   1469    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
   1470    case SV_GRIDID:        return 0x2c;
   1471    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
   1472    case SV_LBASE:         return 0x34;
   1473    case SV_SBASE:         return 0x30;
   1474    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
   1475    default:
   1476       assert(!"no sreg for system value");
   1477       return 0;
   1478    }
   1479 }
   1480 
   1481 void
   1482 CodeEmitterNVC0::emitMOV(const Instruction *i)
   1483 {
   1484    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
   1485       uint8_t sr = getSRegEncoding(i->src(0));
   1486 
   1487       if (i->encSize == 8) {
   1488          code[0] = 0x00000004 | (sr << 26);
   1489          code[1] = 0x2c000000;
   1490       } else {
   1491          code[0] = 0x40000008 | (sr << 20);
   1492       }
   1493       defId(i->def(0), 14);
   1494 
   1495       emitPredicate(i);
   1496    } else
   1497    if (i->encSize == 8) {
   1498       uint64_t opc;
   1499 
   1500       if (i->src(0).getFile() == FILE_IMMEDIATE)
   1501          opc = HEX64(18000000, 000001e2);
   1502       else
   1503       if (i->src(0).getFile() == FILE_PREDICATE)
   1504          opc = HEX64(080e0000, 1c000004);
   1505       else
   1506          opc = HEX64(28000000, 00000004);
   1507 
   1508       opc |= i->lanes << 5;
   1509 
   1510       emitForm_B(i, opc);
   1511    } else {
   1512       uint32_t imm;
   1513 
   1514       if (i->src(0).getFile() == FILE_IMMEDIATE) {
   1515          imm = SDATA(i->src(0)).u32;
   1516          if (imm & 0xfff00000) {
   1517             assert(!(imm & 0x000fffff));
   1518             code[0] = 0x00000318 | imm;
   1519          } else {
   1520             assert(imm < 0x800 || ((int32_t)imm >= -0x800));
   1521             code[0] = 0x00000118 | (imm << 20);
   1522          }
   1523       } else {
   1524          code[0] = 0x0028;
   1525          emitShortSrc2(i->src(0));
   1526       }
   1527       defId(i->def(0), 14);
   1528 
   1529       emitPredicate(i);
   1530    }
   1531 }
   1532 
   1533 bool
   1534 CodeEmitterNVC0::emitInstruction(Instruction *insn)
   1535 {
   1536    unsigned int size = insn->encSize;
   1537 
   1538    if (writeIssueDelays && !(codeSize & 0x3f))
   1539       size += 8;
   1540 
   1541    if (!insn->encSize) {
   1542       ERROR("skipping unencodable instruction: "); insn->print();
   1543       return false;
   1544    } else
   1545    if (codeSize + size > codeSizeLimit) {
   1546       ERROR("code emitter output buffer too small\n");
   1547       return false;
   1548    }
   1549 
   1550    if (writeIssueDelays) {
   1551       if (!(codeSize & 0x3f)) {
   1552          code[0] = 0x00000007; // cf issue delay "instruction"
   1553          code[1] = 0x20000000;
   1554          code += 2;
   1555          codeSize += 8;
   1556       }
   1557       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
   1558       uint32_t *data = code - (id * 2 + 2);
   1559       if (id <= 2) {
   1560          data[0] |= insn->sched << (id * 8 + 4);
   1561       } else
   1562       if (id == 3) {
   1563          data[0] |= insn->sched << 28;
   1564          data[1] |= insn->sched >> 4;
   1565       } else {
   1566          data[1] |= insn->sched << ((id - 4) * 8 + 4);
   1567       }
   1568    }
   1569 
   1570    // assert that instructions with multiple defs don't corrupt registers
   1571    for (int d = 0; insn->defExists(d); ++d)
   1572       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
   1573 
   1574    switch (insn->op) {
   1575    case OP_MOV:
   1576    case OP_RDSV:
   1577       emitMOV(insn);
   1578       break;
   1579    case OP_NOP:
   1580       break;
   1581    case OP_LOAD:
   1582       emitLOAD(insn);
   1583       break;
   1584    case OP_STORE:
   1585       emitSTORE(insn);
   1586       break;
   1587    case OP_LINTERP:
   1588    case OP_PINTERP:
   1589       emitINTERP(insn);
   1590       break;
   1591    case OP_VFETCH:
   1592       emitVFETCH(insn);
   1593       break;
   1594    case OP_EXPORT:
   1595       emitEXPORT(insn);
   1596       break;
   1597    case OP_PFETCH:
   1598       emitPFETCH(insn);
   1599       break;
   1600    case OP_EMIT:
   1601    case OP_RESTART:
   1602       emitOUT(insn);
   1603       break;
   1604    case OP_ADD:
   1605    case OP_SUB:
   1606       if (isFloatType(insn->dType))
   1607          emitFADD(insn);
   1608       else
   1609          emitUADD(insn);
   1610       break;
   1611    case OP_MUL:
   1612       if (isFloatType(insn->dType))
   1613          emitFMUL(insn);
   1614       else
   1615          emitUMUL(insn);
   1616       break;
   1617    case OP_MAD:
   1618    case OP_FMA:
   1619       if (isFloatType(insn->dType))
   1620          emitFMAD(insn);
   1621       else
   1622          emitIMAD(insn);
   1623       break;
   1624    case OP_SAD:
   1625       emitISAD(insn);
   1626       break;
   1627    case OP_NOT:
   1628       emitNOT(insn);
   1629       break;
   1630    case OP_AND:
   1631       emitLogicOp(insn, 0);
   1632       break;
   1633    case OP_OR:
   1634       emitLogicOp(insn, 1);
   1635       break;
   1636    case OP_XOR:
   1637       emitLogicOp(insn, 2);
   1638       break;
   1639    case OP_SHL:
   1640    case OP_SHR:
   1641       emitShift(insn);
   1642       break;
   1643    case OP_SET:
   1644    case OP_SET_AND:
   1645    case OP_SET_OR:
   1646    case OP_SET_XOR:
   1647       emitSET(insn->asCmp());
   1648       break;
   1649    case OP_SELP:
   1650       emitSELP(insn);
   1651       break;
   1652    case OP_SLCT:
   1653       emitSLCT(insn->asCmp());
   1654       break;
   1655    case OP_MIN:
   1656    case OP_MAX:
   1657       emitMINMAX(insn);
   1658       break;
   1659    case OP_ABS:
   1660    case OP_NEG:
   1661    case OP_CEIL:
   1662    case OP_FLOOR:
   1663    case OP_TRUNC:
   1664    case OP_CVT:
   1665    case OP_SAT:
   1666       emitCVT(insn);
   1667       break;
   1668    case OP_RSQ:
   1669       emitSFnOp(insn, 5);
   1670       break;
   1671    case OP_RCP:
   1672       emitSFnOp(insn, 4);
   1673       break;
   1674    case OP_LG2:
   1675       emitSFnOp(insn, 3);
   1676       break;
   1677    case OP_EX2:
   1678       emitSFnOp(insn, 2);
   1679       break;
   1680    case OP_SIN:
   1681       emitSFnOp(insn, 1);
   1682       break;
   1683    case OP_COS:
   1684       emitSFnOp(insn, 0);
   1685       break;
   1686    case OP_PRESIN:
   1687    case OP_PREEX2:
   1688       emitPreOp(insn);
   1689       break;
   1690    case OP_TEX:
   1691    case OP_TXB:
   1692    case OP_TXL:
   1693    case OP_TXD:
   1694    case OP_TXF:
   1695       emitTEX(insn->asTex());
   1696       break;
   1697    case OP_TXQ:
   1698       emitTXQ(insn->asTex());
   1699       break;
   1700    case OP_TEXBAR:
   1701       emitTEXBAR(insn);
   1702       break;
   1703    case OP_BRA:
   1704    case OP_CALL:
   1705    case OP_PRERET:
   1706    case OP_RET:
   1707    case OP_DISCARD:
   1708    case OP_EXIT:
   1709    case OP_PRECONT:
   1710    case OP_CONT:
   1711    case OP_PREBREAK:
   1712    case OP_BREAK:
   1713    case OP_JOINAT:
   1714    case OP_BRKPT:
   1715    case OP_QUADON:
   1716    case OP_QUADPOP:
   1717       emitFlow(insn);
   1718       break;
   1719    case OP_QUADOP:
   1720       emitQUADOP(insn, insn->subOp, insn->lanes);
   1721       break;
   1722    case OP_DFDX:
   1723       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
   1724       break;
   1725    case OP_DFDY:
   1726       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
   1727       break;
   1728    case OP_POPCNT:
   1729       emitPOPC(insn);
   1730       break;
   1731    case OP_JOIN:
   1732       emitNOP(insn);
   1733       insn->join = 1;
   1734       break;
   1735    case OP_PHI:
   1736    case OP_UNION:
   1737    case OP_CONSTRAINT:
   1738       ERROR("operation should have been eliminated");
   1739       return false;
   1740    case OP_EXP:
   1741    case OP_LOG:
   1742    case OP_SQRT:
   1743    case OP_POW:
   1744       ERROR("operation should have been lowered\n");
   1745       return false;
   1746    default:
   1747       ERROR("unknow op\n");
   1748       return false;
   1749    }
   1750 
   1751    if (insn->join) {
   1752       code[0] |= 0x10;
   1753       assert(insn->encSize == 8);
   1754    }
   1755 
   1756    code += insn->encSize / 4;
   1757    codeSize += insn->encSize;
   1758    return true;
   1759 }
   1760 
   1761 uint32_t
   1762 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
   1763 {
   1764    const Target::OpInfo &info = targ->getOpInfo(i);
   1765 
   1766    if (writeIssueDelays || info.minEncSize == 8 || 1)
   1767       return 8;
   1768 
   1769    if (i->ftz || i->saturate || i->join)
   1770       return 8;
   1771    if (i->rnd != ROUND_N)
   1772       return 8;
   1773    if (i->predSrc >= 0 && i->op == OP_MAD)
   1774       return 8;
   1775 
   1776    if (i->op == OP_PINTERP) {
   1777       if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
   1778          return 8;
   1779    } else
   1780    if (i->op == OP_MOV && i->lanes != 0xf) {
   1781       return 8;
   1782    }
   1783 
   1784    for (int s = 0; i->srcExists(s); ++s) {
   1785       if (i->src(s).isIndirect(0))
   1786          return 8;
   1787 
   1788       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
   1789          if (SDATA(i->src(s)).offset >= 0x100)
   1790             return 8;
   1791          if (i->getSrc(s)->reg.fileIndex > 1 &&
   1792              i->getSrc(s)->reg.fileIndex != 16)
   1793              return 8;
   1794       } else
   1795       if (i->src(s).getFile() == FILE_IMMEDIATE) {
   1796          if (i->dType == TYPE_F32) {
   1797             if (SDATA(i->src(s)).u32 >= 0x100)
   1798                return 8;
   1799          } else {
   1800             if (SDATA(i->src(s)).u32 > 0xff)
   1801                return 8;
   1802          }
   1803       }
   1804 
   1805       if (i->op == OP_CVT)
   1806          continue;
   1807       if (i->src(s).mod != Modifier(0)) {
   1808          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
   1809             if (i->op != OP_RSQ)
   1810                return 8;
   1811          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
   1812             if (i->op != OP_ADD || s != 0)
   1813                return 8;
   1814       }
   1815    }
   1816 
   1817    return 4;
   1818 }
   1819 
   1820 // Simplified, erring on safe side.
   1821 class SchedDataCalculator : public Pass
   1822 {
   1823 public:
   1824    SchedDataCalculator(const Target *targ) : targ(targ) { }
   1825 
   1826 private:
   1827    struct RegScores
   1828    {
   1829       struct Resource {
   1830          int st[DATA_FILE_COUNT]; // LD to LD delay 3
   1831          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
   1832          int tex; // TEX to non-TEX delay 17 (0x11)
   1833          int sfu; // SFU to SFU delay 3 (except PRE-ops)
   1834          int imul; // integer MUL to MUL delay 3
   1835       } res;
   1836       struct ScoreData {
   1837          int r[64];
   1838          int p[8];
   1839          int c;
   1840       } rd, wr;
   1841       int base;
   1842 
   1843       void rebase(const int base)
   1844       {
   1845          const int delta = this->base - base;
   1846          if (!delta)
   1847             return;
   1848          this->base = 0;
   1849 
   1850          for (int i = 0; i < 64; ++i) {
   1851             rd.r[i] += delta;
   1852             wr.r[i] += delta;
   1853          }
   1854          for (int i = 0; i < 8; ++i) {
   1855             rd.p[i] += delta;
   1856             wr.p[i] += delta;
   1857          }
   1858          rd.c += delta;
   1859          wr.c += delta;
   1860 
   1861          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   1862             res.ld[f] += delta;
   1863             res.st[f] += delta;
   1864          }
   1865          res.sfu += delta;
   1866          res.imul += delta;
   1867          res.tex += delta;
   1868       }
   1869       void wipe()
   1870       {
   1871          memset(&rd, 0, sizeof(rd));
   1872          memset(&wr, 0, sizeof(wr));
   1873          memset(&res, 0, sizeof(res));
   1874       }
   1875       int getLatest(const ScoreData& d) const
   1876       {
   1877          int max = 0;
   1878          for (int i = 0; i < 64; ++i)
   1879             if (d.r[i] > max)
   1880                max = d.r[i];
   1881          for (int i = 0; i < 8; ++i)
   1882             if (d.p[i] > max)
   1883                max = d.p[i];
   1884          if (d.c > max)
   1885             max = d.c;
   1886          return max;
   1887       }
   1888       inline int getLatestRd() const
   1889       {
   1890          return getLatest(rd);
   1891       }
   1892       inline int getLatestWr() const
   1893       {
   1894          return getLatest(wr);
   1895       }
   1896       inline int getLatest() const
   1897       {
   1898          const int a = getLatestRd();
   1899          const int b = getLatestWr();
   1900 
   1901          int max = MAX2(a, b);
   1902          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   1903             max = MAX2(res.ld[f], max);
   1904             max = MAX2(res.st[f], max);
   1905          }
   1906          max = MAX2(res.sfu, max);
   1907          max = MAX2(res.imul, max);
   1908          max = MAX2(res.tex, max);
   1909          return max;
   1910       }
   1911       void setMax(const RegScores *that)
   1912       {
   1913          for (int i = 0; i < 64; ++i) {
   1914             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
   1915             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
   1916          }
   1917          for (int i = 0; i < 8; ++i) {
   1918             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
   1919             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
   1920          }
   1921          rd.c = MAX2(rd.c, that->rd.c);
   1922          wr.c = MAX2(wr.c, that->wr.c);
   1923 
   1924          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
   1925             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
   1926             res.st[f] = MAX2(res.st[f], that->res.st[f]);
   1927          }
   1928          res.sfu = MAX2(res.sfu, that->res.sfu);
   1929          res.imul = MAX2(res.imul, that->res.imul);
   1930          res.tex = MAX2(res.tex, that->res.tex);
   1931       }
   1932       void print(int cycle)
   1933       {
   1934          for (int i = 0; i < 64; ++i) {
   1935             if (rd.r[i] > cycle)
   1936                INFO("rd $r%i @ %i\n", i, rd.r[i]);
   1937             if (wr.r[i] > cycle)
   1938                INFO("wr $r%i @ %i\n", i, wr.r[i]);
   1939          }
   1940          for (int i = 0; i < 8; ++i) {
   1941             if (rd.p[i] > cycle)
   1942                INFO("rd $p%i @ %i\n", i, rd.p[i]);
   1943             if (wr.p[i] > cycle)
   1944                INFO("wr $p%i @ %i\n", i, wr.p[i]);
   1945          }
   1946          if (rd.c > cycle)
   1947             INFO("rd $c @ %i\n", rd.c);
   1948          if (wr.c > cycle)
   1949             INFO("wr $c @ %i\n", wr.c);
   1950          if (res.sfu > cycle)
   1951             INFO("sfu @ %i\n", res.sfu);
   1952          if (res.imul > cycle)
   1953             INFO("imul @ %i\n", res.imul);
   1954          if (res.tex > cycle)
   1955             INFO("tex @ %i\n", res.tex);
   1956       }
   1957    };
   1958 
   1959    RegScores *score; // for current BB
   1960    std::vector<RegScores> scoreBoards;
   1961    int cycle;
   1962    int prevData;
   1963    operation prevOp;
   1964 
   1965    const Target *targ;
   1966 
   1967    bool visit(Function *);
   1968    bool visit(BasicBlock *);
   1969 
   1970    void commitInsn(const Instruction *, int cycle);
   1971    int calcDelay(const Instruction *, int cycle) const;
   1972    void setDelay(Instruction *, int delay, Instruction *next);
   1973 
   1974    void recordRd(const Value *, const int ready);
   1975    void recordWr(const Value *, const int ready);
   1976    void checkRd(const Value *, int cycle, int& delay) const;
   1977    void checkWr(const Value *, int cycle, int& delay) const;
   1978 
   1979    int getCycles(const Instruction *, int origDelay) const;
   1980 };
   1981 
   1982 void
   1983 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
   1984 {
   1985    if (insn->op == OP_EXIT)
   1986       delay = MAX2(delay, 14);
   1987 
   1988    if (insn->op == OP_TEXBAR) {
   1989       // TODO: except if results not used before EXIT
   1990       insn->sched = 0xc2;
   1991    } else
   1992    if (insn->op == OP_JOIN || insn->join) {
   1993       insn->sched = 0x00;
   1994    } else
   1995    if (delay >= 0 || prevData == 0x04 ||
   1996        !next || !targ->canDualIssue(insn, next)) {
   1997       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
   1998       if (prevOp == OP_EXPORT)
   1999          insn->sched |= 0x40;
   2000       else
   2001          insn->sched |= 0x20;
   2002    } else {
   2003       insn->sched = 0x04; // dual-issue
   2004    }
   2005 
   2006    if (prevData != 0x04 || prevOp != OP_EXPORT)
   2007       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
   2008          prevOp = insn->op;
   2009 
   2010    prevData = insn->sched;
   2011 }
   2012 
   2013 int
   2014 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
   2015 {
   2016    if (insn->sched & 0x80) {
   2017       int c = (insn->sched & 0x0f) * 2 + 1;
   2018       if (insn->op == OP_TEXBAR && origDelay > 0)
   2019          c += origDelay;
   2020       return c;
   2021    }
   2022    if (insn->sched & 0x60)
   2023       return (insn->sched & 0x1f) + 1;
   2024    return (insn->sched == 0x04) ? 0 : 32;
   2025 }
   2026 
   2027 bool
   2028 SchedDataCalculator::visit(Function *func)
   2029 {
   2030    scoreBoards.resize(func->cfg.getSize());
   2031    for (size_t i = 0; i < scoreBoards.size(); ++i)
   2032       scoreBoards[i].wipe();
   2033    return true;
   2034 }
   2035 
   2036 bool
   2037 SchedDataCalculator::visit(BasicBlock *bb)
   2038 {
   2039    Instruction *insn;
   2040    Instruction *next = NULL;
   2041 
   2042    int cycle = 0;
   2043 
   2044    prevData = 0x00;
   2045    prevOp = OP_NOP;
   2046    score = &scoreBoards.at(bb->getId());
   2047 
   2048    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
   2049       BasicBlock *in = BasicBlock::get(ei.getNode());
   2050       if (in->getExit()) {
   2051          if (prevData != 0x04)
   2052             prevData = in->getExit()->sched;
   2053          prevOp = in->getExit()->op;
   2054       }
   2055       if (ei.getType() != Graph::Edge::BACK)
   2056          score->setMax(&scoreBoards.at(in->getId()));
   2057       // back branches will wait until all target dependencies are satisfied
   2058    }
   2059    if (bb->cfg.incidentCount() > 1)
   2060       prevOp = OP_NOP;
   2061 
   2062 #ifdef NVC0_DEBUG_SCHED_DATA
   2063    INFO("=== BB:%i initial scores\n", bb->getId());
   2064    score->print(cycle);
   2065 #endif
   2066 
   2067    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
   2068       next = insn->next;
   2069 
   2070       commitInsn(insn, cycle);
   2071       int delay = calcDelay(next, cycle);
   2072       setDelay(insn, delay, next);
   2073       cycle += getCycles(insn, delay);
   2074 
   2075 #ifdef NVC0_DEBUG_SCHED_DATA
   2076       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
   2077       insn->print();
   2078       next->print();
   2079 #endif
   2080    }
   2081    if (!insn)
   2082       return true;
   2083    commitInsn(insn, cycle);
   2084 
   2085    int bbDelay = -1;
   2086 
   2087    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
   2088       BasicBlock *out = BasicBlock::get(ei.getNode());
   2089 
   2090       if (ei.getType() != Graph::Edge::BACK) {
   2091          // only test the first instruction of the outgoing block
   2092          next = out->getEntry();
   2093          if (next)
   2094             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
   2095       } else {
   2096          // wait until all dependencies are satisfied
   2097          const int regsFree = score->getLatest();
   2098          next = out->getFirst();
   2099          for (int c = cycle; next && c < regsFree; next = next->next) {
   2100             bbDelay = MAX2(bbDelay, calcDelay(next, c));
   2101             c += getCycles(next, bbDelay);
   2102          }
   2103          next = NULL;
   2104       }
   2105    }
   2106    if (bb->cfg.outgoingCount() != 1)
   2107       next = NULL;
   2108    setDelay(insn, bbDelay, next);
   2109    cycle += getCycles(insn, bbDelay);
   2110 
   2111    score->rebase(cycle); // common base for initializing out blocks' scores
   2112    return true;
   2113 }
   2114 
   2115 #define NVE4_MAX_ISSUE_DELAY 0x1f
   2116 int
   2117 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
   2118 {
   2119    int delay = 0, ready = cycle;
   2120 
   2121    for (int s = 0; insn->srcExists(s); ++s)
   2122       checkRd(insn->getSrc(s), cycle, delay);
   2123    // WAR & WAW don't seem to matter
   2124    // for (int s = 0; insn->srcExists(s); ++s)
   2125    //   recordRd(insn->getSrc(s), cycle);
   2126 
   2127    switch (Target::getOpClass(insn->op)) {
   2128    case OPCLASS_SFU:
   2129       ready = score->res.sfu;
   2130       break;
   2131    case OPCLASS_ARITH:
   2132       if (insn->op == OP_MUL && !isFloatType(insn->dType))
   2133          ready = score->res.imul;
   2134       break;
   2135    case OPCLASS_TEXTURE:
   2136       ready = score->res.tex;
   2137       break;
   2138    case OPCLASS_LOAD:
   2139       ready = score->res.ld[insn->src(0).getFile()];
   2140       break;
   2141    case OPCLASS_STORE:
   2142       ready = score->res.st[insn->src(0).getFile()];
   2143       break;
   2144    default:
   2145       break;
   2146    }
   2147    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
   2148       ready = MAX2(ready, score->res.tex);
   2149 
   2150    delay = MAX2(delay, ready - cycle);
   2151 
   2152    // if can issue next cycle, delay is 0, not 1
   2153    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
   2154 }
   2155 
   2156 void
   2157 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
   2158 {
   2159    const int ready = cycle + targ->getLatency(insn);
   2160 
   2161    for (int d = 0; insn->defExists(d); ++d)
   2162       recordWr(insn->getDef(d), ready);
   2163    // WAR & WAW don't seem to matter
   2164    // for (int s = 0; insn->srcExists(s); ++s)
   2165    //   recordRd(insn->getSrc(s), cycle);
   2166 
   2167    switch (Target::getOpClass(insn->op)) {
   2168    case OPCLASS_SFU:
   2169       score->res.sfu = cycle + 4;
   2170       break;
   2171    case OPCLASS_ARITH:
   2172       if (insn->op == OP_MUL && !isFloatType(insn->dType))
   2173          score->res.imul = cycle + 4;
   2174       break;
   2175    case OPCLASS_TEXTURE:
   2176       score->res.tex = cycle + 18;
   2177       break;
   2178    case OPCLASS_LOAD:
   2179       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
   2180          break;
   2181       score->res.ld[insn->src(0).getFile()] = cycle + 4;
   2182       score->res.st[insn->src(0).getFile()] = ready;
   2183       break;
   2184    case OPCLASS_STORE:
   2185       score->res.st[insn->src(0).getFile()] = cycle + 4;
   2186       score->res.ld[insn->src(0).getFile()] = ready;
   2187       break;
   2188    case OPCLASS_OTHER:
   2189       if (insn->op == OP_TEXBAR)
   2190          score->res.tex = cycle;
   2191       break;
   2192    default:
   2193       break;
   2194    }
   2195 
   2196 #ifdef NVC0_DEBUG_SCHED_DATA
   2197    score->print(cycle);
   2198 #endif
   2199 }
   2200 
   2201 void
   2202 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
   2203 {
   2204    int ready = cycle;
   2205    int a, b;
   2206 
   2207    switch (v->reg.file) {
   2208    case FILE_GPR:
   2209       a = v->reg.data.id;
   2210       b = a + v->reg.size / 4;
   2211       for (int r = a; r < b; ++r)
   2212          ready = MAX2(ready, score->rd.r[r]);
   2213       break;
   2214    case FILE_PREDICATE:
   2215       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
   2216       break;
   2217    case FILE_FLAGS:
   2218       ready = MAX2(ready, score->rd.c);
   2219       break;
   2220    case FILE_SHADER_INPUT:
   2221    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
   2222    case FILE_MEMORY_LOCAL:
   2223    case FILE_MEMORY_CONST:
   2224    case FILE_MEMORY_SHARED:
   2225    case FILE_MEMORY_GLOBAL:
   2226    case FILE_SYSTEM_VALUE:
   2227       // TODO: any restrictions here ?
   2228       break;
   2229    case FILE_IMMEDIATE:
   2230       break;
   2231    default:
   2232       assert(0);
   2233       break;
   2234    }
   2235    if (cycle < ready)
   2236       delay = MAX2(delay, ready - cycle);
   2237 }
   2238 
   2239 void
   2240 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
   2241 {
   2242    int ready = cycle;
   2243    int a, b;
   2244 
   2245    switch (v->reg.file) {
   2246    case FILE_GPR:
   2247       a = v->reg.data.id;
   2248       b = a + v->reg.size / 4;
   2249       for (int r = a; r < b; ++r)
   2250          ready = MAX2(ready, score->wr.r[r]);
   2251       break;
   2252    case FILE_PREDICATE:
   2253       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
   2254       break;
   2255    default:
   2256       assert(v->reg.file == FILE_FLAGS);
   2257       ready = MAX2(ready, score->wr.c);
   2258       break;
   2259    }
   2260    if (cycle < ready)
   2261       delay = MAX2(delay, ready - cycle);
   2262 }
   2263 
   2264 void
   2265 SchedDataCalculator::recordWr(const Value *v, const int ready)
   2266 {
   2267    int a = v->reg.data.id;
   2268 
   2269    if (v->reg.file == FILE_GPR) {
   2270       int b = a + v->reg.size / 4;
   2271       for (int r = a; r < b; ++r)
   2272          score->rd.r[r] = ready;
   2273    } else
   2274    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
   2275    if (v->reg.file == FILE_PREDICATE) {
   2276       score->rd.p[a] = ready + 4;
   2277    } else {
   2278       assert(v->reg.file == FILE_FLAGS);
   2279       score->rd.c = ready + 4;
   2280    }
   2281 }
   2282 
   2283 void
   2284 SchedDataCalculator::recordRd(const Value *v, const int ready)
   2285 {
   2286    int a = v->reg.data.id;
   2287 
   2288    if (v->reg.file == FILE_GPR) {
   2289       int b = a + v->reg.size / 4;
   2290       for (int r = a; r < b; ++r)
   2291          score->wr.r[r] = ready;
   2292    } else
   2293    if (v->reg.file == FILE_PREDICATE) {
   2294       score->wr.p[a] = ready;
   2295    } else
   2296    if (v->reg.file == FILE_FLAGS) {
   2297       score->wr.c = ready;
   2298    }
   2299 }
   2300 
   2301 void
   2302 CodeEmitterNVC0::prepareEmission(Function *func)
   2303 {
   2304    const Target *targ = func->getProgram()->getTarget();
   2305 
   2306    CodeEmitter::prepareEmission(func);
   2307 
   2308    if (targ->hasSWSched) {
   2309       SchedDataCalculator sched(targ);
   2310       sched.run(func, true, true);
   2311    }
   2312 }
   2313 
   2314 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
   2315    : CodeEmitter(target),
   2316      writeIssueDelays(target->hasSWSched)
   2317 {
   2318    code = NULL;
   2319    codeSize = codeSizeLimit = 0;
   2320    relocInfo = NULL;
   2321 }
   2322 
   2323 CodeEmitter *
   2324 TargetNVC0::getCodeEmitter(Program::Type type)
   2325 {
   2326    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
   2327    emit->setProgramType(type);
   2328    return emit;
   2329 }
   2330 
   2331 } // namespace nv50_ir
   2332