Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2014 Red Hat Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * Authors: Ben Skeggs <bskeggs (at) redhat.com>
     23  */
     24 
     25 #include "codegen/nv50_ir_target_gm107.h"
     26 
     27 //#define GM107_DEBUG_SCHED_DATA
     28 
     29 namespace nv50_ir {
     30 
     31 class CodeEmitterGM107 : public CodeEmitter
     32 {
     33 public:
     34    CodeEmitterGM107(const TargetGM107 *);
     35 
     36    virtual bool emitInstruction(Instruction *);
     37    virtual uint32_t getMinEncodingSize(const Instruction *) const;
     38 
     39    virtual void prepareEmission(Program *);
     40    virtual void prepareEmission(Function *);
     41 
     42    inline void setProgramType(Program::Type pType) { progType = pType; }
     43 
     44 private:
     45    const TargetGM107 *targGM107;
     46 
     47    Program::Type progType;
     48 
     49    const Instruction *insn;
     50    const bool writeIssueDelays;
     51    uint32_t *data;
     52 
     53 private:
     54    inline void emitField(uint32_t *, int, int, uint32_t);
     55    inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
     56 
     57    inline void emitInsn(uint32_t, bool);
     58    inline void emitInsn(uint32_t o) { emitInsn(o, true); }
     59    inline void emitPred();
     60    inline void emitGPR(int, const Value *);
     61    inline void emitGPR(int pos) {
     62       emitGPR(pos, (const Value *)NULL);
     63    }
     64    inline void emitGPR(int pos, const ValueRef &ref) {
     65       emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
     66    }
     67    inline void emitGPR(int pos, const ValueRef *ref) {
     68       emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
     69    }
     70    inline void emitGPR(int pos, const ValueDef &def) {
     71       emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
     72    }
     73    inline void emitSYS(int, const Value *);
     74    inline void emitSYS(int pos, const ValueRef &ref) {
     75       emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
     76    }
     77    inline void emitPRED(int, const Value *);
     78    inline void emitPRED(int pos) {
     79       emitPRED(pos, (const Value *)NULL);
     80    }
     81    inline void emitPRED(int pos, const ValueRef &ref) {
     82       emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
     83    }
     84    inline void emitPRED(int pos, const ValueDef &def) {
     85       emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
     86    }
     87    inline void emitADDR(int, int, int, int, const ValueRef &);
     88    inline void emitCBUF(int, int, int, int, int, const ValueRef &);
     89    inline bool longIMMD(const ValueRef &);
     90    inline void emitIMMD(int, int, const ValueRef &);
     91 
     92    void emitCond3(int, CondCode);
     93    void emitCond4(int, CondCode);
     94    void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
     95    inline void emitO(int);
     96    inline void emitP(int);
     97    inline void emitSAT(int);
     98    inline void emitCC(int);
     99    inline void emitX(int);
    100    inline void emitABS(int, const ValueRef &);
    101    inline void emitNEG(int, const ValueRef &);
    102    inline void emitNEG2(int, const ValueRef &, const ValueRef &);
    103    inline void emitFMZ(int, int);
    104    inline void emitRND(int, RoundMode, int);
    105    inline void emitRND(int pos) {
    106       emitRND(pos, insn->rnd, -1);
    107    }
    108    inline void emitPDIV(int);
    109    inline void emitINV(int, const ValueRef &);
    110 
    111    void emitEXIT();
    112    void emitBRA();
    113    void emitCAL();
    114    void emitPCNT();
    115    void emitCONT();
    116    void emitPBK();
    117    void emitBRK();
    118    void emitPRET();
    119    void emitRET();
    120    void emitSSY();
    121    void emitSYNC();
    122    void emitSAM();
    123    void emitRAM();
    124 
    125    void emitMOV();
    126    void emitS2R();
    127    void emitF2F();
    128    void emitF2I();
    129    void emitI2F();
    130    void emitI2I();
    131    void emitSEL();
    132    void emitSHFL();
    133 
    134    void emitDADD();
    135    void emitDMUL();
    136    void emitDFMA();
    137    void emitDMNMX();
    138    void emitDSET();
    139    void emitDSETP();
    140 
    141    void emitFADD();
    142    void emitFMUL();
    143    void emitFFMA();
    144    void emitMUFU();
    145    void emitFMNMX();
    146    void emitRRO();
    147    void emitFCMP();
    148    void emitFSET();
    149    void emitFSETP();
    150    void emitFSWZADD();
    151 
    152    void emitLOP();
    153    void emitNOT();
    154    void emitIADD();
    155    void emitIMUL();
    156    void emitIMAD();
    157    void emitISCADD();
    158    void emitIMNMX();
    159    void emitICMP();
    160    void emitISET();
    161    void emitISETP();
    162    void emitSHL();
    163    void emitSHR();
    164    void emitPOPC();
    165    void emitBFI();
    166    void emitBFE();
    167    void emitFLO();
    168 
    169    void emitLDSTs(int, DataType);
    170    void emitLDSTc(int);
    171    void emitLDC();
    172    void emitLDL();
    173    void emitLDS();
    174    void emitLD();
    175    void emitSTL();
    176    void emitSTS();
    177    void emitST();
    178    void emitALD();
    179    void emitAST();
    180    void emitISBERD();
    181    void emitAL2P();
    182    void emitIPA();
    183    void emitATOM();
    184    void emitATOMS();
    185    void emitRED();
    186    void emitCCTL();
    187 
    188    void emitPIXLD();
    189 
    190    void emitTEXs(int);
    191    void emitTEX();
    192    void emitTLD();
    193    void emitTLD4();
    194    void emitTXD();
    195    void emitTXQ();
    196    void emitTMML();
    197    void emitDEPBAR();
    198 
    199    void emitNOP();
    200    void emitKIL();
    201    void emitOUT();
    202 
    203    void emitBAR();
    204    void emitMEMBAR();
    205 
    206    void emitVOTE();
    207 
    208    void emitSUTarget();
    209    void emitSUHandle(const int s);
    210    void emitSUSTx();
    211    void emitSULDx();
    212    void emitSUREDx();
    213 };
    214 
    215 /*******************************************************************************
    216  * general instruction layout/fields
    217  ******************************************************************************/
    218 
    219 void
    220 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
    221 {
    222    if (b >= 0) {
    223       uint32_t m = ((1ULL << s) - 1);
    224       uint64_t d = (uint64_t)(v & m) << b;
    225       assert(!(v & ~m) || (v & ~m) == ~m);
    226       data[1] |= d >> 32;
    227       data[0] |= d;
    228    }
    229 }
    230 
    231 void
    232 CodeEmitterGM107::emitPred()
    233 {
    234    if (insn->predSrc >= 0) {
    235       emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
    236       emitField(19, 1, insn->cc == CC_NOT_P);
    237    } else {
    238       emitField(16, 3, 7);
    239    }
    240 }
    241 
    242 void
    243 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
    244 {
    245    code[0] = 0x00000000;
    246    code[1] = hi;
    247    if (pred)
    248       emitPred();
    249 }
    250 
    251 void
    252 CodeEmitterGM107::emitGPR(int pos, const Value *val)
    253 {
    254    emitField(pos, 8, val ? val->reg.data.id : 255);
    255 }
    256 
    257 void
    258 CodeEmitterGM107::emitSYS(int pos, const Value *val)
    259 {
    260    int id = val ? val->reg.data.id : -1;
    261 
    262    switch (id) {
    263    case SV_LANEID         : id = 0x00; break;
    264    case SV_VERTEX_COUNT   : id = 0x10; break;
    265    case SV_INVOCATION_ID  : id = 0x11; break;
    266    case SV_THREAD_KILL    : id = 0x13; break;
    267    case SV_INVOCATION_INFO: id = 0x1d; break;
    268    case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
    269    case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
    270    default:
    271       assert(!"invalid system value");
    272       id = 0;
    273       break;
    274    }
    275 
    276    emitField(pos, 8, id);
    277 }
    278 
    279 void
    280 CodeEmitterGM107::emitPRED(int pos, const Value *val)
    281 {
    282    emitField(pos, 3, val ? val->reg.data.id : 7);
    283 }
    284 
    285 void
    286 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
    287                            const ValueRef &ref)
    288 {
    289    const Value *v = ref.get();
    290    assert(!(v->reg.data.offset & ((1 << shr) - 1)));
    291    if (gpr >= 0)
    292       emitGPR(gpr, ref.getIndirect(0));
    293    emitField(off, len, v->reg.data.offset >> shr);
    294 }
    295 
    296 void
    297 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
    298                            const ValueRef &ref)
    299 {
    300    const Value *v = ref.get();
    301    const Symbol *s = v->asSym();
    302 
    303    assert(!(s->reg.data.offset & ((1 << shr) - 1)));
    304 
    305    emitField(buf,  5, v->reg.fileIndex);
    306    if (gpr >= 0)
    307       emitGPR(gpr, ref.getIndirect(0));
    308    emitField(off, 16, s->reg.data.offset >> shr);
    309 }
    310 
    311 bool
    312 CodeEmitterGM107::longIMMD(const ValueRef &ref)
    313 {
    314    if (ref.getFile() == FILE_IMMEDIATE) {
    315       const ImmediateValue *imm = ref.get()->asImm();
    316       if (isFloatType(insn->sType)) {
    317          if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
    318             return true;
    319       } else {
    320          if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
    321              (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
    322             return true;
    323       }
    324    }
    325    return false;
    326 }
    327 
    328 void
    329 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
    330 {
    331    const ImmediateValue *imm = ref.get()->asImm();
    332    uint32_t val = imm->reg.data.u32;
    333 
    334    if (len == 19) {
    335       if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
    336          assert(!(val & 0x00000fff));
    337          val >>= 12;
    338       } else if (insn->sType == TYPE_F64) {
    339          assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
    340          val = imm->reg.data.u64 >> 44;
    341       }
    342       assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
    343       emitField( 56,   1, (val & 0x80000) >> 19);
    344       emitField(pos, len, (val & 0x7ffff));
    345    } else {
    346       emitField(pos, len, val);
    347    }
    348 }
    349 
    350 /*******************************************************************************
    351  * modifiers
    352  ******************************************************************************/
    353 
    354 void
    355 CodeEmitterGM107::emitCond3(int pos, CondCode code)
    356 {
    357    int data = 0;
    358 
    359    switch (code) {
    360    case CC_FL : data = 0x00; break;
    361    case CC_LTU:
    362    case CC_LT : data = 0x01; break;
    363    case CC_EQU:
    364    case CC_EQ : data = 0x02; break;
    365    case CC_LEU:
    366    case CC_LE : data = 0x03; break;
    367    case CC_GTU:
    368    case CC_GT : data = 0x04; break;
    369    case CC_NEU:
    370    case CC_NE : data = 0x05; break;
    371    case CC_GEU:
    372    case CC_GE : data = 0x06; break;
    373    case CC_TR : data = 0x07; break;
    374    default:
    375       assert(!"invalid cond3");
    376       break;
    377    }
    378 
    379    emitField(pos, 3, data);
    380 }
    381 
    382 void
    383 CodeEmitterGM107::emitCond4(int pos, CondCode code)
    384 {
    385    int data = 0;
    386 
    387    switch (code) {
    388    case CC_FL: data = 0x00; break;
    389    case CC_LT: data = 0x01; break;
    390    case CC_EQ: data = 0x02; break;
    391    case CC_LE: data = 0x03; break;
    392    case CC_GT: data = 0x04; break;
    393    case CC_NE: data = 0x05; break;
    394    case CC_GE: data = 0x06; break;
    395 //   case CC_NUM: data = 0x07; break;
    396 //   case CC_NAN: data = 0x08; break;
    397    case CC_LTU: data = 0x09; break;
    398    case CC_EQU: data = 0x0a; break;
    399    case CC_LEU: data = 0x0b; break;
    400    case CC_GTU: data = 0x0c; break;
    401    case CC_NEU: data = 0x0d; break;
    402    case CC_GEU: data = 0x0e; break;
    403    case CC_TR:  data = 0x0f; break;
    404    default:
    405       assert(!"invalid cond4");
    406       break;
    407    }
    408 
    409    emitField(pos, 4, data);
    410 }
    411 
    412 void
    413 CodeEmitterGM107::emitO(int pos)
    414 {
    415    emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
    416 }
    417 
    418 void
    419 CodeEmitterGM107::emitP(int pos)
    420 {
    421    emitField(pos, 1, insn->perPatch);
    422 }
    423 
    424 void
    425 CodeEmitterGM107::emitSAT(int pos)
    426 {
    427    emitField(pos, 1, insn->saturate);
    428 }
    429 
    430 void
    431 CodeEmitterGM107::emitCC(int pos)
    432 {
    433    emitField(pos, 1, insn->flagsDef >= 0);
    434 }
    435 
    436 void
    437 CodeEmitterGM107::emitX(int pos)
    438 {
    439    emitField(pos, 1, insn->flagsSrc >= 0);
    440 }
    441 
    442 void
    443 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
    444 {
    445    emitField(pos, 1, ref.mod.abs());
    446 }
    447 
    448 void
    449 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
    450 {
    451    emitField(pos, 1, ref.mod.neg());
    452 }
    453 
    454 void
    455 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
    456 {
    457    emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
    458 }
    459 
    460 void
    461 CodeEmitterGM107::emitFMZ(int pos, int len)
    462 {
    463    emitField(pos, len, insn->dnz << 1 | insn->ftz);
    464 }
    465 
    466 void
    467 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
    468 {
    469    int rm = 0, ri = 0;
    470    switch (rnd) {
    471    case ROUND_NI: ri = 1;
    472    case ROUND_N : rm = 0; break;
    473    case ROUND_MI: ri = 1;
    474    case ROUND_M : rm = 1; break;
    475    case ROUND_PI: ri = 1;
    476    case ROUND_P : rm = 2; break;
    477    case ROUND_ZI: ri = 1;
    478    case ROUND_Z : rm = 3; break;
    479    default:
    480       assert(!"invalid round mode");
    481       break;
    482    }
    483    emitField(rip, 1, ri);
    484    emitField(rmp, 2, rm);
    485 }
    486 
    487 void
    488 CodeEmitterGM107::emitPDIV(int pos)
    489 {
    490    assert(insn->postFactor >= -3 && insn->postFactor <= 3);
    491    if (insn->postFactor > 0)
    492       emitField(pos, 3, 7 - insn->postFactor);
    493    else
    494       emitField(pos, 3, 0 - insn->postFactor);
    495 }
    496 
    497 void
    498 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
    499 {
    500    emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
    501 }
    502 
    503 /*******************************************************************************
    504  * control flow
    505  ******************************************************************************/
    506 
    507 void
    508 CodeEmitterGM107::emitEXIT()
    509 {
    510    emitInsn (0xe3000000);
    511    emitCond5(0x00, CC_TR);
    512 }
    513 
    514 void
    515 CodeEmitterGM107::emitBRA()
    516 {
    517    const FlowInstruction *insn = this->insn->asFlow();
    518    int gpr = -1;
    519 
    520    if (insn->indirect) {
    521       if (insn->absolute)
    522          emitInsn(0xe2000000); // JMX
    523       else
    524          emitInsn(0xe2500000); // BRX
    525       gpr = 0x08;
    526    } else {
    527       if (insn->absolute)
    528          emitInsn(0xe2100000); // JMP
    529       else
    530          emitInsn(0xe2400000); // BRA
    531       emitField(0x07, 1, insn->allWarp);
    532    }
    533 
    534    emitField(0x06, 1, insn->limit);
    535    emitCond5(0x00, CC_TR);
    536 
    537    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    538       int32_t pos = insn->target.bb->binPos;
    539       if (writeIssueDelays && !(pos & 0x1f))
    540          pos += 8;
    541       if (!insn->absolute)
    542          emitField(0x14, 24, pos - (codeSize + 8));
    543       else
    544          emitField(0x14, 32, pos);
    545    } else {
    546       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
    547       emitField(0x05, 1, 1);
    548    }
    549 }
    550 
    551 void
    552 CodeEmitterGM107::emitCAL()
    553 {
    554    const FlowInstruction *insn = this->insn->asFlow();
    555 
    556    if (insn->absolute) {
    557       emitInsn(0xe2200000, 0); // JCAL
    558    } else {
    559       emitInsn(0xe2600000, 0); // CAL
    560    }
    561 
    562    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    563       if (!insn->absolute)
    564          emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
    565       else {
    566          if (insn->builtin) {
    567             int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
    568             addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
    569             addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
    570          } else {
    571             emitField(0x14, 32, insn->target.bb->binPos);
    572          }
    573       }
    574    } else {
    575       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
    576       emitField(0x05, 1, 1);
    577    }
    578 }
    579 
    580 void
    581 CodeEmitterGM107::emitPCNT()
    582 {
    583    const FlowInstruction *insn = this->insn->asFlow();
    584 
    585    emitInsn(0xe2b00000, 0);
    586 
    587    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    588       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
    589    } else {
    590       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
    591       emitField(0x05, 1, 1);
    592    }
    593 }
    594 
    595 void
    596 CodeEmitterGM107::emitCONT()
    597 {
    598    emitInsn (0xe3500000);
    599    emitCond5(0x00, CC_TR);
    600 }
    601 
    602 void
    603 CodeEmitterGM107::emitPBK()
    604 {
    605    const FlowInstruction *insn = this->insn->asFlow();
    606 
    607    emitInsn(0xe2a00000, 0);
    608 
    609    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    610       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
    611    } else {
    612       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
    613       emitField(0x05, 1, 1);
    614    }
    615 }
    616 
    617 void
    618 CodeEmitterGM107::emitBRK()
    619 {
    620    emitInsn (0xe3400000);
    621    emitCond5(0x00, CC_TR);
    622 }
    623 
    624 void
    625 CodeEmitterGM107::emitPRET()
    626 {
    627    const FlowInstruction *insn = this->insn->asFlow();
    628 
    629    emitInsn(0xe2700000, 0);
    630 
    631    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    632       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
    633    } else {
    634       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
    635       emitField(0x05, 1, 1);
    636    }
    637 }
    638 
    639 void
    640 CodeEmitterGM107::emitRET()
    641 {
    642    emitInsn (0xe3200000);
    643    emitCond5(0x00, CC_TR);
    644 }
    645 
    646 void
    647 CodeEmitterGM107::emitSSY()
    648 {
    649    const FlowInstruction *insn = this->insn->asFlow();
    650 
    651    emitInsn(0xe2900000, 0);
    652 
    653    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
    654       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
    655    } else {
    656       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
    657       emitField(0x05, 1, 1);
    658    }
    659 }
    660 
    661 void
    662 CodeEmitterGM107::emitSYNC()
    663 {
    664    emitInsn (0xf0f80000);
    665    emitCond5(0x00, CC_TR);
    666 }
    667 
    668 void
    669 CodeEmitterGM107::emitSAM()
    670 {
    671    emitInsn(0xe3700000, 0);
    672 }
    673 
    674 void
    675 CodeEmitterGM107::emitRAM()
    676 {
    677    emitInsn(0xe3800000, 0);
    678 }
    679 
    680 /*******************************************************************************
    681  * predicate/cc
    682  ******************************************************************************/
    683 
    684 /*******************************************************************************
    685  * movement / conversion
    686  ******************************************************************************/
    687 
    688 void
    689 CodeEmitterGM107::emitMOV()
    690 {
    691    if (insn->src(0).getFile() != FILE_IMMEDIATE) {
    692       switch (insn->src(0).getFile()) {
    693       case FILE_GPR:
    694          if (insn->def(0).getFile() == FILE_PREDICATE) {
    695             emitInsn(0x5b6a0000);
    696             emitGPR (0x08);
    697          } else {
    698             emitInsn(0x5c980000);
    699          }
    700          emitGPR (0x14, insn->src(0));
    701          break;
    702       case FILE_MEMORY_CONST:
    703          emitInsn(0x4c980000);
    704          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
    705          break;
    706       case FILE_IMMEDIATE:
    707          emitInsn(0x38980000);
    708          emitIMMD(0x14, 19, insn->src(0));
    709          break;
    710       case FILE_PREDICATE:
    711          emitInsn(0x50880000);
    712          emitPRED(0x0c, insn->src(0));
    713          emitPRED(0x1d);
    714          emitPRED(0x27);
    715          break;
    716       default:
    717          assert(!"bad src file");
    718          break;
    719       }
    720       if (insn->def(0).getFile() != FILE_PREDICATE &&
    721           insn->src(0).getFile() != FILE_PREDICATE)
    722          emitField(0x27, 4, insn->lanes);
    723    } else {
    724       emitInsn (0x01000000);
    725       emitIMMD (0x14, 32, insn->src(0));
    726       emitField(0x0c, 4, insn->lanes);
    727    }
    728 
    729    if (insn->def(0).getFile() == FILE_PREDICATE) {
    730       emitPRED(0x27);
    731       emitPRED(0x03, insn->def(0));
    732       emitPRED(0x00);
    733    } else {
    734       emitGPR(0x00, insn->def(0));
    735    }
    736 }
    737 
    738 void
    739 CodeEmitterGM107::emitS2R()
    740 {
    741    emitInsn(0xf0c80000);
    742    emitSYS (0x14, insn->src(0));
    743    emitGPR (0x00, insn->def(0));
    744 }
    745 
    746 void
    747 CodeEmitterGM107::emitF2F()
    748 {
    749    RoundMode rnd = insn->rnd;
    750 
    751    switch (insn->op) {
    752    case OP_FLOOR: rnd = ROUND_MI; break;
    753    case OP_CEIL : rnd = ROUND_PI; break;
    754    case OP_TRUNC: rnd = ROUND_ZI; break;
    755    default:
    756       break;
    757    }
    758 
    759    switch (insn->src(0).getFile()) {
    760    case FILE_GPR:
    761       emitInsn(0x5ca80000);
    762       emitGPR (0x14, insn->src(0));
    763       break;
    764    case FILE_MEMORY_CONST:
    765       emitInsn(0x4ca80000);
    766       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
    767       break;
    768    case FILE_IMMEDIATE:
    769       emitInsn(0x38a80000);
    770       emitIMMD(0x14, 19, insn->src(0));
    771       break;
    772    default:
    773       assert(!"bad src0 file");
    774       break;
    775    }
    776 
    777    emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
    778    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    779    emitCC   (0x2f);
    780    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
    781    emitFMZ  (0x2c, 1);
    782    emitField(0x29, 1, insn->subOp);
    783    emitRND  (0x27, rnd, 0x2a);
    784    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
    785    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
    786    emitGPR  (0x00, insn->def(0));
    787 }
    788 
    789 void
    790 CodeEmitterGM107::emitF2I()
    791 {
    792    RoundMode rnd = insn->rnd;
    793 
    794    switch (insn->op) {
    795    case OP_FLOOR: rnd = ROUND_M; break;
    796    case OP_CEIL : rnd = ROUND_P; break;
    797    case OP_TRUNC: rnd = ROUND_Z; break;
    798    default:
    799       break;
    800    }
    801 
    802    switch (insn->src(0).getFile()) {
    803    case FILE_GPR:
    804       emitInsn(0x5cb00000);
    805       emitGPR (0x14, insn->src(0));
    806       break;
    807    case FILE_MEMORY_CONST:
    808       emitInsn(0x4cb00000);
    809       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
    810       break;
    811    case FILE_IMMEDIATE:
    812       emitInsn(0x38b00000);
    813       emitIMMD(0x14, 19, insn->src(0));
    814       break;
    815    default:
    816       assert(!"bad src0 file");
    817       break;
    818    }
    819 
    820    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    821    emitCC   (0x2f);
    822    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
    823    emitFMZ  (0x2c, 1);
    824    emitRND  (0x27, rnd, 0x2a);
    825    emitField(0x0c, 1, isSignedType(insn->dType));
    826    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
    827    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
    828    emitGPR  (0x00, insn->def(0));
    829 }
    830 
    831 void
    832 CodeEmitterGM107::emitI2F()
    833 {
    834    RoundMode rnd = insn->rnd;
    835 
    836    switch (insn->op) {
    837    case OP_FLOOR: rnd = ROUND_M; break;
    838    case OP_CEIL : rnd = ROUND_P; break;
    839    case OP_TRUNC: rnd = ROUND_Z; break;
    840    default:
    841       break;
    842    }
    843 
    844    switch (insn->src(0).getFile()) {
    845    case FILE_GPR:
    846       emitInsn(0x5cb80000);
    847       emitGPR (0x14, insn->src(0));
    848       break;
    849    case FILE_MEMORY_CONST:
    850       emitInsn(0x4cb80000);
    851       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
    852       break;
    853    case FILE_IMMEDIATE:
    854       emitInsn(0x38b80000);
    855       emitIMMD(0x14, 19, insn->src(0));
    856       break;
    857    default:
    858       assert(!"bad src0 file");
    859       break;
    860    }
    861 
    862    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    863    emitCC   (0x2f);
    864    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
    865    emitField(0x29, 2, insn->subOp);
    866    emitRND  (0x27, rnd, -1);
    867    emitField(0x0d, 1, isSignedType(insn->sType));
    868    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
    869    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
    870    emitGPR  (0x00, insn->def(0));
    871 }
    872 
    873 void
    874 CodeEmitterGM107::emitI2I()
    875 {
    876    switch (insn->src(0).getFile()) {
    877    case FILE_GPR:
    878       emitInsn(0x5ce00000);
    879       emitGPR (0x14, insn->src(0));
    880       break;
    881    case FILE_MEMORY_CONST:
    882       emitInsn(0x4ce00000);
    883       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
    884       break;
    885    case FILE_IMMEDIATE:
    886       emitInsn(0x38e00000);
    887       emitIMMD(0x14, 19, insn->src(0));
    888       break;
    889    default:
    890       assert(!"bad src0 file");
    891       break;
    892    }
    893 
    894    emitSAT  (0x32);
    895    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    896    emitCC   (0x2f);
    897    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
    898    emitField(0x29, 2, insn->subOp);
    899    emitField(0x0d, 1, isSignedType(insn->sType));
    900    emitField(0x0c, 1, isSignedType(insn->dType));
    901    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
    902    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
    903    emitGPR  (0x00, insn->def(0));
    904 }
    905 
    906 static void
    907 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
    908 {
    909    int loc = entry->loc;
    910    if (data.force_persample_interp)
    911       code[loc + 1] |= 1 << 10;
    912    else
    913       code[loc + 1] &= ~(1 << 10);
    914 }
    915 
    916 void
    917 CodeEmitterGM107::emitSEL()
    918 {
    919    switch (insn->src(1).getFile()) {
    920    case FILE_GPR:
    921       emitInsn(0x5ca00000);
    922       emitGPR (0x14, insn->src(1));
    923       break;
    924    case FILE_MEMORY_CONST:
    925       emitInsn(0x4ca00000);
    926       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
    927       break;
    928    case FILE_IMMEDIATE:
    929       emitInsn(0x38a00000);
    930       emitIMMD(0x14, 19, insn->src(1));
    931       break;
    932    default:
    933       assert(!"bad src1 file");
    934       break;
    935    }
    936 
    937    emitINV (0x2a, insn->src(2));
    938    emitPRED(0x27, insn->src(2));
    939    emitGPR (0x08, insn->src(0));
    940    emitGPR (0x00, insn->def(0));
    941 
    942    if (insn->subOp == 1) {
    943       addInterp(0, 0, selpFlip);
    944    }
    945 }
    946 
    947 void
    948 CodeEmitterGM107::emitSHFL()
    949 {
    950    int type = 0;
    951 
    952    emitInsn (0xef100000);
    953 
    954    switch (insn->src(1).getFile()) {
    955    case FILE_GPR:
    956       emitGPR(0x14, insn->src(1));
    957       break;
    958    case FILE_IMMEDIATE:
    959       emitIMMD(0x14, 5, insn->src(1));
    960       type |= 1;
    961       break;
    962    default:
    963       assert(!"invalid src1 file");
    964       break;
    965    }
    966 
    967    /*XXX: what is this arg? hardcode immediate for now */
    968    emitField(0x22, 13, 0x1c03);
    969    type |= 2;
    970 
    971    emitPRED (0x30);
    972    emitField(0x1e, 2, insn->subOp);
    973    emitField(0x1c, 2, type);
    974    emitGPR  (0x08, insn->src(0));
    975    emitGPR  (0x00, insn->def(0));
    976 }
    977 
    978 /*******************************************************************************
    979  * double
    980  ******************************************************************************/
    981 
    982 void
    983 CodeEmitterGM107::emitDADD()
    984 {
    985    switch (insn->src(1).getFile()) {
    986    case FILE_GPR:
    987       emitInsn(0x5c700000);
    988       emitGPR (0x14, insn->src(1));
    989       break;
    990    case FILE_MEMORY_CONST:
    991       emitInsn(0x4c700000);
    992       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
    993       break;
    994    case FILE_IMMEDIATE:
    995       emitInsn(0x38700000);
    996       emitIMMD(0x14, 19, insn->src(1));
    997       break;
    998    default:
    999       assert(!"bad src1 file");
   1000       break;
   1001    }
   1002    emitABS(0x31, insn->src(1));
   1003    emitNEG(0x30, insn->src(0));
   1004    emitCC (0x2f);
   1005    emitABS(0x2e, insn->src(0));
   1006    emitNEG(0x2d, insn->src(1));
   1007 
   1008    if (insn->op == OP_SUB)
   1009       code[1] ^= 0x00002000;
   1010 
   1011    emitGPR(0x08, insn->src(0));
   1012    emitGPR(0x00, insn->def(0));
   1013 }
   1014 
   1015 void
   1016 CodeEmitterGM107::emitDMUL()
   1017 {
   1018    switch (insn->src(1).getFile()) {
   1019    case FILE_GPR:
   1020       emitInsn(0x5c800000);
   1021       emitGPR (0x14, insn->src(1));
   1022       break;
   1023    case FILE_MEMORY_CONST:
   1024       emitInsn(0x4c800000);
   1025       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1026       break;
   1027    case FILE_IMMEDIATE:
   1028       emitInsn(0x38800000);
   1029       emitIMMD(0x14, 19, insn->src(1));
   1030       break;
   1031    default:
   1032       assert(!"bad src1 file");
   1033       break;
   1034    }
   1035 
   1036    emitNEG2(0x30, insn->src(0), insn->src(1));
   1037    emitCC  (0x2f);
   1038    emitRND (0x27);
   1039    emitGPR (0x08, insn->src(0));
   1040    emitGPR (0x00, insn->def(0));
   1041 }
   1042 
   1043 void
   1044 CodeEmitterGM107::emitDFMA()
   1045 {
   1046    switch(insn->src(2).getFile()) {
   1047    case FILE_GPR:
   1048       switch (insn->src(1).getFile()) {
   1049       case FILE_GPR:
   1050          emitInsn(0x5b700000);
   1051          emitGPR (0x14, insn->src(1));
   1052          break;
   1053       case FILE_MEMORY_CONST:
   1054          emitInsn(0x4b700000);
   1055          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1056          break;
   1057       case FILE_IMMEDIATE:
   1058          emitInsn(0x36700000);
   1059          emitIMMD(0x14, 19, insn->src(1));
   1060          break;
   1061       default:
   1062          assert(!"bad src1 file");
   1063          break;
   1064       }
   1065       emitGPR (0x27, insn->src(2));
   1066       break;
   1067    case FILE_MEMORY_CONST:
   1068       emitInsn(0x53700000);
   1069       emitGPR (0x27, insn->src(1));
   1070       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1071       break;
   1072    default:
   1073       assert(!"bad src2 file");
   1074       break;
   1075    }
   1076 
   1077    emitRND (0x32);
   1078    emitNEG (0x31, insn->src(2));
   1079    emitNEG2(0x30, insn->src(0), insn->src(1));
   1080    emitCC  (0x2f);
   1081    emitGPR (0x08, insn->src(0));
   1082    emitGPR (0x00, insn->def(0));
   1083 }
   1084 
   1085 void
   1086 CodeEmitterGM107::emitDMNMX()
   1087 {
   1088    switch (insn->src(1).getFile()) {
   1089    case FILE_GPR:
   1090       emitInsn(0x5c500000);
   1091       emitGPR (0x14, insn->src(1));
   1092       break;
   1093    case FILE_MEMORY_CONST:
   1094       emitInsn(0x4c500000);
   1095       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1096       break;
   1097    case FILE_IMMEDIATE:
   1098       emitInsn(0x38500000);
   1099       emitIMMD(0x14, 19, insn->src(1));
   1100       break;
   1101    default:
   1102       assert(!"bad src1 file");
   1103       break;
   1104    }
   1105 
   1106    emitABS  (0x31, insn->src(1));
   1107    emitNEG  (0x30, insn->src(0));
   1108    emitCC   (0x2f);
   1109    emitABS  (0x2e, insn->src(0));
   1110    emitNEG  (0x2d, insn->src(1));
   1111    emitField(0x2a, 1, insn->op == OP_MAX);
   1112    emitPRED (0x27);
   1113    emitGPR  (0x08, insn->src(0));
   1114    emitGPR  (0x00, insn->def(0));
   1115 }
   1116 
   1117 void
   1118 CodeEmitterGM107::emitDSET()
   1119 {
   1120    const CmpInstruction *insn = this->insn->asCmp();
   1121 
   1122    switch (insn->src(1).getFile()) {
   1123    case FILE_GPR:
   1124       emitInsn(0x59000000);
   1125       emitGPR (0x14, insn->src(1));
   1126       break;
   1127    case FILE_MEMORY_CONST:
   1128       emitInsn(0x49000000);
   1129       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1130       break;
   1131    case FILE_IMMEDIATE:
   1132       emitInsn(0x32000000);
   1133       emitIMMD(0x14, 19, insn->src(1));
   1134       break;
   1135    default:
   1136       assert(!"bad src1 file");
   1137       break;
   1138    }
   1139 
   1140    if (insn->op != OP_SET) {
   1141       switch (insn->op) {
   1142       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1143       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1144       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1145       default:
   1146          assert(!"invalid set op");
   1147          break;
   1148       }
   1149       emitPRED(0x27, insn->src(2));
   1150    } else {
   1151       emitPRED(0x27);
   1152    }
   1153 
   1154    emitABS  (0x36, insn->src(0));
   1155    emitNEG  (0x35, insn->src(1));
   1156    emitField(0x34, 1, insn->dType == TYPE_F32);
   1157    emitCond4(0x30, insn->setCond);
   1158    emitCC   (0x2f);
   1159    emitABS  (0x2c, insn->src(1));
   1160    emitNEG  (0x2b, insn->src(0));
   1161    emitGPR  (0x08, insn->src(0));
   1162    emitGPR  (0x00, insn->def(0));
   1163 }
   1164 
   1165 void
   1166 CodeEmitterGM107::emitDSETP()
   1167 {
   1168    const CmpInstruction *insn = this->insn->asCmp();
   1169 
   1170    switch (insn->src(1).getFile()) {
   1171    case FILE_GPR:
   1172       emitInsn(0x5b800000);
   1173       emitGPR (0x14, insn->src(1));
   1174       break;
   1175    case FILE_MEMORY_CONST:
   1176       emitInsn(0x4b800000);
   1177       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1178       break;
   1179    case FILE_IMMEDIATE:
   1180       emitInsn(0x36800000);
   1181       emitIMMD(0x14, 19, insn->src(1));
   1182       break;
   1183    default:
   1184       assert(!"bad src1 file");
   1185       break;
   1186    }
   1187 
   1188    if (insn->op != OP_SET) {
   1189       switch (insn->op) {
   1190       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1191       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1192       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1193       default:
   1194          assert(!"invalid set op");
   1195          break;
   1196       }
   1197       emitPRED(0x27, insn->src(2));
   1198    } else {
   1199       emitPRED(0x27);
   1200    }
   1201 
   1202    emitCond4(0x30, insn->setCond);
   1203    emitABS  (0x2c, insn->src(1));
   1204    emitNEG  (0x2b, insn->src(0));
   1205    emitGPR  (0x08, insn->src(0));
   1206    emitABS  (0x07, insn->src(0));
   1207    emitNEG  (0x06, insn->src(1));
   1208    emitPRED (0x03, insn->def(0));
   1209    if (insn->defExists(1))
   1210       emitPRED(0x00, insn->def(1));
   1211    else
   1212       emitPRED(0x00);
   1213 }
   1214 
   1215 /*******************************************************************************
   1216  * float
   1217  ******************************************************************************/
   1218 
   1219 void
   1220 CodeEmitterGM107::emitFADD()
   1221 {
   1222    if (!longIMMD(insn->src(1))) {
   1223       switch (insn->src(1).getFile()) {
   1224       case FILE_GPR:
   1225          emitInsn(0x5c580000);
   1226          emitGPR (0x14, insn->src(1));
   1227          break;
   1228       case FILE_MEMORY_CONST:
   1229          emitInsn(0x4c580000);
   1230          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1231          break;
   1232       case FILE_IMMEDIATE:
   1233          emitInsn(0x38580000);
   1234          emitIMMD(0x14, 19, insn->src(1));
   1235          break;
   1236       default:
   1237          assert(!"bad src1 file");
   1238          break;
   1239       }
   1240       emitSAT(0x32);
   1241       emitABS(0x31, insn->src(1));
   1242       emitNEG(0x30, insn->src(0));
   1243       emitCC (0x2f);
   1244       emitABS(0x2e, insn->src(0));
   1245       emitNEG(0x2d, insn->src(1));
   1246       emitFMZ(0x2c, 1);
   1247 
   1248       if (insn->op == OP_SUB)
   1249          code[1] ^= 0x00002000;
   1250    } else {
   1251       emitInsn(0x08000000);
   1252       emitABS(0x39, insn->src(1));
   1253       emitNEG(0x38, insn->src(0));
   1254       emitFMZ(0x37, 1);
   1255       emitABS(0x36, insn->src(0));
   1256       emitNEG(0x35, insn->src(1));
   1257       emitCC  (0x34);
   1258       emitIMMD(0x14, 32, insn->src(1));
   1259 
   1260       if (insn->op == OP_SUB)
   1261          code[1] ^= 0x00080000;
   1262    }
   1263 
   1264    emitGPR(0x08, insn->src(0));
   1265    emitGPR(0x00, insn->def(0));
   1266 }
   1267 
   1268 void
   1269 CodeEmitterGM107::emitFMUL()
   1270 {
   1271    if (!longIMMD(insn->src(1))) {
   1272       switch (insn->src(1).getFile()) {
   1273       case FILE_GPR:
   1274          emitInsn(0x5c680000);
   1275          emitGPR (0x14, insn->src(1));
   1276          break;
   1277       case FILE_MEMORY_CONST:
   1278          emitInsn(0x4c680000);
   1279          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1280          break;
   1281       case FILE_IMMEDIATE:
   1282          emitInsn(0x38680000);
   1283          emitIMMD(0x14, 19, insn->src(1));
   1284          break;
   1285       default:
   1286          assert(!"bad src1 file");
   1287          break;
   1288       }
   1289       emitSAT (0x32);
   1290       emitNEG2(0x30, insn->src(0), insn->src(1));
   1291       emitCC  (0x2f);
   1292       emitFMZ (0x2c, 2);
   1293       emitPDIV(0x29);
   1294       emitRND (0x27);
   1295    } else {
   1296       emitInsn(0x1e000000);
   1297       emitSAT (0x37);
   1298       emitFMZ (0x35, 2);
   1299       emitCC  (0x34);
   1300       emitIMMD(0x14, 32, insn->src(1));
   1301       if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
   1302          code[1] ^= 0x00080000; /* flip immd sign bit */
   1303    }
   1304 
   1305    emitGPR(0x08, insn->src(0));
   1306    emitGPR(0x00, insn->def(0));
   1307 }
   1308 
   1309 void
   1310 CodeEmitterGM107::emitFFMA()
   1311 {
   1312    /*XXX: ffma32i exists, but not using it as third src overlaps dst */
   1313    switch(insn->src(2).getFile()) {
   1314    case FILE_GPR:
   1315       switch (insn->src(1).getFile()) {
   1316       case FILE_GPR:
   1317          emitInsn(0x59800000);
   1318          emitGPR (0x14, insn->src(1));
   1319          break;
   1320       case FILE_MEMORY_CONST:
   1321          emitInsn(0x49800000);
   1322          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1323          break;
   1324       case FILE_IMMEDIATE:
   1325          emitInsn(0x32800000);
   1326          emitIMMD(0x14, 19, insn->src(1));
   1327          break;
   1328       default:
   1329          assert(!"bad src1 file");
   1330          break;
   1331       }
   1332       emitGPR (0x27, insn->src(2));
   1333       break;
   1334    case FILE_MEMORY_CONST:
   1335       emitInsn(0x51800000);
   1336       emitGPR (0x27, insn->src(1));
   1337       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1338       break;
   1339    default:
   1340       assert(!"bad src2 file");
   1341       break;
   1342    }
   1343    emitRND (0x33);
   1344    emitSAT (0x32);
   1345    emitNEG (0x31, insn->src(2));
   1346    emitNEG2(0x30, insn->src(0), insn->src(1));
   1347    emitCC  (0x2f);
   1348 
   1349    emitFMZ(0x35, 2);
   1350    emitGPR(0x08, insn->src(0));
   1351    emitGPR(0x00, insn->def(0));
   1352 }
   1353 
   1354 void
   1355 CodeEmitterGM107::emitMUFU()
   1356 {
   1357    int mufu = 0;
   1358 
   1359    switch (insn->op) {
   1360    case OP_COS: mufu = 0; break;
   1361    case OP_SIN: mufu = 1; break;
   1362    case OP_EX2: mufu = 2; break;
   1363    case OP_LG2: mufu = 3; break;
   1364    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
   1365    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
   1366    default:
   1367       assert(!"invalid mufu");
   1368       break;
   1369    }
   1370 
   1371    emitInsn (0x50800000);
   1372    emitSAT  (0x32);
   1373    emitNEG  (0x30, insn->src(0));
   1374    emitABS  (0x2e, insn->src(0));
   1375    emitField(0x14, 3, mufu);
   1376    emitGPR  (0x08, insn->src(0));
   1377    emitGPR  (0x00, insn->def(0));
   1378 }
   1379 
   1380 void
   1381 CodeEmitterGM107::emitFMNMX()
   1382 {
   1383    switch (insn->src(1).getFile()) {
   1384    case FILE_GPR:
   1385       emitInsn(0x5c600000);
   1386       emitGPR (0x14, insn->src(1));
   1387       break;
   1388    case FILE_MEMORY_CONST:
   1389       emitInsn(0x4c600000);
   1390       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1391       break;
   1392    case FILE_IMMEDIATE:
   1393       emitInsn(0x38600000);
   1394       emitIMMD(0x14, 19, insn->src(1));
   1395       break;
   1396    default:
   1397       assert(!"bad src1 file");
   1398       break;
   1399    }
   1400 
   1401    emitField(0x2a, 1, insn->op == OP_MAX);
   1402    emitPRED (0x27);
   1403 
   1404    emitABS(0x31, insn->src(1));
   1405    emitNEG(0x30, insn->src(0));
   1406    emitCC (0x2f);
   1407    emitABS(0x2e, insn->src(0));
   1408    emitNEG(0x2d, insn->src(1));
   1409    emitFMZ(0x2c, 1);
   1410    emitGPR(0x08, insn->src(0));
   1411    emitGPR(0x00, insn->def(0));
   1412 }
   1413 
   1414 void
   1415 CodeEmitterGM107::emitRRO()
   1416 {
   1417    switch (insn->src(0).getFile()) {
   1418    case FILE_GPR:
   1419       emitInsn(0x5c900000);
   1420       emitGPR (0x14, insn->src(0));
   1421       break;
   1422    case FILE_MEMORY_CONST:
   1423       emitInsn(0x4c900000);
   1424       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
   1425       break;
   1426    case FILE_IMMEDIATE:
   1427       emitInsn(0x38900000);
   1428       emitIMMD(0x14, 19, insn->src(0));
   1429       break;
   1430    default:
   1431       assert(!"bad src file");
   1432       break;
   1433    }
   1434 
   1435    emitABS  (0x31, insn->src(0));
   1436    emitNEG  (0x2d, insn->src(0));
   1437    emitField(0x27, 1, insn->op == OP_PREEX2);
   1438    emitGPR  (0x00, insn->def(0));
   1439 }
   1440 
   1441 void
   1442 CodeEmitterGM107::emitFCMP()
   1443 {
   1444    const CmpInstruction *insn = this->insn->asCmp();
   1445    CondCode cc = insn->setCond;
   1446 
   1447    if (insn->src(2).mod.neg())
   1448       cc = reverseCondCode(cc);
   1449 
   1450    switch(insn->src(2).getFile()) {
   1451    case FILE_GPR:
   1452       switch (insn->src(1).getFile()) {
   1453       case FILE_GPR:
   1454          emitInsn(0x5ba00000);
   1455          emitGPR (0x14, insn->src(1));
   1456          break;
   1457       case FILE_MEMORY_CONST:
   1458          emitInsn(0x4ba00000);
   1459          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1460          break;
   1461       case FILE_IMMEDIATE:
   1462          emitInsn(0x36a00000);
   1463          emitIMMD(0x14, 19, insn->src(1));
   1464          break;
   1465       default:
   1466          assert(!"bad src1 file");
   1467          break;
   1468       }
   1469       emitGPR (0x27, insn->src(2));
   1470       break;
   1471    case FILE_MEMORY_CONST:
   1472       emitInsn(0x53a00000);
   1473       emitGPR (0x27, insn->src(1));
   1474       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1475       break;
   1476    default:
   1477       assert(!"bad src2 file");
   1478       break;
   1479    }
   1480 
   1481    emitCond4(0x30, cc);
   1482    emitFMZ  (0x2f, 1);
   1483    emitGPR  (0x08, insn->src(0));
   1484    emitGPR  (0x00, insn->def(0));
   1485 }
   1486 
   1487 void
   1488 CodeEmitterGM107::emitFSET()
   1489 {
   1490    const CmpInstruction *insn = this->insn->asCmp();
   1491 
   1492    switch (insn->src(1).getFile()) {
   1493    case FILE_GPR:
   1494       emitInsn(0x58000000);
   1495       emitGPR (0x14, insn->src(1));
   1496       break;
   1497    case FILE_MEMORY_CONST:
   1498       emitInsn(0x48000000);
   1499       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1500       break;
   1501    case FILE_IMMEDIATE:
   1502       emitInsn(0x30000000);
   1503       emitIMMD(0x14, 19, insn->src(1));
   1504       break;
   1505    default:
   1506       assert(!"bad src1 file");
   1507       break;
   1508    }
   1509 
   1510    if (insn->op != OP_SET) {
   1511       switch (insn->op) {
   1512       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1513       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1514       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1515       default:
   1516          assert(!"invalid set op");
   1517          break;
   1518       }
   1519       emitPRED(0x27, insn->src(2));
   1520    } else {
   1521       emitPRED(0x27);
   1522    }
   1523 
   1524    emitFMZ  (0x37, 1);
   1525    emitABS  (0x36, insn->src(0));
   1526    emitNEG  (0x35, insn->src(1));
   1527    emitField(0x34, 1, insn->dType == TYPE_F32);
   1528    emitCond4(0x30, insn->setCond);
   1529    emitCC   (0x2f);
   1530    emitABS  (0x2c, insn->src(1));
   1531    emitNEG  (0x2b, insn->src(0));
   1532    emitGPR  (0x08, insn->src(0));
   1533    emitGPR  (0x00, insn->def(0));
   1534 }
   1535 
   1536 void
   1537 CodeEmitterGM107::emitFSETP()
   1538 {
   1539    const CmpInstruction *insn = this->insn->asCmp();
   1540 
   1541    switch (insn->src(1).getFile()) {
   1542    case FILE_GPR:
   1543       emitInsn(0x5bb00000);
   1544       emitGPR (0x14, insn->src(1));
   1545       break;
   1546    case FILE_MEMORY_CONST:
   1547       emitInsn(0x4bb00000);
   1548       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1549       break;
   1550    case FILE_IMMEDIATE:
   1551       emitInsn(0x36b00000);
   1552       emitIMMD(0x14, 19, insn->src(1));
   1553       break;
   1554    default:
   1555       assert(!"bad src1 file");
   1556       break;
   1557    }
   1558 
   1559    if (insn->op != OP_SET) {
   1560       switch (insn->op) {
   1561       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1562       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1563       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1564       default:
   1565          assert(!"invalid set op");
   1566          break;
   1567       }
   1568       emitPRED(0x27, insn->src(2));
   1569    } else {
   1570       emitPRED(0x27);
   1571    }
   1572 
   1573    emitCond4(0x30, insn->setCond);
   1574    emitFMZ  (0x2f, 1);
   1575    emitABS  (0x2c, insn->src(1));
   1576    emitNEG  (0x2b, insn->src(0));
   1577    emitGPR  (0x08, insn->src(0));
   1578    emitABS  (0x07, insn->src(0));
   1579    emitNEG  (0x06, insn->src(1));
   1580    emitPRED (0x03, insn->def(0));
   1581    if (insn->defExists(1))
   1582       emitPRED(0x00, insn->def(1));
   1583    else
   1584       emitPRED(0x00);
   1585 }
   1586 
   1587 void
   1588 CodeEmitterGM107::emitFSWZADD()
   1589 {
   1590    emitInsn (0x50f80000);
   1591    emitCC   (0x2f);
   1592    emitFMZ  (0x2c, 1);
   1593    emitRND  (0x27);
   1594    emitField(0x26, 1, insn->lanes); /* abused for .ndv */
   1595    emitField(0x1c, 8, insn->subOp);
   1596    if (insn->predSrc != 1)
   1597       emitGPR  (0x14, insn->src(1));
   1598    else
   1599       emitGPR  (0x14);
   1600    emitGPR  (0x08, insn->src(0));
   1601    emitGPR  (0x00, insn->def(0));
   1602 }
   1603 
   1604 /*******************************************************************************
   1605  * integer
   1606  ******************************************************************************/
   1607 
   1608 void
   1609 CodeEmitterGM107::emitLOP()
   1610 {
   1611    int lop = 0;
   1612 
   1613    switch (insn->op) {
   1614    case OP_AND: lop = 0; break;
   1615    case OP_OR : lop = 1; break;
   1616    case OP_XOR: lop = 2; break;
   1617    default:
   1618       assert(!"invalid lop");
   1619       break;
   1620    }
   1621 
   1622    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
   1623       switch (insn->src(1).getFile()) {
   1624       case FILE_GPR:
   1625          emitInsn(0x5c400000);
   1626          emitGPR (0x14, insn->src(1));
   1627          break;
   1628       case FILE_MEMORY_CONST:
   1629          emitInsn(0x4c400000);
   1630          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1631          break;
   1632       case FILE_IMMEDIATE:
   1633          emitInsn(0x38400000);
   1634          emitIMMD(0x14, 19, insn->src(1));
   1635          break;
   1636       default:
   1637          assert(!"bad src1 file");
   1638          break;
   1639       }
   1640       emitPRED (0x30);
   1641       emitCC   (0x2f);
   1642       emitX    (0x2b);
   1643       emitField(0x29, 2, lop);
   1644       emitINV  (0x28, insn->src(1));
   1645       emitINV  (0x27, insn->src(0));
   1646    } else {
   1647       emitInsn (0x04000000);
   1648       emitX    (0x39);
   1649       emitINV  (0x38, insn->src(1));
   1650       emitINV  (0x37, insn->src(0));
   1651       emitField(0x35, 2, lop);
   1652       emitCC   (0x34);
   1653       emitIMMD (0x14, 32, insn->src(1));
   1654    }
   1655 
   1656    emitGPR  (0x08, insn->src(0));
   1657    emitGPR  (0x00, insn->def(0));
   1658 }
   1659 
   1660 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
   1661 void
   1662 CodeEmitterGM107::emitNOT()
   1663 {
   1664    if (!longIMMD(insn->src(0))) {
   1665       switch (insn->src(0).getFile()) {
   1666       case FILE_GPR:
   1667          emitInsn(0x5c400700);
   1668          emitGPR (0x14, insn->src(0));
   1669          break;
   1670       case FILE_MEMORY_CONST:
   1671          emitInsn(0x4c400700);
   1672          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
   1673          break;
   1674       case FILE_IMMEDIATE:
   1675          emitInsn(0x38400700);
   1676          emitIMMD(0x14, 19, insn->src(0));
   1677          break;
   1678       default:
   1679          assert(!"bad src1 file");
   1680          break;
   1681       }
   1682       emitPRED (0x30);
   1683    } else {
   1684       emitInsn (0x05600000);
   1685       emitIMMD (0x14, 32, insn->src(1));
   1686    }
   1687 
   1688    emitGPR(0x08);
   1689    emitGPR(0x00, insn->def(0));
   1690 }
   1691 
   1692 void
   1693 CodeEmitterGM107::emitIADD()
   1694 {
   1695    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
   1696       switch (insn->src(1).getFile()) {
   1697       case FILE_GPR:
   1698          emitInsn(0x5c100000);
   1699          emitGPR (0x14, insn->src(1));
   1700          break;
   1701       case FILE_MEMORY_CONST:
   1702          emitInsn(0x4c100000);
   1703          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1704          break;
   1705       case FILE_IMMEDIATE:
   1706          emitInsn(0x38100000);
   1707          emitIMMD(0x14, 19, insn->src(1));
   1708          break;
   1709       default:
   1710          assert(!"bad src1 file");
   1711          break;
   1712       }
   1713       emitSAT(0x32);
   1714       emitNEG(0x31, insn->src(0));
   1715       emitNEG(0x30, insn->src(1));
   1716       emitCC (0x2f);
   1717       emitX  (0x2b);
   1718    } else {
   1719       emitInsn(0x1c000000);
   1720       emitNEG (0x38, insn->src(0));
   1721       emitSAT (0x36);
   1722       emitX   (0x35);
   1723       emitCC  (0x34);
   1724       emitIMMD(0x14, 32, insn->src(1));
   1725    }
   1726 
   1727    if (insn->op == OP_SUB)
   1728       code[1] ^= 0x00010000;
   1729 
   1730    emitGPR(0x08, insn->src(0));
   1731    emitGPR(0x00, insn->def(0));
   1732 }
   1733 
   1734 void
   1735 CodeEmitterGM107::emitIMUL()
   1736 {
   1737    if (insn->src(1).getFile() != FILE_IMMEDIATE) {
   1738       switch (insn->src(1).getFile()) {
   1739       case FILE_GPR:
   1740          emitInsn(0x5c380000);
   1741          emitGPR (0x14, insn->src(1));
   1742          break;
   1743       case FILE_MEMORY_CONST:
   1744          emitInsn(0x4c380000);
   1745          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1746          break;
   1747       case FILE_IMMEDIATE:
   1748          emitInsn(0x38380000);
   1749          emitIMMD(0x14, 19, insn->src(1));
   1750          break;
   1751       default:
   1752          assert(!"bad src1 file");
   1753          break;
   1754       }
   1755       emitCC   (0x2f);
   1756       emitField(0x29, 1, isSignedType(insn->sType));
   1757       emitField(0x28, 1, isSignedType(insn->dType));
   1758       emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
   1759    } else {
   1760       emitInsn (0x1f000000);
   1761       emitField(0x37, 1, isSignedType(insn->sType));
   1762       emitField(0x36, 1, isSignedType(insn->dType));
   1763       emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
   1764       emitCC   (0x34);
   1765       emitIMMD (0x14, 32, insn->src(1));
   1766    }
   1767 
   1768    emitGPR(0x08, insn->src(0));
   1769    emitGPR(0x00, insn->def(0));
   1770 }
   1771 
   1772 void
   1773 CodeEmitterGM107::emitIMAD()
   1774 {
   1775    /*XXX: imad32i exists, but not using it as third src overlaps dst */
   1776    switch(insn->src(2).getFile()) {
   1777    case FILE_GPR:
   1778       switch (insn->src(1).getFile()) {
   1779       case FILE_GPR:
   1780          emitInsn(0x5a000000);
   1781          emitGPR (0x14, insn->src(1));
   1782          break;
   1783       case FILE_MEMORY_CONST:
   1784          emitInsn(0x4a000000);
   1785          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1786          break;
   1787       case FILE_IMMEDIATE:
   1788          emitInsn(0x34000000);
   1789          emitIMMD(0x14, 19, insn->src(1));
   1790          break;
   1791       default:
   1792          assert(!"bad src1 file");
   1793          break;
   1794       }
   1795       emitGPR (0x27, insn->src(2));
   1796       break;
   1797    case FILE_MEMORY_CONST:
   1798       emitInsn(0x52000000);
   1799       emitGPR (0x27, insn->src(1));
   1800       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1801       break;
   1802    default:
   1803       assert(!"bad src2 file");
   1804       break;
   1805    }
   1806 
   1807    emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
   1808    emitField(0x35, 1, isSignedType(insn->sType));
   1809    emitNEG  (0x34, insn->src(2));
   1810    emitNEG2 (0x33, insn->src(0), insn->src(1));
   1811    emitSAT  (0x32);
   1812    emitX    (0x31);
   1813    emitField(0x30, 1, isSignedType(insn->dType));
   1814    emitCC   (0x2f);
   1815    emitGPR  (0x08, insn->src(0));
   1816    emitGPR  (0x00, insn->def(0));
   1817 }
   1818 
   1819 void
   1820 CodeEmitterGM107::emitISCADD()
   1821 {
   1822    switch (insn->src(2).getFile()) {
   1823    case FILE_GPR:
   1824       emitInsn(0x5c180000);
   1825       emitGPR (0x14, insn->src(2));
   1826       break;
   1827    case FILE_MEMORY_CONST:
   1828       emitInsn(0x4c180000);
   1829       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1830       break;
   1831    case FILE_IMMEDIATE:
   1832       emitInsn(0x38180000);
   1833       emitIMMD(0x14, 19, insn->src(2));
   1834       break;
   1835    default:
   1836       assert(!"bad src1 file");
   1837       break;
   1838    }
   1839    emitNEG (0x31, insn->src(0));
   1840    emitNEG (0x30, insn->src(2));
   1841    emitCC  (0x2f);
   1842    emitIMMD(0x27, 5, insn->src(1));
   1843    emitGPR (0x08, insn->src(0));
   1844    emitGPR (0x00, insn->def(0));
   1845 }
   1846 
   1847 void
   1848 CodeEmitterGM107::emitIMNMX()
   1849 {
   1850    switch (insn->src(1).getFile()) {
   1851    case FILE_GPR:
   1852       emitInsn(0x5c200000);
   1853       emitGPR (0x14, insn->src(1));
   1854       break;
   1855    case FILE_MEMORY_CONST:
   1856       emitInsn(0x4c200000);
   1857       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1858       break;
   1859    case FILE_IMMEDIATE:
   1860       emitInsn(0x38200000);
   1861       emitIMMD(0x14, 19, insn->src(1));
   1862       break;
   1863    default:
   1864       assert(!"bad src1 file");
   1865       break;
   1866    }
   1867 
   1868    emitField(0x30, 1, isSignedType(insn->dType));
   1869    emitCC   (0x2f);
   1870    emitField(0x2a, 1, insn->op == OP_MAX);
   1871    emitPRED (0x27);
   1872    emitGPR  (0x08, insn->src(0));
   1873    emitGPR  (0x00, insn->def(0));
   1874 }
   1875 
   1876 void
   1877 CodeEmitterGM107::emitICMP()
   1878 {
   1879    const CmpInstruction *insn = this->insn->asCmp();
   1880    CondCode cc = insn->setCond;
   1881 
   1882    if (insn->src(2).mod.neg())
   1883       cc = reverseCondCode(cc);
   1884 
   1885    switch(insn->src(2).getFile()) {
   1886    case FILE_GPR:
   1887       switch (insn->src(1).getFile()) {
   1888       case FILE_GPR:
   1889          emitInsn(0x5b400000);
   1890          emitGPR (0x14, insn->src(1));
   1891          break;
   1892       case FILE_MEMORY_CONST:
   1893          emitInsn(0x4b400000);
   1894          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1895          break;
   1896       case FILE_IMMEDIATE:
   1897          emitInsn(0x36400000);
   1898          emitIMMD(0x14, 19, insn->src(1));
   1899          break;
   1900       default:
   1901          assert(!"bad src1 file");
   1902          break;
   1903       }
   1904       emitGPR (0x27, insn->src(2));
   1905       break;
   1906    case FILE_MEMORY_CONST:
   1907       emitInsn(0x53400000);
   1908       emitGPR (0x27, insn->src(1));
   1909       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   1910       break;
   1911    default:
   1912       assert(!"bad src2 file");
   1913       break;
   1914    }
   1915 
   1916    emitCond3(0x31, cc);
   1917    emitField(0x30, 1, isSignedType(insn->sType));
   1918    emitGPR  (0x08, insn->src(0));
   1919    emitGPR  (0x00, insn->def(0));
   1920 }
   1921 
   1922 void
   1923 CodeEmitterGM107::emitISET()
   1924 {
   1925    const CmpInstruction *insn = this->insn->asCmp();
   1926 
   1927    switch (insn->src(1).getFile()) {
   1928    case FILE_GPR:
   1929       emitInsn(0x5b500000);
   1930       emitGPR (0x14, insn->src(1));
   1931       break;
   1932    case FILE_MEMORY_CONST:
   1933       emitInsn(0x4b500000);
   1934       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1935       break;
   1936    case FILE_IMMEDIATE:
   1937       emitInsn(0x36500000);
   1938       emitIMMD(0x14, 19, insn->src(1));
   1939       break;
   1940    default:
   1941       assert(!"bad src1 file");
   1942       break;
   1943    }
   1944 
   1945    if (insn->op != OP_SET) {
   1946       switch (insn->op) {
   1947       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1948       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1949       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1950       default:
   1951          assert(!"invalid set op");
   1952          break;
   1953       }
   1954       emitPRED(0x27, insn->src(2));
   1955    } else {
   1956       emitPRED(0x27);
   1957    }
   1958 
   1959    emitCond3(0x31, insn->setCond);
   1960    emitField(0x30, 1, isSignedType(insn->sType));
   1961    emitCC   (0x2f);
   1962    emitField(0x2c, 1, insn->dType == TYPE_F32);
   1963    emitX    (0x2b);
   1964    emitGPR  (0x08, insn->src(0));
   1965    emitGPR  (0x00, insn->def(0));
   1966 }
   1967 
   1968 void
   1969 CodeEmitterGM107::emitISETP()
   1970 {
   1971    const CmpInstruction *insn = this->insn->asCmp();
   1972 
   1973    switch (insn->src(1).getFile()) {
   1974    case FILE_GPR:
   1975       emitInsn(0x5b600000);
   1976       emitGPR (0x14, insn->src(1));
   1977       break;
   1978    case FILE_MEMORY_CONST:
   1979       emitInsn(0x4b600000);
   1980       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   1981       break;
   1982    case FILE_IMMEDIATE:
   1983       emitInsn(0x36600000);
   1984       emitIMMD(0x14, 19, insn->src(1));
   1985       break;
   1986    default:
   1987       assert(!"bad src1 file");
   1988       break;
   1989    }
   1990 
   1991    if (insn->op != OP_SET) {
   1992       switch (insn->op) {
   1993       case OP_SET_AND: emitField(0x2d, 2, 0); break;
   1994       case OP_SET_OR : emitField(0x2d, 2, 1); break;
   1995       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
   1996       default:
   1997          assert(!"invalid set op");
   1998          break;
   1999       }
   2000       emitPRED(0x27, insn->src(2));
   2001    } else {
   2002       emitPRED(0x27);
   2003    }
   2004 
   2005    emitCond3(0x31, insn->setCond);
   2006    emitField(0x30, 1, isSignedType(insn->sType));
   2007    emitX    (0x2b);
   2008    emitGPR  (0x08, insn->src(0));
   2009    emitPRED (0x03, insn->def(0));
   2010    if (insn->defExists(1))
   2011       emitPRED(0x00, insn->def(1));
   2012    else
   2013       emitPRED(0x00);
   2014 }
   2015 
   2016 void
   2017 CodeEmitterGM107::emitSHL()
   2018 {
   2019    switch (insn->src(1).getFile()) {
   2020    case FILE_GPR:
   2021       emitInsn(0x5c480000);
   2022       emitGPR (0x14, insn->src(1));
   2023       break;
   2024    case FILE_MEMORY_CONST:
   2025       emitInsn(0x4c480000);
   2026       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   2027       break;
   2028    case FILE_IMMEDIATE:
   2029       emitInsn(0x38480000);
   2030       emitIMMD(0x14, 19, insn->src(1));
   2031       break;
   2032    default:
   2033       assert(!"bad src1 file");
   2034       break;
   2035    }
   2036 
   2037    emitCC   (0x2f);
   2038    emitX    (0x2b);
   2039    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
   2040    emitGPR  (0x08, insn->src(0));
   2041    emitGPR  (0x00, insn->def(0));
   2042 }
   2043 
   2044 void
   2045 CodeEmitterGM107::emitSHR()
   2046 {
   2047    switch (insn->src(1).getFile()) {
   2048    case FILE_GPR:
   2049       emitInsn(0x5c280000);
   2050       emitGPR (0x14, insn->src(1));
   2051       break;
   2052    case FILE_MEMORY_CONST:
   2053       emitInsn(0x4c280000);
   2054       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   2055       break;
   2056    case FILE_IMMEDIATE:
   2057       emitInsn(0x38280000);
   2058       emitIMMD(0x14, 19, insn->src(1));
   2059       break;
   2060    default:
   2061       assert(!"bad src1 file");
   2062       break;
   2063    }
   2064 
   2065    emitField(0x30, 1, isSignedType(insn->dType));
   2066    emitCC   (0x2f);
   2067    emitX    (0x2c);
   2068    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
   2069    emitGPR  (0x08, insn->src(0));
   2070    emitGPR  (0x00, insn->def(0));
   2071 }
   2072 
   2073 void
   2074 CodeEmitterGM107::emitPOPC()
   2075 {
   2076    switch (insn->src(0).getFile()) {
   2077    case FILE_GPR:
   2078       emitInsn(0x5c080000);
   2079       emitGPR (0x14, insn->src(0));
   2080       break;
   2081    case FILE_MEMORY_CONST:
   2082       emitInsn(0x4c080000);
   2083       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
   2084       break;
   2085    case FILE_IMMEDIATE:
   2086       emitInsn(0x38080000);
   2087       emitIMMD(0x14, 19, insn->src(0));
   2088       break;
   2089    default:
   2090       assert(!"bad src1 file");
   2091       break;
   2092    }
   2093 
   2094    emitINV(0x28, insn->src(0));
   2095    emitGPR(0x00, insn->def(0));
   2096 }
   2097 
   2098 void
   2099 CodeEmitterGM107::emitBFI()
   2100 {
   2101    switch(insn->src(2).getFile()) {
   2102    case FILE_GPR:
   2103       switch (insn->src(1).getFile()) {
   2104       case FILE_GPR:
   2105          emitInsn(0x5bf00000);
   2106          emitGPR (0x14, insn->src(1));
   2107          break;
   2108       case FILE_MEMORY_CONST:
   2109          emitInsn(0x4bf00000);
   2110          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   2111          break;
   2112       case FILE_IMMEDIATE:
   2113          emitInsn(0x36f00000);
   2114          emitIMMD(0x14, 19, insn->src(1));
   2115          break;
   2116       default:
   2117          assert(!"bad src1 file");
   2118          break;
   2119       }
   2120       emitGPR (0x27, insn->src(2));
   2121       break;
   2122    case FILE_MEMORY_CONST:
   2123       emitInsn(0x53f00000);
   2124       emitGPR (0x27, insn->src(1));
   2125       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
   2126       break;
   2127    default:
   2128       assert(!"bad src2 file");
   2129       break;
   2130    }
   2131 
   2132    emitCC   (0x2f);
   2133    emitGPR  (0x08, insn->src(0));
   2134    emitGPR  (0x00, insn->def(0));
   2135 }
   2136 
   2137 void
   2138 CodeEmitterGM107::emitBFE()
   2139 {
   2140    switch (insn->src(1).getFile()) {
   2141    case FILE_GPR:
   2142       emitInsn(0x5c000000);
   2143       emitGPR (0x14, insn->src(1));
   2144       break;
   2145    case FILE_MEMORY_CONST:
   2146       emitInsn(0x4c000000);
   2147       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   2148       break;
   2149    case FILE_IMMEDIATE:
   2150       emitInsn(0x38000000);
   2151       emitIMMD(0x14, 19, insn->src(1));
   2152       break;
   2153    default:
   2154       assert(!"bad src1 file");
   2155       break;
   2156    }
   2157 
   2158    emitField(0x30, 1, isSignedType(insn->dType));
   2159    emitCC   (0x2f);
   2160    emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
   2161    emitGPR  (0x08, insn->src(0));
   2162    emitGPR  (0x00, insn->def(0));
   2163 }
   2164 
   2165 void
   2166 CodeEmitterGM107::emitFLO()
   2167 {
   2168    switch (insn->src(0).getFile()) {
   2169    case FILE_GPR:
   2170       emitInsn(0x5c300000);
   2171       emitGPR (0x14, insn->src(0));
   2172       break;
   2173    case FILE_MEMORY_CONST:
   2174       emitInsn(0x4c300000);
   2175       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
   2176       break;
   2177    case FILE_IMMEDIATE:
   2178       emitInsn(0x38300000);
   2179       emitIMMD(0x14, 19, insn->src(0));
   2180       break;
   2181    default:
   2182       assert(!"bad src1 file");
   2183       break;
   2184    }
   2185 
   2186    emitField(0x30, 1, isSignedType(insn->dType));
   2187    emitCC   (0x2f);
   2188    emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
   2189    emitINV  (0x28, insn->src(0));
   2190    emitGPR  (0x00, insn->def(0));
   2191 }
   2192 
   2193 /*******************************************************************************
   2194  * memory
   2195  ******************************************************************************/
   2196 
   2197 void
   2198 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
   2199 {
   2200    int data = 0;
   2201 
   2202    switch (typeSizeof(type)) {
   2203    case  1: data = isSignedType(type) ? 1 : 0; break;
   2204    case  2: data = isSignedType(type) ? 3 : 2; break;
   2205    case  4: data = 4; break;
   2206    case  8: data = 5; break;
   2207    case 16: data = 6; break;
   2208    default:
   2209       assert(!"bad type");
   2210       break;
   2211    }
   2212 
   2213    emitField(pos, 3, data);
   2214 }
   2215 
   2216 void
   2217 CodeEmitterGM107::emitLDSTc(int pos)
   2218 {
   2219    int mode = 0;
   2220 
   2221    switch (insn->cache) {
   2222    case CACHE_CA: mode = 0; break;
   2223    case CACHE_CG: mode = 1; break;
   2224    case CACHE_CS: mode = 2; break;
   2225    case CACHE_CV: mode = 3; break;
   2226    default:
   2227       assert(!"invalid caching mode");
   2228       break;
   2229    }
   2230 
   2231    emitField(pos, 2, mode);
   2232 }
   2233 
   2234 void
   2235 CodeEmitterGM107::emitLDC()
   2236 {
   2237    emitInsn (0xef900000);
   2238    emitLDSTs(0x30, insn->dType);
   2239    emitField(0x2c, 2, insn->subOp);
   2240    emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
   2241    emitGPR  (0x00, insn->def(0));
   2242 }
   2243 
   2244 void
   2245 CodeEmitterGM107::emitLDL()
   2246 {
   2247    emitInsn (0xef400000);
   2248    emitLDSTs(0x30, insn->dType);
   2249    emitLDSTc(0x2c);
   2250    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
   2251    emitGPR  (0x00, insn->def(0));
   2252 }
   2253 
   2254 void
   2255 CodeEmitterGM107::emitLDS()
   2256 {
   2257    emitInsn (0xef480000);
   2258    emitLDSTs(0x30, insn->dType);
   2259    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
   2260    emitGPR  (0x00, insn->def(0));
   2261 }
   2262 
   2263 void
   2264 CodeEmitterGM107::emitLD()
   2265 {
   2266    emitInsn (0x80000000);
   2267    emitPRED (0x3a);
   2268    emitLDSTc(0x38);
   2269    emitLDSTs(0x35, insn->dType);
   2270    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
   2271    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
   2272    emitGPR  (0x00, insn->def(0));
   2273 }
   2274 
   2275 void
   2276 CodeEmitterGM107::emitSTL()
   2277 {
   2278    emitInsn (0xef500000);
   2279    emitLDSTs(0x30, insn->dType);
   2280    emitLDSTc(0x2c);
   2281    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
   2282    emitGPR  (0x00, insn->src(1));
   2283 }
   2284 
   2285 void
   2286 CodeEmitterGM107::emitSTS()
   2287 {
   2288    emitInsn (0xef580000);
   2289    emitLDSTs(0x30, insn->dType);
   2290    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
   2291    emitGPR  (0x00, insn->src(1));
   2292 }
   2293 
   2294 void
   2295 CodeEmitterGM107::emitST()
   2296 {
   2297    emitInsn (0xa0000000);
   2298    emitPRED (0x3a);
   2299    emitLDSTc(0x38);
   2300    emitLDSTs(0x35, insn->dType);
   2301    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
   2302    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
   2303    emitGPR  (0x00, insn->src(1));
   2304 }
   2305 
   2306 void
   2307 CodeEmitterGM107::emitALD()
   2308 {
   2309    emitInsn (0xefd80000);
   2310    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
   2311    emitGPR  (0x27, insn->src(0).getIndirect(1));
   2312    emitO    (0x20);
   2313    emitP    (0x1f);
   2314    emitADDR (0x08, 20, 10, 0, insn->src(0));
   2315    emitGPR  (0x00, insn->def(0));
   2316 }
   2317 
   2318 void
   2319 CodeEmitterGM107::emitAST()
   2320 {
   2321    emitInsn (0xeff00000);
   2322    emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
   2323    emitGPR  (0x27, insn->src(0).getIndirect(1));
   2324    emitP    (0x1f);
   2325    emitADDR (0x08, 20, 10, 0, insn->src(0));
   2326    emitGPR  (0x00, insn->src(1));
   2327 }
   2328 
   2329 void
   2330 CodeEmitterGM107::emitISBERD()
   2331 {
   2332    emitInsn(0xefd00000);
   2333    emitGPR (0x08, insn->src(0));
   2334    emitGPR (0x00, insn->def(0));
   2335 }
   2336 
   2337 void
   2338 CodeEmitterGM107::emitAL2P()
   2339 {
   2340    emitInsn (0xefa00000);
   2341    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
   2342    emitPRED (0x2c);
   2343    emitO    (0x20);
   2344    emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
   2345    emitGPR  (0x08, insn->src(0).getIndirect(0));
   2346    emitGPR  (0x00, insn->def(0));
   2347 }
   2348 
   2349 static void
   2350 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
   2351 {
   2352    int ipa = entry->ipa;
   2353    int reg = entry->reg;
   2354    int loc = entry->loc;
   2355 
   2356    if (data.flatshade &&
   2357        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
   2358       ipa = NV50_IR_INTERP_FLAT;
   2359       reg = 0xff;
   2360    } else if (data.force_persample_interp &&
   2361               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
   2362               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
   2363       ipa |= NV50_IR_INTERP_CENTROID;
   2364    }
   2365    code[loc + 1] &= ~(0xf << 0x14);
   2366    code[loc + 1] |= (ipa & 0x3) << 0x16;
   2367    code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
   2368    code[loc + 0] &= ~(0xff << 0x14);
   2369    code[loc + 0] |= reg << 0x14;
   2370 }
   2371 
   2372 void
   2373 CodeEmitterGM107::emitIPA()
   2374 {
   2375    int ipam = 0, ipas = 0;
   2376 
   2377    switch (insn->getInterpMode()) {
   2378    case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
   2379    case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
   2380    case NV50_IR_INTERP_FLAT       : ipam = 2; break;
   2381    case NV50_IR_INTERP_SC         : ipam = 3; break;
   2382    default:
   2383       assert(!"invalid ipa mode");
   2384       break;
   2385    }
   2386 
   2387    switch (insn->getSampleMode()) {
   2388    case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
   2389    case NV50_IR_INTERP_CENTROID: ipas = 1; break;
   2390    case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
   2391    default:
   2392       assert(!"invalid ipa sample mode");
   2393       break;
   2394    }
   2395 
   2396    emitInsn (0xe0000000);
   2397    emitField(0x36, 2, ipam);
   2398    emitField(0x34, 2, ipas);
   2399    emitSAT  (0x33);
   2400    emitField(0x2f, 3, 7);
   2401    emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
   2402    if ((code[0] & 0x0000ff00) != 0x0000ff00)
   2403       code[1] |= 0x00000040; /* .idx */
   2404    emitGPR(0x00, insn->def(0));
   2405 
   2406    if (insn->op == OP_PINTERP) {
   2407       emitGPR(0x14, insn->src(1));
   2408       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
   2409          emitGPR(0x27, insn->src(2));
   2410       addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
   2411    } else {
   2412       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
   2413          emitGPR(0x27, insn->src(1));
   2414       emitGPR(0x14);
   2415       addInterp(insn->ipa, 0xff, interpApply);
   2416    }
   2417 
   2418    if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
   2419       emitGPR(0x27);
   2420 }
   2421 
   2422 void
   2423 CodeEmitterGM107::emitATOM()
   2424 {
   2425    unsigned dType, subOp;
   2426 
   2427    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
   2428       switch (insn->dType) {
   2429       case TYPE_U32: dType = 0; break;
   2430       case TYPE_U64: dType = 1; break;
   2431       default: assert(!"unexpected dType"); dType = 0; break;
   2432       }
   2433       subOp = 15;
   2434 
   2435       emitInsn (0xee000000);
   2436    } else {
   2437       switch (insn->dType) {
   2438       case TYPE_U32: dType = 0; break;
   2439       case TYPE_S32: dType = 1; break;
   2440       case TYPE_U64: dType = 2; break;
   2441       case TYPE_F32: dType = 3; break;
   2442       case TYPE_B128: dType = 4; break;
   2443       case TYPE_S64: dType = 5; break;
   2444       default: assert(!"unexpected dType"); dType = 0; break;
   2445       }
   2446       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
   2447          subOp = 8;
   2448       else
   2449          subOp = insn->subOp;
   2450 
   2451       emitInsn (0xed000000);
   2452    }
   2453 
   2454    emitField(0x34, 4, subOp);
   2455    emitField(0x31, 3, dType);
   2456    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
   2457    emitGPR  (0x14, insn->src(1));
   2458    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
   2459    emitGPR  (0x00, insn->def(0));
   2460 }
   2461 
   2462 void
   2463 CodeEmitterGM107::emitATOMS()
   2464 {
   2465    unsigned dType, subOp;
   2466 
   2467    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
   2468       switch (insn->dType) {
   2469       case TYPE_U32: dType = 0; break;
   2470       case TYPE_U64: dType = 1; break;
   2471       default: assert(!"unexpected dType"); dType = 0; break;
   2472       }
   2473       subOp = 4;
   2474 
   2475       emitInsn (0xee000000);
   2476       emitField(0x34, 1, dType);
   2477    } else {
   2478       switch (insn->dType) {
   2479       case TYPE_U32: dType = 0; break;
   2480       case TYPE_S32: dType = 1; break;
   2481       case TYPE_U64: dType = 2; break;
   2482       case TYPE_S64: dType = 3; break;
   2483       default: assert(!"unexpected dType"); dType = 0; break;
   2484       }
   2485 
   2486       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
   2487          subOp = 8;
   2488       else
   2489          subOp = insn->subOp;
   2490 
   2491       emitInsn (0xec000000);
   2492       emitField(0x1c, 3, dType);
   2493    }
   2494 
   2495    emitField(0x34, 4, subOp);
   2496    emitGPR  (0x14, insn->src(1));
   2497    emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
   2498    emitGPR  (0x00, insn->def(0));
   2499 }
   2500 
   2501 void
   2502 CodeEmitterGM107::emitRED()
   2503 {
   2504    unsigned dType;
   2505 
   2506    switch (insn->dType) {
   2507    case TYPE_U32: dType = 0; break;
   2508    case TYPE_S32: dType = 1; break;
   2509    case TYPE_U64: dType = 2; break;
   2510    case TYPE_F32: dType = 3; break;
   2511    case TYPE_B128: dType = 4; break;
   2512    case TYPE_S64: dType = 5; break;
   2513    default: assert(!"unexpected dType"); dType = 0; break;
   2514    }
   2515 
   2516    emitInsn (0xebf80000);
   2517    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
   2518    emitField(0x17, 3, insn->subOp);
   2519    emitField(0x14, 3, dType);
   2520    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
   2521    emitGPR  (0x00, insn->src(1));
   2522 }
   2523 
   2524 void
   2525 CodeEmitterGM107::emitCCTL()
   2526 {
   2527    unsigned width;
   2528    if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
   2529       emitInsn(0xef600000);
   2530       width = 30;
   2531    } else {
   2532       emitInsn(0xef800000);
   2533       width = 22;
   2534    }
   2535    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
   2536    emitADDR (0x08, 0x16, width, 2, insn->src(0));
   2537    emitField(0x00, 4, insn->subOp);
   2538 }
   2539 
   2540 /*******************************************************************************
   2541  * surface
   2542  ******************************************************************************/
   2543 
   2544 void
   2545 CodeEmitterGM107::emitPIXLD()
   2546 {
   2547    emitInsn (0xefe80000);
   2548    emitPRED (0x2d);
   2549    emitField(0x1f, 3, insn->subOp);
   2550    emitGPR  (0x08, insn->src(0));
   2551    emitGPR  (0x00, insn->def(0));
   2552 }
   2553 
   2554 /*******************************************************************************
   2555  * texture
   2556  ******************************************************************************/
   2557 
   2558 void
   2559 CodeEmitterGM107::emitTEXs(int pos)
   2560 {
   2561    int src1 = insn->predSrc == 1 ? 2 : 1;
   2562    if (insn->srcExists(src1))
   2563       emitGPR(pos, insn->src(src1));
   2564    else
   2565       emitGPR(pos);
   2566 }
   2567 
   2568 void
   2569 CodeEmitterGM107::emitTEX()
   2570 {
   2571    const TexInstruction *insn = this->insn->asTex();
   2572    int lodm = 0;
   2573 
   2574    if (!insn->tex.levelZero) {
   2575       switch (insn->op) {
   2576       case OP_TEX: lodm = 0; break;
   2577       case OP_TXB: lodm = 2; break;
   2578       case OP_TXL: lodm = 3; break;
   2579       default:
   2580          assert(!"invalid tex op");
   2581          break;
   2582       }
   2583    } else {
   2584       lodm = 1;
   2585    }
   2586 
   2587    if (insn->tex.rIndirectSrc >= 0) {
   2588       emitInsn (0xdeb80000);
   2589       emitField(0x25, 2, lodm);
   2590       emitField(0x24, 1, insn->tex.useOffsets == 1);
   2591    } else {
   2592       emitInsn (0xc0380000);
   2593       emitField(0x37, 2, lodm);
   2594       emitField(0x36, 1, insn->tex.useOffsets == 1);
   2595       emitField(0x24, 13, insn->tex.r);
   2596    }
   2597 
   2598    emitField(0x32, 1, insn->tex.target.isShadow());
   2599    emitField(0x31, 1, insn->tex.liveOnly);
   2600    emitField(0x23, 1, insn->tex.derivAll);
   2601    emitField(0x1f, 4, insn->tex.mask);
   2602    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
   2603                       insn->tex.target.getDim() - 1);
   2604    emitField(0x1c, 1, insn->tex.target.isArray());
   2605    emitTEXs (0x14);
   2606    emitGPR  (0x08, insn->src(0));
   2607    emitGPR  (0x00, insn->def(0));
   2608 }
   2609 
   2610 void
   2611 CodeEmitterGM107::emitTLD()
   2612 {
   2613    const TexInstruction *insn = this->insn->asTex();
   2614 
   2615    if (insn->tex.rIndirectSrc >= 0) {
   2616       emitInsn (0xdd380000);
   2617    } else {
   2618       emitInsn (0xdc380000);
   2619       emitField(0x24, 13, insn->tex.r);
   2620    }
   2621 
   2622    emitField(0x37, 1, insn->tex.levelZero == 0);
   2623    emitField(0x32, 1, insn->tex.target.isMS());
   2624    emitField(0x31, 1, insn->tex.liveOnly);
   2625    emitField(0x23, 1, insn->tex.useOffsets == 1);
   2626    emitField(0x1f, 4, insn->tex.mask);
   2627    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
   2628                       insn->tex.target.getDim() - 1);
   2629    emitField(0x1c, 1, insn->tex.target.isArray());
   2630    emitTEXs (0x14);
   2631    emitGPR  (0x08, insn->src(0));
   2632    emitGPR  (0x00, insn->def(0));
   2633 }
   2634 
   2635 void
   2636 CodeEmitterGM107::emitTLD4()
   2637 {
   2638    const TexInstruction *insn = this->insn->asTex();
   2639 
   2640    if (insn->tex.rIndirectSrc >= 0) {
   2641       emitInsn (0xdef80000);
   2642       emitField(0x26, 2, insn->tex.gatherComp);
   2643       emitField(0x25, 2, insn->tex.useOffsets == 4);
   2644       emitField(0x24, 2, insn->tex.useOffsets == 1);
   2645    } else {
   2646       emitInsn (0xc8380000);
   2647       emitField(0x38, 2, insn->tex.gatherComp);
   2648       emitField(0x37, 2, insn->tex.useOffsets == 4);
   2649       emitField(0x36, 2, insn->tex.useOffsets == 1);
   2650       emitField(0x24, 13, insn->tex.r);
   2651    }
   2652 
   2653    emitField(0x32, 1, insn->tex.target.isShadow());
   2654    emitField(0x31, 1, insn->tex.liveOnly);
   2655    emitField(0x23, 1, insn->tex.derivAll);
   2656    emitField(0x1f, 4, insn->tex.mask);
   2657    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
   2658                       insn->tex.target.getDim() - 1);
   2659    emitField(0x1c, 1, insn->tex.target.isArray());
   2660    emitTEXs (0x14);
   2661    emitGPR  (0x08, insn->src(0));
   2662    emitGPR  (0x00, insn->def(0));
   2663 }
   2664 
   2665 void
   2666 CodeEmitterGM107::emitTXD()
   2667 {
   2668    const TexInstruction *insn = this->insn->asTex();
   2669 
   2670    if (insn->tex.rIndirectSrc >= 0) {
   2671       emitInsn (0xde780000);
   2672    } else {
   2673       emitInsn (0xde380000);
   2674       emitField(0x24, 13, insn->tex.r);
   2675    }
   2676 
   2677    emitField(0x31, 1, insn->tex.liveOnly);
   2678    emitField(0x23, 1, insn->tex.useOffsets == 1);
   2679    emitField(0x1f, 4, insn->tex.mask);
   2680    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
   2681                       insn->tex.target.getDim() - 1);
   2682    emitField(0x1c, 1, insn->tex.target.isArray());
   2683    emitTEXs (0x14);
   2684    emitGPR  (0x08, insn->src(0));
   2685    emitGPR  (0x00, insn->def(0));
   2686 }
   2687 
   2688 void
   2689 CodeEmitterGM107::emitTMML()
   2690 {
   2691    const TexInstruction *insn = this->insn->asTex();
   2692 
   2693    if (insn->tex.rIndirectSrc >= 0) {
   2694       emitInsn (0xdf600000);
   2695    } else {
   2696       emitInsn (0xdf580000);
   2697       emitField(0x24, 13, insn->tex.r);
   2698    }
   2699 
   2700    emitField(0x31, 1, insn->tex.liveOnly);
   2701    emitField(0x23, 1, insn->tex.derivAll);
   2702    emitField(0x1f, 4, insn->tex.mask);
   2703    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
   2704                       insn->tex.target.getDim() - 1);
   2705    emitField(0x1c, 1, insn->tex.target.isArray());
   2706    emitTEXs (0x14);
   2707    emitGPR  (0x08, insn->src(0));
   2708    emitGPR  (0x00, insn->def(0));
   2709 }
   2710 
   2711 void
   2712 CodeEmitterGM107::emitTXQ()
   2713 {
   2714    const TexInstruction *insn = this->insn->asTex();
   2715    int type = 0;
   2716 
   2717    switch (insn->tex.query) {
   2718    case TXQ_DIMS           : type = 0x01; break;
   2719    case TXQ_TYPE           : type = 0x02; break;
   2720    case TXQ_SAMPLE_POSITION: type = 0x05; break;
   2721    case TXQ_FILTER         : type = 0x10; break;
   2722    case TXQ_LOD            : type = 0x12; break;
   2723    case TXQ_WRAP           : type = 0x14; break;
   2724    case TXQ_BORDER_COLOUR  : type = 0x16; break;
   2725    default:
   2726       assert(!"invalid txq query");
   2727       break;
   2728    }
   2729 
   2730    if (insn->tex.rIndirectSrc >= 0) {
   2731       emitInsn (0xdf500000);
   2732    } else {
   2733       emitInsn (0xdf480000);
   2734       emitField(0x24, 13, insn->tex.r);
   2735    }
   2736 
   2737    emitField(0x31, 1, insn->tex.liveOnly);
   2738    emitField(0x1f, 4, insn->tex.mask);
   2739    emitField(0x16, 6, type);
   2740    emitGPR  (0x08, insn->src(0));
   2741    emitGPR  (0x00, insn->def(0));
   2742 }
   2743 
   2744 void
   2745 CodeEmitterGM107::emitDEPBAR()
   2746 {
   2747    emitInsn (0xf0f00000);
   2748    emitField(0x1d, 1, 1); /* le */
   2749    emitField(0x1a, 3, 5);
   2750    emitField(0x14, 6, insn->subOp);
   2751    emitField(0x00, 6, insn->subOp);
   2752 }
   2753 
   2754 /*******************************************************************************
   2755  * misc
   2756  ******************************************************************************/
   2757 
   2758 void
   2759 CodeEmitterGM107::emitNOP()
   2760 {
   2761    emitInsn(0x50b00000);
   2762 }
   2763 
   2764 void
   2765 CodeEmitterGM107::emitKIL()
   2766 {
   2767    emitInsn (0xe3300000);
   2768    emitCond5(0x00, CC_TR);
   2769 }
   2770 
   2771 void
   2772 CodeEmitterGM107::emitOUT()
   2773 {
   2774    const int cut  = insn->op == OP_RESTART || insn->subOp;
   2775    const int emit = insn->op == OP_EMIT;
   2776 
   2777    switch (insn->src(1).getFile()) {
   2778    case FILE_GPR:
   2779       emitInsn(0xfbe00000);
   2780       emitGPR (0x14, insn->src(1));
   2781       break;
   2782    case FILE_IMMEDIATE:
   2783       emitInsn(0xf6e00000);
   2784       emitIMMD(0x14, 19, insn->src(1));
   2785       break;
   2786    case FILE_MEMORY_CONST:
   2787       emitInsn(0xebe00000);
   2788       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
   2789       break;
   2790    default:
   2791       assert(!"bad src1 file");
   2792       break;
   2793    }
   2794 
   2795    emitField(0x27, 2, (cut << 1) | emit);
   2796    emitGPR  (0x08, insn->src(0));
   2797    emitGPR  (0x00, insn->def(0));
   2798 }
   2799 
   2800 void
   2801 CodeEmitterGM107::emitBAR()
   2802 {
   2803    uint8_t subop;
   2804 
   2805    emitInsn (0xf0a80000);
   2806 
   2807    switch (insn->subOp) {
   2808    case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
   2809    case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
   2810    case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
   2811    case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
   2812    default:
   2813       subop = 0x80;
   2814       assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
   2815       break;
   2816    }
   2817 
   2818    emitField(0x20, 8, subop);
   2819 
   2820    // barrier id
   2821    if (insn->src(0).getFile() == FILE_GPR) {
   2822       emitGPR(0x08, insn->src(0));
   2823    } else {
   2824       ImmediateValue *imm = insn->getSrc(0)->asImm();
   2825       assert(imm);
   2826       emitField(0x08, 8, imm->reg.data.u32);
   2827       emitField(0x2b, 1, 1);
   2828    }
   2829 
   2830    // thread count
   2831    if (insn->src(1).getFile() == FILE_GPR) {
   2832       emitGPR(0x14, insn->src(1));
   2833    } else {
   2834       ImmediateValue *imm = insn->getSrc(0)->asImm();
   2835       assert(imm);
   2836       emitField(0x14, 12, imm->reg.data.u32);
   2837       emitField(0x2c, 1, 1);
   2838    }
   2839 
   2840    if (insn->srcExists(2) && (insn->predSrc != 2)) {
   2841       emitPRED (0x27, insn->src(2));
   2842       emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
   2843    } else {
   2844       emitField(0x27, 3, 7);
   2845    }
   2846 }
   2847 
   2848 void
   2849 CodeEmitterGM107::emitMEMBAR()
   2850 {
   2851    emitInsn (0xef980000);
   2852    emitField(0x08, 2, insn->subOp >> 2);
   2853 }
   2854 
   2855 void
   2856 CodeEmitterGM107::emitVOTE()
   2857 {
   2858    assert(insn->src(0).getFile() == FILE_PREDICATE);
   2859 
   2860    int r = -1, p = -1;
   2861    for (int i = 0; insn->defExists(i); i++) {
   2862       if (insn->def(i).getFile() == FILE_GPR)
   2863          r = i;
   2864       else if (insn->def(i).getFile() == FILE_PREDICATE)
   2865          p = i;
   2866    }
   2867 
   2868    emitInsn (0x50d80000);
   2869    emitField(0x30, 2, insn->subOp);
   2870    if (r >= 0)
   2871       emitGPR  (0x00, insn->def(r));
   2872    else
   2873       emitGPR  (0x00);
   2874    if (p >= 0)
   2875       emitPRED (0x2d, insn->def(p));
   2876    else
   2877       emitPRED (0x2d);
   2878    emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
   2879    emitPRED (0x27, insn->src(0));
   2880 }
   2881 
   2882 void
   2883 CodeEmitterGM107::emitSUTarget()
   2884 {
   2885    const TexInstruction *insn = this->insn->asTex();
   2886    int target = 0;
   2887 
   2888    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
   2889 
   2890    if (insn->tex.target == TEX_TARGET_BUFFER) {
   2891       target = 2;
   2892    } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
   2893       target = 4;
   2894    } else if (insn->tex.target == TEX_TARGET_2D ||
   2895               insn->tex.target == TEX_TARGET_RECT) {
   2896       target = 6;
   2897    } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
   2898               insn->tex.target == TEX_TARGET_CUBE ||
   2899               insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
   2900       target = 8;
   2901    } else if (insn->tex.target == TEX_TARGET_3D) {
   2902       target = 10;
   2903    } else {
   2904       assert(insn->tex.target == TEX_TARGET_1D);
   2905    }
   2906    emitField(0x20, 4, target);
   2907 }
   2908 
   2909 void
   2910 CodeEmitterGM107::emitSUHandle(const int s)
   2911 {
   2912    const TexInstruction *insn = this->insn->asTex();
   2913 
   2914    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
   2915 
   2916    if (insn->src(s).getFile() == FILE_GPR) {
   2917       emitGPR(0x27, insn->src(s));
   2918    } else {
   2919       ImmediateValue *imm = insn->getSrc(s)->asImm();
   2920       assert(imm);
   2921       emitField(0x33, 1, 1);
   2922       emitField(0x24, 13, imm->reg.data.u32);
   2923    }
   2924 }
   2925 
   2926 void
   2927 CodeEmitterGM107::emitSUSTx()
   2928 {
   2929    const TexInstruction *insn = this->insn->asTex();
   2930 
   2931    emitInsn(0xeb200000);
   2932    if (insn->op == OP_SUSTB)
   2933       emitField(0x34, 1, 1);
   2934    emitSUTarget();
   2935 
   2936    emitLDSTc(0x18);
   2937    emitField(0x14, 4, 0xf); // rgba
   2938    emitGPR  (0x08, insn->src(0));
   2939    emitGPR  (0x00, insn->src(1));
   2940 
   2941    emitSUHandle(2);
   2942 }
   2943 
   2944 void
   2945 CodeEmitterGM107::emitSULDx()
   2946 {
   2947    const TexInstruction *insn = this->insn->asTex();
   2948    int type = 0;
   2949 
   2950    emitInsn(0xeb000000);
   2951    if (insn->op == OP_SULDB)
   2952       emitField(0x34, 1, 1);
   2953    emitSUTarget();
   2954 
   2955    switch (insn->dType) {
   2956    case TYPE_S8:   type = 1; break;
   2957    case TYPE_U16:  type = 2; break;
   2958    case TYPE_S16:  type = 3; break;
   2959    case TYPE_U32:  type = 4; break;
   2960    case TYPE_U64:  type = 5; break;
   2961    case TYPE_B128: type = 6; break;
   2962    default:
   2963       assert(insn->dType == TYPE_U8);
   2964       break;
   2965    }
   2966    emitLDSTc(0x18);
   2967    emitField(0x14, 3, type);
   2968    emitGPR  (0x00, insn->def(0));
   2969    emitGPR  (0x08, insn->src(0));
   2970 
   2971    emitSUHandle(1);
   2972 }
   2973 
   2974 void
   2975 CodeEmitterGM107::emitSUREDx()
   2976 {
   2977    const TexInstruction *insn = this->insn->asTex();
   2978    uint8_t type = 0, subOp;
   2979 
   2980    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
   2981       emitInsn(0xeac00000);
   2982    else
   2983       emitInsn(0xea600000);
   2984 
   2985    if (insn->op == OP_SUREDB)
   2986       emitField(0x34, 1, 1);
   2987    emitSUTarget();
   2988 
   2989    // destination type
   2990    switch (insn->dType) {
   2991    case TYPE_S32: type = 1; break;
   2992    case TYPE_U64: type = 2; break;
   2993    case TYPE_F32: type = 3; break;
   2994    case TYPE_S64: type = 5; break;
   2995    default:
   2996       assert(insn->dType == TYPE_U32);
   2997       break;
   2998    }
   2999 
   3000    // atomic operation
   3001    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
   3002       subOp = 0;
   3003    } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
   3004       subOp = 8;
   3005    } else {
   3006       subOp = insn->subOp;
   3007    }
   3008 
   3009    emitField(0x24, 3, type);
   3010    emitField(0x1d, 4, subOp);
   3011    emitGPR  (0x14, insn->src(1));
   3012    emitGPR  (0x08, insn->src(0));
   3013    emitGPR  (0x00, insn->def(0));
   3014 
   3015    emitSUHandle(2);
   3016 }
   3017 
   3018 /*******************************************************************************
   3019  * assembler front-end
   3020  ******************************************************************************/
   3021 
   3022 bool
   3023 CodeEmitterGM107::emitInstruction(Instruction *i)
   3024 {
   3025    const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
   3026    bool ret = true;
   3027 
   3028    insn = i;
   3029 
   3030    if (insn->encSize != 8) {
   3031       ERROR("skipping undecodable instruction: "); insn->print();
   3032       return false;
   3033    } else
   3034    if (codeSize + size > codeSizeLimit) {
   3035       ERROR("code emitter output buffer too small\n");
   3036       return false;
   3037    }
   3038 
   3039    if (writeIssueDelays) {
   3040       int n = ((codeSize & 0x1f) / 8) - 1;
   3041       if (n < 0) {
   3042          data = code;
   3043          data[0] = 0x00000000;
   3044          data[1] = 0x00000000;
   3045          code += 2;
   3046          codeSize += 8;
   3047          n++;
   3048       }
   3049 
   3050       emitField(data, n * 21, 21, insn->sched);
   3051    }
   3052 
   3053    switch (insn->op) {
   3054    case OP_EXIT:
   3055       emitEXIT();
   3056       break;
   3057    case OP_BRA:
   3058       emitBRA();
   3059       break;
   3060    case OP_CALL:
   3061       emitCAL();
   3062       break;
   3063    case OP_PRECONT:
   3064       emitPCNT();
   3065       break;
   3066    case OP_CONT:
   3067       emitCONT();
   3068       break;
   3069    case OP_PREBREAK:
   3070       emitPBK();
   3071       break;
   3072    case OP_BREAK:
   3073       emitBRK();
   3074       break;
   3075    case OP_PRERET:
   3076       emitPRET();
   3077       break;
   3078    case OP_RET:
   3079       emitRET();
   3080       break;
   3081    case OP_JOINAT:
   3082       emitSSY();
   3083       break;
   3084    case OP_JOIN:
   3085       emitSYNC();
   3086       break;
   3087    case OP_QUADON:
   3088       emitSAM();
   3089       break;
   3090    case OP_QUADPOP:
   3091       emitRAM();
   3092       break;
   3093    case OP_MOV:
   3094       emitMOV();
   3095       break;
   3096    case OP_RDSV:
   3097       emitS2R();
   3098       break;
   3099    case OP_ABS:
   3100    case OP_NEG:
   3101    case OP_SAT:
   3102    case OP_FLOOR:
   3103    case OP_CEIL:
   3104    case OP_TRUNC:
   3105    case OP_CVT:
   3106       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
   3107                                  insn->src(0).getFile() == FILE_PREDICATE)) {
   3108          emitMOV();
   3109       } else if (isFloatType(insn->dType)) {
   3110          if (isFloatType(insn->sType))
   3111             emitF2F();
   3112          else
   3113             emitI2F();
   3114       } else {
   3115          if (isFloatType(insn->sType))
   3116             emitF2I();
   3117          else
   3118             emitI2I();
   3119       }
   3120       break;
   3121    case OP_SHFL:
   3122       emitSHFL();
   3123       break;
   3124    case OP_ADD:
   3125    case OP_SUB:
   3126       if (isFloatType(insn->dType)) {
   3127          if (insn->dType == TYPE_F64)
   3128             emitDADD();
   3129          else
   3130             emitFADD();
   3131       } else {
   3132          emitIADD();
   3133       }
   3134       break;
   3135    case OP_MUL:
   3136       if (isFloatType(insn->dType)) {
   3137          if (insn->dType == TYPE_F64)
   3138             emitDMUL();
   3139          else
   3140             emitFMUL();
   3141       } else {
   3142          emitIMUL();
   3143       }
   3144       break;
   3145    case OP_MAD:
   3146    case OP_FMA:
   3147       if (isFloatType(insn->dType)) {
   3148          if (insn->dType == TYPE_F64)
   3149             emitDFMA();
   3150          else
   3151             emitFFMA();
   3152       } else {
   3153          emitIMAD();
   3154       }
   3155       break;
   3156    case OP_SHLADD:
   3157       emitISCADD();
   3158       break;
   3159    case OP_MIN:
   3160    case OP_MAX:
   3161       if (isFloatType(insn->dType)) {
   3162          if (insn->dType == TYPE_F64)
   3163             emitDMNMX();
   3164          else
   3165             emitFMNMX();
   3166       } else {
   3167          emitIMNMX();
   3168       }
   3169       break;
   3170    case OP_SHL:
   3171       emitSHL();
   3172       break;
   3173    case OP_SHR:
   3174       emitSHR();
   3175       break;
   3176    case OP_POPCNT:
   3177       emitPOPC();
   3178       break;
   3179    case OP_INSBF:
   3180       emitBFI();
   3181       break;
   3182    case OP_EXTBF:
   3183       emitBFE();
   3184       break;
   3185    case OP_BFIND:
   3186       emitFLO();
   3187       break;
   3188    case OP_SLCT:
   3189       if (isFloatType(insn->dType))
   3190          emitFCMP();
   3191       else
   3192          emitICMP();
   3193       break;
   3194    case OP_SET:
   3195    case OP_SET_AND:
   3196    case OP_SET_OR:
   3197    case OP_SET_XOR:
   3198       if (insn->def(0).getFile() != FILE_PREDICATE) {
   3199          if (isFloatType(insn->sType))
   3200             if (insn->sType == TYPE_F64)
   3201                emitDSET();
   3202             else
   3203                emitFSET();
   3204          else
   3205             emitISET();
   3206       } else {
   3207          if (isFloatType(insn->sType))
   3208             if (insn->sType == TYPE_F64)
   3209                emitDSETP();
   3210             else
   3211                emitFSETP();
   3212          else
   3213             emitISETP();
   3214       }
   3215       break;
   3216    case OP_SELP:
   3217       emitSEL();
   3218       break;
   3219    case OP_PRESIN:
   3220    case OP_PREEX2:
   3221       emitRRO();
   3222       break;
   3223    case OP_COS:
   3224    case OP_SIN:
   3225    case OP_EX2:
   3226    case OP_LG2:
   3227    case OP_RCP:
   3228    case OP_RSQ:
   3229       emitMUFU();
   3230       break;
   3231    case OP_AND:
   3232    case OP_OR:
   3233    case OP_XOR:
   3234       emitLOP();
   3235       break;
   3236    case OP_NOT:
   3237       emitNOT();
   3238       break;
   3239    case OP_LOAD:
   3240       switch (insn->src(0).getFile()) {
   3241       case FILE_MEMORY_CONST : emitLDC(); break;
   3242       case FILE_MEMORY_LOCAL : emitLDL(); break;
   3243       case FILE_MEMORY_SHARED: emitLDS(); break;
   3244       case FILE_MEMORY_GLOBAL: emitLD(); break;
   3245       default:
   3246          assert(!"invalid load");
   3247          emitNOP();
   3248          break;
   3249       }
   3250       break;
   3251    case OP_STORE:
   3252       switch (insn->src(0).getFile()) {
   3253       case FILE_MEMORY_LOCAL : emitSTL(); break;
   3254       case FILE_MEMORY_SHARED: emitSTS(); break;
   3255       case FILE_MEMORY_GLOBAL: emitST(); break;
   3256       default:
   3257          assert(!"invalid store");
   3258          emitNOP();
   3259          break;
   3260       }
   3261       break;
   3262    case OP_ATOM:
   3263       if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
   3264          emitATOMS();
   3265       else
   3266          if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
   3267             emitRED();
   3268          else
   3269             emitATOM();
   3270       break;
   3271    case OP_CCTL:
   3272       emitCCTL();
   3273       break;
   3274    case OP_VFETCH:
   3275       emitALD();
   3276       break;
   3277    case OP_EXPORT:
   3278       emitAST();
   3279       break;
   3280    case OP_PFETCH:
   3281       emitISBERD();
   3282       break;
   3283    case OP_AFETCH:
   3284       emitAL2P();
   3285       break;
   3286    case OP_LINTERP:
   3287    case OP_PINTERP:
   3288       emitIPA();
   3289       break;
   3290    case OP_PIXLD:
   3291       emitPIXLD();
   3292       break;
   3293    case OP_TEX:
   3294    case OP_TXB:
   3295    case OP_TXL:
   3296       emitTEX();
   3297       break;
   3298    case OP_TXF:
   3299       emitTLD();
   3300       break;
   3301    case OP_TXG:
   3302       emitTLD4();
   3303       break;
   3304    case OP_TXD:
   3305       emitTXD();
   3306       break;
   3307    case OP_TXQ:
   3308       emitTXQ();
   3309       break;
   3310    case OP_TXLQ:
   3311       emitTMML();
   3312       break;
   3313    case OP_TEXBAR:
   3314       emitDEPBAR();
   3315       break;
   3316    case OP_QUADOP:
   3317       emitFSWZADD();
   3318       break;
   3319    case OP_NOP:
   3320       emitNOP();
   3321       break;
   3322    case OP_DISCARD:
   3323       emitKIL();
   3324       break;
   3325    case OP_EMIT:
   3326    case OP_RESTART:
   3327       emitOUT();
   3328       break;
   3329    case OP_BAR:
   3330       emitBAR();
   3331       break;
   3332    case OP_MEMBAR:
   3333       emitMEMBAR();
   3334       break;
   3335    case OP_VOTE:
   3336       emitVOTE();
   3337       break;
   3338    case OP_SUSTB:
   3339    case OP_SUSTP:
   3340       emitSUSTx();
   3341       break;
   3342    case OP_SULDB:
   3343    case OP_SULDP:
   3344       emitSULDx();
   3345       break;
   3346    case OP_SUREDB:
   3347    case OP_SUREDP:
   3348       emitSUREDx();
   3349       break;
   3350    default:
   3351       assert(!"invalid opcode");
   3352       emitNOP();
   3353       ret = false;
   3354       break;
   3355    }
   3356 
   3357    if (insn->join) {
   3358       /*XXX*/
   3359    }
   3360 
   3361    code += 2;
   3362    codeSize += 8;
   3363    return ret;
   3364 }
   3365 
   3366 uint32_t
   3367 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
   3368 {
   3369    return 8;
   3370 }
   3371 
   3372 /*******************************************************************************
   3373  * sched data calculator
   3374  ******************************************************************************/
   3375 
   3376 class SchedDataCalculatorGM107 : public Pass
   3377 {
   3378 public:
   3379    SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
   3380 
   3381 private:
   3382    struct RegScores
   3383    {
   3384       struct ScoreData {
   3385          int r[256];
   3386          int p[8];
   3387          int c;
   3388       } rd, wr;
   3389       int base;
   3390 
   3391       void rebase(const int base)
   3392       {
   3393          const int delta = this->base - base;
   3394          if (!delta)
   3395             return;
   3396          this->base = 0;
   3397 
   3398          for (int i = 0; i < 256; ++i) {
   3399             rd.r[i] += delta;
   3400             wr.r[i] += delta;
   3401          }
   3402          for (int i = 0; i < 8; ++i) {
   3403             rd.p[i] += delta;
   3404             wr.p[i] += delta;
   3405          }
   3406          rd.c += delta;
   3407          wr.c += delta;
   3408       }
   3409       void wipe()
   3410       {
   3411          memset(&rd, 0, sizeof(rd));
   3412          memset(&wr, 0, sizeof(wr));
   3413       }
   3414       int getLatest(const ScoreData& d) const
   3415       {
   3416          int max = 0;
   3417          for (int i = 0; i < 256; ++i)
   3418             if (d.r[i] > max)
   3419                max = d.r[i];
   3420          for (int i = 0; i < 8; ++i)
   3421             if (d.p[i] > max)
   3422                max = d.p[i];
   3423          if (d.c > max)
   3424             max = d.c;
   3425          return max;
   3426       }
   3427       inline int getLatestRd() const
   3428       {
   3429          return getLatest(rd);
   3430       }
   3431       inline int getLatestWr() const
   3432       {
   3433          return getLatest(wr);
   3434       }
   3435       inline int getLatest() const
   3436       {
   3437          return MAX2(getLatestRd(), getLatestWr());
   3438       }
   3439       void setMax(const RegScores *that)
   3440       {
   3441          for (int i = 0; i < 256; ++i) {
   3442             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
   3443             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
   3444          }
   3445          for (int i = 0; i < 8; ++i) {
   3446             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
   3447             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
   3448          }
   3449          rd.c = MAX2(rd.c, that->rd.c);
   3450          wr.c = MAX2(wr.c, that->wr.c);
   3451       }
   3452       void print(int cycle)
   3453       {
   3454          for (int i = 0; i < 256; ++i) {
   3455             if (rd.r[i] > cycle)
   3456                INFO("rd $r%i @ %i\n", i, rd.r[i]);
   3457             if (wr.r[i] > cycle)
   3458                INFO("wr $r%i @ %i\n", i, wr.r[i]);
   3459          }
   3460          for (int i = 0; i < 8; ++i) {
   3461             if (rd.p[i] > cycle)
   3462                INFO("rd $p%i @ %i\n", i, rd.p[i]);
   3463             if (wr.p[i] > cycle)
   3464                INFO("wr $p%i @ %i\n", i, wr.p[i]);
   3465          }
   3466          if (rd.c > cycle)
   3467             INFO("rd $c @ %i\n", rd.c);
   3468          if (wr.c > cycle)
   3469             INFO("wr $c @ %i\n", wr.c);
   3470       }
   3471    };
   3472 
   3473    RegScores *score; // for current BB
   3474    std::vector<RegScores> scoreBoards;
   3475 
   3476    const TargetGM107 *targ;
   3477    bool visit(Function *);
   3478    bool visit(BasicBlock *);
   3479 
   3480    void commitInsn(const Instruction *, int);
   3481    int calcDelay(const Instruction *, int) const;
   3482    void setDelay(Instruction *, int, const Instruction *);
   3483    void recordWr(const Value *, int, int);
   3484    void checkRd(const Value *, int, int&) const;
   3485 
   3486    inline void emitYield(Instruction *);
   3487    inline void emitStall(Instruction *, uint8_t);
   3488    inline void emitReuse(Instruction *, uint8_t);
   3489    inline void emitWrDepBar(Instruction *, uint8_t);
   3490    inline void emitRdDepBar(Instruction *, uint8_t);
   3491    inline void emitWtDepBar(Instruction *, uint8_t);
   3492 
   3493    inline int getStall(const Instruction *) const;
   3494    inline int getWrDepBar(const Instruction *) const;
   3495    inline int getRdDepBar(const Instruction *) const;
   3496    inline int getWtDepBar(const Instruction *) const;
   3497 
   3498    void setReuseFlag(Instruction *);
   3499 
   3500    inline void printSchedInfo(int, const Instruction *) const;
   3501 
   3502    struct LiveBarUse {
   3503       LiveBarUse(Instruction *insn, Instruction *usei)
   3504          : insn(insn), usei(usei) { }
   3505       Instruction *insn;
   3506       Instruction *usei;
   3507    };
   3508 
   3509    struct LiveBarDef {
   3510       LiveBarDef(Instruction *insn, Instruction *defi)
   3511          : insn(insn), defi(defi) { }
   3512       Instruction *insn;
   3513       Instruction *defi;
   3514    };
   3515 
   3516    bool insertBarriers(BasicBlock *);
   3517 
   3518    Instruction *findFirstUse(const Instruction *) const;
   3519    Instruction *findFirstDef(const Instruction *) const;
   3520 
   3521    bool needRdDepBar(const Instruction *) const;
   3522    bool needWrDepBar(const Instruction *) const;
   3523 };
   3524 
   3525 inline void
   3526 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
   3527 {
   3528    assert(cnt < 16);
   3529    insn->sched |= cnt;
   3530 }
   3531 
   3532 inline void
   3533 SchedDataCalculatorGM107::emitYield(Instruction *insn)
   3534 {
   3535    insn->sched |= 1 << 4;
   3536 }
   3537 
   3538 inline void
   3539 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
   3540 {
   3541    assert(id < 6);
   3542    if ((insn->sched & 0xe0) == 0xe0)
   3543       insn->sched ^= 0xe0;
   3544    insn->sched |= id << 5;
   3545 }
   3546 
   3547 inline void
   3548 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
   3549 {
   3550    assert(id < 6);
   3551    if ((insn->sched & 0x700) == 0x700)
   3552       insn->sched ^= 0x700;
   3553    insn->sched |= id << 8;
   3554 }
   3555 
   3556 inline void
   3557 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
   3558 {
   3559    assert(id < 6);
   3560    insn->sched |= 1 << (11 + id);
   3561 }
   3562 
   3563 inline void
   3564 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
   3565 {
   3566    assert(id < 4);
   3567    insn->sched |= 1 << (17 + id);
   3568 }
   3569 
   3570 inline void
   3571 SchedDataCalculatorGM107::printSchedInfo(int cycle,
   3572                                          const Instruction *insn) const
   3573 {
   3574    uint8_t st, yl, wr, rd, wt, ru;
   3575 
   3576    st = (insn->sched & 0x00000f) >> 0;
   3577    yl = (insn->sched & 0x000010) >> 4;
   3578    wr = (insn->sched & 0x0000e0) >> 5;
   3579    rd = (insn->sched & 0x000700) >> 8;
   3580    wt = (insn->sched & 0x01f800) >> 11;
   3581    ru = (insn->sched & 0x1e0000) >> 17;
   3582 
   3583    INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
   3584         cycle, st, yl, wr, rd, wt, ru);
   3585 }
   3586 
   3587 inline int
   3588 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
   3589 {
   3590    return insn->sched & 0xf;
   3591 }
   3592 
   3593 inline int
   3594 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
   3595 {
   3596    return (insn->sched & 0x0000e0) >> 5;
   3597 }
   3598 
   3599 inline int
   3600 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
   3601 {
   3602    return (insn->sched & 0x000700) >> 8;
   3603 }
   3604 
   3605 inline int
   3606 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
   3607 {
   3608    return (insn->sched & 0x01f800) >> 11;
   3609 }
   3610 
   3611 // Emit the reuse flag which allows to make use of the new memory hierarchy
   3612 // introduced since Maxwell, the operand reuse cache.
   3613 //
   3614 // It allows to reduce bank conflicts by caching operands. Each time you issue
   3615 // an instruction, that flag can tell the hw which operands are going to be
   3616 // re-used by the next instruction. Note that the next instruction has to use
   3617 // the same GPR id in the same operand slot.
   3618 void
   3619 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
   3620 {
   3621    Instruction *next = insn->next;
   3622    BitSet defs(255, 1);
   3623 
   3624    if (!targ->isReuseSupported(insn))
   3625       return;
   3626 
   3627    for (int d = 0; insn->defExists(d); ++d) {
   3628       const Value *def = insn->def(d).rep();
   3629       if (insn->def(d).getFile() != FILE_GPR)
   3630          continue;
   3631       if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
   3632          continue;
   3633       defs.set(def->reg.data.id);
   3634    }
   3635 
   3636    for (int s = 0; insn->srcExists(s); s++) {
   3637       const Value *src = insn->src(s).rep();
   3638       if (insn->src(s).getFile() != FILE_GPR)
   3639          continue;
   3640       if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
   3641          continue;
   3642       if (defs.test(src->reg.data.id))
   3643          continue;
   3644       if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
   3645          continue;
   3646       if (src->reg.data.id != next->getSrc(s)->reg.data.id)
   3647          continue;
   3648       assert(s < 4);
   3649       emitReuse(insn, s);
   3650    }
   3651 }
   3652 
   3653 void
   3654 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
   3655 {
   3656    int a = v->reg.data.id, b;
   3657 
   3658    switch (v->reg.file) {
   3659    case FILE_GPR:
   3660       b = a + v->reg.size / 4;
   3661       for (int r = a; r < b; ++r)
   3662          score->rd.r[r] = ready;
   3663       break;
   3664    case FILE_PREDICATE:
   3665       // To immediately use a predicate set by any instructions, the minimum
   3666       // number of stall counts is 13.
   3667       score->rd.p[a] = cycle + 13;
   3668       break;
   3669    case FILE_FLAGS:
   3670       score->rd.c = ready;
   3671       break;
   3672    default:
   3673       break;
   3674    }
   3675 }
   3676 
   3677 void
   3678 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
   3679 {
   3680    int a = v->reg.data.id, b;
   3681    int ready = cycle;
   3682 
   3683    switch (v->reg.file) {
   3684    case FILE_GPR:
   3685       b = a + v->reg.size / 4;
   3686       for (int r = a; r < b; ++r)
   3687          ready = MAX2(ready, score->rd.r[r]);
   3688       break;
   3689    case FILE_PREDICATE:
   3690       ready = MAX2(ready, score->rd.p[a]);
   3691       break;
   3692    case FILE_FLAGS:
   3693       ready = MAX2(ready, score->rd.c);
   3694       break;
   3695    default:
   3696       break;
   3697    }
   3698    if (cycle < ready)
   3699       delay = MAX2(delay, ready - cycle);
   3700 }
   3701 
   3702 void
   3703 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
   3704 {
   3705    const int ready = cycle + targ->getLatency(insn);
   3706 
   3707    for (int d = 0; insn->defExists(d); ++d)
   3708       recordWr(insn->getDef(d), cycle, ready);
   3709 
   3710 #ifdef GM107_DEBUG_SCHED_DATA
   3711    score->print(cycle);
   3712 #endif
   3713 }
   3714 
   3715 #define GM107_MIN_ISSUE_DELAY 0x1
   3716 #define GM107_MAX_ISSUE_DELAY 0xf
   3717 
   3718 int
   3719 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
   3720 {
   3721    int delay = 0, ready = cycle;
   3722 
   3723    for (int s = 0; insn->srcExists(s); ++s)
   3724       checkRd(insn->getSrc(s), cycle, delay);
   3725 
   3726    // TODO: make use of getReadLatency()!
   3727 
   3728    return MAX2(delay, ready - cycle);
   3729 }
   3730 
   3731 void
   3732 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
   3733                                    const Instruction *next)
   3734 {
   3735    const OpClass cl = targ->getOpClass(insn->op);
   3736    int wr, rd;
   3737 
   3738    if (insn->op == OP_EXIT ||
   3739        insn->op == OP_BAR ||
   3740        insn->op == OP_MEMBAR) {
   3741       delay = GM107_MAX_ISSUE_DELAY;
   3742    } else
   3743    if (insn->op == OP_QUADON ||
   3744        insn->op == OP_QUADPOP) {
   3745       delay = 0xd;
   3746    } else
   3747    if (cl == OPCLASS_FLOW || insn->join) {
   3748       delay = 0xd;
   3749    }
   3750 
   3751    if (!next || !targ->canDualIssue(insn, next)) {
   3752       delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
   3753    } else {
   3754       delay = 0x0; // dual-issue
   3755    }
   3756 
   3757    wr = getWrDepBar(insn);
   3758    rd = getRdDepBar(insn);
   3759 
   3760    if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
   3761       // Barriers take one additional clock cycle to become active on top of
   3762       // the clock consumed by the instruction producing it.
   3763       if (!next || insn->bb != next->bb) {
   3764          delay = 0x2;
   3765       } else {
   3766          int wt = getWtDepBar(next);
   3767          if ((wt & (1 << wr)) | (wt & (1 << rd)))
   3768             delay = 0x2;
   3769       }
   3770    }
   3771 
   3772    emitStall(insn, delay);
   3773 }
   3774 
   3775 
   3776 // Return true when the given instruction needs to emit a read dependency
   3777 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
   3778 // setting the maximum number of stall counts is not enough.
   3779 bool
   3780 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
   3781 {
   3782    BitSet srcs(255, 1), defs(255, 1);
   3783    int a, b;
   3784 
   3785    if (!targ->isBarrierRequired(insn))
   3786       return false;
   3787 
   3788    // Do not emit a read dependency barrier when the instruction doesn't use
   3789    // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
   3790    for (int s = 0; insn->srcExists(s); ++s) {
   3791       const Value *src = insn->src(s).rep();
   3792       if (insn->src(s).getFile() != FILE_GPR)
   3793          continue;
   3794       if (src->reg.data.id == 255)
   3795          continue;
   3796 
   3797       a = src->reg.data.id;
   3798       b = a + src->reg.size / 4;
   3799       for (int r = a; r < b; ++r)
   3800          srcs.set(r);
   3801    }
   3802 
   3803    if (!srcs.popCount())
   3804       return false;
   3805 
   3806    // Do not emit a read dependency barrier when the output GPRs are equal to
   3807    // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
   3808    // be produced and WaR hazards are prevented.
   3809    for (int d = 0; insn->defExists(d); ++d) {
   3810       const Value *def = insn->def(d).rep();
   3811       if (insn->def(d).getFile() != FILE_GPR)
   3812          continue;
   3813       if (def->reg.data.id == 255)
   3814          continue;
   3815 
   3816       a = def->reg.data.id;
   3817       b = a + def->reg.size / 4;
   3818       for (int r = a; r < b; ++r)
   3819          defs.set(r);
   3820    }
   3821 
   3822    srcs.andNot(defs);
   3823    if (!srcs.popCount())
   3824       return false;
   3825 
   3826    return true;
   3827 }
   3828 
   3829 // Return true when the given instruction needs to emit a write dependency
   3830 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
   3831 // setting the maximum number of stall counts is not enough. This is only legal
   3832 // if the instruction output something.
   3833 bool
   3834 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
   3835 {
   3836    if (!targ->isBarrierRequired(insn))
   3837       return false;
   3838 
   3839    for (int d = 0; insn->defExists(d); ++d) {
   3840       if (insn->def(d).getFile() == FILE_GPR ||
   3841           insn->def(d).getFile() == FILE_PREDICATE)
   3842          return true;
   3843    }
   3844    return false;
   3845 }
   3846 
   3847 // Find the next instruction inside the same basic block which uses the output
   3848 // of the given instruction in order to avoid RaW hazards.
   3849 Instruction *
   3850 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
   3851 {
   3852    Instruction *insn, *next;
   3853    int minGPR, maxGPR;
   3854 
   3855    if (!bari->defExists(0))
   3856       return NULL;
   3857 
   3858    minGPR = bari->def(0).rep()->reg.data.id;
   3859    maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
   3860 
   3861    for (insn = bari->next; insn != NULL; insn = next) {
   3862       next = insn->next;
   3863 
   3864       for (int s = 0; insn->srcExists(s); ++s) {
   3865          const Value *src = insn->src(s).rep();
   3866          if (bari->def(0).getFile() == FILE_GPR) {
   3867             if (insn->src(s).getFile() != FILE_GPR ||
   3868                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
   3869                 src->reg.data.id > maxGPR)
   3870                continue;
   3871             return insn;
   3872          } else
   3873          if (bari->def(0).getFile() == FILE_PREDICATE) {
   3874             if (insn->src(s).getFile() != FILE_PREDICATE ||
   3875                 src->reg.data.id != minGPR)
   3876                continue;
   3877             return insn;
   3878          }
   3879       }
   3880    }
   3881    return NULL;
   3882 }
   3883 
   3884 // Find the next instruction inside the same basic block which overwrites, at
   3885 // least, one source of the given instruction in order to avoid WaR hazards.
   3886 Instruction *
   3887 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
   3888 {
   3889    Instruction *insn, *next;
   3890    int minGPR, maxGPR;
   3891 
   3892    for (insn = bari->next; insn != NULL; insn = next) {
   3893       next = insn->next;
   3894 
   3895       for (int d = 0; insn->defExists(d); ++d) {
   3896          const Value *def = insn->def(d).rep();
   3897          if (insn->def(d).getFile() != FILE_GPR)
   3898             continue;
   3899 
   3900          minGPR = def->reg.data.id;
   3901          maxGPR = minGPR + def->reg.size / 4 - 1;
   3902 
   3903          for (int s = 0; bari->srcExists(s); ++s) {
   3904             const Value *src = bari->src(s).rep();
   3905             if (bari->src(s).getFile() != FILE_GPR ||
   3906                 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
   3907                 src->reg.data.id > maxGPR)
   3908                continue;
   3909             return insn;
   3910          }
   3911       }
   3912    }
   3913    return NULL;
   3914 }
   3915 
   3916 // Dependency barriers:
   3917 // This pass is a bit ugly and could probably be improved by performing a
   3918 // better allocation.
   3919 //
   3920 // The main idea is to avoid WaR and RaW hazards by emitting read/write
   3921 // dependency barriers using the control codes.
   3922 bool
   3923 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
   3924 {
   3925    std::list<LiveBarUse> live_uses;
   3926    std::list<LiveBarDef> live_defs;
   3927    Instruction *insn, *next;
   3928    BitSet bars(6, 1);
   3929    int bar_id;
   3930 
   3931    for (insn = bb->getEntry(); insn != NULL; insn = next) {
   3932       Instruction *usei = NULL, *defi = NULL;
   3933       bool need_wr_bar, need_rd_bar;
   3934 
   3935       next = insn->next;
   3936 
   3937       // Expire old barrier uses.
   3938       for (std::list<LiveBarUse>::iterator it = live_uses.begin();
   3939            it != live_uses.end();) {
   3940          if (insn->serial >= it->usei->serial) {
   3941             int wr = getWrDepBar(it->insn);
   3942             emitWtDepBar(insn, wr);
   3943             bars.clr(wr); // free barrier
   3944             it = live_uses.erase(it);
   3945             continue;
   3946          }
   3947          ++it;
   3948       }
   3949 
   3950       // Expire old barrier defs.
   3951       for (std::list<LiveBarDef>::iterator it = live_defs.begin();
   3952            it != live_defs.end();) {
   3953          if (insn->serial >= it->defi->serial) {
   3954             int rd = getRdDepBar(it->insn);
   3955             emitWtDepBar(insn, rd);
   3956             bars.clr(rd); // free barrier
   3957             it = live_defs.erase(it);
   3958             continue;
   3959          }
   3960          ++it;
   3961       }
   3962 
   3963       need_wr_bar = needWrDepBar(insn);
   3964       need_rd_bar = needRdDepBar(insn);
   3965 
   3966       if (need_wr_bar) {
   3967          // When the instruction requires to emit a write dependency barrier
   3968          // (all which write something at a variable latency), find the next
   3969          // instruction which reads the outputs.
   3970          usei = findFirstUse(insn);
   3971 
   3972          // Allocate and emit a new barrier.
   3973          bar_id = bars.findFreeRange(1);
   3974          if (bar_id == -1)
   3975             bar_id = 5;
   3976          bars.set(bar_id);
   3977          emitWrDepBar(insn, bar_id);
   3978          if (usei)
   3979             live_uses.push_back(LiveBarUse(insn, usei));
   3980       }
   3981 
   3982       if (need_rd_bar) {
   3983          // When the instruction requires to emit a read dependency barrier
   3984          // (all which read something at a variable latency), find the next
   3985          // instruction which will write the inputs.
   3986          defi = findFirstDef(insn);
   3987 
   3988          if (usei && defi && usei->serial <= defi->serial)
   3989             continue;
   3990 
   3991          // Allocate and emit a new barrier.
   3992          bar_id = bars.findFreeRange(1);
   3993          if (bar_id == -1)
   3994             bar_id = 5;
   3995          bars.set(bar_id);
   3996          emitRdDepBar(insn, bar_id);
   3997          if (defi)
   3998             live_defs.push_back(LiveBarDef(insn, defi));
   3999       }
   4000    }
   4001 
   4002    // Remove unnecessary barrier waits.
   4003    BitSet alive_bars(6, 1);
   4004    for (insn = bb->getEntry(); insn != NULL; insn = next) {
   4005       int wr, rd, wt;
   4006 
   4007       next = insn->next;
   4008 
   4009       wr = getWrDepBar(insn);
   4010       rd = getRdDepBar(insn);
   4011       wt = getWtDepBar(insn);
   4012 
   4013       for (int idx = 0; idx < 6; ++idx) {
   4014          if (!(wt & (1 << idx)))
   4015             continue;
   4016          if (!alive_bars.test(idx)) {
   4017             insn->sched &= ~(1 << (11  + idx));
   4018          } else {
   4019             alive_bars.clr(idx);
   4020          }
   4021       }
   4022 
   4023       if (wr < 6)
   4024          alive_bars.set(wr);
   4025       if (rd < 6)
   4026          alive_bars.set(rd);
   4027    }
   4028 
   4029    return true;
   4030 }
   4031 
   4032 bool
   4033 SchedDataCalculatorGM107::visit(Function *func)
   4034 {
   4035    ArrayList insns;
   4036 
   4037    func->orderInstructions(insns);
   4038 
   4039    scoreBoards.resize(func->cfg.getSize());
   4040    for (size_t i = 0; i < scoreBoards.size(); ++i)
   4041       scoreBoards[i].wipe();
   4042    return true;
   4043 }
   4044 
   4045 bool
   4046 SchedDataCalculatorGM107::visit(BasicBlock *bb)
   4047 {
   4048    Instruction *insn, *next = NULL;
   4049    int cycle = 0;
   4050 
   4051    for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
   4052       /*XXX*/
   4053       insn->sched = 0x7e0;
   4054    }
   4055 
   4056    if (!debug_get_bool_option("NV50_PROG_SCHED", true))
   4057       return true;
   4058 
   4059    // Insert read/write dependency barriers for instructions which don't
   4060    // operate at a fixed latency.
   4061    insertBarriers(bb);
   4062 
   4063    score = &scoreBoards.at(bb->getId());
   4064 
   4065    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
   4066       // back branches will wait until all target dependencies are satisfied
   4067       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
   4068          continue;
   4069       BasicBlock *in = BasicBlock::get(ei.getNode());
   4070       score->setMax(&scoreBoards.at(in->getId()));
   4071    }
   4072 
   4073 #ifdef GM107_DEBUG_SCHED_DATA
   4074    INFO("=== BB:%i initial scores\n", bb->getId());
   4075    score->print(cycle);
   4076 #endif
   4077 
   4078    // Because barriers are allocated locally (intra-BB), we have to make sure
   4079    // that all produced barriers have been consumed before entering inside a
   4080    // new basic block. The best way is to do a global allocation pre RA but
   4081    // it's really more difficult, especially because of the phi nodes. Anyways,
   4082    // it seems like that waiting on a barrier which has already been consumed
   4083    // doesn't add any additional cost, it's just not elegant!
   4084    Instruction *start = bb->getEntry();
   4085    if (start && bb->cfg.incidentCount() > 0) {
   4086       for (int b = 0; b < 6; b++)
   4087          emitWtDepBar(start, b);
   4088    }
   4089 
   4090    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
   4091       next = insn->next;
   4092 
   4093       commitInsn(insn, cycle);
   4094       int delay = calcDelay(next, cycle);
   4095       setDelay(insn, delay, next);
   4096       cycle += getStall(insn);
   4097 
   4098       setReuseFlag(insn);
   4099 
   4100       // XXX: The yield flag seems to destroy a bunch of things when it is
   4101       // set on every instruction, need investigation.
   4102       //emitYield(insn);
   4103 
   4104 #ifdef GM107_DEBUG_SCHED_DATA
   4105       printSchedInfo(cycle, insn);
   4106       insn->print();
   4107       next->print();
   4108 #endif
   4109    }
   4110 
   4111    if (!insn)
   4112       return true;
   4113    commitInsn(insn, cycle);
   4114 
   4115    int bbDelay = -1;
   4116 
   4117 #ifdef GM107_DEBUG_SCHED_DATA
   4118    fprintf(stderr, "last instruction is : ");
   4119    insn->print();
   4120    fprintf(stderr, "cycle=%d\n", cycle);
   4121 #endif
   4122 
   4123    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
   4124       BasicBlock *out = BasicBlock::get(ei.getNode());
   4125 
   4126       if (ei.getType() != Graph::Edge::BACK) {
   4127          // Only test the first instruction of the outgoing block.
   4128          next = out->getEntry();
   4129          if (next) {
   4130             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
   4131          } else {
   4132             // When the outgoing BB is empty, make sure to set the number of
   4133             // stall counts needed by the instruction because we don't know the
   4134             // next instruction.
   4135             bbDelay = MAX2(bbDelay, targ->getLatency(insn));
   4136          }
   4137       } else {
   4138          // Wait until all dependencies are satisfied.
   4139          const int regsFree = score->getLatest();
   4140          next = out->getFirst();
   4141          for (int c = cycle; next && c < regsFree; next = next->next) {
   4142             bbDelay = MAX2(bbDelay, calcDelay(next, c));
   4143             c += getStall(next);
   4144          }
   4145          next = NULL;
   4146       }
   4147    }
   4148    if (bb->cfg.outgoingCount() != 1)
   4149       next = NULL;
   4150    setDelay(insn, bbDelay, next);
   4151    cycle += getStall(insn);
   4152 
   4153    score->rebase(cycle); // common base for initializing out blocks' scores
   4154    return true;
   4155 }
   4156 
   4157 /*******************************************************************************
   4158  * main
   4159  ******************************************************************************/
   4160 
   4161 void
   4162 CodeEmitterGM107::prepareEmission(Function *func)
   4163 {
   4164    SchedDataCalculatorGM107 sched(targGM107);
   4165    CodeEmitter::prepareEmission(func);
   4166    sched.run(func, true, true);
   4167 }
   4168 
   4169 static inline uint32_t sizeToBundlesGM107(uint32_t size)
   4170 {
   4171    return (size + 23) / 24;
   4172 }
   4173 
   4174 void
   4175 CodeEmitterGM107::prepareEmission(Program *prog)
   4176 {
   4177    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
   4178         !fi.end(); fi.next()) {
   4179       Function *func = reinterpret_cast<Function *>(fi.get());
   4180       func->binPos = prog->binSize;
   4181       prepareEmission(func);
   4182 
   4183       // adjust sizes & positions for schedulding info:
   4184       if (prog->getTarget()->hasSWSched) {
   4185          uint32_t adjPos = func->binPos;
   4186          BasicBlock *bb = NULL;
   4187          for (int i = 0; i < func->bbCount; ++i) {
   4188             bb = func->bbArray[i];
   4189             int32_t adjSize = bb->binSize;
   4190             if (adjPos % 32) {
   4191                adjSize -= 32 - adjPos % 32;
   4192                if (adjSize < 0)
   4193                   adjSize = 0;
   4194             }
   4195             adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
   4196             bb->binPos = adjPos;
   4197             bb->binSize = adjSize;
   4198             adjPos += adjSize;
   4199          }
   4200          if (bb)
   4201             func->binSize = adjPos - func->binPos;
   4202       }
   4203 
   4204       prog->binSize += func->binSize;
   4205    }
   4206 }
   4207 
   4208 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
   4209    : CodeEmitter(target),
   4210      targGM107(target),
   4211      writeIssueDelays(target->hasSWSched)
   4212 {
   4213    code = NULL;
   4214    codeSize = codeSizeLimit = 0;
   4215    relocInfo = NULL;
   4216 }
   4217 
   4218 CodeEmitter *
   4219 TargetGM107::createCodeEmitterGM107(Program::Type type)
   4220 {
   4221    CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
   4222    emit->setProgramType(type);
   4223    return emit;
   4224 }
   4225 
   4226 } // namespace nv50_ir
   4227