Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "codegen/nv50_ir.h"
     24 #include "codegen/nv50_ir_target_nv50.h"
     25 
     26 namespace nv50_ir {
     27 
     28 #define NV50_OP_ENC_LONG     0
     29 #define NV50_OP_ENC_SHORT    1
     30 #define NV50_OP_ENC_IMM      2
     31 #define NV50_OP_ENC_LONG_ALT 3
     32 
     33 class CodeEmitterNV50 : public CodeEmitter
     34 {
     35 public:
     36    CodeEmitterNV50(const TargetNV50 *);
     37 
     38    virtual bool emitInstruction(Instruction *);
     39 
     40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
     41 
     42    inline void setProgramType(Program::Type pType) { progType = pType; }
     43 
     44    virtual void prepareEmission(Function *);
     45 
     46 private:
     47    Program::Type progType;
     48 
     49    const TargetNV50 *targNV50;
     50 
     51 private:
     52    inline void defId(const ValueDef&, const int pos);
     53    inline void srcId(const ValueRef&, const int pos);
     54    inline void srcId(const ValueRef *, const int pos);
     55 
     56    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
     57    inline void srcAddr8(const ValueRef&, const int pos);
     58 
     59    void emitFlagsRd(const Instruction *);
     60    void emitFlagsWr(const Instruction *);
     61 
     62    void emitCondCode(CondCode cc, DataType ty, int pos);
     63 
     64    inline void setARegBits(unsigned int);
     65 
     66    void setAReg16(const Instruction *, int s);
     67    void setImmediate(const Instruction *, int s);
     68 
     69    void setDst(const Value *);
     70    void setDst(const Instruction *, int d);
     71    void setSrcFileBits(const Instruction *, int enc);
     72    void setSrc(const Instruction *, unsigned int s, int slot);
     73 
     74    void emitForm_MAD(const Instruction *);
     75    void emitForm_ADD(const Instruction *);
     76    void emitForm_MUL(const Instruction *);
     77    void emitForm_IMM(const Instruction *);
     78 
     79    void emitLoadStoreSizeLG(DataType ty, int pos);
     80    void emitLoadStoreSizeCS(DataType ty);
     81 
     82    void roundMode_MAD(const Instruction *);
     83    void roundMode_CVT(RoundMode);
     84 
     85    void emitMNeg12(const Instruction *);
     86 
     87    void emitLOAD(const Instruction *);
     88    void emitSTORE(const Instruction *);
     89    void emitMOV(const Instruction *);
     90    void emitRDSV(const Instruction *);
     91    void emitNOP();
     92    void emitINTERP(const Instruction *);
     93    void emitPFETCH(const Instruction *);
     94    void emitOUT(const Instruction *);
     95 
     96    void emitUADD(const Instruction *);
     97    void emitAADD(const Instruction *);
     98    void emitFADD(const Instruction *);
     99    void emitDADD(const Instruction *);
    100    void emitIMUL(const Instruction *);
    101    void emitFMUL(const Instruction *);
    102    void emitDMUL(const Instruction *);
    103    void emitFMAD(const Instruction *);
    104    void emitDMAD(const Instruction *);
    105    void emitIMAD(const Instruction *);
    106    void emitISAD(const Instruction *);
    107 
    108    void emitMINMAX(const Instruction *);
    109 
    110    void emitPreOp(const Instruction *);
    111    void emitSFnOp(const Instruction *, uint8_t subOp);
    112 
    113    void emitShift(const Instruction *);
    114    void emitARL(const Instruction *, unsigned int shl);
    115    void emitLogicOp(const Instruction *);
    116    void emitNOT(const Instruction *);
    117 
    118    void emitCVT(const Instruction *);
    119    void emitSET(const Instruction *);
    120 
    121    void emitTEX(const TexInstruction *);
    122    void emitTXQ(const TexInstruction *);
    123    void emitTEXPREP(const TexInstruction *);
    124 
    125    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
    126 
    127    void emitFlow(const Instruction *, uint8_t flowOp);
    128    void emitPRERETEmu(const FlowInstruction *);
    129    void emitBAR(const Instruction *);
    130 
    131    void emitATOM(const Instruction *);
    132 };
    133 
    134 #define SDATA(a) ((a).rep()->reg.data)
    135 #define DDATA(a) ((a).rep()->reg.data)
    136 
    137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
    138 {
    139    assert(src.get());
    140    code[pos / 32] |= SDATA(src).id << (pos % 32);
    141 }
    142 
    143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
    144 {
    145    assert(src->get());
    146    code[pos / 32] |= SDATA(*src).id << (pos % 32);
    147 }
    148 
    149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
    150 {
    151    assert(src.get());
    152 
    153    int32_t offset = SDATA(src).offset;
    154 
    155    assert(!adj || src.get()->reg.size <= 4);
    156    if (adj)
    157       offset /= src.get()->reg.size;
    158 
    159    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
    160 
    161    if (offset < 0)
    162       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
    163 
    164    code[pos / 32] |= offset << (pos % 32);
    165 }
    166 
    167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
    168 {
    169    assert(src.get());
    170 
    171    uint32_t offset = SDATA(src).offset;
    172 
    173    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
    174 
    175    code[pos / 32] |= (offset >> 2) << (pos % 32);
    176 }
    177 
    178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
    179 {
    180    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
    181 
    182    code[pos / 32] |= DDATA(def).id << (pos % 32);
    183 }
    184 
    185 void
    186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
    187 {
    188    switch (insn->rnd) {
    189    case ROUND_M: code[1] |= 1 << 22; break;
    190    case ROUND_P: code[1] |= 2 << 22; break;
    191    case ROUND_Z: code[1] |= 3 << 22; break;
    192    default:
    193       assert(insn->rnd == ROUND_N);
    194       break;
    195    }
    196 }
    197 
    198 void
    199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
    200 {
    201    code[1] |= i->src(0).mod.neg() << 26;
    202    code[1] |= i->src(1).mod.neg() << 27;
    203 }
    204 
    205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
    206 {
    207    uint8_t enc;
    208 
    209    assert(pos >= 32 || pos <= 27);
    210 
    211    switch (cc) {
    212    case CC_LT:  enc = 0x1; break;
    213    case CC_LTU: enc = 0x9; break;
    214    case CC_EQ:  enc = 0x2; break;
    215    case CC_EQU: enc = 0xa; break;
    216    case CC_LE:  enc = 0x3; break;
    217    case CC_LEU: enc = 0xb; break;
    218    case CC_GT:  enc = 0x4; break;
    219    case CC_GTU: enc = 0xc; break;
    220    case CC_NE:  enc = 0x5; break;
    221    case CC_NEU: enc = 0xd; break;
    222    case CC_GE:  enc = 0x6; break;
    223    case CC_GEU: enc = 0xe; break;
    224    case CC_TR:  enc = 0xf; break;
    225    case CC_FL:  enc = 0x0; break;
    226 
    227    case CC_O:  enc = 0x10; break;
    228    case CC_C:  enc = 0x11; break;
    229    case CC_A:  enc = 0x12; break;
    230    case CC_S:  enc = 0x13; break;
    231    case CC_NS: enc = 0x1c; break;
    232    case CC_NA: enc = 0x1d; break;
    233    case CC_NC: enc = 0x1e; break;
    234    case CC_NO: enc = 0x1f; break;
    235 
    236    default:
    237       enc = 0;
    238       assert(!"invalid condition code");
    239       break;
    240    }
    241    if (ty != TYPE_NONE && !isFloatType(ty))
    242       enc &= ~0x8; // unordered only exists for float types
    243 
    244    code[pos / 32] |= enc << (pos % 32);
    245 }
    246 
    247 void
    248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
    249 {
    250    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
    251 
    252    assert(!(code[1] & 0x00003f80));
    253 
    254    if (s >= 0) {
    255       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
    256       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
    257       srcId(i->src(s), 32 + 12);
    258    } else {
    259       code[1] |= 0x0780;
    260    }
    261 }
    262 
    263 void
    264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
    265 {
    266    assert(!(code[1] & 0x70));
    267 
    268    int flagsDef = i->flagsDef;
    269 
    270    // find flags definition and check that it is the last def
    271    if (flagsDef < 0) {
    272       for (int d = 0; i->defExists(d); ++d)
    273          if (i->def(d).getFile() == FILE_FLAGS)
    274             flagsDef = d;
    275       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
    276          WARN("Instruction::flagsDef was not set properly\n");
    277    }
    278    if (flagsDef == 0 && i->defExists(1))
    279       WARN("flags def should not be the primary definition\n");
    280 
    281    if (flagsDef >= 0)
    282       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
    283 
    284 }
    285 
    286 void
    287 CodeEmitterNV50::setARegBits(unsigned int u)
    288 {
    289    code[0] |= (u & 3) << 26;
    290    code[1] |= (u & 4);
    291 }
    292 
    293 void
    294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
    295 {
    296    if (i->srcExists(s)) {
    297       s = i->src(s).indirect[0];
    298       if (s >= 0)
    299          setARegBits(SDATA(i->src(s)).id + 1);
    300    }
    301 }
    302 
    303 void
    304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
    305 {
    306    const ImmediateValue *imm = i->src(s).get()->asImm();
    307    assert(imm);
    308 
    309    uint32_t u = imm->reg.data.u32;
    310 
    311    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
    312       u = ~u;
    313 
    314    code[1] |= 3;
    315    code[0] |= (u & 0x3f) << 16;
    316    code[1] |= (u >> 6) << 2;
    317 }
    318 
    319 void
    320 CodeEmitterNV50::setDst(const Value *dst)
    321 {
    322    const Storage *reg = &dst->join->reg;
    323 
    324    assert(reg->file != FILE_ADDRESS);
    325 
    326    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
    327       code[0] |= (127 << 2) | 1;
    328       code[1] |= 8;
    329    } else {
    330       int id;
    331       if (reg->file == FILE_SHADER_OUTPUT) {
    332          code[1] |= 8;
    333          id = reg->data.offset / 4;
    334       } else {
    335          id = reg->data.id;
    336       }
    337       code[0] |= id << 2;
    338    }
    339 }
    340 
    341 void
    342 CodeEmitterNV50::setDst(const Instruction *i, int d)
    343 {
    344    if (i->defExists(d)) {
    345       setDst(i->getDef(d));
    346    } else
    347    if (!d) {
    348       code[0] |= 0x01fc; // bit bucket
    349       code[1] |= 0x0008;
    350    }
    351 }
    352 
    353 // 3 * 2 bits:
    354 // 0: r
    355 // 1: a/s
    356 // 2: c
    357 // 3: i
    358 void
    359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
    360 {
    361    uint8_t mode = 0;
    362 
    363    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
    364       switch (i->src(s).getFile()) {
    365       case FILE_GPR:
    366          break;
    367       case FILE_MEMORY_SHARED:
    368       case FILE_SHADER_INPUT:
    369          mode |= 1 << (s * 2);
    370          break;
    371       case FILE_MEMORY_CONST:
    372          mode |= 2 << (s * 2);
    373          break;
    374       case FILE_IMMEDIATE:
    375          mode |= 3 << (s * 2);
    376          break;
    377       default:
    378          ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
    379          assert(0);
    380          break;
    381       }
    382    }
    383    switch (mode) {
    384    case 0x00: // rrr
    385       break;
    386    case 0x01: // arr/grr
    387       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
    388          code[0] |= 0x01800000;
    389          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
    390             code[1] |= 0x00200000;
    391       } else {
    392          if (enc == NV50_OP_ENC_SHORT)
    393             code[0] |= 0x01000000;
    394          else
    395             code[1] |= 0x00200000;
    396       }
    397       break;
    398    case 0x03: // irr
    399       assert(i->op == OP_MOV);
    400       return;
    401    case 0x0c: // rir
    402       break;
    403    case 0x0d: // gir
    404       assert(progType == Program::TYPE_GEOMETRY ||
    405              progType == Program::TYPE_COMPUTE);
    406       code[0] |= 0x01000000;
    407       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
    408          int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
    409          assert(reg < 3);
    410          code[0] |= (reg + 1) << 26;
    411       }
    412       break;
    413    case 0x08: // rcr
    414       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
    415       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
    416       break;
    417    case 0x09: // acr/gcr
    418       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
    419          code[0] |= 0x01800000;
    420       } else {
    421          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
    422          code[1] |= 0x00200000;
    423       }
    424       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
    425       break;
    426    case 0x20: // rrc
    427       code[0] |= 0x01000000;
    428       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
    429       break;
    430    case 0x21: // arc
    431       code[0] |= 0x01000000;
    432       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
    433       assert(progType != Program::TYPE_GEOMETRY);
    434       break;
    435    default:
    436       ERROR("not encodable: %x\n", mode);
    437       assert(0);
    438       break;
    439    }
    440    if (progType != Program::TYPE_COMPUTE)
    441       return;
    442 
    443    if ((mode & 3) == 1) {
    444       const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
    445 
    446       switch (i->sType) {
    447       case TYPE_U8:
    448          break;
    449       case TYPE_U16:
    450          code[0] |= 1 << pos;
    451          break;
    452       case TYPE_S16:
    453          code[0] |= 2 << pos;
    454          break;
    455       default:
    456          code[0] |= 3 << pos;
    457          assert(i->getSrc(0)->reg.size == 4);
    458          break;
    459       }
    460    }
    461 }
    462 
    463 void
    464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
    465 {
    466    if (Target::operationSrcNr[i->op] <= s)
    467       return;
    468    const Storage *reg = &i->src(s).rep()->reg;
    469 
    470    unsigned int id = (reg->file == FILE_GPR) ?
    471       reg->data.id :
    472       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
    473 
    474    switch (slot) {
    475    case 0: code[0] |= id << 9; break;
    476    case 1: code[0] |= id << 16; break;
    477    case 2: code[1] |= id << 14; break;
    478    default:
    479       assert(0);
    480       break;
    481    }
    482 }
    483 
    484 // the default form:
    485 //  - long instruction
    486 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
    487 //  - address & flags
    488 void
    489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
    490 {
    491    assert(i->encSize == 8);
    492    code[0] |= 1;
    493 
    494    emitFlagsRd(i);
    495    emitFlagsWr(i);
    496 
    497    setDst(i, 0);
    498 
    499    setSrcFileBits(i, NV50_OP_ENC_LONG);
    500    setSrc(i, 0, 0);
    501    setSrc(i, 1, 1);
    502    setSrc(i, 2, 2);
    503 
    504    if (i->getIndirect(0, 0)) {
    505       assert(!i->srcExists(1) || !i->getIndirect(1, 0));
    506       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
    507       setAReg16(i, 0);
    508    } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
    509       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
    510       setAReg16(i, 1);
    511    } else {
    512       setAReg16(i, 2);
    513    }
    514 }
    515 
    516 // like default form, but 2nd source in slot 2, and no 3rd source
    517 void
    518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
    519 {
    520    assert(i->encSize == 8);
    521    code[0] |= 1;
    522 
    523    emitFlagsRd(i);
    524    emitFlagsWr(i);
    525 
    526    setDst(i, 0);
    527 
    528    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
    529    setSrc(i, 0, 0);
    530    if (i->predSrc != 1)
    531       setSrc(i, 1, 2);
    532 
    533    if (i->getIndirect(0, 0)) {
    534       assert(!i->getIndirect(1, 0));
    535       setAReg16(i, 0);
    536    } else {
    537       setAReg16(i, 1);
    538    }
    539 }
    540 
    541 // default short form (rr, ar, rc, gr)
    542 void
    543 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
    544 {
    545    assert(i->encSize == 4 && !(code[0] & 1));
    546    assert(i->defExists(0));
    547    assert(!i->getPredicate());
    548 
    549    setDst(i, 0);
    550 
    551    setSrcFileBits(i, NV50_OP_ENC_SHORT);
    552    setSrc(i, 0, 0);
    553    setSrc(i, 1, 1);
    554 }
    555 
    556 // usual immediate form
    557 // - 1 to 3 sources where second is immediate (rir, gir)
    558 // - no address or predicate possible
    559 void
    560 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
    561 {
    562    assert(i->encSize == 8);
    563    code[0] |= 1;
    564 
    565    assert(i->defExists(0) && i->srcExists(0));
    566 
    567    setDst(i, 0);
    568 
    569    setSrcFileBits(i, NV50_OP_ENC_IMM);
    570    if (Target::operationSrcNr[i->op] > 1) {
    571       setSrc(i, 0, 0);
    572       setImmediate(i, 1);
    573       // If there is another source, it has to be the same as the dest reg.
    574    } else {
    575       setImmediate(i, 0);
    576    }
    577 }
    578 
    579 void
    580 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
    581 {
    582    uint8_t enc;
    583 
    584    switch (ty) {
    585    case TYPE_F32: // fall through
    586    case TYPE_S32: // fall through
    587    case TYPE_U32:  enc = 0x6; break;
    588    case TYPE_B128: enc = 0x5; break;
    589    case TYPE_F64: // fall through
    590    case TYPE_S64: // fall through
    591    case TYPE_U64:  enc = 0x4; break;
    592    case TYPE_S16:  enc = 0x3; break;
    593    case TYPE_U16:  enc = 0x2; break;
    594    case TYPE_S8:   enc = 0x1; break;
    595    case TYPE_U8:   enc = 0x0; break;
    596    default:
    597       enc = 0;
    598       assert(!"invalid load/store type");
    599       break;
    600    }
    601    code[pos / 32] |= enc << (pos % 32);
    602 }
    603 
    604 void
    605 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
    606 {
    607    switch (ty) {
    608    case TYPE_U8: break;
    609    case TYPE_U16: code[1] |= 0x4000; break;
    610    case TYPE_S16: code[1] |= 0x8000; break;
    611    case TYPE_F32:
    612    case TYPE_S32:
    613    case TYPE_U32: code[1] |= 0xc000; break;
    614    default:
    615       assert(0);
    616       break;
    617    }
    618 }
    619 
    620 void
    621 CodeEmitterNV50::emitLOAD(const Instruction *i)
    622 {
    623    DataFile sf = i->src(0).getFile();
    624    MAYBE_UNUSED int32_t offset = i->getSrc(0)->reg.data.offset;
    625 
    626    switch (sf) {
    627    case FILE_SHADER_INPUT:
    628       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
    629          code[0] = 0x11800001;
    630       else
    631          // use 'mov' where we can
    632          code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
    633       code[1] = 0x00200000 | (i->lanes << 14);
    634       if (typeSizeof(i->dType) == 4)
    635          code[1] |= 0x04000000;
    636       break;
    637    case FILE_MEMORY_SHARED:
    638       if (targ->getChipset() >= 0x84) {
    639          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
    640          code[0] = 0x10000001;
    641          code[1] = 0x40000000;
    642 
    643          if (typeSizeof(i->dType) == 4)
    644             code[1] |= 0x04000000;
    645 
    646          emitLoadStoreSizeCS(i->sType);
    647       } else {
    648          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
    649          code[0] = 0x10000001;
    650          code[1] = 0x00200000 | (i->lanes << 14);
    651          emitLoadStoreSizeCS(i->sType);
    652       }
    653       break;
    654    case FILE_MEMORY_CONST:
    655       code[0] = 0x10000001;
    656       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
    657       if (typeSizeof(i->dType) == 4)
    658          code[1] |= 0x04000000;
    659       emitLoadStoreSizeCS(i->sType);
    660       break;
    661    case FILE_MEMORY_LOCAL:
    662       code[0] = 0xd0000001;
    663       code[1] = 0x40000000;
    664       break;
    665    case FILE_MEMORY_GLOBAL:
    666       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
    667       code[1] = 0x80000000;
    668       break;
    669    default:
    670       assert(!"invalid load source file");
    671       break;
    672    }
    673    if (sf == FILE_MEMORY_LOCAL ||
    674        sf == FILE_MEMORY_GLOBAL)
    675       emitLoadStoreSizeLG(i->sType, 21 + 32);
    676 
    677    setDst(i, 0);
    678 
    679    emitFlagsRd(i);
    680    emitFlagsWr(i);
    681 
    682    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
    683       srcId(*i->src(0).getIndirect(0), 9);
    684    } else {
    685       setAReg16(i, 0);
    686       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
    687    }
    688 }
    689 
    690 void
    691 CodeEmitterNV50::emitSTORE(const Instruction *i)
    692 {
    693    DataFile f = i->getSrc(0)->reg.file;
    694    int32_t offset = i->getSrc(0)->reg.data.offset;
    695 
    696    switch (f) {
    697    case FILE_SHADER_OUTPUT:
    698       code[0] = 0x00000001 | ((offset >> 2) << 9);
    699       code[1] = 0x80c00000;
    700       srcId(i->src(1), 32 + 14);
    701       break;
    702    case FILE_MEMORY_GLOBAL:
    703       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
    704       code[1] = 0xa0000000;
    705       emitLoadStoreSizeLG(i->dType, 21 + 32);
    706       srcId(i->src(1), 2);
    707       break;
    708    case FILE_MEMORY_LOCAL:
    709       code[0] = 0xd0000001;
    710       code[1] = 0x60000000;
    711       emitLoadStoreSizeLG(i->dType, 21 + 32);
    712       srcId(i->src(1), 2);
    713       break;
    714    case FILE_MEMORY_SHARED:
    715       code[0] = 0x00000001;
    716       code[1] = 0xe0000000;
    717       switch (typeSizeof(i->dType)) {
    718       case 1:
    719          code[0] |= offset << 9;
    720          code[1] |= 0x00400000;
    721          break;
    722       case 2:
    723          code[0] |= (offset >> 1) << 9;
    724          break;
    725       case 4:
    726          code[0] |= (offset >> 2) << 9;
    727          code[1] |= 0x04200000;
    728          break;
    729       default:
    730          assert(0);
    731          break;
    732       }
    733       srcId(i->src(1), 32 + 14);
    734       break;
    735    default:
    736       assert(!"invalid store destination file");
    737       break;
    738    }
    739 
    740    if (f == FILE_MEMORY_GLOBAL)
    741       srcId(*i->src(0).getIndirect(0), 9);
    742    else
    743       setAReg16(i, 0);
    744 
    745    if (f == FILE_MEMORY_LOCAL)
    746       srcAddr16(i->src(0), false, 9);
    747 
    748    emitFlagsRd(i);
    749 }
    750 
    751 void
    752 CodeEmitterNV50::emitMOV(const Instruction *i)
    753 {
    754    DataFile sf = i->getSrc(0)->reg.file;
    755    DataFile df = i->getDef(0)->reg.file;
    756 
    757    assert(sf == FILE_GPR || df == FILE_GPR);
    758 
    759    if (sf == FILE_FLAGS) {
    760       assert(i->flagsSrc >= 0);
    761       code[0] = 0x00000001;
    762       code[1] = 0x20000000;
    763       defId(i->def(0), 2);
    764       emitFlagsRd(i);
    765    } else
    766    if (sf == FILE_ADDRESS) {
    767       code[0] = 0x00000001;
    768       code[1] = 0x40000000;
    769       defId(i->def(0), 2);
    770       setARegBits(SDATA(i->src(0)).id + 1);
    771       emitFlagsRd(i);
    772    } else
    773    if (df == FILE_FLAGS) {
    774       assert(i->flagsDef >= 0);
    775       code[0] = 0x00000001;
    776       code[1] = 0xa0000000;
    777       srcId(i->src(0), 9);
    778       emitFlagsRd(i);
    779       emitFlagsWr(i);
    780    } else
    781    if (sf == FILE_IMMEDIATE) {
    782       code[0] = 0x10008001;
    783       code[1] = 0x00000003;
    784       emitForm_IMM(i);
    785    } else {
    786       if (i->encSize == 4) {
    787          code[0] = 0x10008000;
    788       } else {
    789          code[0] = 0x10000001;
    790          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
    791          code[1] |= (i->lanes << 14);
    792          emitFlagsRd(i);
    793       }
    794       defId(i->def(0), 2);
    795       srcId(i->src(0), 9);
    796    }
    797    if (df == FILE_SHADER_OUTPUT) {
    798       assert(i->encSize == 8);
    799       code[1] |= 0x8;
    800    }
    801 }
    802 
    803 static inline uint8_t getSRegEncoding(const ValueRef &ref)
    804 {
    805    switch (SDATA(ref).sv.sv) {
    806    case SV_PHYSID:        return 0;
    807    case SV_CLOCK:         return 1;
    808    case SV_VERTEX_STRIDE: return 3;
    809 // case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
    810    case SV_SAMPLE_INDEX:  return 8;
    811    default:
    812       assert(!"no sreg for system value");
    813       return 0;
    814    }
    815 }
    816 
    817 void
    818 CodeEmitterNV50::emitRDSV(const Instruction *i)
    819 {
    820    code[0] = 0x00000001;
    821    code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
    822    defId(i->def(0), 2);
    823    emitFlagsRd(i);
    824 }
    825 
    826 void
    827 CodeEmitterNV50::emitNOP()
    828 {
    829    code[0] = 0xf0000001;
    830    code[1] = 0xe0000000;
    831 }
    832 
    833 void
    834 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
    835 {
    836    code[0] = 0xc0000000 | (lane << 16);
    837    code[1] = 0x80000000;
    838 
    839    code[0] |= (quOp & 0x03) << 20;
    840    code[1] |= (quOp & 0xfc) << 20;
    841 
    842    emitForm_ADD(i);
    843 
    844    if (!i->srcExists(1) || i->predSrc == 1)
    845       srcId(i->src(0), 32 + 14);
    846 }
    847 
    848 /* NOTE: This returns the base address of a vertex inside the primitive.
    849  * src0 is an immediate, the index (not offset) of the vertex
    850  * inside the primitive. XXX: signed or unsigned ?
    851  * src1 (may be NULL) should use whatever units the hardware requires
    852  * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
    853  */
    854 void
    855 CodeEmitterNV50::emitPFETCH(const Instruction *i)
    856 {
    857    const uint32_t prim = i->src(0).get()->reg.data.u32;
    858    assert(prim <= 127);
    859 
    860    if (i->def(0).getFile() == FILE_ADDRESS) {
    861       // shl $aX a[] 0
    862       code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
    863       code[1] = 0xc0200000;
    864       code[0] |= prim << 9;
    865       assert(!i->srcExists(1));
    866    } else
    867    if (i->srcExists(1)) {
    868       // ld b32 $rX a[$aX+base]
    869       code[0] = 0x00000001;
    870       code[1] = 0x04200000 | (0xf << 14);
    871       defId(i->def(0), 2);
    872       code[0] |= prim << 9;
    873       setARegBits(SDATA(i->src(1)).id + 1);
    874    } else {
    875       // mov b32 $rX a[]
    876       code[0] = 0x10000001;
    877       code[1] = 0x04200000 | (0xf << 14);
    878       defId(i->def(0), 2);
    879       code[0] |= prim << 9;
    880    }
    881    emitFlagsRd(i);
    882 }
    883 
    884 static void
    885 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
    886 {
    887    int ipa = entry->ipa;
    888    int encSize = entry->reg;
    889    int loc = entry->loc;
    890 
    891    if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
    892        (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
    893       if (data.force_persample_interp) {
    894          if (encSize == 8)
    895             code[loc + 1] |= 1 << 16;
    896          else
    897             code[loc + 0] |= 1 << 24;
    898       } else {
    899          if (encSize == 8)
    900             code[loc + 1] &= ~(1 << 16);
    901          else
    902             code[loc + 0] &= ~(1 << 24);
    903       }
    904    }
    905 }
    906 
    907 void
    908 CodeEmitterNV50::emitINTERP(const Instruction *i)
    909 {
    910    code[0] = 0x80000000;
    911 
    912    defId(i->def(0), 2);
    913    srcAddr8(i->src(0), 16);
    914    setAReg16(i, 0);
    915 
    916    if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
    917       code[0] |= 1 << 8;
    918    } else {
    919       if (i->op == OP_PINTERP) {
    920          code[0] |= 1 << 25;
    921          srcId(i->src(1), 9);
    922       }
    923       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
    924          code[0] |= 1 << 24;
    925    }
    926 
    927    if (i->encSize == 8) {
    928       if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
    929          code[1] = 4 << 16;
    930       else
    931          code[1] = (code[0] & (3 << 24)) >> (24 - 16);
    932       code[0] &= ~0x03000000;
    933       code[0] |= 1;
    934       emitFlagsRd(i);
    935    }
    936 
    937    addInterp(i->ipa, i->encSize, interpApply);
    938 }
    939 
    940 void
    941 CodeEmitterNV50::emitMINMAX(const Instruction *i)
    942 {
    943    if (i->dType == TYPE_F64) {
    944       code[0] = 0xe0000000;
    945       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
    946    } else {
    947       code[0] = 0x30000000;
    948       code[1] = 0x80000000;
    949       if (i->op == OP_MIN)
    950          code[1] |= 0x20000000;
    951 
    952       switch (i->dType) {
    953       case TYPE_F32: code[0] |= 0x80000000; break;
    954       case TYPE_S32: code[1] |= 0x8c000000; break;
    955       case TYPE_U32: code[1] |= 0x84000000; break;
    956       case TYPE_S16: code[1] |= 0x80000000; break;
    957       case TYPE_U16: break;
    958       default:
    959          assert(0);
    960          break;
    961       }
    962    }
    963 
    964    code[1] |= i->src(0).mod.abs() << 20;
    965    code[1] |= i->src(0).mod.neg() << 26;
    966    code[1] |= i->src(1).mod.abs() << 19;
    967    code[1] |= i->src(1).mod.neg() << 27;
    968 
    969    emitForm_MAD(i);
    970 }
    971 
    972 void
    973 CodeEmitterNV50::emitFMAD(const Instruction *i)
    974 {
    975    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
    976    const int neg_add = i->src(2).mod.neg();
    977 
    978    code[0] = 0xe0000000;
    979 
    980    if (i->src(1).getFile() == FILE_IMMEDIATE) {
    981       code[1] = 0;
    982       emitForm_IMM(i);
    983       code[0] |= neg_mul << 15;
    984       code[0] |= neg_add << 22;
    985       if (i->saturate)
    986          code[0] |= 1 << 8;
    987    } else
    988    if (i->encSize == 4) {
    989       emitForm_MUL(i);
    990       code[0] |= neg_mul << 15;
    991       code[0] |= neg_add << 22;
    992       if (i->saturate)
    993          code[0] |= 1 << 8;
    994    } else {
    995       code[1]  = neg_mul << 26;
    996       code[1] |= neg_add << 27;
    997       if (i->saturate)
    998          code[1] |= 1 << 29;
    999       emitForm_MAD(i);
   1000    }
   1001 }
   1002 
   1003 void
   1004 CodeEmitterNV50::emitDMAD(const Instruction *i)
   1005 {
   1006    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
   1007    const int neg_add = i->src(2).mod.neg();
   1008 
   1009    assert(i->encSize == 8);
   1010    assert(!i->saturate);
   1011 
   1012    code[1] = 0x40000000;
   1013    code[0] = 0xe0000000;
   1014 
   1015    code[1] |= neg_mul << 26;
   1016    code[1] |= neg_add << 27;
   1017 
   1018    roundMode_MAD(i);
   1019 
   1020    emitForm_MAD(i);
   1021 }
   1022 
   1023 void
   1024 CodeEmitterNV50::emitFADD(const Instruction *i)
   1025 {
   1026    const int neg0 = i->src(0).mod.neg();
   1027    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
   1028 
   1029    code[0] = 0xb0000000;
   1030 
   1031    assert(!(i->src(0).mod | i->src(1).mod).abs());
   1032 
   1033    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1034       code[1] = 0;
   1035       emitForm_IMM(i);
   1036       code[0] |= neg0 << 15;
   1037       code[0] |= neg1 << 22;
   1038       if (i->saturate)
   1039          code[0] |= 1 << 8;
   1040    } else
   1041    if (i->encSize == 8) {
   1042       code[1] = 0;
   1043       emitForm_ADD(i);
   1044       code[1] |= neg0 << 26;
   1045       code[1] |= neg1 << 27;
   1046       if (i->saturate)
   1047          code[1] |= 1 << 29;
   1048    } else {
   1049       emitForm_MUL(i);
   1050       code[0] |= neg0 << 15;
   1051       code[0] |= neg1 << 22;
   1052       if (i->saturate)
   1053          code[0] |= 1 << 8;
   1054    }
   1055 }
   1056 
   1057 void
   1058 CodeEmitterNV50::emitDADD(const Instruction *i)
   1059 {
   1060    const int neg0 = i->src(0).mod.neg();
   1061    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
   1062 
   1063    assert(!(i->src(0).mod | i->src(1).mod).abs());
   1064    assert(!i->saturate);
   1065    assert(i->encSize == 8);
   1066 
   1067    code[1] = 0x60000000;
   1068    code[0] = 0xe0000000;
   1069 
   1070    emitForm_ADD(i);
   1071 
   1072    code[1] |= neg0 << 26;
   1073    code[1] |= neg1 << 27;
   1074 }
   1075 
   1076 void
   1077 CodeEmitterNV50::emitUADD(const Instruction *i)
   1078 {
   1079    const int neg0 = i->src(0).mod.neg();
   1080    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
   1081 
   1082    code[0] = 0x20008000;
   1083 
   1084    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1085       code[1] = 0;
   1086       emitForm_IMM(i);
   1087    } else
   1088    if (i->encSize == 8) {
   1089       code[0] = 0x20000000;
   1090       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
   1091       emitForm_ADD(i);
   1092    } else {
   1093       emitForm_MUL(i);
   1094    }
   1095    assert(!(neg0 && neg1));
   1096    code[0] |= neg0 << 28;
   1097    code[0] |= neg1 << 22;
   1098 
   1099    if (i->flagsSrc >= 0) {
   1100       // addc == sub | subr
   1101       assert(!(code[0] & 0x10400000) && !i->getPredicate());
   1102       code[0] |= 0x10400000;
   1103       srcId(i->src(i->flagsSrc), 32 + 12);
   1104    }
   1105 }
   1106 
   1107 void
   1108 CodeEmitterNV50::emitAADD(const Instruction *i)
   1109 {
   1110    const int s = (i->op == OP_MOV) ? 0 : 1;
   1111 
   1112    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
   1113    code[1] = 0x20000000;
   1114 
   1115    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
   1116 
   1117    emitFlagsRd(i);
   1118 
   1119    if (s && i->srcExists(0))
   1120       setARegBits(SDATA(i->src(0)).id + 1);
   1121 }
   1122 
   1123 void
   1124 CodeEmitterNV50::emitIMUL(const Instruction *i)
   1125 {
   1126    code[0] = 0x40000000;
   1127 
   1128    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1129       if (i->sType == TYPE_S16)
   1130          code[0] |= 0x8100;
   1131       code[1] = 0;
   1132       emitForm_IMM(i);
   1133    } else
   1134    if (i->encSize == 8) {
   1135       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
   1136       emitForm_MAD(i);
   1137    } else {
   1138       if (i->sType == TYPE_S16)
   1139          code[0] |= 0x8100;
   1140       emitForm_MUL(i);
   1141    }
   1142 }
   1143 
   1144 void
   1145 CodeEmitterNV50::emitFMUL(const Instruction *i)
   1146 {
   1147    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
   1148 
   1149    code[0] = 0xc0000000;
   1150 
   1151    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1152       code[1] = 0;
   1153       emitForm_IMM(i);
   1154       if (neg)
   1155          code[0] |= 0x8000;
   1156       if (i->saturate)
   1157          code[0] |= 1 << 8;
   1158    } else
   1159    if (i->encSize == 8) {
   1160       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
   1161       if (neg)
   1162          code[1] |= 0x08000000;
   1163       if (i->saturate)
   1164          code[1] |= 1 << 20;
   1165       emitForm_MAD(i);
   1166    } else {
   1167       emitForm_MUL(i);
   1168       if (neg)
   1169          code[0] |= 0x8000;
   1170       if (i->saturate)
   1171          code[0] |= 1 << 8;
   1172    }
   1173 }
   1174 
   1175 void
   1176 CodeEmitterNV50::emitDMUL(const Instruction *i)
   1177 {
   1178    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
   1179 
   1180    assert(!i->saturate);
   1181    assert(i->encSize == 8);
   1182 
   1183    code[1] = 0x80000000;
   1184    code[0] = 0xe0000000;
   1185 
   1186    if (neg)
   1187       code[1] |= 0x08000000;
   1188 
   1189    roundMode_CVT(i->rnd);
   1190 
   1191    emitForm_MAD(i);
   1192 }
   1193 
   1194 void
   1195 CodeEmitterNV50::emitIMAD(const Instruction *i)
   1196 {
   1197    int mode;
   1198    code[0] = 0x60000000;
   1199 
   1200    assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
   1201    if (!isSignedType(i->sType))
   1202       mode = 0;
   1203    else if (i->saturate)
   1204       mode = 2;
   1205    else
   1206       mode = 1;
   1207 
   1208    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1209       code[1] = 0;
   1210       emitForm_IMM(i);
   1211       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
   1212       if (i->flagsSrc >= 0) {
   1213          assert(!(code[0] & 0x10400000));
   1214          assert(SDATA(i->src(i->flagsSrc)).id == 0);
   1215          code[0] |= 0x10400000;
   1216       }
   1217    } else
   1218    if (i->encSize == 4) {
   1219       emitForm_MUL(i);
   1220       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
   1221       if (i->flagsSrc >= 0) {
   1222          assert(!(code[0] & 0x10400000));
   1223          assert(SDATA(i->src(i->flagsSrc)).id == 0);
   1224          code[0] |= 0x10400000;
   1225       }
   1226    } else {
   1227       code[1] = mode << 29;
   1228       emitForm_MAD(i);
   1229 
   1230       if (i->flagsSrc >= 0) {
   1231          // add with carry from $cX
   1232          assert(!(code[1] & 0x0c000000) && !i->getPredicate());
   1233          code[1] |= 0xc << 24;
   1234          srcId(i->src(i->flagsSrc), 32 + 12);
   1235       }
   1236    }
   1237 }
   1238 
   1239 void
   1240 CodeEmitterNV50::emitISAD(const Instruction *i)
   1241 {
   1242    if (i->encSize == 8) {
   1243       code[0] = 0x50000000;
   1244       switch (i->sType) {
   1245       case TYPE_U32: code[1] = 0x04000000; break;
   1246       case TYPE_S32: code[1] = 0x0c000000; break;
   1247       case TYPE_U16: code[1] = 0x00000000; break;
   1248       case TYPE_S16: code[1] = 0x08000000; break;
   1249       default:
   1250          assert(0);
   1251          break;
   1252       }
   1253       emitForm_MAD(i);
   1254    } else {
   1255       switch (i->sType) {
   1256       case TYPE_U32: code[0] = 0x50008000; break;
   1257       case TYPE_S32: code[0] = 0x50008100; break;
   1258       case TYPE_U16: code[0] = 0x50000000; break;
   1259       case TYPE_S16: code[0] = 0x50000100; break;
   1260       default:
   1261          assert(0);
   1262          break;
   1263       }
   1264       emitForm_MUL(i);
   1265    }
   1266 }
   1267 
   1268 static void
   1269 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
   1270 {
   1271    int loc = entry->loc;
   1272    int enc;
   1273 
   1274    switch (data.alphatest) {
   1275    case PIPE_FUNC_NEVER: enc = 0x0; break;
   1276    case PIPE_FUNC_LESS: enc = 0x1; break;
   1277    case PIPE_FUNC_EQUAL: enc = 0x2; break;
   1278    case PIPE_FUNC_LEQUAL: enc = 0x3; break;
   1279    case PIPE_FUNC_GREATER: enc = 0x4; break;
   1280    case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
   1281    case PIPE_FUNC_GEQUAL: enc = 0x6; break;
   1282    default:
   1283    case PIPE_FUNC_ALWAYS: enc = 0xf; break;
   1284    }
   1285 
   1286    code[loc + 1] &= ~(0x1f << 14);
   1287    code[loc + 1] |= enc << 14;
   1288 }
   1289 
   1290 void
   1291 CodeEmitterNV50::emitSET(const Instruction *i)
   1292 {
   1293    code[0] = 0x30000000;
   1294    code[1] = 0x60000000;
   1295 
   1296    switch (i->sType) {
   1297    case TYPE_F64:
   1298       code[0] = 0xe0000000;
   1299       code[1] = 0xe0000000;
   1300       break;
   1301    case TYPE_F32: code[0] |= 0x80000000; break;
   1302    case TYPE_S32: code[1] |= 0x0c000000; break;
   1303    case TYPE_U32: code[1] |= 0x04000000; break;
   1304    case TYPE_S16: code[1] |= 0x08000000; break;
   1305    case TYPE_U16: break;
   1306    default:
   1307       assert(0);
   1308       break;
   1309    }
   1310 
   1311    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
   1312 
   1313    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
   1314    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
   1315    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
   1316    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
   1317 
   1318    emitForm_MAD(i);
   1319 
   1320    if (i->subOp == 1) {
   1321       addInterp(0, 0, alphatestSet);
   1322    }
   1323 }
   1324 
   1325 void
   1326 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
   1327 {
   1328    switch (rnd) {
   1329    case ROUND_NI: code[1] |= 0x08000000; break;
   1330    case ROUND_M:  code[1] |= 0x00020000; break;
   1331    case ROUND_MI: code[1] |= 0x08020000; break;
   1332    case ROUND_P:  code[1] |= 0x00040000; break;
   1333    case ROUND_PI: code[1] |= 0x08040000; break;
   1334    case ROUND_Z:  code[1] |= 0x00060000; break;
   1335    case ROUND_ZI: code[1] |= 0x08060000; break;
   1336    default:
   1337       assert(rnd == ROUND_N);
   1338       break;
   1339    }
   1340 }
   1341 
   1342 void
   1343 CodeEmitterNV50::emitCVT(const Instruction *i)
   1344 {
   1345    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
   1346    RoundMode rnd;
   1347    DataType dType;
   1348 
   1349    switch (i->op) {
   1350    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
   1351    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
   1352    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
   1353    default:
   1354       rnd = i->rnd;
   1355       break;
   1356    }
   1357 
   1358    if (i->op == OP_NEG && i->dType == TYPE_U32)
   1359       dType = TYPE_S32;
   1360    else
   1361       dType = i->dType;
   1362 
   1363    code[0] = 0xa0000000;
   1364 
   1365    switch (dType) {
   1366    case TYPE_F64:
   1367       switch (i->sType) {
   1368       case TYPE_F64: code[1] = 0xc4404000; break;
   1369       case TYPE_S64: code[1] = 0x44414000; break;
   1370       case TYPE_U64: code[1] = 0x44404000; break;
   1371       case TYPE_F32: code[1] = 0xc4400000; break;
   1372       case TYPE_S32: code[1] = 0x44410000; break;
   1373       case TYPE_U32: code[1] = 0x44400000; break;
   1374       default:
   1375          assert(0);
   1376          break;
   1377       }
   1378       break;
   1379    case TYPE_S64:
   1380       switch (i->sType) {
   1381       case TYPE_F64: code[1] = 0x8c404000; break;
   1382       case TYPE_F32: code[1] = 0x8c400000; break;
   1383       default:
   1384          assert(0);
   1385          break;
   1386       }
   1387       break;
   1388    case TYPE_U64:
   1389       switch (i->sType) {
   1390       case TYPE_F64: code[1] = 0x84404000; break;
   1391       case TYPE_F32: code[1] = 0x84400000; break;
   1392       default:
   1393          assert(0);
   1394          break;
   1395       }
   1396       break;
   1397    case TYPE_F32:
   1398       switch (i->sType) {
   1399       case TYPE_F64: code[1] = 0xc0404000; break;
   1400       case TYPE_S64: code[1] = 0x40414000; break;
   1401       case TYPE_U64: code[1] = 0x40404000; break;
   1402       case TYPE_F32: code[1] = 0xc4004000; break;
   1403       case TYPE_S32: code[1] = 0x44014000; break;
   1404       case TYPE_U32: code[1] = 0x44004000; break;
   1405       case TYPE_F16: code[1] = 0xc4000000; break;
   1406       case TYPE_U16: code[1] = 0x44000000; break;
   1407       default:
   1408          assert(0);
   1409          break;
   1410       }
   1411       break;
   1412    case TYPE_S32:
   1413       switch (i->sType) {
   1414       case TYPE_F64: code[1] = 0x88404000; break;
   1415       case TYPE_F32: code[1] = 0x8c004000; break;
   1416       case TYPE_S32: code[1] = 0x0c014000; break;
   1417       case TYPE_U32: code[1] = 0x0c004000; break;
   1418       case TYPE_F16: code[1] = 0x8c000000; break;
   1419       case TYPE_S16: code[1] = 0x0c010000; break;
   1420       case TYPE_U16: code[1] = 0x0c000000; break;
   1421       case TYPE_S8:  code[1] = 0x0c018000; break;
   1422       case TYPE_U8:  code[1] = 0x0c008000; break;
   1423       default:
   1424          assert(0);
   1425          break;
   1426       }
   1427       break;
   1428    case TYPE_U32:
   1429       switch (i->sType) {
   1430       case TYPE_F64: code[1] = 0x80404000; break;
   1431       case TYPE_F32: code[1] = 0x84004000; break;
   1432       case TYPE_S32: code[1] = 0x04014000; break;
   1433       case TYPE_U32: code[1] = 0x04004000; break;
   1434       case TYPE_F16: code[1] = 0x84000000; break;
   1435       case TYPE_S16: code[1] = 0x04010000; break;
   1436       case TYPE_U16: code[1] = 0x04000000; break;
   1437       case TYPE_S8:  code[1] = 0x04018000; break;
   1438       case TYPE_U8:  code[1] = 0x04008000; break;
   1439       default:
   1440          assert(0);
   1441          break;
   1442       }
   1443       break;
   1444    case TYPE_S16:
   1445    case TYPE_U16:
   1446    case TYPE_S8:
   1447    case TYPE_U8:
   1448    default:
   1449       assert(0);
   1450       break;
   1451    }
   1452    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
   1453       code[1] |= 0x00004000;
   1454 
   1455    roundMode_CVT(rnd);
   1456 
   1457    switch (i->op) {
   1458    case OP_ABS: code[1] |= 1 << 20; break;
   1459    case OP_SAT: code[1] |= 1 << 19; break;
   1460    case OP_NEG: code[1] |= 1 << 29; break;
   1461    default:
   1462       break;
   1463    }
   1464    code[1] ^= i->src(0).mod.neg() << 29;
   1465    code[1] |= i->src(0).mod.abs() << 20;
   1466    if (i->saturate)
   1467       code[1] |= 1 << 19;
   1468 
   1469    assert(i->op != OP_ABS || !i->src(0).mod.neg());
   1470 
   1471    emitForm_MAD(i);
   1472 }
   1473 
   1474 void
   1475 CodeEmitterNV50::emitPreOp(const Instruction *i)
   1476 {
   1477    code[0] = 0xb0000000;
   1478    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
   1479 
   1480    code[1] |= i->src(0).mod.abs() << 20;
   1481    code[1] |= i->src(0).mod.neg() << 26;
   1482 
   1483    emitForm_MAD(i);
   1484 }
   1485 
   1486 void
   1487 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
   1488 {
   1489    code[0] = 0x90000000;
   1490 
   1491    if (i->encSize == 4) {
   1492       assert(i->op == OP_RCP);
   1493       assert(!i->saturate);
   1494       code[0] |= i->src(0).mod.abs() << 15;
   1495       code[0] |= i->src(0).mod.neg() << 22;
   1496       emitForm_MUL(i);
   1497    } else {
   1498       code[1] = subOp << 29;
   1499       code[1] |= i->src(0).mod.abs() << 20;
   1500       code[1] |= i->src(0).mod.neg() << 26;
   1501       if (i->saturate) {
   1502          assert(subOp == 6 && i->op == OP_EX2);
   1503          code[1] |= 1 << 27;
   1504       }
   1505       emitForm_MAD(i);
   1506    }
   1507 }
   1508 
   1509 void
   1510 CodeEmitterNV50::emitNOT(const Instruction *i)
   1511 {
   1512    code[0] = 0xd0000000;
   1513    code[1] = 0x0002c000;
   1514 
   1515    switch (i->sType) {
   1516    case TYPE_U32:
   1517    case TYPE_S32:
   1518       code[1] |= 0x04000000;
   1519       break;
   1520    default:
   1521       break;
   1522    }
   1523    emitForm_MAD(i);
   1524    setSrc(i, 0, 1);
   1525 }
   1526 
   1527 void
   1528 CodeEmitterNV50::emitLogicOp(const Instruction *i)
   1529 {
   1530    code[0] = 0xd0000000;
   1531    code[1] = 0;
   1532 
   1533    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1534       switch (i->op) {
   1535       case OP_OR:  code[0] |= 0x0100; break;
   1536       case OP_XOR: code[0] |= 0x8000; break;
   1537       default:
   1538          assert(i->op == OP_AND);
   1539          break;
   1540       }
   1541       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
   1542          code[0] |= 1 << 22;
   1543 
   1544       emitForm_IMM(i);
   1545    } else {
   1546       switch (i->op) {
   1547       case OP_AND: code[1] = 0x04000000; break;
   1548       case OP_OR:  code[1] = 0x04004000; break;
   1549       case OP_XOR: code[1] = 0x04008000; break;
   1550       default:
   1551          assert(0);
   1552          break;
   1553       }
   1554       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
   1555          code[1] |= 1 << 16;
   1556       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
   1557          code[1] |= 1 << 17;
   1558 
   1559       emitForm_MAD(i);
   1560    }
   1561 }
   1562 
   1563 void
   1564 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
   1565 {
   1566    code[0] = 0x00000001 | (shl << 16);
   1567    code[1] = 0xc0000000;
   1568 
   1569    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
   1570 
   1571    setSrcFileBits(i, NV50_OP_ENC_IMM);
   1572    setSrc(i, 0, 0);
   1573    emitFlagsRd(i);
   1574 }
   1575 
   1576 void
   1577 CodeEmitterNV50::emitShift(const Instruction *i)
   1578 {
   1579    if (i->def(0).getFile() == FILE_ADDRESS) {
   1580       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
   1581       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
   1582    } else {
   1583       code[0] = 0x30000001;
   1584       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
   1585       if (i->op == OP_SHR && isSignedType(i->sType))
   1586           code[1] |= 1 << 27;
   1587 
   1588       if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1589          code[1] |= 1 << 20;
   1590          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
   1591          defId(i->def(0), 2);
   1592          srcId(i->src(0), 9);
   1593          emitFlagsRd(i);
   1594       } else {
   1595          emitForm_MAD(i);
   1596       }
   1597    }
   1598 }
   1599 
   1600 void
   1601 CodeEmitterNV50::emitOUT(const Instruction *i)
   1602 {
   1603    code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
   1604    code[1] = 0xc0000000;
   1605 
   1606    emitFlagsRd(i);
   1607 }
   1608 
   1609 void
   1610 CodeEmitterNV50::emitTEX(const TexInstruction *i)
   1611 {
   1612    code[0] = 0xf0000001;
   1613    code[1] = 0x00000000;
   1614 
   1615    switch (i->op) {
   1616    case OP_TXB:
   1617       code[1] = 0x20000000;
   1618       break;
   1619    case OP_TXL:
   1620       code[1] = 0x40000000;
   1621       break;
   1622    case OP_TXF:
   1623       code[0] |= 0x01000000;
   1624       break;
   1625    case OP_TXG:
   1626       code[0] |= 0x01000000;
   1627       code[1] = 0x80000000;
   1628       break;
   1629    case OP_TXLQ:
   1630       code[1] = 0x60020000;
   1631       break;
   1632    default:
   1633       assert(i->op == OP_TEX);
   1634       break;
   1635    }
   1636 
   1637    code[0] |= i->tex.r << 9;
   1638    code[0] |= i->tex.s << 17;
   1639 
   1640    int argc = i->tex.target.getArgCount();
   1641 
   1642    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
   1643       argc += 1;
   1644    if (i->tex.target.isShadow())
   1645       argc += 1;
   1646    assert(argc <= 4);
   1647 
   1648    code[0] |= (argc - 1) << 22;
   1649 
   1650    if (i->tex.target.isCube()) {
   1651       code[0] |= 0x08000000;
   1652    } else
   1653    if (i->tex.useOffsets) {
   1654       code[1] |= (i->tex.offset[0] & 0xf) << 24;
   1655       code[1] |= (i->tex.offset[1] & 0xf) << 20;
   1656       code[1] |= (i->tex.offset[2] & 0xf) << 16;
   1657    }
   1658 
   1659    code[0] |= (i->tex.mask & 0x3) << 25;
   1660    code[1] |= (i->tex.mask & 0xc) << 12;
   1661 
   1662    if (i->tex.liveOnly)
   1663       code[1] |= 1 << 2;
   1664    if (i->tex.derivAll)
   1665       code[1] |= 1 << 3;
   1666 
   1667    defId(i->def(0), 2);
   1668 
   1669    emitFlagsRd(i);
   1670 }
   1671 
   1672 void
   1673 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
   1674 {
   1675    assert(i->tex.query == TXQ_DIMS);
   1676 
   1677    code[0] = 0xf0000001;
   1678    code[1] = 0x60000000;
   1679 
   1680    code[0] |= i->tex.r << 9;
   1681    code[0] |= i->tex.s << 17;
   1682 
   1683    code[0] |= (i->tex.mask & 0x3) << 25;
   1684    code[1] |= (i->tex.mask & 0xc) << 12;
   1685 
   1686    defId(i->def(0), 2);
   1687 
   1688    emitFlagsRd(i);
   1689 }
   1690 
   1691 void
   1692 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
   1693 {
   1694    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
   1695    code[1] = 0x60010000;
   1696 
   1697    code[0] |= (i->tex.mask & 0x3) << 25;
   1698    code[1] |= (i->tex.mask & 0xc) << 12;
   1699    defId(i->def(0), 2);
   1700 
   1701    emitFlagsRd(i);
   1702 }
   1703 
   1704 void
   1705 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
   1706 {
   1707    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
   1708 
   1709    code[0] = 0x10000003; // bra
   1710    code[1] = 0x00000780; // always
   1711 
   1712    switch (i->subOp) {
   1713    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
   1714       break;
   1715    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
   1716       pos += 8;
   1717       break;
   1718    default:
   1719       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
   1720       code[0] = 0x20000003; // call
   1721       code[1] = 0x00000000; // no predicate
   1722       break;
   1723    }
   1724    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
   1725    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
   1726 }
   1727 
   1728 void
   1729 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
   1730 {
   1731    const FlowInstruction *f = i->asFlow();
   1732    bool hasPred = false;
   1733    bool hasTarg = false;
   1734 
   1735    code[0] = 0x00000003 | (flowOp << 28);
   1736    code[1] = 0x00000000;
   1737 
   1738    switch (i->op) {
   1739    case OP_BRA:
   1740       hasPred = true;
   1741       hasTarg = true;
   1742       break;
   1743    case OP_BREAK:
   1744    case OP_BRKPT:
   1745    case OP_DISCARD:
   1746    case OP_RET:
   1747       hasPred = true;
   1748       break;
   1749    case OP_CALL:
   1750    case OP_PREBREAK:
   1751    case OP_JOINAT:
   1752       hasTarg = true;
   1753       break;
   1754    case OP_PRERET:
   1755       hasTarg = true;
   1756       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
   1757          emitPRERETEmu(f);
   1758          return;
   1759       }
   1760       break;
   1761    default:
   1762       break;
   1763    }
   1764 
   1765    if (hasPred)
   1766       emitFlagsRd(i);
   1767 
   1768    if (hasTarg && f) {
   1769       uint32_t pos;
   1770 
   1771       if (f->op == OP_CALL) {
   1772          if (f->builtin) {
   1773             pos = targNV50->getBuiltinOffset(f->target.builtin);
   1774          } else {
   1775             pos = f->target.fn->binPos;
   1776          }
   1777       } else {
   1778          pos = f->target.bb->binPos;
   1779       }
   1780 
   1781       code[0] |= ((pos >>  2) & 0xffff) << 11;
   1782       code[1] |= ((pos >> 18) & 0x003f) << 14;
   1783 
   1784       RelocEntry::Type relocTy;
   1785 
   1786       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
   1787 
   1788       addReloc(relocTy, 0, pos, 0x07fff800, 9);
   1789       addReloc(relocTy, 1, pos, 0x000fc000, -4);
   1790    }
   1791 }
   1792 
   1793 void
   1794 CodeEmitterNV50::emitBAR(const Instruction *i)
   1795 {
   1796    ImmediateValue *barId = i->getSrc(0)->asImm();
   1797    assert(barId);
   1798 
   1799    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
   1800    code[1] = 0x00004000;
   1801 
   1802    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
   1803       code[0] |= 1 << 26;
   1804 }
   1805 
   1806 void
   1807 CodeEmitterNV50::emitATOM(const Instruction *i)
   1808 {
   1809    uint8_t subOp;
   1810    switch (i->subOp) {
   1811    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
   1812    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
   1813    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
   1814    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
   1815    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
   1816    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
   1817    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
   1818    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
   1819    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
   1820    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
   1821    default:
   1822       assert(!"invalid subop");
   1823       return;
   1824    }
   1825    code[0] = 0xd0000001;
   1826    code[1] = 0xe0c00000 | (subOp << 2);
   1827    if (isSignedType(i->dType))
   1828       code[1] |= 1 << 21;
   1829 
   1830    // args
   1831    emitFlagsRd(i);
   1832    setDst(i, 0);
   1833    setSrc(i, 1, 1);
   1834    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
   1835       setSrc(i, 2, 2);
   1836 
   1837    // g[] pointer
   1838    code[0] |= i->getSrc(0)->reg.fileIndex << 23;
   1839    srcId(i->getIndirect(0, 0), 9);
   1840 }
   1841 
   1842 bool
   1843 CodeEmitterNV50::emitInstruction(Instruction *insn)
   1844 {
   1845    if (!insn->encSize) {
   1846       ERROR("skipping unencodable instruction: "); insn->print();
   1847       return false;
   1848    } else
   1849    if (codeSize + insn->encSize > codeSizeLimit) {
   1850       ERROR("code emitter output buffer too small\n");
   1851       return false;
   1852    }
   1853 
   1854    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
   1855       INFO("EMIT: "); insn->print();
   1856    }
   1857 
   1858    switch (insn->op) {
   1859    case OP_MOV:
   1860       emitMOV(insn);
   1861       break;
   1862    case OP_EXIT:
   1863    case OP_NOP:
   1864    case OP_JOIN:
   1865       emitNOP();
   1866       break;
   1867    case OP_VFETCH:
   1868    case OP_LOAD:
   1869       emitLOAD(insn);
   1870       break;
   1871    case OP_EXPORT:
   1872    case OP_STORE:
   1873       emitSTORE(insn);
   1874       break;
   1875    case OP_PFETCH:
   1876       emitPFETCH(insn);
   1877       break;
   1878    case OP_RDSV:
   1879       emitRDSV(insn);
   1880       break;
   1881    case OP_LINTERP:
   1882    case OP_PINTERP:
   1883       emitINTERP(insn);
   1884       break;
   1885    case OP_ADD:
   1886    case OP_SUB:
   1887       if (insn->dType == TYPE_F64)
   1888          emitDADD(insn);
   1889       else if (isFloatType(insn->dType))
   1890          emitFADD(insn);
   1891       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
   1892          emitAADD(insn);
   1893       else
   1894          emitUADD(insn);
   1895       break;
   1896    case OP_MUL:
   1897       if (insn->dType == TYPE_F64)
   1898          emitDMUL(insn);
   1899       else if (isFloatType(insn->dType))
   1900          emitFMUL(insn);
   1901       else
   1902          emitIMUL(insn);
   1903       break;
   1904    case OP_MAD:
   1905    case OP_FMA:
   1906       if (insn->dType == TYPE_F64)
   1907          emitDMAD(insn);
   1908       else if (isFloatType(insn->dType))
   1909          emitFMAD(insn);
   1910       else
   1911          emitIMAD(insn);
   1912       break;
   1913    case OP_SAD:
   1914       emitISAD(insn);
   1915       break;
   1916    case OP_NOT:
   1917       emitNOT(insn);
   1918       break;
   1919    case OP_AND:
   1920    case OP_OR:
   1921    case OP_XOR:
   1922       emitLogicOp(insn);
   1923       break;
   1924    case OP_SHL:
   1925    case OP_SHR:
   1926       emitShift(insn);
   1927       break;
   1928    case OP_SET:
   1929       emitSET(insn);
   1930       break;
   1931    case OP_MIN:
   1932    case OP_MAX:
   1933       emitMINMAX(insn);
   1934       break;
   1935    case OP_CEIL:
   1936    case OP_FLOOR:
   1937    case OP_TRUNC:
   1938    case OP_ABS:
   1939    case OP_NEG:
   1940    case OP_SAT:
   1941       emitCVT(insn);
   1942       break;
   1943    case OP_CVT:
   1944       if (insn->def(0).getFile() == FILE_ADDRESS)
   1945          emitARL(insn, 0);
   1946       else
   1947       if (insn->def(0).getFile() == FILE_FLAGS ||
   1948           insn->src(0).getFile() == FILE_FLAGS ||
   1949           insn->src(0).getFile() == FILE_ADDRESS)
   1950          emitMOV(insn);
   1951       else
   1952          emitCVT(insn);
   1953       break;
   1954    case OP_RCP:
   1955       emitSFnOp(insn, 0);
   1956       break;
   1957    case OP_RSQ:
   1958       emitSFnOp(insn, 2);
   1959       break;
   1960    case OP_LG2:
   1961       emitSFnOp(insn, 3);
   1962       break;
   1963    case OP_SIN:
   1964       emitSFnOp(insn, 4);
   1965       break;
   1966    case OP_COS:
   1967       emitSFnOp(insn, 5);
   1968       break;
   1969    case OP_EX2:
   1970       emitSFnOp(insn, 6);
   1971       break;
   1972    case OP_PRESIN:
   1973    case OP_PREEX2:
   1974       emitPreOp(insn);
   1975       break;
   1976    case OP_TEX:
   1977    case OP_TXB:
   1978    case OP_TXL:
   1979    case OP_TXF:
   1980    case OP_TXG:
   1981    case OP_TXLQ:
   1982       emitTEX(insn->asTex());
   1983       break;
   1984    case OP_TXQ:
   1985       emitTXQ(insn->asTex());
   1986       break;
   1987    case OP_TEXPREP:
   1988       emitTEXPREP(insn->asTex());
   1989       break;
   1990    case OP_EMIT:
   1991    case OP_RESTART:
   1992       emitOUT(insn);
   1993       break;
   1994    case OP_DISCARD:
   1995       emitFlow(insn, 0x0);
   1996       break;
   1997    case OP_BRA:
   1998       emitFlow(insn, 0x1);
   1999       break;
   2000    case OP_CALL:
   2001       emitFlow(insn, 0x2);
   2002       break;
   2003    case OP_RET:
   2004       emitFlow(insn, 0x3);
   2005       break;
   2006    case OP_PREBREAK:
   2007       emitFlow(insn, 0x4);
   2008       break;
   2009    case OP_BREAK:
   2010       emitFlow(insn, 0x5);
   2011       break;
   2012    case OP_QUADON:
   2013       emitFlow(insn, 0x6);
   2014       break;
   2015    case OP_QUADPOP:
   2016       emitFlow(insn, 0x7);
   2017       break;
   2018    case OP_JOINAT:
   2019       emitFlow(insn, 0xa);
   2020       break;
   2021    case OP_PRERET:
   2022       emitFlow(insn, 0xd);
   2023       break;
   2024    case OP_QUADOP:
   2025       emitQUADOP(insn, insn->lanes, insn->subOp);
   2026       break;
   2027    case OP_DFDX:
   2028       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
   2029       break;
   2030    case OP_DFDY:
   2031       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
   2032       break;
   2033    case OP_ATOM:
   2034       emitATOM(insn);
   2035       break;
   2036    case OP_BAR:
   2037       emitBAR(insn);
   2038       break;
   2039    case OP_PHI:
   2040    case OP_UNION:
   2041    case OP_CONSTRAINT:
   2042       ERROR("operation should have been eliminated\n");
   2043       return false;
   2044    case OP_EXP:
   2045    case OP_LOG:
   2046    case OP_SQRT:
   2047    case OP_POW:
   2048    case OP_SELP:
   2049    case OP_SLCT:
   2050    case OP_TXD:
   2051    case OP_PRECONT:
   2052    case OP_CONT:
   2053    case OP_POPCNT:
   2054    case OP_INSBF:
   2055    case OP_EXTBF:
   2056       ERROR("operation should have been lowered\n");
   2057       return false;
   2058    default:
   2059       ERROR("unknown op: %u\n", insn->op);
   2060       return false;
   2061    }
   2062    if (insn->join || insn->op == OP_JOIN)
   2063       code[1] |= 0x2;
   2064    else
   2065    if (insn->exit || insn->op == OP_EXIT)
   2066       code[1] |= 0x1;
   2067 
   2068    assert((insn->encSize == 8) == (code[0] & 1));
   2069 
   2070    code += insn->encSize / 4;
   2071    codeSize += insn->encSize;
   2072    return true;
   2073 }
   2074 
   2075 uint32_t
   2076 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
   2077 {
   2078    const Target::OpInfo &info = targ->getOpInfo(i);
   2079 
   2080    if (info.minEncSize > 4 || i->dType == TYPE_F64)
   2081       return 8;
   2082 
   2083    // check constraints on dst and src operands
   2084    for (int d = 0; i->defExists(d); ++d) {
   2085       if (i->def(d).rep()->reg.data.id > 63 ||
   2086           i->def(d).rep()->reg.file != FILE_GPR)
   2087          return 8;
   2088    }
   2089 
   2090    for (int s = 0; i->srcExists(s); ++s) {
   2091       DataFile sf = i->src(s).getFile();
   2092       if (sf != FILE_GPR)
   2093          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
   2094             return 8;
   2095       if (i->src(s).rep()->reg.data.id > 63)
   2096          return 8;
   2097    }
   2098 
   2099    // check modifiers & rounding
   2100    if (i->join || i->lanes != 0xf || i->exit)
   2101       return 8;
   2102    if (i->op == OP_MUL && i->rnd != ROUND_N)
   2103       return 8;
   2104 
   2105    if (i->asTex())
   2106       return 8; // TODO: short tex encoding
   2107 
   2108    // check constraints on short MAD
   2109    if (info.srcNr >= 2 && i->srcExists(2)) {
   2110       if (!i->defExists(0) ||
   2111           (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
   2112           DDATA(i->def(0)).id != SDATA(i->src(2)).id)
   2113          return 8;
   2114    }
   2115 
   2116    return info.minEncSize;
   2117 }
   2118 
   2119 // Change the encoding size of an instruction after BBs have been scheduled.
   2120 static void
   2121 makeInstructionLong(Instruction *insn)
   2122 {
   2123    if (insn->encSize == 8)
   2124       return;
   2125    Function *fn = insn->bb->getFunction();
   2126    int n = 0;
   2127    int adj = 4;
   2128 
   2129    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
   2130 
   2131    if (n & 1) {
   2132       adj = 8;
   2133       insn->next->encSize = 8;
   2134    } else
   2135    if (insn->prev && insn->prev->encSize == 4) {
   2136       adj = 8;
   2137       insn->prev->encSize = 8;
   2138    }
   2139    insn->encSize = 8;
   2140 
   2141    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
   2142       fn->bbArray[i]->binPos += adj;
   2143    }
   2144    fn->binSize += adj;
   2145    insn->bb->binSize += adj;
   2146 }
   2147 
   2148 static bool
   2149 trySetExitModifier(Instruction *insn)
   2150 {
   2151    if (insn->op == OP_DISCARD ||
   2152        insn->op == OP_QUADON ||
   2153        insn->op == OP_QUADPOP)
   2154       return false;
   2155    for (int s = 0; insn->srcExists(s); ++s)
   2156       if (insn->src(s).getFile() == FILE_IMMEDIATE)
   2157          return false;
   2158    if (insn->asFlow()) {
   2159       if (insn->op == OP_CALL) // side effects !
   2160          return false;
   2161       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
   2162          return false;
   2163       insn->op = OP_EXIT;
   2164    }
   2165    insn->exit = 1;
   2166    makeInstructionLong(insn);
   2167    return true;
   2168 }
   2169 
   2170 static void
   2171 replaceExitWithModifier(Function *func)
   2172 {
   2173    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
   2174 
   2175    if (!epilogue->getExit() ||
   2176        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
   2177       return;
   2178 
   2179    if (epilogue->getEntry()->op != OP_EXIT) {
   2180       Instruction *insn = epilogue->getExit()->prev;
   2181       if (!insn || !trySetExitModifier(insn))
   2182          return;
   2183       insn->exit = 1;
   2184    } else {
   2185       for (Graph::EdgeIterator ei = func->cfgExit->incident();
   2186            !ei.end(); ei.next()) {
   2187          BasicBlock *bb = BasicBlock::get(ei.getNode());
   2188          Instruction *i = bb->getExit();
   2189 
   2190          if (!i || !trySetExitModifier(i))
   2191             return;
   2192       }
   2193    }
   2194 
   2195    int adj = epilogue->getExit()->encSize;
   2196    epilogue->binSize -= adj;
   2197    func->binSize -= adj;
   2198    delete_Instruction(func->getProgram(), epilogue->getExit());
   2199 
   2200    // There may be BB's that are laid out after the exit block
   2201    for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
   2202       func->bbArray[i]->binPos -= adj;
   2203    }
   2204 }
   2205 
   2206 void
   2207 CodeEmitterNV50::prepareEmission(Function *func)
   2208 {
   2209    CodeEmitter::prepareEmission(func);
   2210 
   2211    replaceExitWithModifier(func);
   2212 }
   2213 
   2214 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
   2215    CodeEmitter(target), targNV50(target)
   2216 {
   2217    targ = target; // specialized
   2218    code = NULL;
   2219    codeSize = codeSizeLimit = 0;
   2220    relocInfo = NULL;
   2221 }
   2222 
   2223 CodeEmitter *
   2224 TargetNV50::getCodeEmitter(Program::Type type)
   2225 {
   2226    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
   2227    emit->setProgramType(type);
   2228    return emit;
   2229 }
   2230 
   2231 } // namespace nv50_ir
   2232