Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "nv50_ir.h"
     24 #include "nv50_ir_target_nv50.h"
     25 
     26 namespace nv50_ir {
     27 
     28 #define NV50_OP_ENC_LONG     0
     29 #define NV50_OP_ENC_SHORT    1
     30 #define NV50_OP_ENC_IMM      2
     31 #define NV50_OP_ENC_LONG_ALT 3
     32 
     33 class CodeEmitterNV50 : public CodeEmitter
     34 {
     35 public:
     36    CodeEmitterNV50(const TargetNV50 *);
     37 
     38    virtual bool emitInstruction(Instruction *);
     39 
     40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
     41 
     42    inline void setProgramType(Program::Type pType) { progType = pType; }
     43 
     44    virtual void prepareEmission(Function *);
     45 
     46 private:
     47    Program::Type progType;
     48 
     49    const TargetNV50 *targ;
     50 
     51 private:
     52    inline void defId(const ValueDef&, const int pos);
     53    inline void srcId(const ValueRef&, const int pos);
     54    inline void srcId(const ValueRef *, const int pos);
     55 
     56    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
     57    inline void srcAddr8(const ValueRef&, const int pos);
     58 
     59    void emitFlagsRd(const Instruction *);
     60    void emitFlagsWr(const Instruction *);
     61 
     62    void emitCondCode(CondCode cc, DataType ty, int pos);
     63 
     64    inline void setARegBits(unsigned int);
     65 
     66    void setAReg16(const Instruction *, int s);
     67    void setImmediate(const Instruction *, int s);
     68 
     69    void setDst(const Value *);
     70    void setDst(const Instruction *, int d);
     71    void setSrcFileBits(const Instruction *, int enc);
     72    void setSrc(const Instruction *, unsigned int s, int slot);
     73 
     74    void emitForm_MAD(const Instruction *);
     75    void emitForm_ADD(const Instruction *);
     76    void emitForm_MUL(const Instruction *);
     77    void emitForm_IMM(const Instruction *);
     78 
     79    void emitLoadStoreSizeLG(DataType ty, int pos);
     80    void emitLoadStoreSizeCS(DataType ty);
     81 
     82    void roundMode_MAD(const Instruction *);
     83    void roundMode_CVT(RoundMode);
     84 
     85    void emitMNeg12(const Instruction *);
     86 
     87    void emitLOAD(const Instruction *);
     88    void emitSTORE(const Instruction *);
     89    void emitMOV(const Instruction *);
     90    void emitNOP();
     91    void emitINTERP(const Instruction *);
     92    void emitPFETCH(const Instruction *);
     93    void emitOUT(const Instruction *);
     94 
     95    void emitUADD(const Instruction *);
     96    void emitAADD(const Instruction *);
     97    void emitFADD(const Instruction *);
     98    void emitIMUL(const Instruction *);
     99    void emitFMUL(const Instruction *);
    100    void emitFMAD(const Instruction *);
    101    void emitIMAD(const Instruction *);
    102    void emitISAD(const Instruction *);
    103 
    104    void emitMINMAX(const Instruction *);
    105 
    106    void emitPreOp(const Instruction *);
    107    void emitSFnOp(const Instruction *, uint8_t subOp);
    108 
    109    void emitShift(const Instruction *);
    110    void emitARL(const Instruction *, unsigned int shl);
    111    void emitLogicOp(const Instruction *);
    112    void emitNOT(const Instruction *);
    113 
    114    void emitCVT(const Instruction *);
    115    void emitSET(const Instruction *);
    116 
    117    void emitTEX(const TexInstruction *);
    118    void emitTXQ(const TexInstruction *);
    119 
    120    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
    121 
    122    void emitFlow(const Instruction *, uint8_t flowOp);
    123    void emitPRERETEmu(const FlowInstruction *);
    124 };
    125 
    126 #define SDATA(a) ((a).rep()->reg.data)
    127 #define DDATA(a) ((a).rep()->reg.data)
    128 
    129 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
    130 {
    131    assert(src.get());
    132    code[pos / 32] |= SDATA(src).id << (pos % 32);
    133 }
    134 
    135 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
    136 {
    137    assert(src->get());
    138    code[pos / 32] |= SDATA(*src).id << (pos % 32);
    139 }
    140 
    141 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
    142 {
    143    assert(src.get());
    144 
    145    int32_t offset = SDATA(src).offset;
    146 
    147    assert(!adj || src.get()->reg.size <= 4);
    148    if (adj)
    149       offset /= src.get()->reg.size;
    150 
    151    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
    152 
    153    if (offset < 0)
    154       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
    155 
    156    code[pos / 32] |= offset << (pos % 32);
    157 }
    158 
    159 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
    160 {
    161    assert(src.get());
    162 
    163    uint32_t offset = SDATA(src).offset;
    164 
    165    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
    166 
    167    code[pos / 32] |= (offset >> 2) << (pos % 32);
    168 }
    169 
    170 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
    171 {
    172    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
    173 
    174    code[pos / 32] |= DDATA(def).id << (pos % 32);
    175 }
    176 
    177 void
    178 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
    179 {
    180    switch (insn->rnd) {
    181    case ROUND_M: code[1] |= 1 << 22; break;
    182    case ROUND_P: code[1] |= 2 << 22; break;
    183    case ROUND_Z: code[1] |= 3 << 22; break;
    184    default:
    185       assert(insn->rnd == ROUND_N);
    186       break;
    187    }
    188 }
    189 
    190 void
    191 CodeEmitterNV50::emitMNeg12(const Instruction *i)
    192 {
    193    code[1] |= i->src(0).mod.neg() << 26;
    194    code[1] |= i->src(1).mod.neg() << 27;
    195 }
    196 
    197 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
    198 {
    199    uint8_t enc;
    200 
    201    assert(pos >= 32 || pos <= 27);
    202 
    203    switch (cc) {
    204    case CC_LT:  enc = 0x1; break;
    205    case CC_LTU: enc = 0x9; break;
    206    case CC_EQ:  enc = 0x2; break;
    207    case CC_EQU: enc = 0xa; break;
    208    case CC_LE:  enc = 0x3; break;
    209    case CC_LEU: enc = 0xb; break;
    210    case CC_GT:  enc = 0x4; break;
    211    case CC_GTU: enc = 0xc; break;
    212    case CC_NE:  enc = 0x5; break;
    213    case CC_NEU: enc = 0xd; break;
    214    case CC_GE:  enc = 0x6; break;
    215    case CC_GEU: enc = 0xe; break;
    216    case CC_TR:  enc = 0xf; break;
    217    case CC_FL:  enc = 0x0; break;
    218 
    219    case CC_O:  enc = 0x10; break;
    220    case CC_C:  enc = 0x11; break;
    221    case CC_A:  enc = 0x12; break;
    222    case CC_S:  enc = 0x13; break;
    223    case CC_NS: enc = 0x1c; break;
    224    case CC_NA: enc = 0x1d; break;
    225    case CC_NC: enc = 0x1e; break;
    226    case CC_NO: enc = 0x1f; break;
    227 
    228    default:
    229       enc = 0;
    230       assert(!"invalid condition code");
    231       break;
    232    }
    233    if (ty != TYPE_NONE && !isFloatType(ty))
    234       enc &= ~0x8; // unordered only exists for float types
    235 
    236    code[pos / 32] |= enc << (pos % 32);
    237 }
    238 
    239 void
    240 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
    241 {
    242    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
    243 
    244    assert(!(code[1] & 0x00003f80));
    245 
    246    if (s >= 0) {
    247       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
    248       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
    249       srcId(i->src(s), 32 + 12);
    250    } else {
    251       code[1] |= 0x0780;
    252    }
    253 }
    254 
    255 void
    256 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
    257 {
    258    assert(!(code[1] & 0x70));
    259 
    260    int flagsDef = i->flagsDef;
    261 
    262    // find flags definition and check that it is the last def
    263    if (flagsDef < 0) {
    264       for (int d = 0; i->defExists(d); ++d)
    265          if (i->def(d).getFile() == FILE_FLAGS)
    266             flagsDef = d;
    267       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
    268          WARN("Instruction::flagsDef was not set properly\n");
    269    }
    270    if (flagsDef == 0 && i->defExists(1))
    271       WARN("flags def should not be the primary definition\n");
    272 
    273    if (flagsDef >= 0)
    274       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
    275 
    276 }
    277 
    278 void
    279 CodeEmitterNV50::setARegBits(unsigned int u)
    280 {
    281    code[0] |= (u & 3) << 26;
    282    code[1] |= (u & 4);
    283 }
    284 
    285 void
    286 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
    287 {
    288    if (i->srcExists(s)) {
    289       s = i->src(s).indirect[0];
    290       if (s >= 0)
    291          setARegBits(SDATA(i->src(s)).id + 1);
    292    }
    293 }
    294 
    295 void
    296 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
    297 {
    298    const ImmediateValue *imm = i->src(s).get()->asImm();
    299    assert(imm);
    300 
    301    uint32_t u = imm->reg.data.u32;
    302 
    303    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
    304       u = ~u;
    305 
    306    code[1] |= 3;
    307    code[0] |= (u & 0x3f) << 16;
    308    code[1] |= (u >> 6) << 2;
    309 }
    310 
    311 void
    312 CodeEmitterNV50::setDst(const Value *dst)
    313 {
    314    const Storage *reg = &dst->join->reg;
    315 
    316    assert(reg->file != FILE_ADDRESS);
    317 
    318    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
    319       code[0] |= (127 << 2) | 1;
    320       code[1] |= 8;
    321    } else {
    322       int id;
    323       if (reg->file == FILE_SHADER_OUTPUT) {
    324          code[1] |= 8;
    325          id = reg->data.offset / 4;
    326       } else {
    327          id = reg->data.id;
    328       }
    329       code[0] |= id << 2;
    330    }
    331 }
    332 
    333 void
    334 CodeEmitterNV50::setDst(const Instruction *i, int d)
    335 {
    336    if (i->defExists(d)) {
    337       setDst(i->getDef(d));
    338    } else
    339    if (!d) {
    340       code[0] |= 0x01fc; // bit bucket
    341       code[1] |= 0x0008;
    342    }
    343 }
    344 
    345 // 3 * 2 bits:
    346 // 0: r
    347 // 1: a/s
    348 // 2: c
    349 // 3: i
    350 void
    351 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
    352 {
    353    uint8_t mode = 0;
    354 
    355    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
    356       switch (i->src(s).getFile()) {
    357       case FILE_GPR:
    358          break;
    359       case FILE_MEMORY_SHARED:
    360       case FILE_SHADER_INPUT:
    361          mode |= 1 << (s * 2);
    362          break;
    363       case FILE_MEMORY_CONST:
    364          mode |= 2 << (s * 2);
    365          break;
    366       case FILE_IMMEDIATE:
    367          mode |= 3 << (s * 2);
    368          break;
    369       default:
    370 	      ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
    371          assert(0);
    372          break;
    373       }
    374    }
    375    switch (mode) {
    376    case 0x00: // rrr
    377       break;
    378    case 0x01: // arr/grr
    379       if (progType == Program::TYPE_GEOMETRY) {
    380          code[0] |= 0x01800000;
    381          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
    382             code[1] |= 0x00200000;
    383       } else {
    384          if (enc == NV50_OP_ENC_SHORT)
    385             code[0] |= 0x01000000;
    386          else
    387             code[1] |= 0x00200000;
    388       }
    389       break;
    390    case 0x03: // irr
    391       assert(i->op == OP_MOV);
    392       return;
    393    case 0x0c: // rir
    394       break;
    395    case 0x0d: // gir
    396       code[0] |= 0x01000000;
    397       assert(progType == Program::TYPE_GEOMETRY ||
    398              progType == Program::TYPE_COMPUTE);
    399       break;
    400    case 0x08: // rcr
    401       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
    402       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
    403       break;
    404    case 0x09: // acr/gcr
    405       if (progType == Program::TYPE_GEOMETRY) {
    406          code[0] |= 0x01800000;
    407       } else {
    408          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
    409          code[1] |= 0x00200000;
    410       }
    411       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
    412       break;
    413    case 0x20: // rrc
    414       code[0] |= 0x01000000;
    415       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
    416       break;
    417    case 0x21: // arc
    418       code[0] |= 0x01000000;
    419       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
    420       assert(progType != Program::TYPE_GEOMETRY);
    421       break;
    422    default:
    423       ERROR("not encodable: %x\n", mode);
    424       assert(0);
    425       break;
    426    }
    427    if (progType != Program::TYPE_COMPUTE)
    428       return;
    429 
    430    if ((mode & 3) == 1) {
    431       const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
    432 
    433       switch (i->getSrc(0)->reg.type) {
    434       case TYPE_U8:
    435          break;
    436       case TYPE_U16:
    437          code[0] |= 1 << pos;
    438          break;
    439       case TYPE_S16:
    440          code[0] |= 2 << pos;
    441          break;
    442       default:
    443          code[0] |= 3 << pos;
    444          assert(i->getSrc(0)->reg.size == 4);
    445          break;
    446       }
    447    }
    448 }
    449 
    450 void
    451 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
    452 {
    453    if (Target::operationSrcNr[i->op] <= s)
    454       return;
    455    const Storage *reg = &i->src(s).rep()->reg;
    456 
    457    unsigned int id = (reg->file == FILE_GPR) ?
    458       reg->data.id :
    459       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
    460 
    461    switch (slot) {
    462    case 0: code[0] |= id << 9; break;
    463    case 1: code[0] |= id << 16; break;
    464    case 2: code[1] |= id << 14; break;
    465    default:
    466       assert(0);
    467       break;
    468    }
    469 }
    470 
    471 // the default form:
    472 //  - long instruction
    473 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
    474 //  - address & flags
    475 void
    476 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
    477 {
    478    assert(i->encSize == 8);
    479    code[0] |= 1;
    480 
    481    emitFlagsRd(i);
    482    emitFlagsWr(i);
    483 
    484    setDst(i, 0);
    485 
    486    setSrcFileBits(i, NV50_OP_ENC_LONG);
    487    setSrc(i, 0, 0);
    488    setSrc(i, 1, 1);
    489    setSrc(i, 2, 2);
    490 
    491    setAReg16(i, 1);
    492 }
    493 
    494 // like default form, but 2nd source in slot 2, and no 3rd source
    495 void
    496 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
    497 {
    498    assert(i->encSize == 8);
    499    code[0] |= 1;
    500 
    501    emitFlagsRd(i);
    502    emitFlagsWr(i);
    503 
    504    setDst(i, 0);
    505 
    506    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
    507    setSrc(i, 0, 0);
    508    setSrc(i, 1, 2);
    509 
    510    setAReg16(i, 1);
    511 }
    512 
    513 // default short form (rr, ar, rc, gr)
    514 void
    515 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
    516 {
    517    assert(i->encSize == 4 && !(code[0] & 1));
    518    assert(i->defExists(0));
    519    assert(!i->getPredicate());
    520 
    521    setDst(i, 0);
    522 
    523    setSrcFileBits(i, NV50_OP_ENC_SHORT);
    524    setSrc(i, 0, 0);
    525    setSrc(i, 1, 1);
    526 }
    527 
    528 // usual immediate form
    529 // - 1 to 3 sources where last is immediate (rir, gir)
    530 // - no address or predicate possible
    531 void
    532 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
    533 {
    534    assert(i->encSize == 8);
    535    code[0] |= 1;
    536 
    537    assert(i->defExists(0) && i->srcExists(0));
    538 
    539    setDst(i, 0);
    540 
    541    setSrcFileBits(i, NV50_OP_ENC_IMM);
    542    if (Target::operationSrcNr[i->op] > 1) {
    543       setSrc(i, 0, 0);
    544       setImmediate(i, 1);
    545       setSrc(i, 2, 1);
    546    } else {
    547       setImmediate(i, 0);
    548    }
    549 }
    550 
    551 void
    552 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
    553 {
    554    uint8_t enc;
    555 
    556    switch (ty) {
    557    case TYPE_F32: // fall through
    558    case TYPE_S32: // fall through
    559    case TYPE_U32:  enc = 0x6; break;
    560    case TYPE_B128: enc = 0x5; break;
    561    case TYPE_F64: // fall through
    562    case TYPE_S64: // fall through
    563    case TYPE_U64:  enc = 0x4; break;
    564    case TYPE_S16:  enc = 0x3; break;
    565    case TYPE_U16:  enc = 0x2; break;
    566    case TYPE_S8:   enc = 0x1; break;
    567    case TYPE_U8:   enc = 0x0; break;
    568    default:
    569       enc = 0;
    570       assert(!"invalid load/store type");
    571       break;
    572    }
    573    code[pos / 32] |= enc << (pos % 32);
    574 }
    575 
    576 void
    577 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
    578 {
    579    switch (ty) {
    580    case TYPE_U8: break;
    581    case TYPE_U16: code[1] |= 0x4000; break;
    582    case TYPE_S16: code[1] |= 0x8000; break;
    583    case TYPE_F32:
    584    case TYPE_S32:
    585    case TYPE_U32: code[1] |= 0xc000; break;
    586    default:
    587       assert(0);
    588       break;
    589    }
    590 }
    591 
    592 void
    593 CodeEmitterNV50::emitLOAD(const Instruction *i)
    594 {
    595    DataFile sf = i->src(0).getFile();
    596    int32_t offset = i->getSrc(0)->reg.data.offset;
    597 
    598    switch (sf) {
    599    case FILE_SHADER_INPUT:
    600       // use 'mov' where we can
    601       code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
    602       code[1] = 0x00200000 | (i->lanes << 14);
    603       if (typeSizeof(i->dType) == 4)
    604          code[1] |= 0x04000000;
    605       break;
    606    case FILE_MEMORY_SHARED:
    607       if (targ->getChipset() >= 0x84) {
    608          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
    609          code[0] = 0x10000001;
    610          code[1] = 0x40000000;
    611 
    612          if (typeSizeof(i->dType) == 4)
    613             code[1] |= 0x04000000;
    614 
    615          emitLoadStoreSizeCS(i->sType);
    616       } else {
    617          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
    618          code[0] = 0x10000001;
    619          code[1] = 0x00200000 | (i->lanes << 14);
    620          emitLoadStoreSizeCS(i->sType);
    621       }
    622       break;
    623    case FILE_MEMORY_CONST:
    624       code[0] = 0x10000001;
    625       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
    626       if (typeSizeof(i->dType) == 4)
    627          code[1] |= 0x04000000;
    628       emitLoadStoreSizeCS(i->sType);
    629       break;
    630    case FILE_MEMORY_LOCAL:
    631       code[0] = 0xd0000001;
    632       code[1] = 0x40000000;
    633       break;
    634    case FILE_MEMORY_GLOBAL:
    635       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
    636       code[1] = 0x80000000;
    637       break;
    638    default:
    639       assert(!"invalid load source file");
    640       break;
    641    }
    642    if (sf == FILE_MEMORY_LOCAL ||
    643        sf == FILE_MEMORY_GLOBAL)
    644       emitLoadStoreSizeLG(i->sType, 21 + 32);
    645 
    646    setDst(i, 0);
    647 
    648    emitFlagsRd(i);
    649    emitFlagsWr(i);
    650 
    651    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
    652       srcId(*i->src(0).getIndirect(0), 9);
    653    } else {
    654       setAReg16(i, 0);
    655       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
    656    }
    657 }
    658 
    659 void
    660 CodeEmitterNV50::emitSTORE(const Instruction *i)
    661 {
    662    DataFile f = i->getSrc(0)->reg.file;
    663    int32_t offset = i->getSrc(0)->reg.data.offset;
    664 
    665    switch (f) {
    666    case FILE_SHADER_OUTPUT:
    667       code[0] = 0x00000001 | ((offset >> 2) << 9);
    668       code[1] = 0x80c00000;
    669       srcId(i->src(1), 32 + 14);
    670       break;
    671    case FILE_MEMORY_GLOBAL:
    672       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
    673       code[1] = 0xa0000000;
    674       emitLoadStoreSizeLG(i->dType, 21 + 32);
    675       srcId(i->src(1), 2);
    676       break;
    677    case FILE_MEMORY_LOCAL:
    678       code[0] = 0xd0000001;
    679       code[1] = 0x60000000;
    680       emitLoadStoreSizeLG(i->dType, 21 + 32);
    681       srcId(i->src(1), 2);
    682       break;
    683    case FILE_MEMORY_SHARED:
    684       code[0] = 0x00000001;
    685       code[1] = 0xe0000000;
    686       switch (typeSizeof(i->dType)) {
    687       case 1:
    688          code[0] |= offset << 9;
    689          code[1] |= 0x00400000;
    690          break;
    691       case 2:
    692          code[0] |= (offset >> 1) << 9;
    693          break;
    694       case 4:
    695          code[0] |= (offset >> 2) << 9;
    696          code[1] |= 0x04200000;
    697          break;
    698       default:
    699          assert(0);
    700          break;
    701       }
    702       srcId(i->src(1), 32 + 14);
    703       break;
    704    default:
    705       assert(!"invalid store destination file");
    706       break;
    707    }
    708 
    709    if (f == FILE_MEMORY_GLOBAL)
    710       srcId(*i->src(0).getIndirect(0), 9);
    711    else
    712       setAReg16(i, 0);
    713 
    714    if (f == FILE_MEMORY_LOCAL)
    715       srcAddr16(i->src(0), false, 9);
    716 
    717    emitFlagsRd(i);
    718 }
    719 
    720 void
    721 CodeEmitterNV50::emitMOV(const Instruction *i)
    722 {
    723    DataFile sf = i->getSrc(0)->reg.file;
    724    DataFile df = i->getDef(0)->reg.file;
    725 
    726    assert(sf == FILE_GPR || df == FILE_GPR);
    727 
    728    if (sf == FILE_FLAGS) {
    729       code[0] = 0x00000001;
    730       code[1] = 0x20000000;
    731       defId(i->def(0), 2);
    732       srcId(i->src(0), 12);
    733       emitFlagsRd(i);
    734    } else
    735    if (sf == FILE_ADDRESS) {
    736       code[0] = 0x00000001;
    737       code[1] = 0x40000000;
    738       defId(i->def(0), 2);
    739       setARegBits(SDATA(i->src(0)).id + 1);
    740       emitFlagsRd(i);
    741    } else
    742    if (df == FILE_FLAGS) {
    743       code[0] = 0x00000001;
    744       code[1] = 0xa0000000;
    745       defId(i->def(0), 4);
    746       srcId(i->src(0), 9);
    747       emitFlagsRd(i);
    748    } else
    749    if (sf == FILE_IMMEDIATE) {
    750       code[0] = 0x10008001;
    751       code[1] = 0x00000003;
    752       emitForm_IMM(i);
    753    } else {
    754       if (i->encSize == 4) {
    755          code[0] = 0x10008000;
    756       } else {
    757          code[0] = 0x10000001;
    758          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
    759          code[1] |= (i->lanes << 14);
    760          emitFlagsRd(i);
    761       }
    762       defId(i->def(0), 2);
    763       srcId(i->src(0), 9);
    764    }
    765    if (df == FILE_SHADER_OUTPUT) {
    766       assert(i->encSize == 8);
    767       code[1] |= 0x8;
    768    }
    769 }
    770 
    771 void
    772 CodeEmitterNV50::emitNOP()
    773 {
    774    code[0] = 0xf0000001;
    775    code[1] = 0xe0000000;
    776 }
    777 
    778 void
    779 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
    780 {
    781    code[0] = 0xc0000000 | (lane << 16);
    782    code[1] = 0x80000000;
    783 
    784    code[0] |= (quOp & 0x03) << 20;
    785    code[1] |= (quOp & 0xfc) << 20;
    786 
    787    emitForm_ADD(i);
    788 
    789    if (!i->srcExists(1))
    790       srcId(i->src(0), 32 + 14);
    791 }
    792 
    793 void
    794 CodeEmitterNV50::emitPFETCH(const Instruction *i)
    795 {
    796    code[0] = 0x11800001;
    797    code[1] = 0x04200000 | (0xf << 14);
    798 
    799    defId(i->def(0), 2);
    800    srcAddr8(i->src(0), 9);
    801    setAReg16(i, 0);
    802 }
    803 
    804 void
    805 CodeEmitterNV50::emitINTERP(const Instruction *i)
    806 {
    807    code[0] = 0x80000000;
    808 
    809    defId(i->def(0), 2);
    810    srcAddr8(i->src(0), 16);
    811 
    812    if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
    813       code[0] |= 1 << 8;
    814    } else {
    815       if (i->op == OP_PINTERP) {
    816          code[0] |= 1 << 25;
    817          srcId(i->src(1), 9);
    818       }
    819       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
    820          code[0] |= 1 << 24;
    821    }
    822 
    823    if (i->encSize == 8) {
    824       code[1] =
    825          (code[0] & (3 << 24)) >> (24 - 16) |
    826          (code[0] & (1 <<  8)) << (18 -  8);
    827       code[0] &= ~0x03000100;
    828       code[0] |= 1;
    829       emitFlagsRd(i);
    830    }
    831 }
    832 
    833 void
    834 CodeEmitterNV50::emitMINMAX(const Instruction *i)
    835 {
    836    if (i->dType == TYPE_F64) {
    837       code[0] = 0xe0000000;
    838       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
    839    } else {
    840       code[0] = 0x30000000;
    841       code[1] = 0x80000000;
    842       if (i->op == OP_MIN)
    843          code[1] |= 0x20000000;
    844 
    845       switch (i->dType) {
    846       case TYPE_F32: code[0] |= 0x80000000; break;
    847       case TYPE_S32: code[1] |= 0x8c000000; break;
    848       case TYPE_U32: code[1] |= 0x84000000; break;
    849       case TYPE_S16: code[1] |= 0x80000000; break;
    850       case TYPE_U16: break;
    851       default:
    852          assert(0);
    853          break;
    854       }
    855       code[1] |= i->src(0).mod.abs() << 20;
    856       code[1] |= i->src(1).mod.abs() << 19;
    857    }
    858    emitForm_MAD(i);
    859 }
    860 
    861 void
    862 CodeEmitterNV50::emitFMAD(const Instruction *i)
    863 {
    864    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
    865    const int neg_add = i->src(2).mod.neg();
    866 
    867    code[0] = 0xe0000000;
    868 
    869    if (i->encSize == 4) {
    870       emitForm_MUL(i);
    871       assert(!neg_mul && !neg_add);
    872    } else {
    873       code[1]  = neg_mul << 26;
    874       code[1] |= neg_add << 27;
    875       if (i->saturate)
    876          code[1] |= 1 << 29;
    877       emitForm_MAD(i);
    878    }
    879 }
    880 
    881 void
    882 CodeEmitterNV50::emitFADD(const Instruction *i)
    883 {
    884    const int neg0 = i->src(0).mod.neg();
    885    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
    886 
    887    code[0] = 0xb0000000;
    888 
    889    assert(!(i->src(0).mod | i->src(1).mod).abs());
    890 
    891    if (i->src(1).getFile() == FILE_IMMEDIATE) {
    892       code[1] = 0;
    893       emitForm_IMM(i);
    894       code[0] |= neg0 << 15;
    895       code[0] |= neg1 << 22;
    896       if (i->saturate)
    897          code[0] |= 1 << 8;
    898    } else
    899    if (i->encSize == 8) {
    900       code[1] = 0;
    901       emitForm_ADD(i);
    902       code[1] |= neg0 << 26;
    903       code[1] |= neg1 << 27;
    904       if (i->saturate)
    905          code[1] |= 1 << 29;
    906    } else {
    907       emitForm_MUL(i);
    908       code[0] |= neg0 << 15;
    909       code[0] |= neg1 << 22;
    910       if (i->saturate)
    911          code[0] |= 1 << 8;
    912    }
    913 }
    914 
    915 void
    916 CodeEmitterNV50::emitUADD(const Instruction *i)
    917 {
    918    const int neg0 = i->src(0).mod.neg();
    919    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
    920 
    921    code[0] = 0x20008000;
    922 
    923    if (i->src(1).getFile() == FILE_IMMEDIATE) {
    924       code[1] = 0;
    925       emitForm_IMM(i);
    926    } else
    927    if (i->encSize == 8) {
    928       code[0] = 0x20000000;
    929       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
    930       emitForm_ADD(i);
    931    } else {
    932       emitForm_MUL(i);
    933    }
    934    assert(!(neg0 && neg1));
    935    code[0] |= neg0 << 28;
    936    code[0] |= neg1 << 22;
    937 
    938    if (i->flagsSrc >= 0) {
    939       // addc == sub | subr
    940       assert(!(code[0] & 0x10400000) && !i->getPredicate());
    941       code[0] |= 0x10400000;
    942       srcId(i->src(i->flagsSrc), 32 + 12);
    943    }
    944 }
    945 
    946 void
    947 CodeEmitterNV50::emitAADD(const Instruction *i)
    948 {
    949    const int s = (i->op == OP_MOV) ? 0 : 1;
    950 
    951    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
    952    code[1] = 0x20000000;
    953 
    954    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
    955 
    956    emitFlagsRd(i);
    957 
    958    if (s && i->srcExists(0))
    959       setARegBits(SDATA(i->src(0)).id + 1);
    960 }
    961 
    962 void
    963 CodeEmitterNV50::emitIMUL(const Instruction *i)
    964 {
    965    code[0] = 0x40000000;
    966 
    967    if (i->encSize == 8) {
    968       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
    969       emitForm_MAD(i);
    970    } else {
    971       if (i->sType == TYPE_S16)
    972          code[0] |= 0x8100;
    973       emitForm_MUL(i);
    974    }
    975 }
    976 
    977 void
    978 CodeEmitterNV50::emitFMUL(const Instruction *i)
    979 {
    980    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
    981 
    982    code[0] = 0xc0000000;
    983 
    984    if (i->src(1).getFile() == FILE_IMMEDIATE) {
    985       code[1] = 0;
    986       emitForm_IMM(i);
    987       if (neg)
    988          code[0] |= 0x8000;
    989    } else
    990    if (i->encSize == 8) {
    991       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
    992       if (neg)
    993          code[1] |= 0x08000000;
    994       emitForm_MAD(i);
    995    } else {
    996       emitForm_MUL(i);
    997       if (neg)
    998          code[0] |= 0x8000;
    999    }
   1000 }
   1001 
   1002 void
   1003 CodeEmitterNV50::emitIMAD(const Instruction *i)
   1004 {
   1005    code[0] = 0x60000000;
   1006    if (isSignedType(i->sType))
   1007       code[1] = i->saturate ? 0x40000000 : 0x20000000;
   1008    else
   1009       code[1] = 0x00000000;
   1010 
   1011    int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
   1012    int neg2 = i->src(2).mod.neg();
   1013 
   1014    assert(!(neg1 & neg2));
   1015    code[1] |= neg1 << 27;
   1016    code[1] |= neg2 << 26;
   1017 
   1018    emitForm_MAD(i);
   1019 
   1020    if (i->flagsSrc >= 0) {
   1021       // add with carry from $cX
   1022       assert(!(code[1] & 0x0c000000) && !i->getPredicate());
   1023       code[1] |= 0xc << 24;
   1024       srcId(i->src(i->flagsSrc), 32 + 12);
   1025    }
   1026 }
   1027 
   1028 void
   1029 CodeEmitterNV50::emitISAD(const Instruction *i)
   1030 {
   1031    if (i->encSize == 8) {
   1032       code[0] = 0x50000000;
   1033       switch (i->sType) {
   1034       case TYPE_U32: code[1] = 0x04000000; break;
   1035       case TYPE_S32: code[1] = 0x0c000000; break;
   1036       case TYPE_U16: code[1] = 0x00000000; break;
   1037       case TYPE_S16: code[1] = 0x08000000; break;
   1038       default:
   1039          assert(0);
   1040          break;
   1041       }
   1042       emitForm_MAD(i);
   1043    } else {
   1044       switch (i->sType) {
   1045       case TYPE_U32: code[0] = 0x50008000; break;
   1046       case TYPE_S32: code[0] = 0x50008100; break;
   1047       case TYPE_U16: code[0] = 0x50000000; break;
   1048       case TYPE_S16: code[0] = 0x50000100; break;
   1049       default:
   1050          assert(0);
   1051          break;
   1052       }
   1053       emitForm_MUL(i);
   1054    }
   1055 }
   1056 
   1057 void
   1058 CodeEmitterNV50::emitSET(const Instruction *i)
   1059 {
   1060    code[0] = 0x30000000;
   1061    code[1] = 0x60000000;
   1062 
   1063    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
   1064 
   1065    switch (i->sType) {
   1066    case TYPE_F32: code[0] |= 0x80000000; break;
   1067    case TYPE_S32: code[1] |= 0x0c000000; break;
   1068    case TYPE_U32: code[1] |= 0x04000000; break;
   1069    case TYPE_S16: code[1] |= 0x08000000; break;
   1070    case TYPE_U16: break;
   1071    default:
   1072       assert(0);
   1073       break;
   1074    }
   1075    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
   1076    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
   1077    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
   1078    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
   1079 
   1080    emitForm_MAD(i);
   1081 }
   1082 
   1083 void
   1084 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
   1085 {
   1086    switch (rnd) {
   1087    case ROUND_NI: code[1] |= 0x08000000; break;
   1088    case ROUND_M:  code[1] |= 0x00020000; break;
   1089    case ROUND_MI: code[1] |= 0x08020000; break;
   1090    case ROUND_P:  code[1] |= 0x00040000; break;
   1091    case ROUND_PI: code[1] |= 0x08040000; break;
   1092    case ROUND_Z:  code[1] |= 0x00060000; break;
   1093    case ROUND_ZI: code[1] |= 0x08060000; break;
   1094    default:
   1095       assert(rnd == ROUND_N);
   1096       break;
   1097    }
   1098 }
   1099 
   1100 void
   1101 CodeEmitterNV50::emitCVT(const Instruction *i)
   1102 {
   1103    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
   1104    RoundMode rnd;
   1105 
   1106    switch (i->op) {
   1107    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
   1108    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
   1109    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
   1110    default:
   1111       rnd = i->rnd;
   1112       break;
   1113    }
   1114 
   1115    code[0] = 0xa0000000;
   1116 
   1117    switch (i->dType) {
   1118    case TYPE_F64:
   1119       switch (i->sType) {
   1120       case TYPE_F64: code[1] = 0xc4404000; break;
   1121       case TYPE_S64: code[1] = 0x44414000; break;
   1122       case TYPE_U64: code[1] = 0x44404000; break;
   1123       case TYPE_F32: code[1] = 0xc4400000; break;
   1124       case TYPE_S32: code[1] = 0x44410000; break;
   1125       case TYPE_U32: code[1] = 0x44400000; break;
   1126       default:
   1127          assert(0);
   1128          break;
   1129       }
   1130       break;
   1131    case TYPE_S64:
   1132       switch (i->sType) {
   1133       case TYPE_F64: code[1] = 0x8c404000; break;
   1134       case TYPE_F32: code[1] = 0x8c400000; break;
   1135       default:
   1136          assert(0);
   1137          break;
   1138       }
   1139       break;
   1140    case TYPE_U64:
   1141       switch (i->sType) {
   1142       case TYPE_F64: code[1] = 0x84404000; break;
   1143       case TYPE_F32: code[1] = 0x84400000; break;
   1144       default:
   1145          assert(0);
   1146          break;
   1147       }
   1148       break;
   1149    case TYPE_F32:
   1150       switch (i->sType) {
   1151       case TYPE_F64: code[1] = 0xc0404000; break;
   1152       case TYPE_S64: code[1] = 0x40414000; break;
   1153       case TYPE_U64: code[1] = 0x40404000; break;
   1154       case TYPE_F32: code[1] = 0xc4004000; break;
   1155       case TYPE_S32: code[1] = 0x44014000; break;
   1156       case TYPE_U32: code[1] = 0x44004000; break;
   1157       case TYPE_F16: code[1] = 0xc4000000; break;
   1158       default:
   1159          assert(0);
   1160          break;
   1161       }
   1162       break;
   1163    case TYPE_S32:
   1164       switch (i->sType) {
   1165       case TYPE_F64: code[1] = 0x88404000; break;
   1166       case TYPE_F32: code[1] = 0x8c004000; break;
   1167       case TYPE_S32: code[1] = 0x0c014000; break;
   1168       case TYPE_U32: code[1] = 0x0c004000; break;
   1169       case TYPE_F16: code[1] = 0x8c000000; break;
   1170       case TYPE_S16: code[1] = 0x0c010000; break;
   1171       case TYPE_U16: code[1] = 0x0c000000; break;
   1172       case TYPE_S8:  code[1] = 0x0c018000; break;
   1173       case TYPE_U8:  code[1] = 0x0c008000; break;
   1174       default:
   1175          assert(0);
   1176          break;
   1177       }
   1178       break;
   1179    case TYPE_U32:
   1180       switch (i->sType) {
   1181       case TYPE_F64: code[1] = 0x80404000; break;
   1182       case TYPE_F32: code[1] = 0x84004000; break;
   1183       case TYPE_S32: code[1] = 0x04014000; break;
   1184       case TYPE_U32: code[1] = 0x04004000; break;
   1185       case TYPE_F16: code[1] = 0x84000000; break;
   1186       case TYPE_S16: code[1] = 0x04010000; break;
   1187       case TYPE_U16: code[1] = 0x04000000; break;
   1188       case TYPE_S8:  code[1] = 0x04018000; break;
   1189       case TYPE_U8:  code[1] = 0x04008000; break;
   1190       default:
   1191          assert(0);
   1192          break;
   1193       }
   1194       break;
   1195    case TYPE_S16:
   1196    case TYPE_U16:
   1197    case TYPE_S8:
   1198    case TYPE_U8:
   1199    default:
   1200       assert(0);
   1201       break;
   1202    }
   1203    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
   1204       code[1] |= 0x00004000;
   1205 
   1206    roundMode_CVT(rnd);
   1207 
   1208    switch (i->op) {
   1209    case OP_ABS: code[1] |= 1 << 20; break;
   1210    case OP_SAT: code[1] |= 1 << 19; break;
   1211    case OP_NEG: code[1] |= 1 << 29; break;
   1212    default:
   1213       break;
   1214    }
   1215    code[1] ^= i->src(0).mod.neg() << 29;
   1216    code[1] |= i->src(0).mod.abs() << 20;
   1217    if (i->saturate)
   1218       code[1] |= 1 << 19;
   1219 
   1220    assert(i->op != OP_ABS || !i->src(0).mod.neg());
   1221 
   1222    emitForm_MAD(i);
   1223 }
   1224 
   1225 void
   1226 CodeEmitterNV50::emitPreOp(const Instruction *i)
   1227 {
   1228    code[0] = 0xb0000000;
   1229    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
   1230 
   1231    code[1] |= i->src(0).mod.abs() << 20;
   1232    code[1] |= i->src(0).mod.neg() << 26;
   1233 
   1234    emitForm_MAD(i);
   1235 }
   1236 
   1237 void
   1238 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
   1239 {
   1240    code[0] = 0x90000000;
   1241 
   1242    if (i->encSize == 4) {
   1243       assert(i->op == OP_RCP);
   1244       code[0] |= i->src(0).mod.abs() << 15;
   1245       code[0] |= i->src(0).mod.neg() << 22;
   1246       emitForm_MUL(i);
   1247    } else {
   1248       code[1] = subOp << 29;
   1249       code[1] |= i->src(0).mod.abs() << 20;
   1250       code[1] |= i->src(0).mod.neg() << 26;
   1251       emitForm_MAD(i);
   1252    }
   1253 }
   1254 
   1255 void
   1256 CodeEmitterNV50::emitNOT(const Instruction *i)
   1257 {
   1258    code[0] = 0xd0000000;
   1259    code[1] = 0x0002c000;
   1260 
   1261    switch (i->sType) {
   1262    case TYPE_U32:
   1263    case TYPE_S32:
   1264       code[1] |= 0x04000000;
   1265       break;
   1266    default:
   1267       break;
   1268    }
   1269    emitForm_MAD(i);
   1270    setSrc(i, 0, 1);
   1271 }
   1272 
   1273 void
   1274 CodeEmitterNV50::emitLogicOp(const Instruction *i)
   1275 {
   1276    code[0] = 0xd0000000;
   1277    code[1] = 0;
   1278 
   1279    if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1280       switch (i->op) {
   1281       case OP_OR:  code[0] |= 0x0100; break;
   1282       case OP_XOR: code[0] |= 0x8000; break;
   1283       default:
   1284          assert(i->op == OP_AND);
   1285          break;
   1286       }
   1287       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
   1288          code[0] |= 1 << 22;
   1289 
   1290       emitForm_IMM(i);
   1291    } else {
   1292       switch (i->op) {
   1293       case OP_AND: code[1] = 0x04000000; break;
   1294       case OP_OR:  code[1] = 0x04004000; break;
   1295       case OP_XOR: code[1] = 0x04008000; break;
   1296       default:
   1297          assert(0);
   1298          break;
   1299       }
   1300       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
   1301          code[1] |= 1 << 16;
   1302       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
   1303          code[1] |= 1 << 17;
   1304 
   1305       emitForm_MAD(i);
   1306    }
   1307 }
   1308 
   1309 void
   1310 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
   1311 {
   1312    code[0] = 0x00000001 | (shl << 16);
   1313    code[1] = 0xc0000000;
   1314 
   1315    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
   1316 
   1317    setSrcFileBits(i, NV50_OP_ENC_IMM);
   1318    setSrc(i, 0, 0);
   1319    emitFlagsRd(i);
   1320 }
   1321 
   1322 void
   1323 CodeEmitterNV50::emitShift(const Instruction *i)
   1324 {
   1325    if (i->def(0).getFile() == FILE_ADDRESS) {
   1326       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
   1327       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
   1328    } else {
   1329       code[0] = 0x30000001;
   1330       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
   1331       if (i->op == OP_SHR && isSignedType(i->sType))
   1332           code[1] |= 1 << 27;
   1333 
   1334       if (i->src(1).getFile() == FILE_IMMEDIATE) {
   1335          code[1] |= 1 << 20;
   1336          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
   1337          defId(i->def(0), 2);
   1338          srcId(i->src(0), 9);
   1339          emitFlagsRd(i);
   1340       } else {
   1341          emitForm_MAD(i);
   1342       }
   1343    }
   1344 }
   1345 
   1346 void
   1347 CodeEmitterNV50::emitOUT(const Instruction *i)
   1348 {
   1349    code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
   1350    code[1] = 0xc0000001;
   1351 
   1352    emitFlagsRd(i);
   1353 }
   1354 
   1355 void
   1356 CodeEmitterNV50::emitTEX(const TexInstruction *i)
   1357 {
   1358    code[0] = 0xf0000001;
   1359    code[1] = 0x00000000;
   1360 
   1361    switch (i->op) {
   1362    case OP_TXB:
   1363       code[1] = 0x20000000;
   1364       break;
   1365    case OP_TXL:
   1366       code[1] = 0x40000000;
   1367       break;
   1368    case OP_TXF:
   1369       code[0] |= 0x01000000;
   1370       break;
   1371    case OP_TXG:
   1372       code[0] = 0x01000000;
   1373       code[1] = 0x80000000;
   1374       break;
   1375    default:
   1376       assert(i->op == OP_TEX);
   1377       break;
   1378    }
   1379 
   1380    code[0] |= i->tex.r << 9;
   1381    code[0] |= i->tex.s << 17;
   1382 
   1383    int argc = i->tex.target.getArgCount();
   1384 
   1385    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
   1386       argc += 1;
   1387    if (i->tex.target.isShadow())
   1388       argc += 1;
   1389    assert(argc <= 4);
   1390 
   1391    code[0] |= (argc - 1) << 22;
   1392 
   1393    if (i->tex.target.isCube()) {
   1394       code[0] |= 0x08000000;
   1395    } else
   1396    if (i->tex.useOffsets) {
   1397       code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
   1398       code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
   1399       code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
   1400    }
   1401 
   1402    code[0] |= (i->tex.mask & 0x3) << 25;
   1403    code[1] |= (i->tex.mask & 0xc) << 12;
   1404 
   1405    if (i->tex.liveOnly)
   1406       code[1] |= 4;
   1407 
   1408    defId(i->def(0), 2);
   1409 
   1410    emitFlagsRd(i);
   1411 }
   1412 
   1413 void
   1414 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
   1415 {
   1416    assert(i->tex.query == TXQ_DIMS);
   1417 
   1418    code[0] = 0xf0000001;
   1419    code[1] = 0x60000000;
   1420 
   1421    code[0] |= i->tex.r << 9;
   1422    code[0] |= i->tex.s << 17;
   1423 
   1424    code[0] |= (i->tex.mask & 0x3) << 25;
   1425    code[1] |= (i->tex.mask & 0xc) << 12;
   1426 
   1427    defId(i->def(0), 2);
   1428 
   1429    emitFlagsRd(i);
   1430 }
   1431 
   1432 void
   1433 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
   1434 {
   1435    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
   1436 
   1437    code[0] = 0x10000003; // bra
   1438    code[1] = 0x00000780; // always
   1439 
   1440    switch (i->subOp) {
   1441    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
   1442       break;
   1443    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
   1444       pos += 8;
   1445       break;
   1446    default:
   1447       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
   1448       code[0] = 0x20000003; // call
   1449       code[1] = 0x00000000; // no predicate
   1450       break;
   1451    }
   1452    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
   1453    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
   1454 }
   1455 
   1456 void
   1457 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
   1458 {
   1459    const FlowInstruction *f = i->asFlow();
   1460    bool hasPred = false;
   1461    bool hasTarg = false;
   1462 
   1463    code[0] = 0x00000003 | (flowOp << 28);
   1464    code[1] = 0x00000000;
   1465 
   1466    switch (i->op) {
   1467    case OP_BRA:
   1468       hasPred = true;
   1469       hasTarg = true;
   1470       break;
   1471    case OP_BREAK:
   1472    case OP_BRKPT:
   1473    case OP_DISCARD:
   1474    case OP_RET:
   1475       hasPred = true;
   1476       break;
   1477    case OP_CALL:
   1478    case OP_PREBREAK:
   1479    case OP_JOINAT:
   1480       hasTarg = true;
   1481       break;
   1482    case OP_PRERET:
   1483       hasTarg = true;
   1484       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
   1485          emitPRERETEmu(f);
   1486          return;
   1487       }
   1488       break;
   1489    default:
   1490       break;
   1491    }
   1492 
   1493    if (hasPred)
   1494       emitFlagsRd(i);
   1495 
   1496    if (hasTarg && f) {
   1497       uint32_t pos;
   1498 
   1499       if (f->op == OP_CALL) {
   1500          if (f->builtin) {
   1501             pos = targ->getBuiltinOffset(f->target.builtin);
   1502          } else {
   1503             pos = f->target.fn->binPos;
   1504          }
   1505       } else {
   1506          pos = f->target.bb->binPos;
   1507       }
   1508 
   1509       code[0] |= ((pos >>  2) & 0xffff) << 11;
   1510       code[1] |= ((pos >> 18) & 0x003f) << 14;
   1511 
   1512       RelocEntry::Type relocTy;
   1513 
   1514       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
   1515 
   1516       addReloc(relocTy, 0, pos, 0x07fff800, 9);
   1517       addReloc(relocTy, 1, pos, 0x000fc000, -4);
   1518    }
   1519 }
   1520 
   1521 bool
   1522 CodeEmitterNV50::emitInstruction(Instruction *insn)
   1523 {
   1524    if (!insn->encSize) {
   1525       ERROR("skipping unencodable instruction: "); insn->print();
   1526       return false;
   1527    } else
   1528    if (codeSize + insn->encSize > codeSizeLimit) {
   1529       ERROR("code emitter output buffer too small\n");
   1530       return false;
   1531    }
   1532 
   1533    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
   1534       INFO("EMIT: "); insn->print();
   1535    }
   1536 
   1537    switch (insn->op) {
   1538    case OP_MOV:
   1539       emitMOV(insn);
   1540       break;
   1541    case OP_EXIT:
   1542    case OP_NOP:
   1543    case OP_JOIN:
   1544       emitNOP();
   1545       break;
   1546    case OP_VFETCH:
   1547    case OP_LOAD:
   1548       emitLOAD(insn);
   1549       break;
   1550    case OP_EXPORT:
   1551    case OP_STORE:
   1552       emitSTORE(insn);
   1553       break;
   1554    case OP_PFETCH:
   1555       emitPFETCH(insn);
   1556       break;
   1557    case OP_LINTERP:
   1558    case OP_PINTERP:
   1559       emitINTERP(insn);
   1560       break;
   1561    case OP_ADD:
   1562    case OP_SUB:
   1563       if (isFloatType(insn->dType))
   1564          emitFADD(insn);
   1565       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
   1566          emitAADD(insn);
   1567       else
   1568          emitUADD(insn);
   1569       break;
   1570    case OP_MUL:
   1571       if (isFloatType(insn->dType))
   1572          emitFMUL(insn);
   1573       else
   1574          emitIMUL(insn);
   1575       break;
   1576    case OP_MAD:
   1577    case OP_FMA:
   1578       if (isFloatType(insn->dType))
   1579          emitFMAD(insn);
   1580       else
   1581          emitIMAD(insn);
   1582       break;
   1583    case OP_SAD:
   1584       emitISAD(insn);
   1585       break;
   1586    case OP_NOT:
   1587       emitNOT(insn);
   1588       break;
   1589    case OP_AND:
   1590    case OP_OR:
   1591    case OP_XOR:
   1592       emitLogicOp(insn);
   1593       break;
   1594    case OP_SHL:
   1595    case OP_SHR:
   1596       emitShift(insn);
   1597       break;
   1598    case OP_SET:
   1599       emitSET(insn);
   1600       break;
   1601    case OP_MIN:
   1602    case OP_MAX:
   1603       emitMINMAX(insn);
   1604       break;
   1605    case OP_CEIL:
   1606    case OP_FLOOR:
   1607    case OP_TRUNC:
   1608    case OP_ABS:
   1609    case OP_NEG:
   1610    case OP_SAT:
   1611       emitCVT(insn);
   1612       break;
   1613    case OP_CVT:
   1614       if (insn->def(0).getFile() == FILE_ADDRESS)
   1615          emitARL(insn, 0);
   1616       else
   1617       if (insn->def(0).getFile() == FILE_FLAGS ||
   1618           insn->src(0).getFile() == FILE_FLAGS ||
   1619           insn->src(0).getFile() == FILE_ADDRESS)
   1620          emitMOV(insn);
   1621       else
   1622          emitCVT(insn);
   1623       break;
   1624    case OP_RCP:
   1625       emitSFnOp(insn, 0);
   1626       break;
   1627    case OP_RSQ:
   1628       emitSFnOp(insn, 2);
   1629       break;
   1630    case OP_LG2:
   1631       emitSFnOp(insn, 3);
   1632       break;
   1633    case OP_SIN:
   1634       emitSFnOp(insn, 4);
   1635       break;
   1636    case OP_COS:
   1637       emitSFnOp(insn, 5);
   1638       break;
   1639    case OP_EX2:
   1640       emitSFnOp(insn, 6);
   1641       break;
   1642    case OP_PRESIN:
   1643    case OP_PREEX2:
   1644       emitPreOp(insn);
   1645       break;
   1646    case OP_TEX:
   1647    case OP_TXB:
   1648    case OP_TXL:
   1649    case OP_TXF:
   1650       emitTEX(insn->asTex());
   1651       break;
   1652    case OP_TXQ:
   1653       emitTXQ(insn->asTex());
   1654       break;
   1655    case OP_EMIT:
   1656    case OP_RESTART:
   1657       emitOUT(insn);
   1658       break;
   1659    case OP_DISCARD:
   1660       emitFlow(insn, 0x0);
   1661       break;
   1662    case OP_BRA:
   1663       emitFlow(insn, 0x1);
   1664       break;
   1665    case OP_CALL:
   1666       emitFlow(insn, 0x2);
   1667       break;
   1668    case OP_RET:
   1669       emitFlow(insn, 0x3);
   1670       break;
   1671    case OP_PREBREAK:
   1672       emitFlow(insn, 0x4);
   1673       break;
   1674    case OP_BREAK:
   1675       emitFlow(insn, 0x5);
   1676       break;
   1677    case OP_QUADON:
   1678       emitFlow(insn, 0x6);
   1679       break;
   1680    case OP_QUADPOP:
   1681       emitFlow(insn, 0x7);
   1682       break;
   1683    case OP_JOINAT:
   1684       emitFlow(insn, 0xa);
   1685       break;
   1686    case OP_PRERET:
   1687       emitFlow(insn, 0xd);
   1688       break;
   1689    case OP_QUADOP:
   1690       emitQUADOP(insn, insn->lanes, insn->subOp);
   1691       break;
   1692    case OP_DFDX:
   1693       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
   1694       break;
   1695    case OP_DFDY:
   1696       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
   1697       break;
   1698    case OP_PHI:
   1699    case OP_UNION:
   1700    case OP_CONSTRAINT:
   1701       ERROR("operation should have been eliminated\n");
   1702       return false;
   1703    case OP_EXP:
   1704    case OP_LOG:
   1705    case OP_SQRT:
   1706    case OP_POW:
   1707    case OP_SELP:
   1708    case OP_SLCT:
   1709    case OP_TXD:
   1710    case OP_PRECONT:
   1711    case OP_CONT:
   1712    case OP_POPCNT:
   1713    case OP_INSBF:
   1714    case OP_EXTBF:
   1715       ERROR("operation should have been lowered\n");
   1716       return false;
   1717    default:
   1718       ERROR("unknown op: %u\n", insn->op);
   1719       return false;
   1720    }
   1721    if (insn->join || insn->op == OP_JOIN)
   1722       code[1] |= 0x2;
   1723    else
   1724    if (insn->exit || insn->op == OP_EXIT)
   1725       code[1] |= 0x1;
   1726 
   1727    assert((insn->encSize == 8) == (code[0] & 1));
   1728 
   1729    code += insn->encSize / 4;
   1730    codeSize += insn->encSize;
   1731    return true;
   1732 }
   1733 
   1734 uint32_t
   1735 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
   1736 {
   1737    const Target::OpInfo &info = targ->getOpInfo(i);
   1738 
   1739    if (info.minEncSize > 4)
   1740       return 8;
   1741 
   1742    // check constraints on dst and src operands
   1743    for (int d = 0; i->defExists(d); ++d) {
   1744       if (i->def(d).rep()->reg.data.id > 63 ||
   1745           i->def(d).rep()->reg.file != FILE_GPR)
   1746          return 8;
   1747    }
   1748 
   1749    for (int s = 0; i->srcExists(s); ++s) {
   1750       DataFile sf = i->src(s).getFile();
   1751       if (sf != FILE_GPR)
   1752          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
   1753             return 8;
   1754       if (i->src(s).rep()->reg.data.id > 63)
   1755          return 8;
   1756    }
   1757 
   1758    // check modifiers & rounding
   1759    if (i->join || i->lanes != 0xf || i->exit)
   1760       return 8;
   1761    if (i->op == OP_MUL && i->rnd != ROUND_N)
   1762       return 8;
   1763 
   1764    if (i->asTex())
   1765       return 8; // TODO: short tex encoding
   1766 
   1767    // check constraints on short MAD
   1768    if (info.srcNr >= 2 && i->srcExists(2)) {
   1769       if (i->saturate || i->src(2).mod)
   1770          return 8;
   1771       if ((i->src(0).mod ^ i->src(1).mod) ||
   1772           (i->src(0).mod | i->src(1).mod).abs())
   1773          return 8;
   1774       if (!i->defExists(0) ||
   1775           i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
   1776          return 8;
   1777    }
   1778 
   1779    return info.minEncSize;
   1780 }
   1781 
   1782 // Change the encoding size of an instruction after BBs have been scheduled.
   1783 static void
   1784 makeInstructionLong(Instruction *insn)
   1785 {
   1786    if (insn->encSize == 8)
   1787       return;
   1788    Function *fn = insn->bb->getFunction();
   1789    int n = 0;
   1790    int adj = 4;
   1791 
   1792    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
   1793 
   1794    if (n & 1) {
   1795       adj = 8;
   1796       insn->next->encSize = 8;
   1797    } else
   1798    if (insn->prev && insn->prev->encSize == 4) {
   1799       adj = 8;
   1800       insn->prev->encSize = 8;
   1801    }
   1802    insn->encSize = 8;
   1803 
   1804    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
   1805       fn->bbArray[i]->binPos += 4;
   1806    }
   1807    fn->binSize += adj;
   1808    insn->bb->binSize += adj;
   1809 }
   1810 
   1811 static bool
   1812 trySetExitModifier(Instruction *insn)
   1813 {
   1814    if (insn->op == OP_DISCARD ||
   1815        insn->op == OP_QUADON ||
   1816        insn->op == OP_QUADPOP)
   1817       return false;
   1818    for (int s = 0; insn->srcExists(s); ++s)
   1819       if (insn->src(s).getFile() == FILE_IMMEDIATE)
   1820          return false;
   1821    if (insn->asFlow()) {
   1822       if (insn->op == OP_CALL) // side effects !
   1823          return false;
   1824       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
   1825          return false;
   1826       insn->op = OP_EXIT;
   1827    }
   1828    insn->exit = 1;
   1829    makeInstructionLong(insn);
   1830    return true;
   1831 }
   1832 
   1833 static void
   1834 replaceExitWithModifier(Function *func)
   1835 {
   1836    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
   1837 
   1838    if (!epilogue->getExit() ||
   1839        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
   1840       return;
   1841 
   1842    if (epilogue->getEntry()->op != OP_EXIT) {
   1843       Instruction *insn = epilogue->getExit()->prev;
   1844       if (!insn || !trySetExitModifier(insn))
   1845          return;
   1846       insn->exit = 1;
   1847    } else {
   1848       for (Graph::EdgeIterator ei = func->cfgExit->incident();
   1849            !ei.end(); ei.next()) {
   1850          BasicBlock *bb = BasicBlock::get(ei.getNode());
   1851          Instruction *i = bb->getExit();
   1852 
   1853          if (!i || !trySetExitModifier(i))
   1854             return;
   1855       }
   1856    }
   1857    epilogue->binSize -= 8;
   1858    func->binSize -= 8;
   1859    delete_Instruction(func->getProgram(), epilogue->getExit());
   1860 }
   1861 
   1862 void
   1863 CodeEmitterNV50::prepareEmission(Function *func)
   1864 {
   1865    CodeEmitter::prepareEmission(func);
   1866 
   1867    replaceExitWithModifier(func);
   1868 }
   1869 
   1870 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
   1871 {
   1872    targ = target; // specialized
   1873    code = NULL;
   1874    codeSize = codeSizeLimit = 0;
   1875    relocInfo = NULL;
   1876 }
   1877 
   1878 CodeEmitter *
   1879 TargetNV50::getCodeEmitter(Program::Type type)
   1880 {
   1881    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
   1882    emit->setProgramType(type);
   1883    return emit;
   1884 }
   1885 
   1886 } // namespace nv50_ir
   1887