Home | History | Annotate | Download | only in codegen
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 extern "C" {
     24 #include "tgsi/tgsi_dump.h"
     25 #include "tgsi/tgsi_scan.h"
     26 }
     27 
     28 #include "nv50_ir.h"
     29 #include "nv50_ir_util.h"
     30 #include "nv50_ir_build_util.h"
     31 
     32 namespace tgsi {
     33 
     34 class Source;
     35 
     36 static nv50_ir::operation translateOpcode(uint opcode);
     37 static nv50_ir::DataFile translateFile(uint file);
     38 static nv50_ir::TexTarget translateTexture(uint texTarg);
     39 static nv50_ir::SVSemantic translateSysVal(uint sysval);
     40 
     41 class Instruction
     42 {
     43 public:
     44    Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
     45 
     46    class SrcRegister
     47    {
     48    public:
     49       SrcRegister(const struct tgsi_full_src_register *src)
     50          : reg(src->Register),
     51            fsr(src)
     52       { }
     53 
     54       SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
     55 
     56       struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
     57       {
     58          struct tgsi_src_register reg;
     59          memset(&reg, 0, sizeof(reg));
     60          reg.Index = off.Index;
     61          reg.File = off.File;
     62          reg.SwizzleX = off.SwizzleX;
     63          reg.SwizzleY = off.SwizzleY;
     64          reg.SwizzleZ = off.SwizzleZ;
     65          return reg;
     66       }
     67 
     68       SrcRegister(const struct tgsi_texture_offset& off) :
     69          reg(offsetToSrc(off)),
     70          fsr(NULL)
     71       { }
     72 
     73       uint getFile() const { return reg.File; }
     74 
     75       bool is2D() const { return reg.Dimension; }
     76 
     77       bool isIndirect(int dim) const
     78       {
     79          return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
     80       }
     81 
     82       int getIndex(int dim) const
     83       {
     84          return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
     85       }
     86 
     87       int getSwizzle(int chan) const
     88       {
     89          return tgsi_util_get_src_register_swizzle(&reg, chan);
     90       }
     91 
     92       nv50_ir::Modifier getMod(int chan) const;
     93 
     94       SrcRegister getIndirect(int dim) const
     95       {
     96          assert(fsr && isIndirect(dim));
     97          if (dim)
     98             return SrcRegister(fsr->DimIndirect);
     99          return SrcRegister(fsr->Indirect);
    100       }
    101 
    102       uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
    103       {
    104          assert(reg.File == TGSI_FILE_IMMEDIATE);
    105          assert(!reg.Absolute);
    106          assert(!reg.Negate);
    107          return info->immd.data[reg.Index * 4 + getSwizzle(c)];
    108       }
    109 
    110    private:
    111       const struct tgsi_src_register reg;
    112       const struct tgsi_full_src_register *fsr;
    113    };
    114 
    115    class DstRegister
    116    {
    117    public:
    118       DstRegister(const struct tgsi_full_dst_register *dst)
    119          : reg(dst->Register),
    120            fdr(dst)
    121       { }
    122 
    123       DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
    124 
    125       uint getFile() const { return reg.File; }
    126 
    127       bool is2D() const { return reg.Dimension; }
    128 
    129       bool isIndirect(int dim) const
    130       {
    131          return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
    132       }
    133 
    134       int getIndex(int dim) const
    135       {
    136          return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
    137       }
    138 
    139       unsigned int getMask() const { return reg.WriteMask; }
    140 
    141       bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
    142 
    143       SrcRegister getIndirect(int dim) const
    144       {
    145          assert(fdr && isIndirect(dim));
    146          if (dim)
    147             return SrcRegister(fdr->DimIndirect);
    148          return SrcRegister(fdr->Indirect);
    149       }
    150 
    151    private:
    152       const struct tgsi_dst_register reg;
    153       const struct tgsi_full_dst_register *fdr;
    154    };
    155 
    156    inline uint getOpcode() const { return insn->Instruction.Opcode; }
    157 
    158    unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
    159    unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
    160 
    161    // mask of used components of source s
    162    unsigned int srcMask(unsigned int s) const;
    163 
    164    SrcRegister getSrc(unsigned int s) const
    165    {
    166       assert(s < srcCount());
    167       return SrcRegister(&insn->Src[s]);
    168    }
    169 
    170    DstRegister getDst(unsigned int d) const
    171    {
    172       assert(d < dstCount());
    173       return DstRegister(&insn->Dst[d]);
    174    }
    175 
    176    SrcRegister getTexOffset(unsigned int i) const
    177    {
    178       assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
    179       return SrcRegister(insn->TexOffsets[i]);
    180    }
    181 
    182    unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
    183 
    184    bool checkDstSrcAliasing() const;
    185 
    186    inline nv50_ir::operation getOP() const {
    187       return translateOpcode(getOpcode()); }
    188 
    189    nv50_ir::DataType inferSrcType() const;
    190    nv50_ir::DataType inferDstType() const;
    191 
    192    nv50_ir::CondCode getSetCond() const;
    193 
    194    nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
    195 
    196    inline uint getLabel() { return insn->Label.Label; }
    197 
    198    unsigned getSaturate() const { return insn->Instruction.Saturate; }
    199 
    200    void print() const
    201    {
    202       tgsi_dump_instruction(insn, 1);
    203    }
    204 
    205 private:
    206    const struct tgsi_full_instruction *insn;
    207 };
    208 
    209 unsigned int Instruction::srcMask(unsigned int s) const
    210 {
    211    unsigned int mask = insn->Dst[0].Register.WriteMask;
    212 
    213    switch (insn->Instruction.Opcode) {
    214    case TGSI_OPCODE_COS:
    215    case TGSI_OPCODE_SIN:
    216       return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
    217    case TGSI_OPCODE_DP2:
    218       return 0x3;
    219    case TGSI_OPCODE_DP3:
    220       return 0x7;
    221    case TGSI_OPCODE_DP4:
    222    case TGSI_OPCODE_DPH:
    223    case TGSI_OPCODE_KIL: /* WriteMask ignored */
    224       return 0xf;
    225    case TGSI_OPCODE_DST:
    226       return mask & (s ? 0xa : 0x6);
    227    case TGSI_OPCODE_EX2:
    228    case TGSI_OPCODE_EXP:
    229    case TGSI_OPCODE_LG2:
    230    case TGSI_OPCODE_LOG:
    231    case TGSI_OPCODE_POW:
    232    case TGSI_OPCODE_RCP:
    233    case TGSI_OPCODE_RSQ:
    234    case TGSI_OPCODE_SCS:
    235       return 0x1;
    236    case TGSI_OPCODE_IF:
    237       return 0x1;
    238    case TGSI_OPCODE_LIT:
    239       return 0xb;
    240    case TGSI_OPCODE_TEX:
    241    case TGSI_OPCODE_TXB:
    242    case TGSI_OPCODE_TXD:
    243    case TGSI_OPCODE_TXL:
    244    case TGSI_OPCODE_TXP:
    245    {
    246       const struct tgsi_instruction_texture *tex = &insn->Texture;
    247 
    248       assert(insn->Instruction.Texture);
    249 
    250       mask = 0x7;
    251       if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
    252           insn->Instruction.Opcode != TGSI_OPCODE_TXD)
    253          mask |= 0x8; /* bias, lod or proj */
    254 
    255       switch (tex->Texture) {
    256       case TGSI_TEXTURE_1D:
    257          mask &= 0x9;
    258          break;
    259       case TGSI_TEXTURE_SHADOW1D:
    260          mask &= 0xd;
    261          break;
    262       case TGSI_TEXTURE_1D_ARRAY:
    263       case TGSI_TEXTURE_2D:
    264       case TGSI_TEXTURE_RECT:
    265          mask &= 0xb;
    266          break;
    267       default:
    268          break;
    269       }
    270    }
    271       return mask;
    272    case TGSI_OPCODE_XPD:
    273    {
    274       unsigned int x = 0;
    275       if (mask & 1) x |= 0x6;
    276       if (mask & 2) x |= 0x5;
    277       if (mask & 4) x |= 0x3;
    278       return x;
    279    }
    280    default:
    281       break;
    282    }
    283 
    284    return mask;
    285 }
    286 
    287 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
    288 {
    289    nv50_ir::Modifier m(0);
    290 
    291    if (reg.Absolute)
    292       m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
    293    if (reg.Negate)
    294       m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
    295    return m;
    296 }
    297 
    298 static nv50_ir::DataFile translateFile(uint file)
    299 {
    300    switch (file) {
    301    case TGSI_FILE_CONSTANT:        return nv50_ir::FILE_MEMORY_CONST;
    302    case TGSI_FILE_INPUT:           return nv50_ir::FILE_SHADER_INPUT;
    303    case TGSI_FILE_OUTPUT:          return nv50_ir::FILE_SHADER_OUTPUT;
    304    case TGSI_FILE_TEMPORARY:       return nv50_ir::FILE_GPR;
    305    case TGSI_FILE_ADDRESS:         return nv50_ir::FILE_ADDRESS;
    306    case TGSI_FILE_PREDICATE:       return nv50_ir::FILE_PREDICATE;
    307    case TGSI_FILE_IMMEDIATE:       return nv50_ir::FILE_IMMEDIATE;
    308    case TGSI_FILE_SYSTEM_VALUE:    return nv50_ir::FILE_SYSTEM_VALUE;
    309    case TGSI_FILE_IMMEDIATE_ARRAY: return nv50_ir::FILE_IMMEDIATE;
    310    case TGSI_FILE_TEMPORARY_ARRAY: return nv50_ir::FILE_MEMORY_LOCAL;
    311    case TGSI_FILE_RESOURCE:        return nv50_ir::FILE_MEMORY_GLOBAL;
    312    case TGSI_FILE_SAMPLER:
    313    case TGSI_FILE_NULL:
    314    default:
    315       return nv50_ir::FILE_NULL;
    316    }
    317 }
    318 
    319 static nv50_ir::SVSemantic translateSysVal(uint sysval)
    320 {
    321    switch (sysval) {
    322    case TGSI_SEMANTIC_FACE:       return nv50_ir::SV_FACE;
    323    case TGSI_SEMANTIC_PSIZE:      return nv50_ir::SV_POINT_SIZE;
    324    case TGSI_SEMANTIC_PRIMID:     return nv50_ir::SV_PRIMITIVE_ID;
    325    case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
    326    case TGSI_SEMANTIC_VERTEXID:   return nv50_ir::SV_VERTEX_ID;
    327    default:
    328       assert(0);
    329       return nv50_ir::SV_CLOCK;
    330    }
    331 }
    332 
    333 #define NV50_IR_TEX_TARG_CASE(a, b) \
    334    case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
    335 
    336 static nv50_ir::TexTarget translateTexture(uint tex)
    337 {
    338    switch (tex) {
    339    NV50_IR_TEX_TARG_CASE(1D, 1D);
    340    NV50_IR_TEX_TARG_CASE(2D, 2D);
    341    NV50_IR_TEX_TARG_CASE(3D, 3D);
    342    NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
    343    NV50_IR_TEX_TARG_CASE(RECT, RECT);
    344    NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
    345    NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
    346    NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
    347    NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
    348    NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
    349    NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
    350    NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
    351    NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
    352    NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
    353 
    354    case TGSI_TEXTURE_UNKNOWN:
    355    default:
    356       assert(!"invalid texture target");
    357       return nv50_ir::TEX_TARGET_2D;
    358    }
    359 }
    360 
    361 nv50_ir::DataType Instruction::inferSrcType() const
    362 {
    363    switch (getOpcode()) {
    364    case TGSI_OPCODE_AND:
    365    case TGSI_OPCODE_OR:
    366    case TGSI_OPCODE_XOR:
    367    case TGSI_OPCODE_NOT:
    368    case TGSI_OPCODE_U2F:
    369    case TGSI_OPCODE_UADD:
    370    case TGSI_OPCODE_UDIV:
    371    case TGSI_OPCODE_UMOD:
    372    case TGSI_OPCODE_UMAD:
    373    case TGSI_OPCODE_UMUL:
    374    case TGSI_OPCODE_UMAX:
    375    case TGSI_OPCODE_UMIN:
    376    case TGSI_OPCODE_USEQ:
    377    case TGSI_OPCODE_USGE:
    378    case TGSI_OPCODE_USLT:
    379    case TGSI_OPCODE_USNE:
    380    case TGSI_OPCODE_USHR:
    381    case TGSI_OPCODE_UCMP:
    382       return nv50_ir::TYPE_U32;
    383    case TGSI_OPCODE_I2F:
    384    case TGSI_OPCODE_IDIV:
    385    case TGSI_OPCODE_IMAX:
    386    case TGSI_OPCODE_IMIN:
    387    case TGSI_OPCODE_IABS:
    388    case TGSI_OPCODE_INEG:
    389    case TGSI_OPCODE_ISGE:
    390    case TGSI_OPCODE_ISHR:
    391    case TGSI_OPCODE_ISLT:
    392    case TGSI_OPCODE_ISSG:
    393    case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
    394    case TGSI_OPCODE_MOD:
    395    case TGSI_OPCODE_UARL:
    396       return nv50_ir::TYPE_S32;
    397    default:
    398       return nv50_ir::TYPE_F32;
    399    }
    400 }
    401 
    402 nv50_ir::DataType Instruction::inferDstType() const
    403 {
    404    switch (getOpcode()) {
    405    case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
    406    case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
    407    case TGSI_OPCODE_I2F:
    408    case TGSI_OPCODE_U2F:
    409       return nv50_ir::TYPE_F32;
    410    default:
    411       return inferSrcType();
    412    }
    413 }
    414 
    415 nv50_ir::CondCode Instruction::getSetCond() const
    416 {
    417    using namespace nv50_ir;
    418 
    419    switch (getOpcode()) {
    420    case TGSI_OPCODE_SLT:
    421    case TGSI_OPCODE_ISLT:
    422    case TGSI_OPCODE_USLT:
    423       return CC_LT;
    424    case TGSI_OPCODE_SLE:
    425       return CC_LE;
    426    case TGSI_OPCODE_SGE:
    427    case TGSI_OPCODE_ISGE:
    428    case TGSI_OPCODE_USGE:
    429       return CC_GE;
    430    case TGSI_OPCODE_SGT:
    431       return CC_GT;
    432    case TGSI_OPCODE_SEQ:
    433    case TGSI_OPCODE_USEQ:
    434       return CC_EQ;
    435    case TGSI_OPCODE_SNE:
    436       return CC_NEU;
    437    case TGSI_OPCODE_USNE:
    438       return CC_NE;
    439    case TGSI_OPCODE_SFL:
    440       return CC_NEVER;
    441    case TGSI_OPCODE_STR:
    442    default:
    443       return CC_ALWAYS;
    444    }
    445 }
    446 
    447 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
    448 
    449 static nv50_ir::operation translateOpcode(uint opcode)
    450 {
    451    switch (opcode) {
    452    NV50_IR_OPCODE_CASE(ARL, SHL);
    453    NV50_IR_OPCODE_CASE(MOV, MOV);
    454 
    455    NV50_IR_OPCODE_CASE(RCP, RCP);
    456    NV50_IR_OPCODE_CASE(RSQ, RSQ);
    457 
    458    NV50_IR_OPCODE_CASE(MUL, MUL);
    459    NV50_IR_OPCODE_CASE(ADD, ADD);
    460 
    461    NV50_IR_OPCODE_CASE(MIN, MIN);
    462    NV50_IR_OPCODE_CASE(MAX, MAX);
    463    NV50_IR_OPCODE_CASE(SLT, SET);
    464    NV50_IR_OPCODE_CASE(SGE, SET);
    465    NV50_IR_OPCODE_CASE(MAD, MAD);
    466    NV50_IR_OPCODE_CASE(SUB, SUB);
    467 
    468    NV50_IR_OPCODE_CASE(FLR, FLOOR);
    469    NV50_IR_OPCODE_CASE(ROUND, CVT);
    470    NV50_IR_OPCODE_CASE(EX2, EX2);
    471    NV50_IR_OPCODE_CASE(LG2, LG2);
    472    NV50_IR_OPCODE_CASE(POW, POW);
    473 
    474    NV50_IR_OPCODE_CASE(ABS, ABS);
    475 
    476    NV50_IR_OPCODE_CASE(COS, COS);
    477    NV50_IR_OPCODE_CASE(DDX, DFDX);
    478    NV50_IR_OPCODE_CASE(DDY, DFDY);
    479    NV50_IR_OPCODE_CASE(KILP, DISCARD);
    480 
    481    NV50_IR_OPCODE_CASE(SEQ, SET);
    482    NV50_IR_OPCODE_CASE(SFL, SET);
    483    NV50_IR_OPCODE_CASE(SGT, SET);
    484    NV50_IR_OPCODE_CASE(SIN, SIN);
    485    NV50_IR_OPCODE_CASE(SLE, SET);
    486    NV50_IR_OPCODE_CASE(SNE, SET);
    487    NV50_IR_OPCODE_CASE(STR, SET);
    488    NV50_IR_OPCODE_CASE(TEX, TEX);
    489    NV50_IR_OPCODE_CASE(TXD, TXD);
    490    NV50_IR_OPCODE_CASE(TXP, TEX);
    491 
    492    NV50_IR_OPCODE_CASE(BRA, BRA);
    493    NV50_IR_OPCODE_CASE(CAL, CALL);
    494    NV50_IR_OPCODE_CASE(RET, RET);
    495    NV50_IR_OPCODE_CASE(CMP, SLCT);
    496 
    497    NV50_IR_OPCODE_CASE(TXB, TXB);
    498 
    499    NV50_IR_OPCODE_CASE(DIV, DIV);
    500 
    501    NV50_IR_OPCODE_CASE(TXL, TXL);
    502 
    503    NV50_IR_OPCODE_CASE(CEIL, CEIL);
    504    NV50_IR_OPCODE_CASE(I2F, CVT);
    505    NV50_IR_OPCODE_CASE(NOT, NOT);
    506    NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
    507    NV50_IR_OPCODE_CASE(SHL, SHL);
    508 
    509    NV50_IR_OPCODE_CASE(AND, AND);
    510    NV50_IR_OPCODE_CASE(OR, OR);
    511    NV50_IR_OPCODE_CASE(MOD, MOD);
    512    NV50_IR_OPCODE_CASE(XOR, XOR);
    513    NV50_IR_OPCODE_CASE(SAD, SAD);
    514    NV50_IR_OPCODE_CASE(TXF, TXF);
    515    NV50_IR_OPCODE_CASE(TXQ, TXQ);
    516 
    517    NV50_IR_OPCODE_CASE(EMIT, EMIT);
    518    NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
    519 
    520    NV50_IR_OPCODE_CASE(KIL, DISCARD);
    521 
    522    NV50_IR_OPCODE_CASE(F2I, CVT);
    523    NV50_IR_OPCODE_CASE(IDIV, DIV);
    524    NV50_IR_OPCODE_CASE(IMAX, MAX);
    525    NV50_IR_OPCODE_CASE(IMIN, MIN);
    526    NV50_IR_OPCODE_CASE(IABS, ABS);
    527    NV50_IR_OPCODE_CASE(INEG, NEG);
    528    NV50_IR_OPCODE_CASE(ISGE, SET);
    529    NV50_IR_OPCODE_CASE(ISHR, SHR);
    530    NV50_IR_OPCODE_CASE(ISLT, SET);
    531    NV50_IR_OPCODE_CASE(F2U, CVT);
    532    NV50_IR_OPCODE_CASE(U2F, CVT);
    533    NV50_IR_OPCODE_CASE(UADD, ADD);
    534    NV50_IR_OPCODE_CASE(UDIV, DIV);
    535    NV50_IR_OPCODE_CASE(UMAD, MAD);
    536    NV50_IR_OPCODE_CASE(UMAX, MAX);
    537    NV50_IR_OPCODE_CASE(UMIN, MIN);
    538    NV50_IR_OPCODE_CASE(UMOD, MOD);
    539    NV50_IR_OPCODE_CASE(UMUL, MUL);
    540    NV50_IR_OPCODE_CASE(USEQ, SET);
    541    NV50_IR_OPCODE_CASE(USGE, SET);
    542    NV50_IR_OPCODE_CASE(USHR, SHR);
    543    NV50_IR_OPCODE_CASE(USLT, SET);
    544    NV50_IR_OPCODE_CASE(USNE, SET);
    545 
    546    NV50_IR_OPCODE_CASE(LOAD, TXF);
    547    NV50_IR_OPCODE_CASE(SAMPLE, TEX);
    548    NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
    549    NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
    550    NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
    551    NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
    552    NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
    553    NV50_IR_OPCODE_CASE(GATHER4, TXG);
    554    NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
    555 
    556    NV50_IR_OPCODE_CASE(END, EXIT);
    557 
    558    default:
    559       return nv50_ir::OP_NOP;
    560    }
    561 }
    562 
    563 bool Instruction::checkDstSrcAliasing() const
    564 {
    565    if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
    566       return false;
    567 
    568    for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
    569       if (insn->Src[s].Register.File == TGSI_FILE_NULL)
    570          break;
    571       if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
    572           insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
    573          return true;
    574    }
    575    return false;
    576 }
    577 
    578 class Source
    579 {
    580 public:
    581    Source(struct nv50_ir_prog_info *);
    582    ~Source();
    583 
    584 public:
    585    bool scanSource();
    586    unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
    587 
    588 public:
    589    struct tgsi_shader_info scan;
    590    struct tgsi_full_instruction *insns;
    591    const struct tgsi_token *tokens;
    592    struct nv50_ir_prog_info *info;
    593 
    594    nv50_ir::DynArray tempArrays;
    595    nv50_ir::DynArray immdArrays;
    596    int tempArrayCount;
    597    int immdArrayCount;
    598 
    599    bool mainTempsInLMem;
    600 
    601    int clipVertexOutput;
    602 
    603    uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
    604    unsigned samplerViewCount;
    605 
    606 private:
    607    int inferSysValDirection(unsigned sn) const;
    608    bool scanDeclaration(const struct tgsi_full_declaration *);
    609    bool scanInstruction(const struct tgsi_full_instruction *);
    610    void scanProperty(const struct tgsi_full_property *);
    611    void scanImmediate(const struct tgsi_full_immediate *);
    612 
    613    inline bool isEdgeFlagPassthrough(const Instruction&) const;
    614 };
    615 
    616 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
    617 {
    618    tokens = (const struct tgsi_token *)info->bin.source;
    619 
    620    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
    621       tgsi_dump(tokens, 0);
    622 
    623    samplerViewTargets = NULL;
    624 
    625    mainTempsInLMem = FALSE;
    626 }
    627 
    628 Source::~Source()
    629 {
    630    if (insns)
    631       FREE(insns);
    632 
    633    if (info->immd.data)
    634       FREE(info->immd.data);
    635    if (info->immd.type)
    636       FREE(info->immd.type);
    637 
    638    if (samplerViewTargets)
    639       delete[] samplerViewTargets;
    640 }
    641 
    642 bool Source::scanSource()
    643 {
    644    unsigned insnCount = 0;
    645    struct tgsi_parse_context parse;
    646 
    647    tgsi_scan_shader(tokens, &scan);
    648 
    649    insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
    650                                                   sizeof(insns[0]));
    651    if (!insns)
    652       return false;
    653 
    654    clipVertexOutput = -1;
    655 
    656    samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
    657    samplerViewTargets = new uint8_t[samplerViewCount];
    658 
    659    info->immd.bufSize = 0;
    660    tempArrayCount = 0;
    661    immdArrayCount = 0;
    662 
    663    info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
    664    info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
    665    info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
    666 
    667    if (info->type == PIPE_SHADER_FRAGMENT) {
    668       info->prop.fp.writesDepth = scan.writes_z;
    669       info->prop.fp.usesDiscard = scan.uses_kill;
    670    } else
    671    if (info->type == PIPE_SHADER_GEOMETRY) {
    672       info->prop.gp.instanceCount = 1; // default value
    673    }
    674 
    675    info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
    676    info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
    677 
    678    tgsi_parse_init(&parse, tokens);
    679    while (!tgsi_parse_end_of_tokens(&parse)) {
    680       tgsi_parse_token(&parse);
    681 
    682       switch (parse.FullToken.Token.Type) {
    683       case TGSI_TOKEN_TYPE_IMMEDIATE:
    684          scanImmediate(&parse.FullToken.FullImmediate);
    685          break;
    686       case TGSI_TOKEN_TYPE_DECLARATION:
    687          scanDeclaration(&parse.FullToken.FullDeclaration);
    688          break;
    689       case TGSI_TOKEN_TYPE_INSTRUCTION:
    690          insns[insnCount++] = parse.FullToken.FullInstruction;
    691          scanInstruction(&parse.FullToken.FullInstruction);
    692          break;
    693       case TGSI_TOKEN_TYPE_PROPERTY:
    694          scanProperty(&parse.FullToken.FullProperty);
    695          break;
    696       default:
    697          INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
    698          break;
    699       }
    700    }
    701    tgsi_parse_free(&parse);
    702 
    703    if (mainTempsInLMem)
    704       info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
    705 
    706    if (info->io.genUserClip > 0) {
    707       info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
    708 
    709       for (unsigned int n = 0; n < ((info->io.genUserClip + 3) / 4); ++n) {
    710          unsigned int i = info->numOutputs++;
    711          info->out[i].id = i;
    712          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
    713          info->out[i].si = n;
    714          info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
    715       }
    716    }
    717 
    718    return info->assignSlots(info) == 0;
    719 }
    720 
    721 void Source::scanProperty(const struct tgsi_full_property *prop)
    722 {
    723    switch (prop->Property.PropertyName) {
    724    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
    725       info->prop.gp.outputPrim = prop->u[0].Data;
    726       break;
    727    case TGSI_PROPERTY_GS_INPUT_PRIM:
    728       info->prop.gp.inputPrim = prop->u[0].Data;
    729       break;
    730    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
    731       info->prop.gp.maxVertices = prop->u[0].Data;
    732       break;
    733 #if 0
    734    case TGSI_PROPERTY_GS_INSTANCE_COUNT:
    735       info->prop.gp.instanceCount = prop->u[0].Data;
    736       break;
    737 #endif
    738    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
    739       info->prop.fp.separateFragData = TRUE;
    740       break;
    741    case TGSI_PROPERTY_FS_COORD_ORIGIN:
    742    case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
    743       // we don't care
    744       break;
    745    case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
    746       info->io.genUserClip = -1;
    747       break;
    748    default:
    749       INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
    750       break;
    751    }
    752 }
    753 
    754 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
    755 {
    756    const unsigned n = info->immd.count++;
    757 
    758    assert(n < scan.immediate_count);
    759 
    760    for (int c = 0; c < 4; ++c)
    761       info->immd.data[n * 4 + c] = imm->u[c].Uint;
    762 
    763    info->immd.type[n] = imm->Immediate.DataType;
    764 }
    765 
    766 int Source::inferSysValDirection(unsigned sn) const
    767 {
    768    switch (sn) {
    769    case TGSI_SEMANTIC_INSTANCEID:
    770    case TGSI_SEMANTIC_VERTEXID:
    771       return 1;
    772 #if 0
    773    case TGSI_SEMANTIC_LAYER:
    774    case TGSI_SEMANTIC_VIEWPORTINDEX:
    775       return 0;
    776 #endif
    777    case TGSI_SEMANTIC_PRIMID:
    778       return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
    779    default:
    780       return 0;
    781    }
    782 }
    783 
    784 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
    785 {
    786    unsigned i;
    787    unsigned sn = TGSI_SEMANTIC_GENERIC;
    788    unsigned si = 0;
    789    const unsigned first = decl->Range.First, last = decl->Range.Last;
    790 
    791    if (decl->Declaration.Semantic) {
    792       sn = decl->Semantic.Name;
    793       si = decl->Semantic.Index;
    794    }
    795 
    796    switch (decl->Declaration.File) {
    797    case TGSI_FILE_INPUT:
    798       if (info->type == PIPE_SHADER_VERTEX) {
    799          // all vertex attributes are equal
    800          for (i = first; i <= last; ++i) {
    801             info->in[i].sn = TGSI_SEMANTIC_GENERIC;
    802             info->in[i].si = i;
    803          }
    804       } else {
    805          for (i = first; i <= last; ++i, ++si) {
    806             info->in[i].id = i;
    807             info->in[i].sn = sn;
    808             info->in[i].si = si;
    809             if (info->type == PIPE_SHADER_FRAGMENT) {
    810                // translate interpolation mode
    811                switch (decl->Interp.Interpolate) {
    812                case TGSI_INTERPOLATE_CONSTANT:
    813                   info->in[i].flat = 1;
    814                   break;
    815                case TGSI_INTERPOLATE_COLOR:
    816                   info->in[i].sc = 1;
    817                   break;
    818                case TGSI_INTERPOLATE_LINEAR:
    819                   info->in[i].linear = 1;
    820                   break;
    821                default:
    822                   break;
    823                }
    824                if (decl->Interp.Centroid)
    825                   info->in[i].centroid = 1;
    826             }
    827          }
    828       }
    829       break;
    830    case TGSI_FILE_OUTPUT:
    831       for (i = first; i <= last; ++i, ++si) {
    832          switch (sn) {
    833          case TGSI_SEMANTIC_POSITION:
    834             if (info->type == PIPE_SHADER_FRAGMENT)
    835                info->io.fragDepth = i;
    836             else
    837             if (clipVertexOutput < 0)
    838                clipVertexOutput = i;
    839             break;
    840          case TGSI_SEMANTIC_COLOR:
    841             if (info->type == PIPE_SHADER_FRAGMENT)
    842                info->prop.fp.numColourResults++;
    843             break;
    844          case TGSI_SEMANTIC_EDGEFLAG:
    845             info->io.edgeFlagOut = i;
    846             break;
    847          case TGSI_SEMANTIC_CLIPVERTEX:
    848             clipVertexOutput = i;
    849             break;
    850          case TGSI_SEMANTIC_CLIPDIST:
    851             info->io.clipDistanceMask |=
    852                decl->Declaration.UsageMask << (si * 4);
    853             info->io.genUserClip = -1;
    854             break;
    855          default:
    856             break;
    857          }
    858          info->out[i].id = i;
    859          info->out[i].sn = sn;
    860          info->out[i].si = si;
    861       }
    862       break;
    863    case TGSI_FILE_SYSTEM_VALUE:
    864       switch (sn) {
    865       case TGSI_SEMANTIC_INSTANCEID:
    866          info->io.instanceId = first;
    867          break;
    868       case TGSI_SEMANTIC_VERTEXID:
    869          info->io.vertexId = first;
    870          break;
    871       default:
    872          break;
    873       }
    874       for (i = first; i <= last; ++i, ++si) {
    875          info->sv[i].sn = sn;
    876          info->sv[i].si = si;
    877          info->sv[i].input = inferSysValDirection(sn);
    878       }
    879       break;
    880    case TGSI_FILE_SAMPLER_VIEW:
    881       for (i = first; i <= last; ++i)
    882          samplerViewTargets[i] = decl->SamplerView.Resource;
    883       break;
    884    case TGSI_FILE_IMMEDIATE_ARRAY:
    885    {
    886       if (decl->Dim.Index2D >= immdArrayCount)
    887          immdArrayCount = decl->Dim.Index2D + 1;
    888       immdArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
    889       int c;
    890       uint32_t base, count;
    891       switch (decl->Declaration.UsageMask) {
    892       case 0x1: c = 1; break;
    893       case 0x3: c = 2; break;
    894       default:
    895          c = 4;
    896          break;
    897       }
    898       immdArrays[decl->Dim.Index2D].u32 |= c;
    899       count = (last + 1) * c;
    900       base = info->immd.bufSize / 4;
    901       info->immd.bufSize = (info->immd.bufSize + count * 4 + 0xf) & ~0xf;
    902       info->immd.buf = (uint32_t *)REALLOC(info->immd.buf, base * 4,
    903                                            info->immd.bufSize);
    904       // NOTE: this assumes array declarations are ordered by Dim.Index2D
    905       for (i = 0; i < count; ++i)
    906          info->immd.buf[base + i] = decl->ImmediateData.u[i].Uint;
    907    }
    908       break;
    909    case TGSI_FILE_TEMPORARY_ARRAY:
    910    {
    911       if (decl->Dim.Index2D >= tempArrayCount)
    912          tempArrayCount = decl->Dim.Index2D + 1;
    913       tempArrays[decl->Dim.Index2D].u32 = (last + 1) << 2;
    914       int c;
    915       uint32_t count;
    916       switch (decl->Declaration.UsageMask) {
    917       case 0x1: c = 1; break;
    918       case 0x3: c = 2; break;
    919       default:
    920          c = 4;
    921          break;
    922       }
    923       tempArrays[decl->Dim.Index2D].u32 |= c;
    924       count = (last + 1) * c;
    925       info->bin.tlsSpace += (info->bin.tlsSpace + count * 4 + 0xf) & ~0xf;
    926    }
    927       break;
    928    case TGSI_FILE_NULL:
    929    case TGSI_FILE_TEMPORARY:
    930    case TGSI_FILE_ADDRESS:
    931    case TGSI_FILE_CONSTANT:
    932    case TGSI_FILE_IMMEDIATE:
    933    case TGSI_FILE_PREDICATE:
    934    case TGSI_FILE_SAMPLER:
    935       break;
    936    default:
    937       ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
    938       return false;
    939    }
    940    return true;
    941 }
    942 
    943 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
    944 {
    945    return insn.getOpcode() == TGSI_OPCODE_MOV &&
    946       insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
    947       insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
    948 }
    949 
    950 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
    951 {
    952    Instruction insn(inst);
    953 
    954    if (insn.dstCount()) {
    955       if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
    956          Instruction::DstRegister dst = insn.getDst(0);
    957 
    958          if (dst.isIndirect(0))
    959             for (unsigned i = 0; i < info->numOutputs; ++i)
    960                info->out[i].mask = 0xf;
    961          else
    962             info->out[dst.getIndex(0)].mask |= dst.getMask();
    963 
    964          if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
    965             info->out[dst.getIndex(0)].mask &= 1;
    966 
    967          if (isEdgeFlagPassthrough(insn))
    968             info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
    969       } else
    970       if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
    971          if (insn.getDst(0).isIndirect(0))
    972             mainTempsInLMem = TRUE;
    973       }
    974    }
    975 
    976    for (unsigned s = 0; s < insn.srcCount(); ++s) {
    977       Instruction::SrcRegister src = insn.getSrc(s);
    978       if (src.getFile() == TGSI_FILE_TEMPORARY)
    979          if (src.isIndirect(0))
    980             mainTempsInLMem = TRUE;
    981       if (src.getFile() != TGSI_FILE_INPUT)
    982          continue;
    983       unsigned mask = insn.srcMask(s);
    984 
    985       if (src.isIndirect(0)) {
    986          for (unsigned i = 0; i < info->numInputs; ++i)
    987             info->in[i].mask = 0xf;
    988       } else {
    989          for (unsigned c = 0; c < 4; ++c) {
    990             if (!(mask & (1 << c)))
    991                continue;
    992             int k = src.getSwizzle(c);
    993             int i = src.getIndex(0);
    994             if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
    995                if (k <= TGSI_SWIZZLE_W)
    996                   info->in[i].mask |= 1 << k;
    997          }
    998       }
    999    }
   1000    return true;
   1001 }
   1002 
   1003 nv50_ir::TexInstruction::Target
   1004 Instruction::getTexture(const tgsi::Source *code, int s) const
   1005 {
   1006    switch (getSrc(s).getFile()) {
   1007    case TGSI_FILE_SAMPLER_VIEW: {
   1008       // XXX: indirect access
   1009       unsigned int r = getSrc(s).getIndex(0);
   1010       assert(r < code->samplerViewCount);
   1011       return translateTexture(code->samplerViewTargets[r]);
   1012    }
   1013    default:
   1014       return translateTexture(insn->Texture.Texture);
   1015    }
   1016 }
   1017 
   1018 } // namespace tgsi
   1019 
   1020 namespace {
   1021 
   1022 using namespace nv50_ir;
   1023 
   1024 class Converter : public BuildUtil
   1025 {
   1026 public:
   1027    Converter(Program *, const tgsi::Source *);
   1028    ~Converter();
   1029 
   1030    bool run();
   1031 
   1032 private:
   1033    struct Subroutine
   1034    {
   1035       Subroutine(Function *f) : f(f) { }
   1036       Function *f;
   1037       ValueMap values;
   1038    };
   1039 
   1040    Value *getVertexBase(int s);
   1041    DataArray *getArrayForFile(unsigned file, int idx);
   1042    Value *fetchSrc(int s, int c);
   1043    Value *acquireDst(int d, int c);
   1044    void storeDst(int d, int c, Value *);
   1045 
   1046    Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
   1047    void storeDst(const tgsi::Instruction::DstRegister dst, int c,
   1048                  Value *val, Value *ptr);
   1049 
   1050    Value *applySrcMod(Value *, int s, int c);
   1051 
   1052    Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
   1053    Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
   1054    Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
   1055 
   1056    bool handleInstruction(const struct tgsi_full_instruction *);
   1057    void exportOutputs();
   1058    inline Subroutine *getSubroutine(unsigned ip);
   1059    inline Subroutine *getSubroutine(Function *);
   1060    inline bool isEndOfSubroutine(uint ip);
   1061 
   1062    void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
   1063 
   1064    // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
   1065    void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
   1066    void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
   1067    void handleTXF(Value *dst0[4], int R);
   1068    void handleTXQ(Value *dst0[4], enum TexQuery);
   1069    void handleLIT(Value *dst0[4]);
   1070    void handleUserClipPlanes();
   1071 
   1072    Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
   1073 
   1074    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
   1075 
   1076    Value *buildDot(int dim);
   1077 
   1078    class BindArgumentsPass : public Pass {
   1079    public:
   1080       BindArgumentsPass(Converter &conv) : conv(conv) { }
   1081 
   1082    private:
   1083       Converter &conv;
   1084       Subroutine *sub;
   1085 
   1086       template<typename T> inline void
   1087       updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
   1088                      T (Function::*proto));
   1089 
   1090       template<typename T> inline void
   1091       updatePrototype(BitSet *set, void (Function::*updateSet)(),
   1092                       T (Function::*proto));
   1093 
   1094    protected:
   1095       bool visit(Function *);
   1096       bool visit(BasicBlock *bb) { return false; }
   1097    };
   1098 
   1099 private:
   1100    const struct tgsi::Source *code;
   1101    const struct nv50_ir_prog_info *info;
   1102 
   1103    struct {
   1104       std::map<unsigned, Subroutine> map;
   1105       Subroutine *cur;
   1106    } sub;
   1107 
   1108    uint ip; // instruction pointer
   1109 
   1110    tgsi::Instruction tgsi;
   1111 
   1112    DataType dstTy;
   1113    DataType srcTy;
   1114 
   1115    DataArray tData; // TGSI_FILE_TEMPORARY
   1116    DataArray aData; // TGSI_FILE_ADDRESS
   1117    DataArray pData; // TGSI_FILE_PREDICATE
   1118    DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
   1119    std::vector<DataArray> lData; // TGSI_FILE_TEMPORARY_ARRAY
   1120    std::vector<DataArray> iData; // TGSI_FILE_IMMEDIATE_ARRAY
   1121 
   1122    Value *zero;
   1123    Value *fragCoord[4];
   1124    Value *clipVtx[4];
   1125 
   1126    Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
   1127    uint8_t vtxBaseValid;
   1128 
   1129    Stack condBBs;  // fork BB, then else clause BB
   1130    Stack joinBBs;  // fork BB, for inserting join ops on ENDIF
   1131    Stack loopBBs;  // loop headers
   1132    Stack breakBBs; // end of / after loop
   1133 };
   1134 
   1135 Symbol *
   1136 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
   1137 {
   1138    const int swz = src.getSwizzle(c);
   1139 
   1140    return makeSym(src.getFile(),
   1141                   src.is2D() ? src.getIndex(1) : 0,
   1142                   src.isIndirect(0) ? -1 : src.getIndex(0), swz,
   1143                   src.getIndex(0) * 16 + swz * 4);
   1144 }
   1145 
   1146 Symbol *
   1147 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
   1148 {
   1149    return makeSym(dst.getFile(),
   1150                   dst.is2D() ? dst.getIndex(1) : 0,
   1151                   dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
   1152                   dst.getIndex(0) * 16 + c * 4);
   1153 }
   1154 
   1155 Symbol *
   1156 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
   1157 {
   1158    Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
   1159 
   1160    sym->reg.fileIndex = fileIdx;
   1161 
   1162    if (idx >= 0) {
   1163       if (sym->reg.file == FILE_SHADER_INPUT)
   1164          sym->setOffset(info->in[idx].slot[c] * 4);
   1165       else
   1166       if (sym->reg.file == FILE_SHADER_OUTPUT)
   1167          sym->setOffset(info->out[idx].slot[c] * 4);
   1168       else
   1169       if (sym->reg.file == FILE_SYSTEM_VALUE)
   1170          sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
   1171       else
   1172          sym->setOffset(address);
   1173    } else {
   1174       sym->setOffset(address);
   1175    }
   1176    return sym;
   1177 }
   1178 
   1179 static inline uint8_t
   1180 translateInterpMode(const struct nv50_ir_varying *var, operation& op)
   1181 {
   1182    uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
   1183 
   1184    if (var->flat)
   1185       mode = NV50_IR_INTERP_FLAT;
   1186    else
   1187    if (var->linear)
   1188       mode = NV50_IR_INTERP_LINEAR;
   1189    else
   1190    if (var->sc)
   1191       mode = NV50_IR_INTERP_SC;
   1192 
   1193    op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
   1194       ? OP_PINTERP : OP_LINTERP;
   1195 
   1196    if (var->centroid)
   1197       mode |= NV50_IR_INTERP_CENTROID;
   1198 
   1199    return mode;
   1200 }
   1201 
   1202 Value *
   1203 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
   1204 {
   1205    operation op;
   1206 
   1207    // XXX: no way to know interpolation mode if we don't know what's accessed
   1208    const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
   1209                                                       src.getIndex(0)], op);
   1210 
   1211    Instruction *insn = new_Instruction(func, op, TYPE_F32);
   1212 
   1213    insn->setDef(0, getScratch());
   1214    insn->setSrc(0, srcToSym(src, c));
   1215    if (op == OP_PINTERP)
   1216       insn->setSrc(1, fragCoord[3]);
   1217    if (ptr)
   1218       insn->setIndirect(0, 0, ptr);
   1219 
   1220    insn->setInterpolate(mode);
   1221 
   1222    bb->insertTail(insn);
   1223    return insn->getDef(0);
   1224 }
   1225 
   1226 Value *
   1227 Converter::applySrcMod(Value *val, int s, int c)
   1228 {
   1229    Modifier m = tgsi.getSrc(s).getMod(c);
   1230    DataType ty = tgsi.inferSrcType();
   1231 
   1232    if (m & Modifier(NV50_IR_MOD_ABS))
   1233       val = mkOp1v(OP_ABS, ty, getScratch(), val);
   1234 
   1235    if (m & Modifier(NV50_IR_MOD_NEG))
   1236       val = mkOp1v(OP_NEG, ty, getScratch(), val);
   1237 
   1238    return val;
   1239 }
   1240 
   1241 Value *
   1242 Converter::getVertexBase(int s)
   1243 {
   1244    assert(s < 5);
   1245    if (!(vtxBaseValid & (1 << s))) {
   1246       const int index = tgsi.getSrc(s).getIndex(1);
   1247       Value *rel = NULL;
   1248       if (tgsi.getSrc(s).isIndirect(1))
   1249          rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
   1250       vtxBaseValid |= 1 << s;
   1251       vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
   1252    }
   1253    return vtxBase[s];
   1254 }
   1255 
   1256 Value *
   1257 Converter::fetchSrc(int s, int c)
   1258 {
   1259    Value *res;
   1260    Value *ptr = NULL, *dimRel = NULL;
   1261 
   1262    tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
   1263 
   1264    if (src.isIndirect(0))
   1265       ptr = fetchSrc(src.getIndirect(0), 0, NULL);
   1266 
   1267    if (src.is2D()) {
   1268       switch (src.getFile()) {
   1269       case TGSI_FILE_INPUT:
   1270          dimRel = getVertexBase(s);
   1271          break;
   1272       case TGSI_FILE_CONSTANT:
   1273          // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
   1274          if (src.isIndirect(1))
   1275             dimRel = fetchSrc(src.getIndirect(1), 0, 0);
   1276          break;
   1277       default:
   1278          break;
   1279       }
   1280    }
   1281 
   1282    res = fetchSrc(src, c, ptr);
   1283 
   1284    if (dimRel)
   1285       res->getInsn()->setIndirect(0, 1, dimRel);
   1286 
   1287    return applySrcMod(res, s, c);
   1288 }
   1289 
   1290 Converter::DataArray *
   1291 Converter::getArrayForFile(unsigned file, int idx)
   1292 {
   1293    switch (file) {
   1294    case TGSI_FILE_TEMPORARY:
   1295       return &tData;
   1296    case TGSI_FILE_PREDICATE:
   1297       return &pData;
   1298    case TGSI_FILE_ADDRESS:
   1299       return &aData;
   1300    case TGSI_FILE_TEMPORARY_ARRAY:
   1301       assert(idx < code->tempArrayCount);
   1302       return &lData[idx];
   1303    case TGSI_FILE_IMMEDIATE_ARRAY:
   1304       assert(idx < code->immdArrayCount);
   1305       return &iData[idx];
   1306    case TGSI_FILE_OUTPUT:
   1307       assert(prog->getType() == Program::TYPE_FRAGMENT);
   1308       return &oData;
   1309    default:
   1310       assert(!"invalid/unhandled TGSI source file");
   1311       return NULL;
   1312    }
   1313 }
   1314 
   1315 Value *
   1316 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
   1317 {
   1318    const int idx2d = src.is2D() ? src.getIndex(1) : 0;
   1319    const int idx = src.getIndex(0);
   1320    const int swz = src.getSwizzle(c);
   1321 
   1322    switch (src.getFile()) {
   1323    case TGSI_FILE_IMMEDIATE:
   1324       assert(!ptr);
   1325       return loadImm(NULL, info->immd.data[idx * 4 + swz]);
   1326    case TGSI_FILE_CONSTANT:
   1327       return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
   1328    case TGSI_FILE_INPUT:
   1329       if (prog->getType() == Program::TYPE_FRAGMENT) {
   1330          // don't load masked inputs, won't be assigned a slot
   1331          if (!ptr && !(info->in[idx].mask & (1 << swz)))
   1332             return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
   1333 	 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
   1334             return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
   1335          return interpolate(src, c, ptr);
   1336       }
   1337       return mkLoad(TYPE_U32, srcToSym(src, c), ptr);
   1338    case TGSI_FILE_OUTPUT:
   1339       assert(!"load from output file");
   1340       return NULL;
   1341    case TGSI_FILE_SYSTEM_VALUE:
   1342       assert(!ptr);
   1343       return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
   1344    default:
   1345       return getArrayForFile(src.getFile(), idx2d)->load(
   1346          sub.cur->values, idx, swz, ptr);
   1347    }
   1348 }
   1349 
   1350 Value *
   1351 Converter::acquireDst(int d, int c)
   1352 {
   1353    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
   1354    const unsigned f = dst.getFile();
   1355    const int idx = dst.getIndex(0);
   1356    const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
   1357 
   1358    if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
   1359       return NULL;
   1360 
   1361    if (dst.isIndirect(0) ||
   1362        f == TGSI_FILE_TEMPORARY_ARRAY ||
   1363        f == TGSI_FILE_SYSTEM_VALUE ||
   1364        (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
   1365       return getScratch();
   1366 
   1367    return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
   1368 }
   1369 
   1370 void
   1371 Converter::storeDst(int d, int c, Value *val)
   1372 {
   1373    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
   1374 
   1375    switch (tgsi.getSaturate()) {
   1376    case TGSI_SAT_NONE:
   1377       break;
   1378    case TGSI_SAT_ZERO_ONE:
   1379       mkOp1(OP_SAT, dstTy, val, val);
   1380       break;
   1381    case TGSI_SAT_MINUS_PLUS_ONE:
   1382       mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
   1383       mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
   1384       break;
   1385    default:
   1386       assert(!"invalid saturation mode");
   1387       break;
   1388    }
   1389 
   1390    Value *ptr = dst.isIndirect(0) ?
   1391       fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
   1392 
   1393    if (info->io.genUserClip > 0 &&
   1394        dst.getFile() == TGSI_FILE_OUTPUT &&
   1395        !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
   1396       mkMov(clipVtx[c], val);
   1397       val = clipVtx[c];
   1398    }
   1399 
   1400    storeDst(dst, c, val, ptr);
   1401 }
   1402 
   1403 void
   1404 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
   1405                     Value *val, Value *ptr)
   1406 {
   1407    const unsigned f = dst.getFile();
   1408    const int idx = dst.getIndex(0);
   1409    const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
   1410 
   1411    if (f == TGSI_FILE_SYSTEM_VALUE) {
   1412       assert(!ptr);
   1413       mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
   1414    } else
   1415    if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
   1416       if (ptr || (info->out[idx].mask & (1 << c)))
   1417          mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
   1418    } else
   1419    if (f == TGSI_FILE_TEMPORARY ||
   1420        f == TGSI_FILE_TEMPORARY_ARRAY ||
   1421        f == TGSI_FILE_PREDICATE ||
   1422        f == TGSI_FILE_ADDRESS ||
   1423        f == TGSI_FILE_OUTPUT) {
   1424       getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
   1425    } else {
   1426       assert(!"invalid dst file");
   1427    }
   1428 }
   1429 
   1430 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
   1431    for (chan = 0; chan < 4; ++chan)                 \
   1432       if (!inst.getDst(d).isMasked(chan))
   1433 
   1434 Value *
   1435 Converter::buildDot(int dim)
   1436 {
   1437    assert(dim > 0);
   1438 
   1439    Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
   1440    Value *dotp = getScratch();
   1441 
   1442    mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
   1443 
   1444    for (int c = 1; c < dim; ++c) {
   1445       src0 = fetchSrc(0, c);
   1446       src1 = fetchSrc(1, c);
   1447       mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
   1448    }
   1449    return dotp;
   1450 }
   1451 
   1452 void
   1453 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
   1454 {
   1455    FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
   1456    join->fixed = 1;
   1457    conv->insertHead(join);
   1458 
   1459    fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
   1460    fork->insertBefore(fork->getExit(), fork->joinAt);
   1461 }
   1462 
   1463 void
   1464 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
   1465 {
   1466    unsigned rIdx = 0, sIdx = 0;
   1467 
   1468    if (R >= 0)
   1469       rIdx = tgsi.getSrc(R).getIndex(0);
   1470    if (S >= 0)
   1471       sIdx = tgsi.getSrc(S).getIndex(0);
   1472 
   1473    tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
   1474 
   1475    if (tgsi.getSrc(R).isIndirect(0)) {
   1476       tex->tex.rIndirectSrc = s;
   1477       tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
   1478    }
   1479    if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
   1480       tex->tex.sIndirectSrc = s;
   1481       tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
   1482    }
   1483 }
   1484 
   1485 void
   1486 Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
   1487 {
   1488    TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
   1489    tex->tex.query = query;
   1490    unsigned int c, d;
   1491 
   1492    for (d = 0, c = 0; c < 4; ++c) {
   1493       if (!dst0[c])
   1494          continue;
   1495       tex->tex.mask |= 1 << c;
   1496       tex->setDef(d++, dst0[c]);
   1497    }
   1498    tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
   1499 
   1500    setTexRS(tex, c, 1, -1);
   1501 
   1502    bb->insertTail(tex);
   1503 }
   1504 
   1505 void
   1506 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
   1507 {
   1508    Value *proj = fetchSrc(0, 3);
   1509    Instruction *insn = proj->getUniqueInsn();
   1510    int c;
   1511 
   1512    if (insn->op == OP_PINTERP) {
   1513       bb->insertTail(insn = cloneForward(func, insn));
   1514       insn->op = OP_LINTERP;
   1515       insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
   1516       insn->setSrc(1, NULL);
   1517       proj = insn->getDef(0);
   1518    }
   1519    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
   1520 
   1521    for (c = 0; c < 4; ++c) {
   1522       if (!(mask & (1 << c)))
   1523          continue;
   1524       if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
   1525          continue;
   1526       mask &= ~(1 << c);
   1527 
   1528       bb->insertTail(insn = cloneForward(func, insn));
   1529       insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
   1530       insn->setSrc(1, proj);
   1531       dst[c] = insn->getDef(0);
   1532    }
   1533    if (!mask)
   1534       return;
   1535 
   1536    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
   1537 
   1538    for (c = 0; c < 4; ++c)
   1539       if (mask & (1 << c))
   1540          dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
   1541 }
   1542 
   1543 // order of nv50 ir sources: x y z layer lod/bias shadow
   1544 // order of TGSI TEX sources: x y z layer shadow lod/bias
   1545 //  lowering will finally set the hw specific order (like array first on nvc0)
   1546 void
   1547 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
   1548 {
   1549    Value *val;
   1550    Value *arg[4], *src[8];
   1551    Value *lod = NULL, *shd = NULL;
   1552    unsigned int s, c, d;
   1553    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
   1554 
   1555    TexInstruction::Target tgt = tgsi.getTexture(code, R);
   1556 
   1557    for (s = 0; s < tgt.getArgCount(); ++s)
   1558       arg[s] = src[s] = fetchSrc(0, s);
   1559 
   1560    if (texi->op == OP_TXL || texi->op == OP_TXB)
   1561       lod = fetchSrc(L >> 4, L & 3);
   1562 
   1563    if (C == 0x0f)
   1564       C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
   1565 
   1566    if (tgt.isShadow())
   1567       shd = fetchSrc(C >> 4, C & 3);
   1568 
   1569    if (texi->op == OP_TXD) {
   1570       for (c = 0; c < tgt.getDim(); ++c) {
   1571          texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
   1572          texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
   1573       }
   1574    }
   1575 
   1576    // cube textures don't care about projection value, it's divided out
   1577    if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
   1578       unsigned int n = tgt.getDim();
   1579       if (shd) {
   1580          arg[n] = shd;
   1581          ++n;
   1582          assert(tgt.getDim() == tgt.getArgCount());
   1583       }
   1584       loadProjTexCoords(src, arg, (1 << n) - 1);
   1585       if (shd)
   1586          shd = src[n - 1];
   1587    }
   1588 
   1589    if (tgt.isCube()) {
   1590       for (c = 0; c < 3; ++c)
   1591          src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
   1592       val = getScratch();
   1593       mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
   1594       mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
   1595       mkOp1(OP_RCP, TYPE_F32, val, val);
   1596       for (c = 0; c < 3; ++c)
   1597          src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
   1598    }
   1599 
   1600    for (c = 0, d = 0; c < 4; ++c) {
   1601       if (dst[c]) {
   1602          texi->setDef(d++, dst[c]);
   1603          texi->tex.mask |= 1 << c;
   1604       } else {
   1605          // NOTE: maybe hook up def too, for CSE
   1606       }
   1607    }
   1608    for (s = 0; s < tgt.getArgCount(); ++s)
   1609       texi->setSrc(s, src[s]);
   1610    if (lod)
   1611       texi->setSrc(s++, lod);
   1612    if (shd)
   1613       texi->setSrc(s++, shd);
   1614 
   1615    setTexRS(texi, s, R, S);
   1616 
   1617    if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
   1618       texi->tex.levelZero = true;
   1619 
   1620    bb->insertTail(texi);
   1621 }
   1622 
   1623 // 1st source: xyz = coordinates, w = lod
   1624 // 2nd source: offset
   1625 void
   1626 Converter::handleTXF(Value *dst[4], int R)
   1627 {
   1628    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
   1629    unsigned int c, d, s;
   1630 
   1631    texi->tex.target = tgsi.getTexture(code, R);
   1632 
   1633    for (c = 0, d = 0; c < 4; ++c) {
   1634       if (dst[c]) {
   1635          texi->setDef(d++, dst[c]);
   1636          texi->tex.mask |= 1 << c;
   1637       }
   1638    }
   1639    for (c = 0; c < texi->tex.target.getArgCount(); ++c)
   1640       texi->setSrc(c, fetchSrc(0, c));
   1641    texi->setSrc(c++, fetchSrc(0, 3)); // lod
   1642 
   1643    setTexRS(texi, c, R, -1);
   1644 
   1645    for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
   1646       for (c = 0; c < 3; ++c) {
   1647          texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
   1648          if (texi->tex.offset[s][c])
   1649             texi->tex.useOffsets = s + 1;
   1650       }
   1651    }
   1652 
   1653    bb->insertTail(texi);
   1654 }
   1655 
   1656 void
   1657 Converter::handleLIT(Value *dst0[4])
   1658 {
   1659    Value *val0 = NULL;
   1660    unsigned int mask = tgsi.getDst(0).getMask();
   1661 
   1662    if (mask & (1 << 0))
   1663       loadImm(dst0[0], 1.0f);
   1664 
   1665    if (mask & (1 << 3))
   1666       loadImm(dst0[3], 1.0f);
   1667 
   1668    if (mask & (3 << 1)) {
   1669       val0 = getScratch();
   1670       mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
   1671       if (mask & (1 << 1))
   1672          mkMov(dst0[1], val0);
   1673    }
   1674 
   1675    if (mask & (1 << 2)) {
   1676       Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
   1677       Value *val1 = getScratch(), *val3 = getScratch();
   1678 
   1679       Value *pos128 = loadImm(NULL, +127.999999f);
   1680       Value *neg128 = loadImm(NULL, -127.999999f);
   1681 
   1682       mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
   1683       mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
   1684       mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
   1685       mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
   1686 
   1687       mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
   1688    }
   1689 }
   1690 
   1691 Converter::Subroutine *
   1692 Converter::getSubroutine(unsigned ip)
   1693 {
   1694    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
   1695 
   1696    if (it == sub.map.end())
   1697       it = sub.map.insert(std::make_pair(
   1698               ip, Subroutine(new Function(prog, "SUB", ip)))).first;
   1699 
   1700    return &it->second;
   1701 }
   1702 
   1703 Converter::Subroutine *
   1704 Converter::getSubroutine(Function *f)
   1705 {
   1706    unsigned ip = f->getLabel();
   1707    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
   1708 
   1709    if (it == sub.map.end())
   1710       it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
   1711 
   1712    return &it->second;
   1713 }
   1714 
   1715 bool
   1716 Converter::isEndOfSubroutine(uint ip)
   1717 {
   1718    assert(ip < code->scan.num_instructions);
   1719    tgsi::Instruction insn(&code->insns[ip]);
   1720    return (insn.getOpcode() == TGSI_OPCODE_END ||
   1721            insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
   1722            // does END occur at end of main or the very end ?
   1723            insn.getOpcode() == TGSI_OPCODE_BGNSUB);
   1724 }
   1725 
   1726 bool
   1727 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
   1728 {
   1729    Value *dst0[4], *rDst0[4];
   1730    Value *src0, *src1, *src2;
   1731    Value *val0, *val1;
   1732    int c;
   1733 
   1734    tgsi = tgsi::Instruction(insn);
   1735 
   1736    bool useScratchDst = tgsi.checkDstSrcAliasing();
   1737 
   1738    operation op = tgsi.getOP();
   1739    dstTy = tgsi.inferDstType();
   1740    srcTy = tgsi.inferSrcType();
   1741 
   1742    unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
   1743 
   1744    if (tgsi.dstCount()) {
   1745       for (c = 0; c < 4; ++c) {
   1746          rDst0[c] = acquireDst(0, c);
   1747          dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
   1748       }
   1749    }
   1750 
   1751    switch (tgsi.getOpcode()) {
   1752    case TGSI_OPCODE_ADD:
   1753    case TGSI_OPCODE_UADD:
   1754    case TGSI_OPCODE_AND:
   1755    case TGSI_OPCODE_DIV:
   1756    case TGSI_OPCODE_IDIV:
   1757    case TGSI_OPCODE_UDIV:
   1758    case TGSI_OPCODE_MAX:
   1759    case TGSI_OPCODE_MIN:
   1760    case TGSI_OPCODE_IMAX:
   1761    case TGSI_OPCODE_IMIN:
   1762    case TGSI_OPCODE_UMAX:
   1763    case TGSI_OPCODE_UMIN:
   1764    case TGSI_OPCODE_MOD:
   1765    case TGSI_OPCODE_UMOD:
   1766    case TGSI_OPCODE_MUL:
   1767    case TGSI_OPCODE_UMUL:
   1768    case TGSI_OPCODE_OR:
   1769    case TGSI_OPCODE_POW:
   1770    case TGSI_OPCODE_SHL:
   1771    case TGSI_OPCODE_ISHR:
   1772    case TGSI_OPCODE_USHR:
   1773    case TGSI_OPCODE_SUB:
   1774    case TGSI_OPCODE_XOR:
   1775       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1776          src0 = fetchSrc(0, c);
   1777          src1 = fetchSrc(1, c);
   1778          mkOp2(op, dstTy, dst0[c], src0, src1);
   1779       }
   1780       break;
   1781    case TGSI_OPCODE_MAD:
   1782    case TGSI_OPCODE_UMAD:
   1783    case TGSI_OPCODE_SAD:
   1784       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1785          src0 = fetchSrc(0, c);
   1786          src1 = fetchSrc(1, c);
   1787          src2 = fetchSrc(2, c);
   1788          mkOp3(op, dstTy, dst0[c], src0, src1, src2);
   1789       }
   1790       break;
   1791    case TGSI_OPCODE_MOV:
   1792    case TGSI_OPCODE_ABS:
   1793    case TGSI_OPCODE_CEIL:
   1794    case TGSI_OPCODE_FLR:
   1795    case TGSI_OPCODE_TRUNC:
   1796    case TGSI_OPCODE_RCP:
   1797    case TGSI_OPCODE_IABS:
   1798    case TGSI_OPCODE_INEG:
   1799    case TGSI_OPCODE_NOT:
   1800    case TGSI_OPCODE_DDX:
   1801    case TGSI_OPCODE_DDY:
   1802       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1803          mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
   1804       break;
   1805    case TGSI_OPCODE_RSQ:
   1806       src0 = fetchSrc(0, 0);
   1807       val0 = getScratch();
   1808       mkOp1(OP_ABS, TYPE_F32, val0, src0);
   1809       mkOp1(OP_RSQ, TYPE_F32, val0, val0);
   1810       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1811          mkMov(dst0[c], val0);
   1812       break;
   1813    case TGSI_OPCODE_ARL:
   1814       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1815          src0 = fetchSrc(0, c);
   1816          mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
   1817          mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
   1818       }
   1819       break;
   1820    case TGSI_OPCODE_UARL:
   1821       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1822          mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
   1823       break;
   1824    case TGSI_OPCODE_EX2:
   1825    case TGSI_OPCODE_LG2:
   1826       val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
   1827       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1828          mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
   1829       break;
   1830    case TGSI_OPCODE_COS:
   1831    case TGSI_OPCODE_SIN:
   1832       val0 = getScratch();
   1833       if (mask & 7) {
   1834          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
   1835          mkOp1(op, TYPE_F32, val0, val0);
   1836          for (c = 0; c < 3; ++c)
   1837             if (dst0[c])
   1838                mkMov(dst0[c], val0);
   1839       }
   1840       if (dst0[3]) {
   1841          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
   1842          mkOp1(op, TYPE_F32, dst0[3], val0);
   1843       }
   1844       break;
   1845    case TGSI_OPCODE_SCS:
   1846       if (mask & 3) {
   1847          val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
   1848          if (dst0[0])
   1849             mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
   1850          if (dst0[1])
   1851             mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
   1852       }
   1853       if (dst0[2])
   1854          loadImm(dst0[2], 0.0f);
   1855       if (dst0[3])
   1856          loadImm(dst0[3], 1.0f);
   1857       break;
   1858    case TGSI_OPCODE_EXP:
   1859       src0 = fetchSrc(0, 0);
   1860       val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
   1861       if (dst0[1])
   1862          mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
   1863       if (dst0[0])
   1864          mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
   1865       if (dst0[2])
   1866          mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
   1867       if (dst0[3])
   1868          loadImm(dst0[3], 1.0f);
   1869       break;
   1870    case TGSI_OPCODE_LOG:
   1871       src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
   1872       val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
   1873       if (dst0[0] || dst0[1])
   1874          val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
   1875       if (dst0[1]) {
   1876          mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
   1877          mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
   1878          mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
   1879       }
   1880       if (dst0[3])
   1881          loadImm(dst0[3], 1.0f);
   1882       break;
   1883    case TGSI_OPCODE_DP2:
   1884       val0 = buildDot(2);
   1885       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1886          mkMov(dst0[c], val0);
   1887       break;
   1888    case TGSI_OPCODE_DP3:
   1889       val0 = buildDot(3);
   1890       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1891          mkMov(dst0[c], val0);
   1892       break;
   1893    case TGSI_OPCODE_DP4:
   1894       val0 = buildDot(4);
   1895       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1896          mkMov(dst0[c], val0);
   1897       break;
   1898    case TGSI_OPCODE_DPH:
   1899       val0 = buildDot(3);
   1900       src1 = fetchSrc(1, 3);
   1901       mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
   1902       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1903          mkMov(dst0[c], val0);
   1904       break;
   1905    case TGSI_OPCODE_DST:
   1906       if (dst0[0])
   1907          loadImm(dst0[0], 1.0f);
   1908       if (dst0[1]) {
   1909          src0 = fetchSrc(0, 1);
   1910          src1 = fetchSrc(1, 1);
   1911          mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
   1912       }
   1913       if (dst0[2])
   1914          mkMov(dst0[2], fetchSrc(0, 2));
   1915       if (dst0[3])
   1916          mkMov(dst0[3], fetchSrc(1, 3));
   1917       break;
   1918    case TGSI_OPCODE_LRP:
   1919       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1920          src0 = fetchSrc(0, c);
   1921          src1 = fetchSrc(1, c);
   1922          src2 = fetchSrc(2, c);
   1923          mkOp3(OP_MAD, TYPE_F32, dst0[c],
   1924                mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
   1925       }
   1926       break;
   1927    case TGSI_OPCODE_LIT:
   1928       handleLIT(dst0);
   1929       break;
   1930    case TGSI_OPCODE_XPD:
   1931       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1932          if (c < 3) {
   1933             val0 = getSSA();
   1934             src0 = fetchSrc(1, (c + 1) % 3);
   1935             src1 = fetchSrc(0, (c + 2) % 3);
   1936             mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
   1937             mkOp1(OP_NEG, TYPE_F32, val0, val0);
   1938 
   1939             src0 = fetchSrc(0, (c + 1) % 3);
   1940             src1 = fetchSrc(1, (c + 2) % 3);
   1941             mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
   1942          } else {
   1943             loadImm(dst0[c], 1.0f);
   1944          }
   1945       }
   1946       break;
   1947    case TGSI_OPCODE_ISSG:
   1948    case TGSI_OPCODE_SSG:
   1949       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1950          src0 = fetchSrc(0, c);
   1951          val0 = getScratch();
   1952          val1 = getScratch();
   1953          mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
   1954          mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
   1955          if (srcTy == TYPE_F32)
   1956             mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
   1957          else
   1958             mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
   1959       }
   1960       break;
   1961    case TGSI_OPCODE_UCMP:
   1962    case TGSI_OPCODE_CMP:
   1963       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1964          src0 = fetchSrc(0, c);
   1965          src1 = fetchSrc(1, c);
   1966          src2 = fetchSrc(2, c);
   1967          if (src1 == src2)
   1968             mkMov(dst0[c], src1);
   1969          else
   1970             mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
   1971                   srcTy, dst0[c], src1, src2, src0);
   1972       }
   1973       break;
   1974    case TGSI_OPCODE_FRC:
   1975       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1976          src0 = fetchSrc(0, c);
   1977          val0 = getScratch();
   1978          mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
   1979          mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
   1980       }
   1981       break;
   1982    case TGSI_OPCODE_ROUND:
   1983       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   1984          mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
   1985          ->rnd = ROUND_NI;
   1986       break;
   1987    case TGSI_OPCODE_CLAMP:
   1988       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   1989          src0 = fetchSrc(0, c);
   1990          src1 = fetchSrc(1, c);
   1991          src2 = fetchSrc(2, c);
   1992          val0 = getScratch();
   1993          mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
   1994          mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
   1995       }
   1996       break;
   1997    case TGSI_OPCODE_SLT:
   1998    case TGSI_OPCODE_SGE:
   1999    case TGSI_OPCODE_SEQ:
   2000    case TGSI_OPCODE_SFL:
   2001    case TGSI_OPCODE_SGT:
   2002    case TGSI_OPCODE_SLE:
   2003    case TGSI_OPCODE_SNE:
   2004    case TGSI_OPCODE_STR:
   2005    case TGSI_OPCODE_ISGE:
   2006    case TGSI_OPCODE_ISLT:
   2007    case TGSI_OPCODE_USEQ:
   2008    case TGSI_OPCODE_USGE:
   2009    case TGSI_OPCODE_USLT:
   2010    case TGSI_OPCODE_USNE:
   2011       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
   2012          src0 = fetchSrc(0, c);
   2013          src1 = fetchSrc(1, c);
   2014          mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
   2015       }
   2016       break;
   2017    case TGSI_OPCODE_KIL:
   2018       val0 = new_LValue(func, FILE_PREDICATE);
   2019       for (c = 0; c < 4; ++c) {
   2020          mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
   2021          mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
   2022       }
   2023       break;
   2024    case TGSI_OPCODE_KILP:
   2025       mkOp(OP_DISCARD, TYPE_NONE, NULL);
   2026       break;
   2027    case TGSI_OPCODE_TEX:
   2028    case TGSI_OPCODE_TXB:
   2029    case TGSI_OPCODE_TXL:
   2030    case TGSI_OPCODE_TXP:
   2031       //              R  S     L     C    Dx    Dy
   2032       handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
   2033       break;
   2034    case TGSI_OPCODE_TXD:
   2035       handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
   2036       break;
   2037    case TGSI_OPCODE_SAMPLE:
   2038    case TGSI_OPCODE_SAMPLE_B:
   2039    case TGSI_OPCODE_SAMPLE_D:
   2040    case TGSI_OPCODE_SAMPLE_L:
   2041    case TGSI_OPCODE_SAMPLE_C:
   2042    case TGSI_OPCODE_SAMPLE_C_LZ:
   2043       handleTEX(dst0, 1, 2, 0x30, 0x31, 0x40, 0x50);
   2044       break;
   2045    case TGSI_OPCODE_TXF:
   2046    case TGSI_OPCODE_LOAD:
   2047       handleTXF(dst0, 1);
   2048       break;
   2049    case TGSI_OPCODE_TXQ:
   2050    case TGSI_OPCODE_SVIEWINFO:
   2051       handleTXQ(dst0, TXQ_DIMS);
   2052       break;
   2053    case TGSI_OPCODE_F2I:
   2054    case TGSI_OPCODE_F2U:
   2055       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   2056          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
   2057       break;
   2058    case TGSI_OPCODE_I2F:
   2059    case TGSI_OPCODE_U2F:
   2060       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
   2061          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
   2062       break;
   2063    case TGSI_OPCODE_EMIT:
   2064    case TGSI_OPCODE_ENDPRIM:
   2065       // get vertex stream if specified (must be immediate)
   2066       src0 = tgsi.srcCount() ?
   2067          mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
   2068       mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
   2069       break;
   2070    case TGSI_OPCODE_IF:
   2071    {
   2072       BasicBlock *ifBB = new BasicBlock(func);
   2073 
   2074       bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
   2075       condBBs.push(bb);
   2076       joinBBs.push(bb);
   2077 
   2078       mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
   2079 
   2080       setPosition(ifBB, true);
   2081    }
   2082       break;
   2083    case TGSI_OPCODE_ELSE:
   2084    {
   2085       BasicBlock *elseBB = new BasicBlock(func);
   2086       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
   2087 
   2088       forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
   2089       condBBs.push(bb);
   2090 
   2091       forkBB->getExit()->asFlow()->target.bb = elseBB;
   2092       if (!bb->isTerminated())
   2093          mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
   2094 
   2095       setPosition(elseBB, true);
   2096    }
   2097       break;
   2098    case TGSI_OPCODE_ENDIF:
   2099    {
   2100       BasicBlock *convBB = new BasicBlock(func);
   2101       BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
   2102       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
   2103 
   2104       if (!bb->isTerminated()) {
   2105          // we only want join if none of the clauses ended with CONT/BREAK/RET
   2106          if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
   2107             insertConvergenceOps(convBB, forkBB);
   2108          mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
   2109          bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
   2110       }
   2111 
   2112       if (prevBB->getExit()->op == OP_BRA) {
   2113          prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
   2114          prevBB->getExit()->asFlow()->target.bb = convBB;
   2115       }
   2116       setPosition(convBB, true);
   2117    }
   2118       break;
   2119    case TGSI_OPCODE_BGNLOOP:
   2120    {
   2121       BasicBlock *lbgnBB = new BasicBlock(func);
   2122       BasicBlock *lbrkBB = new BasicBlock(func);
   2123 
   2124       loopBBs.push(lbgnBB);
   2125       breakBBs.push(lbrkBB);
   2126       if (loopBBs.getSize() > func->loopNestingBound)
   2127          func->loopNestingBound++;
   2128 
   2129       mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
   2130 
   2131       bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
   2132       setPosition(lbgnBB, true);
   2133       mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
   2134    }
   2135       break;
   2136    case TGSI_OPCODE_ENDLOOP:
   2137    {
   2138       BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
   2139 
   2140       if (!bb->isTerminated()) {
   2141          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
   2142          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
   2143       }
   2144       setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
   2145    }
   2146       break;
   2147    case TGSI_OPCODE_BRK:
   2148    {
   2149       if (bb->isTerminated())
   2150          break;
   2151       BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
   2152       mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
   2153       bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
   2154    }
   2155       break;
   2156    case TGSI_OPCODE_CONT:
   2157    {
   2158       if (bb->isTerminated())
   2159          break;
   2160       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
   2161       mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
   2162       contBB->explicitCont = true;
   2163       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
   2164    }
   2165       break;
   2166    case TGSI_OPCODE_BGNSUB:
   2167    {
   2168       Subroutine *s = getSubroutine(ip);
   2169       BasicBlock *entry = new BasicBlock(s->f);
   2170       BasicBlock *leave = new BasicBlock(s->f);
   2171 
   2172       // multiple entrypoints possible, keep the graph connected
   2173       if (prog->getType() == Program::TYPE_COMPUTE)
   2174          prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
   2175 
   2176       sub.cur = s;
   2177       s->f->setEntry(entry);
   2178       s->f->setExit(leave);
   2179       setPosition(entry, true);
   2180       return true;
   2181    }
   2182    case TGSI_OPCODE_ENDSUB:
   2183    {
   2184       sub.cur = getSubroutine(prog->main);
   2185       setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
   2186       return true;
   2187    }
   2188    case TGSI_OPCODE_CAL:
   2189    {
   2190       Subroutine *s = getSubroutine(tgsi.getLabel());
   2191       mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
   2192       func->call.attach(&s->f->call, Graph::Edge::TREE);
   2193       return true;
   2194    }
   2195    case TGSI_OPCODE_RET:
   2196    {
   2197       if (bb->isTerminated())
   2198          return true;
   2199       BasicBlock *leave = BasicBlock::get(func->cfgExit);
   2200 
   2201       if (!isEndOfSubroutine(ip + 1)) {
   2202          // insert a PRERET at the entry if this is an early return
   2203          // (only needed for sharing code in the epilogue)
   2204          BasicBlock *pos = getBB();
   2205          setPosition(BasicBlock::get(func->cfg.getRoot()), false);
   2206          mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
   2207          setPosition(pos, true);
   2208       }
   2209       mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
   2210       bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
   2211    }
   2212       break;
   2213    case TGSI_OPCODE_END:
   2214    {
   2215       // attach and generate epilogue code
   2216       BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
   2217       bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
   2218       setPosition(epilogue, true);
   2219       if (prog->getType() == Program::TYPE_FRAGMENT)
   2220          exportOutputs();
   2221       if (info->io.genUserClip > 0)
   2222          handleUserClipPlanes();
   2223       mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
   2224    }
   2225       break;
   2226    case TGSI_OPCODE_SWITCH:
   2227    case TGSI_OPCODE_CASE:
   2228       ERROR("switch/case opcode encountered, should have been lowered\n");
   2229       abort();
   2230       break;
   2231    default:
   2232       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
   2233       assert(0);
   2234       break;
   2235    }
   2236 
   2237    if (tgsi.dstCount()) {
   2238       for (c = 0; c < 4; ++c) {
   2239          if (!dst0[c])
   2240             continue;
   2241          if (dst0[c] != rDst0[c])
   2242             mkMov(rDst0[c], dst0[c]);
   2243          storeDst(0, c, rDst0[c]);
   2244       }
   2245    }
   2246    vtxBaseValid = 0;
   2247 
   2248    return true;
   2249 }
   2250 
   2251 void
   2252 Converter::handleUserClipPlanes()
   2253 {
   2254    Value *res[8];
   2255    int n, i, c;
   2256 
   2257    for (c = 0; c < 4; ++c) {
   2258       for (i = 0; i < info->io.genUserClip; ++i) {
   2259          Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
   2260                                 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
   2261          Value *ucp = mkLoad(TYPE_F32, sym, NULL);
   2262          if (c == 0)
   2263             res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
   2264          else
   2265             mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
   2266       }
   2267    }
   2268 
   2269    const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
   2270 
   2271    for (i = 0; i < info->io.genUserClip; ++i) {
   2272       n = i / 4 + first;
   2273       c = i % 4;
   2274       Symbol *sym =
   2275          mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
   2276       mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
   2277    }
   2278 }
   2279 
   2280 void
   2281 Converter::exportOutputs()
   2282 {
   2283    for (unsigned int i = 0; i < info->numOutputs; ++i) {
   2284       for (unsigned int c = 0; c < 4; ++c) {
   2285          if (!oData.exists(sub.cur->values, i, c))
   2286             continue;
   2287          Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
   2288                                 info->out[i].slot[c] * 4);
   2289          Value *val = oData.load(sub.cur->values, i, c, NULL);
   2290          if (val)
   2291             mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
   2292       }
   2293    }
   2294 }
   2295 
   2296 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
   2297      code(code),
   2298      tgsi(NULL),
   2299      tData(this), aData(this), pData(this), oData(this)
   2300 {
   2301    info = code->info;
   2302 
   2303    const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
   2304 
   2305    const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
   2306    const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
   2307    const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
   2308    const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
   2309 
   2310    tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
   2311    pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
   2312    aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
   2313    oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
   2314 
   2315    for (int vol = 0, i = 0; i < code->tempArrayCount; ++i) {
   2316       int len = code->tempArrays[i].u32 >> 2;
   2317       int dim = code->tempArrays[i].u32 & 3;
   2318 
   2319       lData.push_back(DataArray(this));
   2320       lData.back().setup(TGSI_FILE_TEMPORARY_ARRAY, i, vol, len, dim, 4,
   2321                          FILE_MEMORY_LOCAL, 0);
   2322 
   2323       vol += (len * dim * 4 + 0xf) & ~0xf;
   2324    }
   2325 
   2326    for (int vol = 0, i = 0; i < code->immdArrayCount; ++i) {
   2327       int len = code->immdArrays[i].u32 >> 2;
   2328       int dim = code->immdArrays[i].u32 & 3;
   2329 
   2330       lData.push_back(DataArray(this));
   2331       lData.back().setup(TGSI_FILE_IMMEDIATE_ARRAY, i, vol, len, dim, 4,
   2332                          FILE_MEMORY_CONST, 14);
   2333 
   2334       vol += (len * dim * 4 + 0xf) & ~0xf;
   2335    }
   2336 
   2337    zero = mkImm((uint32_t)0);
   2338 
   2339    vtxBaseValid = 0;
   2340 }
   2341 
   2342 Converter::~Converter()
   2343 {
   2344 }
   2345 
   2346 template<typename T> inline void
   2347 Converter::BindArgumentsPass::updateCallArgs(
   2348    Instruction *i, void (Instruction::*setArg)(int, Value *),
   2349    T (Function::*proto))
   2350 {
   2351    Function *g = i->asFlow()->target.fn;
   2352    Subroutine *subg = conv.getSubroutine(g);
   2353 
   2354    for (unsigned a = 0; a < (g->*proto).size(); ++a) {
   2355       Value *v = (g->*proto)[a].get();
   2356       const Converter::Location &l = subg->values.l.find(v)->second;
   2357       Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
   2358 
   2359       (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
   2360    }
   2361 }
   2362 
   2363 template<typename T> inline void
   2364 Converter::BindArgumentsPass::updatePrototype(
   2365    BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
   2366 {
   2367    (func->*updateSet)();
   2368 
   2369    for (unsigned i = 0; i < set->getSize(); ++i) {
   2370       Value *v = func->getLValue(i);
   2371 
   2372       // only include values with a matching TGSI register
   2373       if (set->test(i) && sub->values.l.find(v) != sub->values.l.end())
   2374          (func->*proto).push_back(v);
   2375    }
   2376 }
   2377 
   2378 bool
   2379 Converter::BindArgumentsPass::visit(Function *f)
   2380 {
   2381    sub = conv.getSubroutine(f);
   2382 
   2383    for (ArrayList::Iterator bi = f->allBBlocks.iterator();
   2384         !bi.end(); bi.next()) {
   2385       for (Instruction *i = BasicBlock::get(bi)->getFirst();
   2386            i; i = i->next) {
   2387          if (i->op == OP_CALL && !i->asFlow()->builtin) {
   2388             updateCallArgs(i, &Instruction::setSrc, &Function::ins);
   2389             updateCallArgs(i, &Instruction::setDef, &Function::outs);
   2390          }
   2391       }
   2392    }
   2393 
   2394    if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
   2395       return true;
   2396    updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
   2397                    &Function::buildLiveSets, &Function::ins);
   2398    updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
   2399                    &Function::buildDefSets, &Function::outs);
   2400 
   2401    return true;
   2402 }
   2403 
   2404 bool
   2405 Converter::run()
   2406 {
   2407    BasicBlock *entry = new BasicBlock(prog->main);
   2408    BasicBlock *leave = new BasicBlock(prog->main);
   2409 
   2410    prog->main->setEntry(entry);
   2411    prog->main->setExit(leave);
   2412 
   2413    setPosition(entry, true);
   2414    sub.cur = getSubroutine(prog->main);
   2415 
   2416    if (info->io.genUserClip > 0) {
   2417       for (int c = 0; c < 4; ++c)
   2418          clipVtx[c] = getScratch();
   2419    }
   2420 
   2421    if (prog->getType() == Program::TYPE_FRAGMENT) {
   2422       Symbol *sv = mkSysVal(SV_POSITION, 3);
   2423       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
   2424       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
   2425    }
   2426 
   2427    for (ip = 0; ip < code->scan.num_instructions; ++ip) {
   2428       if (!handleInstruction(&code->insns[ip]))
   2429          return false;
   2430    }
   2431 
   2432    if (!BindArgumentsPass(*this).run(prog))
   2433       return false;
   2434 
   2435    return true;
   2436 }
   2437 
   2438 } // unnamed namespace
   2439 
   2440 namespace nv50_ir {
   2441 
   2442 bool
   2443 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
   2444 {
   2445    tgsi::Source src(info);
   2446    if (!src.scanSource())
   2447       return false;
   2448    tlsSize = info->bin.tlsSpace;
   2449 
   2450    Converter builder(this, &src);
   2451    return builder.run();
   2452 }
   2453 
   2454 } // namespace nv50_ir
   2455