Home | History | Annotate | Download | only in codegen
      1 
      2 #include "nv50_ir.h"
      3 #include "nv50_ir_target.h"
      4 #include "nv50_ir_build_util.h"
      5 
      6 #include "nv50_ir_from_sm4.h"
      7 
      8 // WTF: pass-through is implicit ??? check ReadWriteMask
      9 
     10 namespace tgsi {
     11 
     12 static nv50_ir::SVSemantic irSemantic(unsigned sn)
     13 {
     14    switch (sn) {
     15    case TGSI_SEMANTIC_POSITION:      return nv50_ir::SV_POSITION;
     16    case TGSI_SEMANTIC_FACE:          return nv50_ir::SV_FACE;
     17    case NV50_SEMANTIC_LAYER:         return nv50_ir::SV_LAYER;
     18    case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
     19    case TGSI_SEMANTIC_PSIZE:         return nv50_ir::SV_POINT_SIZE;
     20    case NV50_SEMANTIC_CLIPDISTANCE:  return nv50_ir::SV_CLIP_DISTANCE;
     21    case TGSI_SEMANTIC_VERTEXID:      return nv50_ir::SV_VERTEX_ID;
     22    case TGSI_SEMANTIC_INSTANCEID:    return nv50_ir::SV_INSTANCE_ID;
     23    case TGSI_SEMANTIC_PRIMID:        return nv50_ir::SV_PRIMITIVE_ID;
     24    case NV50_SEMANTIC_TESSFACTOR:    return nv50_ir::SV_TESS_FACTOR;
     25    case NV50_SEMANTIC_TESSCOORD:     return nv50_ir::SV_TESS_COORD;
     26    default:
     27       return nv50_ir::SV_UNDEFINED;
     28    }
     29 }
     30 
     31 } // namespace tgsi
     32 
     33 namespace {
     34 
     35 using namespace nv50_ir;
     36 
     37 #define NV50_IR_MAX_RESOURCES 64
     38 
     39 class Converter : public BuildUtil
     40 {
     41 public:
     42    Converter(Program *, struct nv50_ir_prog_info *);
     43    ~Converter();
     44 
     45 private:
     46    DataArray tData32;
     47    DataArray tData64;
     48    unsigned int nrRegVals;
     49 
     50    DataArray *lData;
     51    unsigned int nrArrays;
     52    unsigned int arrayVol;
     53 
     54    DataArray oData;
     55 
     56    uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
     57 
     58    // outputs for each phase
     59    struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
     60 
     61    int phase;
     62    int subPhaseCnt[2];
     63    int subPhase;
     64    unsigned int phaseStart;
     65    unsigned int phaseInstance;
     66    unsigned int *phaseInstCnt[2];
     67    bool unrollPhase;
     68    bool phaseInstanceUsed;
     69    int phaseEnded; // (phase + 1) if $phase ended
     70 
     71    bool finalized;
     72 
     73    Value *srcPtr[3][3]; // for indirect addressing, save pointer values
     74    Value *dstPtr[3];
     75    Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
     76 
     77    Value *domainPt[3]; // pre-fetched TessCoord
     78 
     79    unsigned int nDstOpnds;
     80 
     81    Stack condBBs;
     82    Stack joinBBs;
     83    Stack loopBBs;
     84    Stack breakBBs;
     85    Stack entryBBs;
     86    Stack leaveBBs;
     87    Stack retIPs;
     88 
     89    bool shadow[NV50_IR_MAX_RESOURCES];
     90    TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
     91 
     92    struct nv50_ir_prog_info& info;
     93 
     94    Value *fragCoord[4];
     95 
     96 public:
     97    bool run();
     98 
     99 private:
    100    bool handleInstruction(unsigned int pos);
    101    bool inspectInstruction(unsigned int pos);
    102    bool handleDeclaration(const sm4_dcl& dcl);
    103    bool inspectDeclaration(const sm4_dcl& dcl);
    104    bool parseSignature();
    105 
    106    bool haveNextPhase(unsigned int pos) const;
    107 
    108    void allocateValues();
    109    void exportOutputs();
    110 
    111    void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
    112    void handleLOAD(Value *dst0[4]);
    113    void handleSAMPLE(operation, Value *dst0[4]);
    114    void handleQUERY(Value *dst0[4], enum TexQuery query);
    115    void handleDP(Value *dst0[4], int dim);
    116 
    117    Symbol *iSym(int i, int c);
    118    Symbol *oSym(int i, int c);
    119 
    120    Value *src(int i, int c);
    121    Value *src(const sm4_op&, int c, int i);
    122    Value *dst(int i, int c);
    123    Value *dst(const sm4_op&, int c, int i);
    124    void saveDst(int i, int c, Value *value);
    125    void saveDst(const sm4_op&, int c, Value *value, int i);
    126    void saveFragDepth(operation op, Value *value);
    127 
    128    Value *interpolate(const sm4_op&, int c, int i);
    129 
    130    Value *getSrcPtr(int s, int dim, int shl);
    131    Value *getDstPtr(int d, int dim, int shl);
    132    Value *getVtxPtr(int s);
    133 
    134    bool checkDstSrcAliasing() const;
    135    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
    136    void finalizeShader();
    137 
    138    operation cvtOpcode(enum sm4_opcode op) const;
    139    unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
    140 
    141    DataType inferSrcType(enum sm4_opcode op) const;
    142    DataType inferDstType(enum sm4_opcode op) const;
    143 
    144    unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
    145    CondCode cvtCondCode(enum sm4_opcode op) const;
    146    RoundMode cvtRoundingMode(enum sm4_opcode op) const;
    147    TexTarget cvtTexTarget(enum sm4_target,
    148                            enum sm4_opcode, operation *) const;
    149    SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
    150    uint8_t cvtInterpMode(enum sm4_interpolation) const;
    151 
    152    unsigned tgsiSemantic(SVSemantic, int index);
    153    void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
    154 
    155 private:
    156    sm4_insn *insn;
    157    DataType dTy, sTy;
    158 
    159    const struct sm4_program& sm4;
    160    Program *prog;
    161 };
    162 
    163 #define PRIM_CASE(a, b) \
    164    case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
    165 
    166 unsigned
    167 Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
    168 {
    169    switch (prim) {
    170    PRIM_CASE(UNDEFINED, POINTS);
    171    PRIM_CASE(POINTLIST, POINTS);
    172    PRIM_CASE(LINELIST, LINES);
    173    PRIM_CASE(LINESTRIP, LINE_STRIP);
    174    PRIM_CASE(TRIANGLELIST, TRIANGLES);
    175    PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
    176    PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
    177    PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
    178    PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
    179    PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
    180    default:
    181       if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
    182           prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
    183          return PIPE_PRIM_POINTS;
    184       if (patchSize)
    185          *patchSize =
    186             prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
    187       return NV50_PRIM_PATCHES;
    188    }
    189 }
    190 
    191 #define IPM_CASE(n, a, b) \
    192    case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
    193 
    194 uint8_t
    195 Converter::cvtInterpMode(enum sm4_interpolation mode) const
    196 {
    197    switch (mode) {
    198    IPM_CASE(CONSTANT,                      FLAT, FLAT);
    199    IPM_CASE(LINEAR,                        PERSPECTIVE, PERSPECTIVE);
    200    IPM_CASE(LINEAR_CENTROID,               PERSPECTIVE, CENTROID);
    201    IPM_CASE(LINEAR_NOPERSPECTIVE,          LINEAR, LINEAR);
    202    IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
    203    IPM_CASE(LINEAR_SAMPLE,                 PERSPECTIVE, OFFSET);
    204    IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE,   LINEAR, OFFSET);
    205    IPM_CASE(UNDEFINED,                     LINEAR, LINEAR);
    206    default:
    207       assert(!"invalid interpolation mode");
    208       return 0;
    209    }
    210 }
    211 
    212 static void
    213 setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
    214 {
    215    switch (mode & NV50_IR_INTERP_MODE_MASK) {
    216    case NV50_IR_INTERP_LINEAR:
    217       var->linear = 1;
    218       break;
    219    case NV50_IR_INTERP_FLAT:
    220       var->flat = 1;
    221       break;
    222    default:
    223       break;
    224    }
    225    if (mode & NV50_IR_INTERP_CENTROID)
    226       var->centroid = 1;
    227 }
    228 
    229 RoundMode
    230 Converter::cvtRoundingMode(enum sm4_opcode op) const
    231 {
    232    switch (op) {
    233    case SM4_OPCODE_ROUND_NE: return ROUND_NI;
    234    case SM4_OPCODE_ROUND_NI: return ROUND_MI;
    235    case SM4_OPCODE_ROUND_PI: return ROUND_PI;
    236    case SM4_OPCODE_ROUND_Z:  return ROUND_ZI;
    237    default:
    238       return ROUND_N;
    239    }
    240 }
    241 
    242 CondCode
    243 Converter::cvtCondCode(enum sm4_opcode op) const
    244 {
    245    switch (op) {
    246    case SM4_OPCODE_EQ:
    247    case SM4_OPCODE_DEQ:
    248    case SM4_OPCODE_IEQ: return CC_EQ;
    249    case SM4_OPCODE_GE:
    250    case SM4_OPCODE_DGE:
    251    case SM4_OPCODE_IGE:
    252    case SM4_OPCODE_UGE: return CC_GE;
    253    case SM4_OPCODE_LT:
    254    case SM4_OPCODE_DLT:
    255    case SM4_OPCODE_ILT:
    256    case SM4_OPCODE_ULT: return CC_LT;
    257    case SM4_OPCODE_NE:
    258    case SM4_OPCODE_INE:
    259    case SM4_OPCODE_DNE: return CC_NEU;
    260    default:
    261       return CC_ALWAYS;
    262    }
    263 }
    264 
    265 DataType
    266 Converter::inferSrcType(enum sm4_opcode op) const
    267 {
    268    switch (op) {
    269    case SM4_OPCODE_IADD:
    270    case SM4_OPCODE_IEQ:
    271    case SM4_OPCODE_IGE:
    272    case SM4_OPCODE_ILT:
    273    case SM4_OPCODE_IMAD:
    274    case SM4_OPCODE_IMAX:
    275    case SM4_OPCODE_IMIN:
    276    case SM4_OPCODE_IMUL:
    277    case SM4_OPCODE_INE:
    278    case SM4_OPCODE_INEG:
    279    case SM4_OPCODE_ISHL:
    280    case SM4_OPCODE_ISHR:
    281    case SM4_OPCODE_ITOF:
    282    case SM4_OPCODE_ATOMIC_IADD:
    283    case SM4_OPCODE_ATOMIC_IMAX:
    284    case SM4_OPCODE_ATOMIC_IMIN:
    285       return TYPE_S32;
    286    case SM4_OPCODE_AND:
    287    case SM4_OPCODE_NOT:
    288    case SM4_OPCODE_OR:
    289    case SM4_OPCODE_UDIV:
    290    case SM4_OPCODE_ULT:
    291    case SM4_OPCODE_UGE:
    292    case SM4_OPCODE_UMUL:
    293    case SM4_OPCODE_UMAD:
    294    case SM4_OPCODE_UMAX:
    295    case SM4_OPCODE_UMIN:
    296    case SM4_OPCODE_USHR:
    297    case SM4_OPCODE_UTOF:
    298    case SM4_OPCODE_XOR:
    299    case SM4_OPCODE_UADDC:
    300    case SM4_OPCODE_USUBB:
    301    case SM4_OPCODE_ATOMIC_AND:
    302    case SM4_OPCODE_ATOMIC_OR:
    303    case SM4_OPCODE_ATOMIC_XOR:
    304    case SM4_OPCODE_ATOMIC_UMAX:
    305    case SM4_OPCODE_ATOMIC_UMIN:
    306       return TYPE_U32;
    307    case SM4_OPCODE_DADD:
    308    case SM4_OPCODE_DMAX:
    309    case SM4_OPCODE_DMIN:
    310    case SM4_OPCODE_DMUL:
    311    case SM4_OPCODE_DEQ:
    312    case SM4_OPCODE_DGE:
    313    case SM4_OPCODE_DLT:
    314    case SM4_OPCODE_DNE:
    315    case SM4_OPCODE_DMOV:
    316    case SM4_OPCODE_DMOVC:
    317    case SM4_OPCODE_DTOF:
    318       return TYPE_F64;
    319    case SM4_OPCODE_F16TOF32:
    320       return TYPE_F16;
    321    default:
    322       return TYPE_F32;
    323    }
    324 }
    325 
    326 DataType
    327 Converter::inferDstType(enum sm4_opcode op) const
    328 {
    329    switch (op) {
    330    case SM4_OPCODE_FTOI:
    331       return TYPE_S32;
    332    case SM4_OPCODE_FTOU:
    333    case SM4_OPCODE_EQ:
    334    case SM4_OPCODE_GE:
    335    case SM4_OPCODE_LT:
    336    case SM4_OPCODE_NE:
    337       return TYPE_U32;
    338    case SM4_OPCODE_FTOD:
    339       return TYPE_F64;
    340    case SM4_OPCODE_F32TOF16:
    341       return TYPE_F16;
    342    case SM4_OPCODE_ITOF:
    343    case SM4_OPCODE_UTOF:
    344    case SM4_OPCODE_DTOF:
    345       return TYPE_F32;
    346    default:
    347       return inferSrcType(op);
    348    }
    349 }
    350 
    351 operation
    352 Converter::cvtOpcode(enum sm4_opcode op) const
    353 {
    354    switch (op) {
    355    case SM4_OPCODE_ADD:         return OP_ADD;
    356    case SM4_OPCODE_AND:         return OP_AND;
    357    case SM4_OPCODE_BREAK:       return OP_BREAK;
    358    case SM4_OPCODE_BREAKC:      return OP_BREAK;
    359    case SM4_OPCODE_CALL:        return OP_CALL;
    360    case SM4_OPCODE_CALLC:       return OP_CALL;
    361    case SM4_OPCODE_CASE:        return OP_NOP;
    362    case SM4_OPCODE_CONTINUE:    return OP_CONT;
    363    case SM4_OPCODE_CONTINUEC:   return OP_CONT;
    364    case SM4_OPCODE_CUT:         return OP_RESTART;
    365    case SM4_OPCODE_DEFAULT:     return OP_NOP;
    366    case SM4_OPCODE_DERIV_RTX:   return OP_DFDX;
    367    case SM4_OPCODE_DERIV_RTY:   return OP_DFDY;
    368    case SM4_OPCODE_DISCARD:     return OP_DISCARD;
    369    case SM4_OPCODE_DIV:         return OP_DIV;
    370    case SM4_OPCODE_DP2:         return OP_MAD;
    371    case SM4_OPCODE_DP3:         return OP_MAD;
    372    case SM4_OPCODE_DP4:         return OP_MAD;
    373    case SM4_OPCODE_ELSE:        return OP_BRA;
    374    case SM4_OPCODE_EMIT:        return OP_EMIT;
    375    case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
    376    case SM4_OPCODE_ENDIF:       return OP_BRA;
    377    case SM4_OPCODE_ENDLOOP:     return OP_PREBREAK;
    378    case SM4_OPCODE_ENDSWITCH:   return OP_NOP;
    379    case SM4_OPCODE_EQ:          return OP_SET;
    380    case SM4_OPCODE_EXP:         return OP_EX2;
    381    case SM4_OPCODE_FRC:         return OP_CVT;
    382    case SM4_OPCODE_FTOI:        return OP_CVT;
    383    case SM4_OPCODE_FTOU:        return OP_CVT;
    384    case SM4_OPCODE_GE:          return OP_SET;
    385    case SM4_OPCODE_IADD:        return OP_ADD;
    386    case SM4_OPCODE_IF:          return OP_BRA;
    387    case SM4_OPCODE_IEQ:         return OP_SET;
    388    case SM4_OPCODE_IGE:         return OP_SET;
    389    case SM4_OPCODE_ILT:         return OP_SET;
    390    case SM4_OPCODE_IMAD:        return OP_MAD;
    391    case SM4_OPCODE_IMAX:        return OP_MAX;
    392    case SM4_OPCODE_IMIN:        return OP_MIN;
    393    case SM4_OPCODE_IMUL:        return OP_MUL;
    394    case SM4_OPCODE_INE:         return OP_SET;
    395    case SM4_OPCODE_INEG:        return OP_NEG;
    396    case SM4_OPCODE_ISHL:        return OP_SHL;
    397    case SM4_OPCODE_ISHR:        return OP_SHR;
    398    case SM4_OPCODE_ITOF:        return OP_CVT;
    399    case SM4_OPCODE_LD:          return OP_TXF;
    400    case SM4_OPCODE_LD_MS:       return OP_TXF;
    401    case SM4_OPCODE_LOG:         return OP_LG2;
    402    case SM4_OPCODE_LOOP:        return OP_PRECONT;
    403    case SM4_OPCODE_LT:          return OP_SET;
    404    case SM4_OPCODE_MAD:         return OP_MAD;
    405    case SM4_OPCODE_MIN:         return OP_MIN;
    406    case SM4_OPCODE_MAX:         return OP_MAX;
    407    case SM4_OPCODE_MOV:         return OP_MOV;
    408    case SM4_OPCODE_MOVC:        return OP_MOV;
    409    case SM4_OPCODE_MUL:         return OP_MUL;
    410    case SM4_OPCODE_NE:          return OP_SET;
    411    case SM4_OPCODE_NOP:         return OP_NOP;
    412    case SM4_OPCODE_NOT:         return OP_NOT;
    413    case SM4_OPCODE_OR:          return OP_OR;
    414    case SM4_OPCODE_RESINFO:     return OP_TXQ;
    415    case SM4_OPCODE_RET:         return OP_RET;
    416    case SM4_OPCODE_RETC:        return OP_RET;
    417    case SM4_OPCODE_ROUND_NE:    return OP_CVT;
    418    case SM4_OPCODE_ROUND_NI:    return OP_FLOOR;
    419    case SM4_OPCODE_ROUND_PI:    return OP_CEIL;
    420    case SM4_OPCODE_ROUND_Z:     return OP_TRUNC;
    421    case SM4_OPCODE_RSQ:         return OP_RSQ;
    422    case SM4_OPCODE_SAMPLE:      return OP_TEX;
    423    case SM4_OPCODE_SAMPLE_C:    return OP_TEX;
    424    case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
    425    case SM4_OPCODE_SAMPLE_L:    return OP_TXL;
    426    case SM4_OPCODE_SAMPLE_D:    return OP_TXD;
    427    case SM4_OPCODE_SAMPLE_B:    return OP_TXB;
    428    case SM4_OPCODE_SQRT:        return OP_SQRT;
    429    case SM4_OPCODE_SWITCH:      return OP_NOP;
    430    case SM4_OPCODE_SINCOS:      return OP_PRESIN;
    431    case SM4_OPCODE_UDIV:        return OP_DIV;
    432    case SM4_OPCODE_ULT:         return OP_SET;
    433    case SM4_OPCODE_UGE:         return OP_SET;
    434    case SM4_OPCODE_UMUL:        return OP_MUL;
    435    case SM4_OPCODE_UMAD:        return OP_MAD;
    436    case SM4_OPCODE_UMAX:        return OP_MAX;
    437    case SM4_OPCODE_UMIN:        return OP_MIN;
    438    case SM4_OPCODE_USHR:        return OP_SHR;
    439    case SM4_OPCODE_UTOF:        return OP_CVT;
    440    case SM4_OPCODE_XOR:         return OP_XOR;
    441 
    442    case SM4_OPCODE_GATHER4:            return OP_TXG;
    443    case SM4_OPCODE_SAMPLE_POS:         return OP_PIXLD;
    444    case SM4_OPCODE_SAMPLE_INFO:        return OP_PIXLD;
    445    case SM4_OPCODE_EMIT_STREAM:        return OP_EMIT;
    446    case SM4_OPCODE_CUT_STREAM:         return OP_RESTART;
    447    case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
    448    case SM4_OPCODE_INTERFACE_CALL:     return OP_CALL;
    449    case SM4_OPCODE_BUFINFO:            return OP_TXQ;
    450    case SM4_OPCODE_DERIV_RTX_COARSE:   return OP_DFDX;
    451    case SM4_OPCODE_DERIV_RTX_FINE:     return OP_DFDX;
    452    case SM4_OPCODE_DERIV_RTY_COARSE:   return OP_DFDY;
    453    case SM4_OPCODE_DERIV_RTY_FINE:     return OP_DFDY;
    454    case SM4_OPCODE_GATHER4_C:          return OP_TXG;
    455    case SM4_OPCODE_GATHER4_PO:         return OP_TXG;
    456    case SM4_OPCODE_GATHER4_PO_C:       return OP_TXG;
    457 
    458    case SM4_OPCODE_RCP:       return OP_RCP;
    459    case SM4_OPCODE_F32TOF16:  return OP_CVT;
    460    case SM4_OPCODE_F16TOF32:  return OP_CVT;
    461    case SM4_OPCODE_UADDC:     return OP_ADD;
    462    case SM4_OPCODE_USUBB:     return OP_SUB;
    463    case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
    464 
    465    case SM4_OPCODE_ATOMIC_AND:       return OP_AND;
    466    case SM4_OPCODE_ATOMIC_OR:        return OP_OR;
    467    case SM4_OPCODE_ATOMIC_XOR:       return OP_XOR;
    468    case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
    469    case SM4_OPCODE_ATOMIC_IADD:      return OP_ADD;
    470    case SM4_OPCODE_ATOMIC_IMAX:      return OP_MAX;
    471    case SM4_OPCODE_ATOMIC_IMIN:      return OP_MIN;
    472    case SM4_OPCODE_ATOMIC_UMAX:      return OP_MAX;
    473    case SM4_OPCODE_ATOMIC_UMIN:      return OP_MIN;
    474 
    475    case SM4_OPCODE_SYNC:  return OP_MEMBAR;
    476    case SM4_OPCODE_DADD:  return OP_ADD;
    477    case SM4_OPCODE_DMAX:  return OP_MAX;
    478    case SM4_OPCODE_DMIN:  return OP_MIN;
    479    case SM4_OPCODE_DMUL:  return OP_MUL;
    480    case SM4_OPCODE_DEQ:   return OP_SET;
    481    case SM4_OPCODE_DGE:   return OP_SET;
    482    case SM4_OPCODE_DLT:   return OP_SET;
    483    case SM4_OPCODE_DNE:   return OP_SET;
    484    case SM4_OPCODE_DMOV:  return OP_MOV;
    485    case SM4_OPCODE_DMOVC: return OP_MOV;
    486    case SM4_OPCODE_DTOF:  return OP_CVT;
    487    case SM4_OPCODE_FTOD:  return OP_CVT;
    488 
    489    default:
    490       return OP_NOP;
    491    }
    492 }
    493 
    494 unsigned int
    495 Converter::getDstOpndCount(enum sm4_opcode opcode) const
    496 {
    497    switch (opcode) {
    498    case SM4_OPCODE_SINCOS:
    499    case SM4_OPCODE_UDIV:
    500    case SM4_OPCODE_IMUL:
    501    case SM4_OPCODE_UMUL:
    502       return 2;
    503    case SM4_OPCODE_BREAK:
    504    case SM4_OPCODE_BREAKC:
    505    case SM4_OPCODE_CALL:
    506    case SM4_OPCODE_CALLC:
    507    case SM4_OPCODE_CONTINUE:
    508    case SM4_OPCODE_CONTINUEC:
    509    case SM4_OPCODE_DISCARD:
    510    case SM4_OPCODE_EMIT:
    511    case SM4_OPCODE_EMIT_STREAM:
    512    case SM4_OPCODE_CUT:
    513    case SM4_OPCODE_CUT_STREAM:
    514    case SM4_OPCODE_EMITTHENCUT:
    515    case SM4_OPCODE_EMITTHENCUT_STREAM:
    516    case SM4_OPCODE_IF:
    517    case SM4_OPCODE_ELSE:
    518    case SM4_OPCODE_ENDIF:
    519    case SM4_OPCODE_LOOP:
    520    case SM4_OPCODE_ENDLOOP:
    521    case SM4_OPCODE_RET:
    522    case SM4_OPCODE_RETC:
    523    case SM4_OPCODE_SYNC:
    524    case SM4_OPCODE_SWITCH:
    525    case SM4_OPCODE_CASE:
    526    case SM4_OPCODE_HS_DECLS:
    527    case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
    528    case SM4_OPCODE_HS_FORK_PHASE:
    529    case SM4_OPCODE_HS_JOIN_PHASE:
    530       return 0;
    531    default:
    532       return 1;
    533    }
    534 }
    535 
    536 #define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
    537 #define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
    538    return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
    539 
    540 TexTarget
    541 Converter::cvtTexTarget(enum sm4_target targ,
    542                         enum sm4_opcode op, operation *opr) const
    543 {
    544    bool dc = (op == SM4_OPCODE_SAMPLE_C ||
    545               op == SM4_OPCODE_SAMPLE_C_LZ ||
    546               op == SM4_OPCODE_GATHER4_C ||
    547               op == SM4_OPCODE_GATHER4_PO_C);
    548 
    549    if (opr) {
    550       switch (targ) {
    551       case SM4_TARGET_RAW_BUFFER:        *opr = OP_LOAD; break;
    552       case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
    553       default:
    554          *opr = OP_TEX;
    555          break;
    556       }
    557    }
    558 
    559    switch (targ) {
    560    TARG_CASE_1(UNKNOWN, 2D);
    561    TARG_CASE_2(TEXTURE1D,         1D);
    562    TARG_CASE_2(TEXTURE2D,         2D);
    563    TARG_CASE_1(TEXTURE2DMS,       2D_MS);
    564    TARG_CASE_1(TEXTURE3D,         3D);
    565    TARG_CASE_2(TEXTURECUBE,       CUBE);
    566    TARG_CASE_2(TEXTURE1DARRAY,    1D_ARRAY);
    567    TARG_CASE_2(TEXTURE2DARRAY,    2D_ARRAY);
    568    TARG_CASE_1(TEXTURE2DMSARRAY,  2D_MS_ARRAY);
    569    TARG_CASE_2(TEXTURECUBEARRAY,  CUBE_ARRAY);
    570    TARG_CASE_1(BUFFER,            BUFFER);
    571    TARG_CASE_1(RAW_BUFFER,        BUFFER);
    572    TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
    573    default:
    574       assert(!"invalid SM4 texture target");
    575       return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
    576    }
    577 }
    578 
    579 static inline uint32_t
    580 getSVIndex(enum sm4_sv sv)
    581 {
    582    switch (sv) {
    583    case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
    584    case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
    585    case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
    586    case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
    587 
    588    case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
    589    case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
    590 
    591    case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
    592    case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
    593    case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
    594 
    595    case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
    596 
    597    case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
    598 
    599    case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
    600 
    601    default:
    602       return 0;
    603    }
    604 }
    605 
    606 SVSemantic
    607 Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
    608 {
    609    idx = 0;
    610 
    611    switch (sv) {
    612    case SM4_SV_UNDEFINED:     return SV_UNDEFINED;
    613    case SM4_SV_POSITION:      return SV_POSITION;
    614    case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
    615    case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
    616    case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
    617    case SM4_SV_VIEWPORT_ARRAY_INDEX:  return SV_VIEWPORT_INDEX;
    618    case SM4_SV_VERTEX_ID:     return SV_VERTEX_ID;
    619    case SM4_SV_PRIMITIVE_ID:  return SV_PRIMITIVE_ID;
    620    case SM4_SV_INSTANCE_ID:   return SV_INSTANCE_ID;
    621    case SM4_SV_IS_FRONT_FACE: return SV_FACE;
    622    case SM4_SV_SAMPLE_INDEX:  return SV_SAMPLE_INDEX;
    623 
    624    case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
    625    case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
    626    case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
    627    case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
    628    case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
    629    case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
    630    case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
    631    case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
    632    case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
    633    case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
    634    case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
    635    case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
    636       idx = getSVIndex(sv);
    637       return SV_TESS_FACTOR;
    638 
    639    default:
    640       assert(!"invalid SM4 system value");
    641       return SV_UNDEFINED;
    642    }
    643 }
    644 
    645 unsigned
    646 Converter::tgsiSemantic(SVSemantic sv, int index)
    647 {
    648    switch (sv) {
    649    case SV_POSITION:       return TGSI_SEMANTIC_POSITION;
    650    case SV_FACE:           return TGSI_SEMANTIC_FACE;
    651    case SV_LAYER:          return NV50_SEMANTIC_LAYER;
    652    case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
    653    case SV_POINT_SIZE:     return TGSI_SEMANTIC_PSIZE;
    654    case SV_CLIP_DISTANCE:  return NV50_SEMANTIC_CLIPDISTANCE;
    655    case SV_VERTEX_ID:      return TGSI_SEMANTIC_VERTEXID;
    656    case SV_INSTANCE_ID:    return TGSI_SEMANTIC_INSTANCEID;
    657    case SV_PRIMITIVE_ID:   return TGSI_SEMANTIC_PRIMID;
    658    case SV_TESS_FACTOR:    return NV50_SEMANTIC_TESSFACTOR;
    659    case SV_TESS_COORD:     return NV50_SEMANTIC_TESSCOORD;
    660    case SV_INVOCATION_ID:  return NV50_SEMANTIC_INVOCATIONID;
    661    default:
    662       return TGSI_SEMANTIC_GENERIC;
    663    }
    664 }
    665 
    666 void
    667 Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
    668 {
    669    unsigned int i;
    670    for (i = 0; i < info.numSysVals; ++i)
    671       if (info.sv[i].sn == sn &&
    672           info.sv[i].si == si)
    673          return;
    674    info.numSysVals = i + 1;
    675    info.sv[i].sn = sn;
    676    info.sv[i].si = si;
    677    info.sv[i].mask = mask;
    678    info.sv[i].input = input ? 1 : 0;
    679 }
    680 
    681 bool
    682 Converter::parseSignature()
    683 {
    684    struct nv50_ir_varying *patch;
    685    unsigned int i, r, n;
    686 
    687    info.numInputs = 0;
    688    info.numOutputs = 0;
    689    info.numPatchConstants = 0;
    690 
    691    for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
    692       r = sm4.params_in[i].Register;
    693 
    694       info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
    695       // mask might be uninitialized ...
    696       if (!sm4.params_in[i].ReadWriteMask)
    697 	  info.in[r].mask = 0xf;
    698       info.in[r].id = r;
    699       if (info.in[r].regular) // already assigned semantic name/index
    700          continue;
    701       info.in[r].regular = 1;
    702       info.in[r].patch = 0;
    703 
    704       info.numInputs = MAX2(info.numInputs, r + 1);
    705 
    706       switch (sm4.params_in[i].SystemValueType) {
    707       case D3D_NAME_UNDEFINED:
    708          info.in[r].sn = TGSI_SEMANTIC_GENERIC;
    709          info.in[r].si = n++;
    710          break;
    711       case D3D_NAME_POSITION:
    712          info.in[r].sn = TGSI_SEMANTIC_POSITION;
    713          break;
    714       case D3D_NAME_VERTEX_ID:
    715          info.in[r].sn = TGSI_SEMANTIC_VERTEXID;
    716          break;
    717       case D3D_NAME_PRIMITIVE_ID:
    718          info.in[r].sn = TGSI_SEMANTIC_PRIMID;
    719          // no corresponding output
    720          recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
    721          break;
    722       case D3D_NAME_INSTANCE_ID:
    723          info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
    724          break;
    725       case D3D_NAME_IS_FRONT_FACE:
    726          info.in[r].sn = TGSI_SEMANTIC_FACE;
    727          // no corresponding output
    728          recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
    729          break;
    730       default:
    731          assert(!"invalid/unsupported input linkage semantic");
    732          break;
    733       }
    734    }
    735 
    736    for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
    737       r = sm4.params_out[i].Register;
    738 
    739       info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
    740       info.out[r].id = r;
    741       if (info.out[r].regular) // already assigned semantic name/index
    742          continue;
    743       info.out[r].regular = 1;
    744       info.out[r].patch = 0;
    745 
    746       info.numOutputs = MAX2(info.numOutputs, r + 1);
    747 
    748       switch (sm4.params_out[i].SystemValueType) {
    749       case D3D_NAME_UNDEFINED:
    750          if (prog->getType() == Program::TYPE_FRAGMENT) {
    751             info.out[r].sn = TGSI_SEMANTIC_COLOR;
    752             info.out[r].si = info.prop.fp.numColourResults++;
    753          } else {
    754             info.out[r].sn = TGSI_SEMANTIC_GENERIC;
    755             info.out[r].si = n++;
    756          }
    757          break;
    758       case D3D_NAME_POSITION:
    759       case D3D_NAME_DEPTH:
    760       case D3D_NAME_DEPTH_GREATER_EQUAL:
    761       case D3D_NAME_DEPTH_LESS_EQUAL:
    762          info.out[r].sn = TGSI_SEMANTIC_POSITION;
    763          info.io.fragDepth = r;
    764          break;
    765       case D3D_NAME_CULL_DISTANCE:
    766       case D3D_NAME_CLIP_DISTANCE:
    767          info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
    768          info.out[r].si = sm4.params_out[i].SemanticIndex;
    769          break;
    770       case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
    771          info.out[r].sn = NV50_SEMANTIC_LAYER;
    772          break;
    773       case D3D_NAME_VIEWPORT_ARRAY_INDEX:
    774          info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
    775          break;
    776       case D3D_NAME_PRIMITIVE_ID:
    777          info.out[r].sn = TGSI_SEMANTIC_PRIMID;
    778          break;
    779       case D3D_NAME_TARGET:
    780          info.out[r].sn = TGSI_SEMANTIC_COLOR;
    781          info.out[r].si = sm4.params_out[i].SemanticIndex;
    782          break;
    783       case D3D_NAME_COVERAGE:
    784          info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
    785          info.io.sampleMask = r;
    786          break;
    787       case D3D_NAME_SAMPLE_INDEX:
    788       default:
    789          assert(!"invalid/unsupported output linkage semantic");
    790          break;
    791       }
    792    }
    793 
    794    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
    795       patch = &info.in[info.numInputs];
    796    else
    797       patch = &info.out[info.numOutputs];
    798 
    799    for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
    800       r = sm4.params_patch[i].Register;
    801 
    802       patch[r].mask |= sm4.params_patch[i].Mask;
    803       patch[r].id = r;
    804       if (patch[r].regular) // already visited
    805          continue;
    806       patch[r].regular = 1;
    807       patch[r].patch = 1;
    808 
    809       info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
    810 
    811       switch (sm4.params_patch[i].SystemValueType) {
    812       case D3D_NAME_UNDEFINED:
    813          patch[r].sn = TGSI_SEMANTIC_GENERIC;
    814          patch[r].si = n++;
    815          break;
    816       case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
    817       case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
    818       case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
    819          patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
    820          patch[r].si = sm4.params_patch[i].SemanticIndex;
    821          break;
    822       case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
    823       case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
    824       case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
    825          patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
    826          patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
    827          break;
    828       default:
    829          assert(!"invalid patch-constant linkage semantic");
    830          break;
    831       }
    832    }
    833    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
    834       info.numInputs += info.numPatchConstants;
    835    else
    836       info.numOutputs += info.numPatchConstants;
    837 
    838    return true;
    839 }
    840 
    841 bool
    842 Converter::inspectDeclaration(const sm4_dcl& dcl)
    843 {
    844    int idx = -1;
    845    enum sm4_interpolation ipa_mode;
    846 
    847    if (dcl.op.get() && dcl.op->is_index_simple(0))
    848       idx = dcl.op->indices[0].disp;
    849 
    850    switch (dcl.opcode) {
    851    case SM4_OPCODE_DCL_SAMPLER:
    852       assert(idx >= 0);
    853       shadow[idx] = dcl.dcl_sampler.shadow;
    854       break;
    855    case SM4_OPCODE_DCL_RESOURCE:
    856    {
    857       enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
    858 
    859       assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
    860       resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
    861       resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
    862    }
    863       break;
    864    case SM4_OPCODE_DCL_CONSTANT_BUFFER:
    865       // nothing to do
    866       break;
    867    case SM4_OPCODE_CUSTOMDATA:
    868       info.immd.bufSize = dcl.num * 4;
    869       info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
    870       memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
    871       break;
    872    case SM4_OPCODE_DCL_INDEX_RANGE:
    873       // XXX: ?
    874       break;
    875    case SM4_OPCODE_DCL_INPUT_PS_SGV:
    876    case SM4_OPCODE_DCL_INPUT_PS_SIV:
    877    case SM4_OPCODE_DCL_INPUT_PS:
    878    {
    879       assert(idx >= 0 && idx < info.numInputs);
    880       ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
    881       interpMode[idx] = cvtInterpMode(ipa_mode);
    882       setVaryingInterpMode(&info.in[idx], interpMode[idx]);
    883    }
    884       break;
    885    case SM4_OPCODE_DCL_INPUT_SGV:
    886    case SM4_OPCODE_DCL_INPUT_SIV:
    887    case SM4_OPCODE_DCL_INPUT:
    888       if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
    889          idx = info.numInputs++;
    890          info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
    891          info.in[idx].mask = dcl.op->mask;
    892       }
    893       // rest handled in parseSignature
    894       break;
    895    case SM4_OPCODE_DCL_OUTPUT_SGV:
    896    case SM4_OPCODE_DCL_OUTPUT_SIV:
    897       switch (dcl.sv) {
    898       case SM4_SV_POSITION:
    899          assert(prog->getType() != Program::TYPE_FRAGMENT);
    900          break;
    901       case SM4_SV_CULL_DISTANCE: // XXX: order ?
    902          info.io.cullDistanceMask |= 1 << info.io.clipDistanceMask;
    903       // fall through
    904       case SM4_SV_CLIP_DISTANCE:
    905          info.io.clipDistanceMask++; // abuse as count
    906          break;
    907       default:
    908          break;
    909       }
    910       switch (dcl.op->file) {
    911       case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
    912       case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
    913       case SM4_FILE_OUTPUT_DEPTH:
    914          if (info.io.fragDepth < 0xff)
    915             break;
    916          idx = info.io.fragDepth = info.numOutputs++;
    917          info.out[idx].sn = TGSI_SEMANTIC_POSITION;
    918          break;
    919       case SM4_FILE_OUTPUT_COVERAGE_MASK:
    920          if (info.io.sampleMask < 0xff)
    921             break;
    922          idx = info.io.sampleMask = info.numOutputs++;
    923          info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
    924          break;
    925       default:
    926          break;
    927       }
    928       break;
    929    case SM4_OPCODE_DCL_OUTPUT:
    930       // handled in parseSignature
    931       break;
    932    case SM4_OPCODE_DCL_TEMPS:
    933       nrRegVals += dcl.num;
    934       break;
    935    case SM4_OPCODE_DCL_INDEXABLE_TEMP:
    936       nrArrays++;
    937       break;
    938    case SM4_OPCODE_DCL_GLOBAL_FLAGS:
    939       if (prog->getType() == Program::TYPE_FRAGMENT)
    940          info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
    941       break;
    942 
    943    case SM4_OPCODE_DCL_FUNCTION_BODY:
    944       break;
    945    case SM4_OPCODE_DCL_FUNCTION_TABLE:
    946       break;
    947    case SM4_OPCODE_DCL_INTERFACE:
    948       break;
    949 
    950       // GP
    951    case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
    952       info.prop.gp.outputPrim = g3dPrim(
    953          dcl.dcl_gs_output_primitive_topology.primitive_topology);
    954       break;
    955    case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
    956       info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
    957       break;
    958    case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
    959       info.prop.gp.maxVertices = dcl.num;
    960       break;
    961    case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
    962       info.prop.gp.instanceCount = dcl.num;
    963       break;
    964    case SM4_OPCODE_DCL_STREAM:
    965       break;
    966 
    967       // TCP/TEP
    968    case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
    969       info.prop.tp.inputPatchSize =
    970          dcl.dcl_input_control_point_count.control_points;
    971       break;
    972    case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
    973       info.prop.tp.outputPatchSize =
    974          dcl.dcl_output_control_point_count.control_points;
    975       break;
    976    case SM4_OPCODE_DCL_TESS_DOMAIN:
    977       switch (dcl.dcl_tess_domain.domain) {
    978       case D3D_TESSELLATOR_DOMAIN_ISOLINE:
    979          info.prop.tp.domain = PIPE_PRIM_LINES;
    980          break;
    981       case D3D_TESSELLATOR_DOMAIN_TRI:
    982          info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
    983          break;
    984       case D3D_TESSELLATOR_DOMAIN_QUAD:
    985          info.prop.tp.domain = PIPE_PRIM_QUADS;
    986          break;
    987       case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
    988       default:
    989          info.prop.tp.domain = PIPE_PRIM_MAX;
    990          break;
    991       }
    992       break;
    993    case SM4_OPCODE_DCL_TESS_PARTITIONING:
    994       switch (dcl.dcl_tess_partitioning.partitioning) {
    995       case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
    996          info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
    997          break;
    998       case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
    999          info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
   1000          break;
   1001       case D3D_TESSELLATOR_PARTITIONING_POW2:
   1002          info.prop.tp.partitioning = NV50_TESS_PART_POW2;
   1003          break;
   1004       case D3D_TESSELLATOR_PARTITIONING_INTEGER:
   1005       case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
   1006       default:
   1007          info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
   1008          break;
   1009       }
   1010       break;
   1011    case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
   1012       switch (dcl.dcl_tess_output_primitive.primitive) {
   1013       case D3D_TESSELLATOR_OUTPUT_LINE:
   1014          info.prop.tp.outputPrim = PIPE_PRIM_LINES;
   1015          break;
   1016       case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
   1017          info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
   1018          info.prop.tp.winding = +1;
   1019          break;
   1020       case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
   1021          info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
   1022          info.prop.tp.winding = -1;
   1023          break;
   1024       case D3D_TESSELLATOR_OUTPUT_POINT:
   1025          info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
   1026          break;
   1027       case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
   1028       default:
   1029          info.prop.tp.outputPrim = PIPE_PRIM_MAX;
   1030          break;
   1031       }
   1032       break;
   1033 
   1034    case SM4_OPCODE_HS_FORK_PHASE:
   1035       ++subPhaseCnt[0];
   1036       phase = 1;
   1037       break;
   1038    case SM4_OPCODE_HS_JOIN_PHASE:
   1039       phase = 2;
   1040       ++subPhaseCnt[1];
   1041       break;
   1042    case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
   1043    case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
   1044    case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
   1045       break;
   1046 
   1047       // weird stuff
   1048    case SM4_OPCODE_DCL_THREAD_GROUP:
   1049    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
   1050    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
   1051    case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
   1052    case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
   1053    case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
   1054    case SM4_OPCODE_DCL_RESOURCE_RAW:
   1055    case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
   1056       ERROR("unhandled declaration\n");
   1057       abort();
   1058       return false;
   1059 
   1060    default:
   1061       assert(!"invalid SM4 declaration");
   1062       return false;
   1063    }
   1064    return true;
   1065 }
   1066 
   1067 void
   1068 Converter::allocateValues()
   1069 {
   1070    lData = new DataArray[nrArrays];
   1071 
   1072    for (unsigned int i = 0; i < nrArrays; ++i)
   1073       lData[i].setParent(this);
   1074 
   1075    tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
   1076    tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
   1077 
   1078    if (prog->getType() == Program::TYPE_FRAGMENT)
   1079       oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
   1080 }
   1081 
   1082 bool Converter::handleDeclaration(const sm4_dcl& dcl)
   1083 {
   1084    switch (dcl.opcode) {
   1085    case SM4_OPCODE_DCL_INDEXABLE_TEMP:
   1086       lData[nrArrays++].setup(arrayVol,
   1087                               dcl.indexable_temp.num, dcl.indexable_temp.comps,
   1088                               4, FILE_MEMORY_LOCAL);
   1089       arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
   1090       break;
   1091    case SM4_OPCODE_HS_FORK_PHASE:
   1092       if (subPhaseCnt[0])
   1093          phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
   1094       ++subPhaseCnt[0];
   1095       break;
   1096    case SM4_OPCODE_HS_JOIN_PHASE:
   1097       if (subPhaseCnt[1])
   1098          phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
   1099       ++subPhaseCnt[1];
   1100       break;
   1101    case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
   1102       phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
   1103       break;
   1104    case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
   1105       phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
   1106       break;
   1107 
   1108    default:
   1109       break; // already handled in inspection
   1110    }
   1111 
   1112    return true;
   1113 }
   1114 
   1115 Symbol *
   1116 Converter::iSym(int i, int c)
   1117 {
   1118    if (info.in[i].regular) {
   1119       return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
   1120    } else {
   1121       return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
   1122    }
   1123 }
   1124 
   1125 Symbol *
   1126 Converter::oSym(int i, int c)
   1127 {
   1128    if (info.out[i].regular) {
   1129       return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
   1130    } else {
   1131       return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
   1132    }
   1133 }
   1134 
   1135 Value *
   1136 Converter::getSrcPtr(int s, int dim, int shl)
   1137 {
   1138    if (srcPtr[s][dim])
   1139       return srcPtr[s][dim];
   1140 
   1141    sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
   1142 
   1143    if (!op)
   1144       return NULL;
   1145 
   1146    Value *index = src(*op, 0, s);
   1147 
   1148    srcPtr[s][dim] = index;
   1149    if (shl)
   1150       srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
   1151    return srcPtr[s][dim];
   1152 }
   1153 
   1154 Value *
   1155 Converter::getDstPtr(int d, int dim, int shl)
   1156 {
   1157    assert(d == 0);
   1158    if (dstPtr[dim])
   1159       return dstPtr[dim];
   1160 
   1161    sm4_op *op = insn->ops[d]->indices[dim].reg.get();
   1162    if (!op)
   1163       return NULL;
   1164 
   1165    Value *index = src(*op, 0, d);
   1166    if (shl)
   1167       index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
   1168 
   1169    return (dstPtr[dim] = index);
   1170 }
   1171 
   1172 Value *
   1173 Converter::getVtxPtr(int s)
   1174 {
   1175    assert(s < 3);
   1176    if (vtxBase[s])
   1177       return vtxBase[s];
   1178 
   1179    sm4_op *op = insn->ops[s + nDstOpnds].get();
   1180    if (!op)
   1181       return NULL;
   1182    int idx = op->indices[0].disp;
   1183 
   1184    vtxBase[s] = getSrcPtr(s, 0, 0);
   1185    vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
   1186    return vtxBase[s];
   1187 }
   1188 
   1189 Value *
   1190 Converter::src(int i, int c)
   1191 {
   1192    return src(*insn->ops[i + nDstOpnds], c, i);
   1193 }
   1194 
   1195 Value *
   1196 Converter::dst(int i, int c)
   1197 {
   1198    return dst(*insn->ops[i], c, i);
   1199 }
   1200 
   1201 void
   1202 Converter::saveDst(int i, int c, Value *value)
   1203 {
   1204    if (insn->insn.sat)
   1205       mkOp1(OP_SAT, dTy, value, value);
   1206    return saveDst(*insn->ops[i], c, value, i);
   1207 }
   1208 
   1209 Value *
   1210 Converter::interpolate(const sm4_op& op, int c, int i)
   1211 {
   1212    int idx = op.indices[0].disp;
   1213    int swz = op.swizzle[c];
   1214    operation opr =
   1215       (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
   1216 
   1217    Value *ptr = getSrcPtr(i, 0, 4);
   1218 
   1219    Instruction *insn = new_Instruction(func, opr, TYPE_F32);
   1220 
   1221    insn->setDef(0, getScratch());
   1222    insn->setSrc(0, iSym(idx, swz));
   1223    if (opr == OP_PINTERP)
   1224       insn->setSrc(1, fragCoord[3]);
   1225    if (ptr)
   1226       insn->setIndirect(0, 0, ptr);
   1227 
   1228    insn->setInterpolate(interpMode[idx]);
   1229 
   1230    bb->insertTail(insn);
   1231    return insn->getDef(0);
   1232 }
   1233 
   1234 Value *
   1235 Converter::src(const sm4_op& op, int c, int s)
   1236 {
   1237    const int size = typeSizeof(sTy);
   1238 
   1239    Instruction *ld;
   1240    Value *res, *ptr, *vtx;
   1241    int idx, dim, off;
   1242    const int swz = op.swizzle[c];
   1243 
   1244    switch (op.file) {
   1245    case SM4_FILE_IMMEDIATE32:
   1246       res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
   1247       break;
   1248    case SM4_FILE_IMMEDIATE64:
   1249       assert(c < 2);
   1250       res = loadImm(NULL, op.imm_values[swz].u64);
   1251       break;
   1252    case SM4_FILE_TEMP:
   1253       assert(op.is_index_simple(0));
   1254       idx = op.indices[0].disp;
   1255       if (size == 8)
   1256          res = tData64.load(idx, swz, NULL);
   1257       else
   1258          res = tData32.load(idx, swz, NULL);
   1259       break;
   1260    case SM4_FILE_INPUT:
   1261    case SM4_FILE_INPUT_CONTROL_POINT:
   1262    case SM4_FILE_INPUT_PATCH_CONSTANT:
   1263       if (prog->getType() == Program::TYPE_FRAGMENT)
   1264          return interpolate(op, c, s);
   1265 
   1266       idx = 0;
   1267       if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
   1268          idx = info.numInputs - info.numPatchConstants;
   1269 
   1270       if (op.num_indices == 2) {
   1271          vtx = getVtxPtr(s);
   1272          ptr = getSrcPtr(s, 1, 4);
   1273          idx += op.indices[1].disp;
   1274          res = getSSA();
   1275          ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
   1276          ld->setIndirect(0, 0, ptr);
   1277          ld->setIndirect(0, 1, vtx);
   1278       } else {
   1279          idx += op.indices[0].disp;
   1280          res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
   1281       }
   1282       if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
   1283          res->defs->getInsn()->perPatch = 1;
   1284       break;
   1285    case SM4_FILE_CONSTANT_BUFFER:
   1286       assert(op.num_indices == 2);
   1287       assert(op.is_index_simple(0));
   1288 
   1289       ptr = getSrcPtr(s, 1, 4);
   1290       dim = op.indices[0].disp;
   1291       off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
   1292 
   1293       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
   1294       break;
   1295    case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
   1296       ptr = getSrcPtr(s, 0, 4);
   1297       off = (op.indices[0].disp * 4 + swz) * 4;
   1298       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
   1299       break;
   1300    case SM4_FILE_INDEXABLE_TEMP:
   1301    {
   1302       assert(op.is_index_simple(0));
   1303       int a = op.indices[0].disp;
   1304       idx = op.indices[1].disp;
   1305       res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
   1306    }
   1307       break;
   1308    case SM4_FILE_INPUT_PRIMITIVEID:
   1309       recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
   1310       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
   1311       break;
   1312    case SM4_FILE_INPUT_GS_INSTANCE_ID:
   1313    case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
   1314       recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
   1315       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
   1316       break;
   1317    case SM4_FILE_CYCLE_COUNTER:
   1318       res =
   1319          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
   1320       break;
   1321    case SM4_FILE_INPUT_FORK_INSTANCE_ID:
   1322    case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
   1323    {
   1324       phaseInstanceUsed = true;
   1325       if (unrollPhase)
   1326          return loadImm(NULL, phaseInstance);
   1327       const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
   1328       res = getScratch();
   1329       res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
   1330       res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
   1331    }
   1332       break;
   1333    case SM4_FILE_INPUT_DOMAIN_POINT:
   1334       assert(swz < 3);
   1335       res = domainPt[swz];
   1336       break;
   1337    case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
   1338       off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
   1339       ptr = getSrcPtr(s, 0, 4);
   1340       res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
   1341       break;
   1342    case SM4_FILE_RESOURCE:
   1343    case SM4_FILE_SAMPLER:
   1344    case SM4_FILE_UNORDERED_ACCESS_VIEW:
   1345       return NULL;
   1346    case SM4_FILE_INPUT_THREAD_ID:
   1347       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
   1348       break;
   1349    case SM4_FILE_INPUT_THREAD_GROUP_ID:
   1350       res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
   1351       break;
   1352    case SM4_FILE_FUNCTION_INPUT:
   1353    case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
   1354       assert(!"unhandled source file");
   1355       return NULL;
   1356    default:
   1357       assert(!"invalid source file");
   1358       return NULL;
   1359    }
   1360 
   1361    if (op.abs)
   1362       res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
   1363    if (op.neg)
   1364       res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
   1365    return res;
   1366 }
   1367 
   1368 Value *
   1369 Converter::dst(const sm4_op &op, int c, int i)
   1370 {
   1371    switch (op.file) {
   1372    case SM4_FILE_TEMP:
   1373       return tData32.acquire(op.indices[0].disp, c);
   1374    case SM4_FILE_INDEXABLE_TEMP:
   1375       return getScratch();
   1376    case SM4_FILE_OUTPUT:
   1377       if (prog->getType() == Program::TYPE_FRAGMENT)
   1378          return oData.acquire(op.indices[0].disp, c);
   1379       return getScratch();
   1380    case SM4_FILE_NULL:
   1381       return NULL;
   1382    case SM4_FILE_OUTPUT_DEPTH:
   1383    case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
   1384    case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
   1385    case SM4_FILE_OUTPUT_COVERAGE_MASK:
   1386       return getScratch();
   1387    case SM4_FILE_IMMEDIATE32:
   1388    case SM4_FILE_IMMEDIATE64:
   1389    case SM4_FILE_CONSTANT_BUFFER:
   1390    case SM4_FILE_RESOURCE:
   1391    case SM4_FILE_SAMPLER:
   1392    case SM4_FILE_UNORDERED_ACCESS_VIEW:
   1393       assert(!"invalid destination file");
   1394       return NULL;
   1395    default:
   1396       assert(!"invalid file");
   1397       return NULL;
   1398    }
   1399 }
   1400 
   1401 void
   1402 Converter::saveFragDepth(operation op, Value *value)
   1403 {
   1404    if (op == OP_MIN || op == OP_MAX) {
   1405       Value *zIn;
   1406       zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
   1407       value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
   1408    }
   1409    oData.store(info.io.fragDepth, 2, NULL, value);
   1410 }
   1411 
   1412 void
   1413 Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
   1414 {
   1415    Symbol *sym;
   1416    Instruction *st;
   1417    int a, idx;
   1418 
   1419    switch (op.file) {
   1420    case SM4_FILE_TEMP:
   1421       idx = op.indices[0].disp;
   1422       tData32.store(idx, c, NULL, value);
   1423       break;
   1424    case SM4_FILE_INDEXABLE_TEMP:
   1425       a = op.indices[0].disp;
   1426       idx = op.indices[1].disp;
   1427       // FIXME: shift is wrong, depends in lData
   1428       lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
   1429       break;
   1430    case SM4_FILE_OUTPUT:
   1431       assert(op.num_indices == 1);
   1432       idx = op.indices[0].disp;
   1433       if (prog->getType() == Program::TYPE_FRAGMENT) {
   1434          oData.store(idx, c, NULL, value);
   1435       } else {
   1436          if (phase)
   1437             idx += info.numOutputs - info.numPatchConstants;
   1438          const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
   1439          sym = oSym(idx, c);
   1440          if (sym->reg.file == FILE_SHADER_OUTPUT)
   1441             st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
   1442          else
   1443             st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
   1444          st->perPatch = phase ? 1 : 0;
   1445       }
   1446       break;
   1447    case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
   1448       saveFragDepth(OP_MAX, value);
   1449       break;
   1450    case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
   1451       saveFragDepth(OP_MIN, value);
   1452       break;
   1453    case SM4_FILE_OUTPUT_DEPTH:
   1454       saveFragDepth(OP_NOP, value);
   1455       break;
   1456    case SM4_FILE_OUTPUT_COVERAGE_MASK:
   1457       oData.store(info.io.sampleMask, 0, NULL, value);
   1458       break;
   1459    case SM4_FILE_IMMEDIATE32:
   1460    case SM4_FILE_IMMEDIATE64:
   1461    case SM4_FILE_INPUT:
   1462    case SM4_FILE_CONSTANT_BUFFER:
   1463    case SM4_FILE_RESOURCE:
   1464    case SM4_FILE_SAMPLER:
   1465       assert(!"invalid destination file");
   1466       return;
   1467    default:
   1468       assert(!"invalid file");
   1469       return;
   1470    }
   1471 }
   1472 
   1473 void
   1474 Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
   1475 {
   1476    Value *res[4] = { NULL, NULL, NULL, NULL };
   1477    unsigned int c, d;
   1478 
   1479    for (c = 0; c < 4; ++c)
   1480       if (dst0[c])
   1481          tex->tex.mask |= 1 << swz[c];
   1482    for (d = 0, c = 0; c < 4; ++c)
   1483       if (tex->tex.mask & (1 << c))
   1484          tex->setDef(d++, (res[c] = getScratch()));
   1485 
   1486    bb->insertTail(tex);
   1487 
   1488    if (insn->opcode == SM4_OPCODE_RESINFO) {
   1489       if (tex->tex.target.getDim() == 1) {
   1490 	 res[2] = loadImm(NULL, 0);
   1491          if (!tex->tex.target.isArray())
   1492             res[1] = res[2];
   1493       } else
   1494       if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
   1495          res[2] = loadImm(NULL, 0);
   1496       }
   1497       for (c = 0; c < 4; ++c) {
   1498          if (!dst0[c])
   1499             continue;
   1500          Value *src = res[swz[c]];
   1501          assert(src);
   1502          switch (insn->insn.resinfo_return_type) {
   1503          case 0:
   1504             mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
   1505             break;
   1506          case 1:
   1507             mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
   1508             if (swz[c] < tex->tex.target.getDim())
   1509                mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
   1510             break;
   1511          default:
   1512             mkMov(dst0[c], src);
   1513             break;
   1514          }
   1515       }
   1516    } else {
   1517       for (c = 0; c < 4; ++c)
   1518          if (dst0[c])
   1519             mkMov(dst0[c], res[swz[c]]);
   1520    }
   1521 }
   1522 
   1523 void
   1524 Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
   1525 {
   1526    TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
   1527    texi->tex.query = query;
   1528 
   1529    assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
   1530 
   1531    const int rOp = (query == TXQ_DIMS) ? 2 : 1;
   1532    const int sOp = (query == TXQ_DIMS) ? 0 : 1;
   1533 
   1534    const int tR = insn->ops[rOp]->indices[0].disp;
   1535 
   1536    texi->setTexture(resourceType[tR][0], tR, 0);
   1537 
   1538    texi->setSrc(0, src(sOp, 0)); // mip level or sample index
   1539 
   1540    emitTex(dst0, texi, insn->ops[rOp]->swizzle);
   1541 }
   1542 
   1543 void
   1544 Converter::handleLOAD(Value *dst0[4])
   1545 {
   1546    TexInstruction *texi = new_TexInstruction(func, OP_TXF);
   1547    unsigned int c;
   1548 
   1549    const int tR = insn->ops[2]->indices[0].disp;
   1550 
   1551    texi->setTexture(resourceType[tR][0], tR, 0);
   1552 
   1553    for (c = 0; c < texi->tex.target.getArgCount(); ++c)
   1554       texi->setSrc(c, src(0, c));
   1555 
   1556    if (texi->tex.target == TEX_TARGET_BUFFER) {
   1557       texi->tex.levelZero = true;
   1558    } else {
   1559       texi->setSrc(c++, src(0, 3));
   1560       for (c = 0; c < 3; ++c) {
   1561          texi->tex.offset[0][c] = insn->sample_offset[c];
   1562 	 if (texi->tex.offset[0][c])
   1563             texi->tex.useOffsets = 1;
   1564       }
   1565    }
   1566 
   1567    emitTex(dst0, texi, insn->ops[2]->swizzle);
   1568 }
   1569 
   1570 // order of nv50 ir sources: x y z/layer lod/bias dc
   1571 void
   1572 Converter::handleSAMPLE(operation opr, Value *dst0[4])
   1573 {
   1574    TexInstruction *texi = new_TexInstruction(func, opr);
   1575    unsigned int c, s;
   1576    Value *arg[4], *src0[4];
   1577    Value *val;
   1578    Value *lod = NULL, *dc = NULL;
   1579 
   1580    const int tR = insn->ops[2]->indices[0].disp;
   1581    const int tS = insn->ops[3]->indices[0].disp;
   1582 
   1583    TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
   1584 
   1585    for (c = 0; c < tgt.getArgCount(); ++c)
   1586       arg[c] = src0[c] = src(0, c);
   1587 
   1588    if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
   1589        insn->opcode == SM4_OPCODE_SAMPLE_B) {
   1590       lod = src(3, 0);
   1591    } else
   1592    if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
   1593        insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
   1594       dc = src(3, 0);
   1595       if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
   1596          texi->tex.levelZero = true;
   1597    } else
   1598    if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
   1599       for (c = 0; c < tgt.getDim(); ++c) {
   1600          texi->dPdx[c] = src(3, c);
   1601          texi->dPdy[c] = src(4, c);
   1602       }
   1603    }
   1604 
   1605    if (tgt.isCube()) {
   1606       for (c = 0; c < 3; ++c)
   1607          src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
   1608       val = getScratch();
   1609       mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
   1610       mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
   1611       mkOp1(OP_RCP, TYPE_F32, val, val);
   1612       for (c = 0; c < 3; ++c)
   1613          src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
   1614    }
   1615 
   1616    for (s = 0; s < tgt.getArgCount(); ++s)
   1617       texi->setSrc(s, src0[s]);
   1618    if (lod)
   1619       texi->setSrc(s++, lod);
   1620    if (dc)
   1621       texi->setSrc(s++, dc);
   1622 
   1623    for (c = 0; c < 3; ++c) {
   1624       texi->tex.offset[0][c] = insn->sample_offset[c];
   1625       if (texi->tex.offset[0][c])
   1626          texi->tex.useOffsets = 1;
   1627    }
   1628 
   1629    texi->setTexture(tgt, tR, tS);
   1630 
   1631    emitTex(dst0, texi, insn->ops[2]->swizzle);
   1632 }
   1633 
   1634 void
   1635 Converter::handleDP(Value *dst0[4], int dim)
   1636 {
   1637    Value *src0 = src(0, 0), *src1 = src(1, 0);
   1638    Value *dotp = getScratch();
   1639 
   1640    assert(dim > 0);
   1641 
   1642    mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
   1643    for (int c = 1; c < dim; ++c)
   1644       mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
   1645 
   1646    for (int c = 0; c < 4; ++c)
   1647       dst0[c] = dotp;
   1648 }
   1649 
   1650 void
   1651 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
   1652 {
   1653    FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
   1654    join->fixed = 1;
   1655    conv->insertHead(join);
   1656 
   1657    fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
   1658    fork->insertBefore(fork->getExit(), fork->joinAt);
   1659 }
   1660 
   1661 void
   1662 Converter::finalizeShader()
   1663 {
   1664    if (finalized)
   1665       return;
   1666    BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
   1667    entryBBs.pop();
   1668 
   1669    finalized = true;
   1670 
   1671    bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
   1672    setPosition(epilogue, true);
   1673 
   1674    if (prog->getType() == Program::TYPE_FRAGMENT)
   1675       exportOutputs();
   1676 
   1677    mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
   1678 }
   1679 
   1680 #define FOR_EACH_DST0_ENABLED_CHANNEL32(chan)         \
   1681    for ((chan) = 0; (chan) < 4; ++(chan))             \
   1682       if (insn->ops[0].get()->mask & (1 << (chan)))
   1683 
   1684 #define FOR_EACH_DST0_ENABLED_CHANNEL64(chan)         \
   1685    for ((chan) = 0; (chan) < 2; ++(chan))             \
   1686       if (insn->ops[0].get()->mask & (1 << (chan)))
   1687 
   1688 bool
   1689 Converter::checkDstSrcAliasing() const
   1690 {
   1691    for (unsigned int d = 0; d < nDstOpnds; ++d) {
   1692       for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
   1693          if (insn->ops[d]->file != insn->ops[s]->file)
   1694             continue;
   1695          int i = insn->ops[s]->num_indices - 1;
   1696          if (i != insn->ops[d]->num_indices - 1)
   1697             continue;
   1698          if (insn->ops[d]->is_index_simple(i) &&
   1699              insn->ops[s]->is_index_simple(i) &&
   1700              insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
   1701             return true;
   1702       }
   1703    }
   1704    return false;
   1705 }
   1706 
   1707 bool
   1708 Converter::handleInstruction(unsigned int pos)
   1709 {
   1710    Value *dst0[4], *rDst0[4];
   1711    Value *dst1[4], *rDst1[4];
   1712    int c, nc;
   1713 
   1714    insn = sm4.insns[pos];
   1715    enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
   1716 
   1717    operation op = cvtOpcode(opcode);
   1718 
   1719    sTy = inferSrcType(opcode);
   1720    dTy = inferDstType(opcode);
   1721 
   1722    nc = dTy == TYPE_F64 ? 2 : 4;
   1723 
   1724    nDstOpnds = getDstOpndCount(opcode);
   1725 
   1726    bool useScratchDst = checkDstSrcAliasing();
   1727 
   1728    INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
   1729 
   1730    if (nDstOpnds >= 1) {
   1731       for (c = 0; c < nc; ++c)
   1732          rDst0[c] = dst0[c] =
   1733             insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
   1734       if (useScratchDst)
   1735          for (c = 0; c < nc; ++c)
   1736             dst0[c] = rDst0[c] ? getScratch() : NULL;
   1737    }
   1738 
   1739    if (nDstOpnds >= 2) {
   1740       for (c = 0; c < nc; ++c)
   1741          rDst1[c] = dst1[c] =
   1742             insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
   1743       if (useScratchDst)
   1744          for (c = 0; c < nc; ++c)
   1745             dst1[c] = rDst1[c] ? getScratch() : NULL;
   1746    }
   1747 
   1748    switch (insn->opcode) {
   1749    case SM4_OPCODE_ADD:
   1750    case SM4_OPCODE_AND:
   1751    case SM4_OPCODE_DIV:
   1752    case SM4_OPCODE_IADD:
   1753    case SM4_OPCODE_IMAX:
   1754    case SM4_OPCODE_IMIN:
   1755    case SM4_OPCODE_MIN:
   1756    case SM4_OPCODE_MAX:
   1757    case SM4_OPCODE_MUL:
   1758    case SM4_OPCODE_OR:
   1759    case SM4_OPCODE_UMAX:
   1760    case SM4_OPCODE_UMIN:
   1761    case SM4_OPCODE_XOR:
   1762       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1763          Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
   1764          if (dTy == TYPE_F32)
   1765             insn->ftz = 1;
   1766       }
   1767       break;
   1768 
   1769    case SM4_OPCODE_ISHL:
   1770    case SM4_OPCODE_ISHR:
   1771    case SM4_OPCODE_USHR:
   1772       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1773          Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
   1774          insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
   1775       }
   1776       break;
   1777 
   1778    case SM4_OPCODE_IMAD:
   1779    case SM4_OPCODE_MAD:
   1780    case SM4_OPCODE_UMAD:
   1781       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1782          mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
   1783       }
   1784       break;
   1785 
   1786    case SM4_OPCODE_DADD:
   1787    case SM4_OPCODE_DMAX:
   1788    case SM4_OPCODE_DMIN:
   1789    case SM4_OPCODE_DMUL:
   1790       FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
   1791          mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
   1792       }
   1793       break;
   1794 
   1795    case SM4_OPCODE_UDIV:
   1796       for (c = 0; c < 4; ++c) {
   1797          Value *dvn, *dvs;
   1798          if (dst0[c] || dst1[c]) {
   1799             dvn = src(0, c);
   1800             dvs = src(1, c);
   1801          }
   1802          if (dst0[c])
   1803             mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
   1804          if (dst1[c])
   1805             mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
   1806       }
   1807       break;
   1808 
   1809    case SM4_OPCODE_IMUL:
   1810    case SM4_OPCODE_UMUL:
   1811       for (c = 0; c < 4; ++c) {
   1812          Value *a, *b;
   1813          if (dst0[c] || dst1[c]) {
   1814             a = src(0, c);
   1815             b = src(1, c);
   1816          }
   1817          if (dst0[c])
   1818             mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
   1819                NV50_IR_SUBOP_MUL_HIGH;
   1820          if (dst1[c])
   1821             mkOp2(OP_MUL, dTy, dst1[c], a, b);
   1822       }
   1823       break;
   1824 
   1825    case SM4_OPCODE_DP2:
   1826       handleDP(dst0, 2);
   1827       break;
   1828    case SM4_OPCODE_DP3:
   1829       handleDP(dst0, 3);
   1830       break;
   1831    case SM4_OPCODE_DP4:
   1832       handleDP(dst0, 4);
   1833       break;
   1834 
   1835    case SM4_OPCODE_DERIV_RTX:
   1836    case SM4_OPCODE_DERIV_RTX_COARSE:
   1837    case SM4_OPCODE_DERIV_RTX_FINE:
   1838    case SM4_OPCODE_DERIV_RTY:
   1839    case SM4_OPCODE_DERIV_RTY_COARSE:
   1840    case SM4_OPCODE_DERIV_RTY_FINE:
   1841    case SM4_OPCODE_MOV:
   1842    case SM4_OPCODE_INEG:
   1843    case SM4_OPCODE_NOT:
   1844    case SM4_OPCODE_SQRT:
   1845    case SM4_OPCODE_COUNTBITS:
   1846    case SM4_OPCODE_EXP:
   1847    case SM4_OPCODE_LOG:
   1848    case SM4_OPCODE_RCP:
   1849       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1850          mkOp1(op, dTy, dst0[c], src(0, c));
   1851       }
   1852       break;
   1853 
   1854    case SM4_OPCODE_FRC:
   1855       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1856          Value *val = getScratch();
   1857          Value *src0 = src(0, c);
   1858          mkOp1(OP_FLOOR, TYPE_F32, val, src0);
   1859          mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
   1860       }
   1861       break;
   1862 
   1863    case SM4_OPCODE_MOVC:
   1864       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
   1865          mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
   1866                src(0, c));
   1867       break;
   1868 
   1869    case SM4_OPCODE_ROUND_NE:
   1870    case SM4_OPCODE_ROUND_NI:
   1871    case SM4_OPCODE_ROUND_PI:
   1872    case SM4_OPCODE_ROUND_Z:
   1873       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1874          Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
   1875          rnd->ftz = 1;
   1876          rnd->rnd = cvtRoundingMode(opcode);
   1877       }
   1878       break;
   1879 
   1880    case SM4_OPCODE_RSQ:
   1881       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
   1882          mkOp1(op, dTy, dst0[c], src(0, c));
   1883       break;
   1884 
   1885    case SM4_OPCODE_SINCOS:
   1886       for (c = 0; c < 4; ++c) {
   1887          if (!dst0[c] && !dst1[c])
   1888             continue;
   1889          Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
   1890          if (dst0[c])
   1891             mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
   1892          if (dst1[c])
   1893             mkOp1(OP_COS, TYPE_F32, dst1[c], val);
   1894       }
   1895       break;
   1896 
   1897    case SM4_OPCODE_EQ:
   1898    case SM4_OPCODE_GE:
   1899    case SM4_OPCODE_IEQ:
   1900    case SM4_OPCODE_IGE:
   1901    case SM4_OPCODE_ILT:
   1902    case SM4_OPCODE_LT:
   1903    case SM4_OPCODE_NE:
   1904    case SM4_OPCODE_INE:
   1905    case SM4_OPCODE_ULT:
   1906    case SM4_OPCODE_UGE:
   1907    case SM4_OPCODE_DEQ:
   1908    case SM4_OPCODE_DGE:
   1909    case SM4_OPCODE_DLT:
   1910    case SM4_OPCODE_DNE:
   1911    {
   1912       CondCode cc = cvtCondCode(opcode);
   1913       FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
   1914          CmpInstruction *set;
   1915          set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
   1916          set->setType(dTy, sTy);
   1917          if (sTy == TYPE_F32)
   1918             set->ftz = 1;
   1919       }
   1920    }
   1921       break;
   1922 
   1923    case SM4_OPCODE_FTOI:
   1924    case SM4_OPCODE_FTOU:
   1925       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
   1926          mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
   1927       break;
   1928    case SM4_OPCODE_ITOF:
   1929    case SM4_OPCODE_UTOF:
   1930    case SM4_OPCODE_F32TOF16:
   1931    case SM4_OPCODE_F16TOF32:
   1932    case SM4_OPCODE_DTOF:
   1933    case SM4_OPCODE_FTOD:
   1934       FOR_EACH_DST0_ENABLED_CHANNEL32(c)
   1935          mkCvt(op, dTy, dst0[c], sTy, src(0, c));
   1936       break;
   1937 
   1938    case SM4_OPCODE_CUT:
   1939    case SM4_OPCODE_CUT_STREAM:
   1940       mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
   1941       break;
   1942    case SM4_OPCODE_EMIT:
   1943    case SM4_OPCODE_EMIT_STREAM:
   1944       mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
   1945       break;
   1946    case SM4_OPCODE_EMITTHENCUT:
   1947    case SM4_OPCODE_EMITTHENCUT_STREAM:
   1948    {
   1949       Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL,  mkImm(0));
   1950       cut->fixed = 1;
   1951       cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
   1952    }
   1953       break;
   1954 
   1955    case SM4_OPCODE_DISCARD:
   1956       info.prop.fp.usesDiscard = TRUE;
   1957       mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
   1958          insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
   1959       break;
   1960 
   1961    case SM4_OPCODE_CALL:
   1962    case SM4_OPCODE_CALLC:
   1963       assert(!"CALL/CALLC not implemented");
   1964       break;
   1965 
   1966    case SM4_OPCODE_RET:
   1967       // XXX: the following doesn't work with subroutines / early ret
   1968       if (!haveNextPhase(pos))
   1969          finalizeShader();
   1970       else
   1971          phaseEnded = phase + 1;
   1972       break;
   1973 
   1974    case SM4_OPCODE_IF:
   1975    {
   1976       BasicBlock *ifClause = new BasicBlock(func);
   1977 
   1978       bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
   1979       condBBs.push(bb);
   1980       joinBBs.push(bb);
   1981 
   1982       mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
   1983 
   1984       setPosition(ifClause, true);
   1985    }
   1986       break;
   1987    case SM4_OPCODE_ELSE:
   1988    {
   1989       BasicBlock *elseClause = new BasicBlock(func);
   1990       BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
   1991 
   1992       forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
   1993       condBBs.push(bb);
   1994 
   1995       forkPoint->getExit()->asFlow()->target.bb = elseClause;
   1996       if (!bb->isTerminated())
   1997          mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
   1998 
   1999       setPosition(elseClause, true);
   2000    }
   2001       break;
   2002    case SM4_OPCODE_ENDIF:
   2003    {
   2004       BasicBlock *convPoint = new BasicBlock(func);
   2005       BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
   2006       BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
   2007 
   2008       if (!bb->isTerminated()) {
   2009          // we only want join if none of the clauses ended with CONT/BREAK/RET
   2010          if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
   2011             insertConvergenceOps(convPoint, forkPoint);
   2012          mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
   2013          bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
   2014       }
   2015 
   2016       if (lastBB->getExit()->op == OP_BRA) {
   2017          lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
   2018          lastBB->getExit()->asFlow()->target.bb = convPoint;
   2019       }
   2020       setPosition(convPoint, true);
   2021    }
   2022       break;
   2023 
   2024    case SM4_OPCODE_SWITCH:
   2025    case SM4_OPCODE_CASE:
   2026    case SM4_OPCODE_ENDSWITCH:
   2027       assert(!"SWITCH/CASE/ENDSWITCH not implemented");
   2028       break;
   2029 
   2030    case SM4_OPCODE_LOOP:
   2031    {
   2032       BasicBlock *loopHeader = new BasicBlock(func);
   2033       BasicBlock *loopBreak = new BasicBlock(func);
   2034 
   2035       loopBBs.push(loopHeader);
   2036       breakBBs.push(loopBreak);
   2037       if (loopBBs.getSize() > func->loopNestingBound)
   2038          func->loopNestingBound++;
   2039 
   2040       mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
   2041 
   2042       bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
   2043       setPosition(loopHeader, true);
   2044       mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
   2045    }
   2046       break;
   2047    case SM4_OPCODE_ENDLOOP:
   2048    {
   2049       BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
   2050 
   2051       if (!bb->isTerminated()) {
   2052          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
   2053          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
   2054       }
   2055       setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
   2056    }
   2057       break;
   2058    case SM4_OPCODE_BREAK:
   2059    {
   2060       if (bb->isTerminated())
   2061          break;
   2062       BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
   2063       mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
   2064       bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
   2065    }
   2066       break;
   2067    case SM4_OPCODE_BREAKC:
   2068    {
   2069       BasicBlock *nextBB = new BasicBlock(func);
   2070       BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
   2071       CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
   2072       mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
   2073       bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
   2074       bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
   2075       setPosition(nextBB, true);
   2076    }
   2077       break;
   2078    case SM4_OPCODE_CONTINUE:
   2079    {
   2080       if (bb->isTerminated())
   2081          break;
   2082       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
   2083       mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
   2084       contBB->explicitCont = true;
   2085       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
   2086    }
   2087       break;
   2088    case SM4_OPCODE_CONTINUEC:
   2089    {
   2090       BasicBlock *nextBB = new BasicBlock(func);
   2091       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
   2092       mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
   2093       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
   2094       bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
   2095       setPosition(nextBB, true);
   2096    }
   2097       break;
   2098 
   2099    case SM4_OPCODE_SAMPLE:
   2100    case SM4_OPCODE_SAMPLE_C:
   2101    case SM4_OPCODE_SAMPLE_C_LZ:
   2102    case SM4_OPCODE_SAMPLE_L:
   2103    case SM4_OPCODE_SAMPLE_D:
   2104    case SM4_OPCODE_SAMPLE_B:
   2105       handleSAMPLE(op, dst0);
   2106       break;
   2107    case SM4_OPCODE_LD:
   2108    case SM4_OPCODE_LD_MS:
   2109       handleLOAD(dst0);
   2110       break;
   2111 
   2112    case SM4_OPCODE_GATHER4:
   2113       assert(!"GATHER4 not implemented\n");
   2114       break;
   2115 
   2116    case SM4_OPCODE_RESINFO:
   2117       handleQUERY(dst0, TXQ_DIMS);
   2118       break;
   2119    case SM4_OPCODE_SAMPLE_POS:
   2120       handleQUERY(dst0, TXQ_SAMPLE_POSITION);
   2121       break;
   2122 
   2123    case SM4_OPCODE_NOP:
   2124       mkOp(OP_NOP, TYPE_NONE, NULL);
   2125       break;
   2126 
   2127    case SM4_OPCODE_HS_DECLS:
   2128       // XXX: any significance ?
   2129       break;
   2130    case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
   2131       phase = 0;
   2132       break;
   2133    case SM4_OPCODE_HS_FORK_PHASE:
   2134       if (phase != 1)
   2135          subPhase = 0;
   2136       phase = 1;
   2137       phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
   2138       phaseStart = pos;
   2139       if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
   2140          unrollPhase = true;
   2141       break;
   2142    case SM4_OPCODE_HS_JOIN_PHASE:
   2143       if (phase != 2)
   2144          subPhase = 0;
   2145       phase = 2;
   2146       phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
   2147       phaseStart = pos;
   2148       if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
   2149          unrollPhase = true;
   2150       break;
   2151 
   2152    default:
   2153       ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
   2154       abort();
   2155       return false;
   2156    }
   2157 
   2158    for (c = 0; c < nc; ++c) {
   2159       if (nDstOpnds >= 1 && rDst0[c]) {
   2160          if (dst0[c] != rDst0[c])
   2161             mkMov(rDst0[c], dst0[c]);
   2162          saveDst(0, c, rDst0[c]);
   2163       }
   2164       if (nDstOpnds >= 2 && rDst1[c]) {
   2165          if (dst1[c] != rDst1[c])
   2166             mkMov(rDst1[c], dst1[c]);
   2167          saveDst(1, c, rDst1[c]);
   2168       }
   2169    }
   2170 
   2171    memset(srcPtr, 0, sizeof(srcPtr));
   2172    memset(dstPtr, 0, sizeof(dstPtr));
   2173    memset(vtxBase, 0, sizeof(vtxBase));
   2174    return true;
   2175 }
   2176 
   2177 void
   2178 Converter::exportOutputs()
   2179 {
   2180    for (int i = 0; i < info.numOutputs; ++i) {
   2181       for (int c = 0; c < 4; ++c) {
   2182          if (!oData.exists(i, c))
   2183             continue;
   2184          Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
   2185                                 info.out[i].slot[c] * 4);
   2186          Value *val = oData.load(i, c, NULL);
   2187          if (val)
   2188             mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
   2189       }
   2190    }
   2191 }
   2192 
   2193 Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
   2194    : tData32(this),
   2195      tData64(this),
   2196      oData(this),
   2197      info(*s),
   2198      sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
   2199      prog(p)
   2200 {
   2201    memset(srcPtr, 0, sizeof(srcPtr));
   2202    memset(dstPtr, 0, sizeof(dstPtr));
   2203    memset(vtxBase, 0, sizeof(vtxBase));
   2204 
   2205    memset(interpMode, 0, sizeof(interpMode));
   2206 
   2207    nrRegVals = nrArrays = arrayVol = 0;
   2208 
   2209    for (phase = 3; phase > 0; --phase)
   2210       for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
   2211          out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
   2212 
   2213    unrollPhase = false;
   2214    phaseStart = 0;
   2215    subPhaseCnt[0] = subPhaseCnt[1] = 0;
   2216 }
   2217 
   2218 Converter::~Converter()
   2219 {
   2220    if (lData)
   2221       delete[] lData;
   2222 
   2223    if (subPhaseCnt[0])
   2224       delete[] phaseInstCnt[0];
   2225    if (subPhaseCnt[1])
   2226       delete[] phaseInstCnt[1];
   2227 }
   2228 
   2229 bool
   2230 Converter::haveNextPhase(unsigned int pos) const
   2231 {
   2232    ++pos;
   2233    return (pos < sm4.insns.size()) &&
   2234       (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
   2235        sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
   2236 }
   2237 
   2238 bool
   2239 Converter::run()
   2240 {
   2241    parseSignature();
   2242 
   2243    for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
   2244       inspectDeclaration(*sm4.dcls[pos]);
   2245 
   2246    phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
   2247    phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
   2248    for (int i = 0; i < subPhaseCnt[0]; ++i)
   2249       phaseInstCnt[0][i] = -1;
   2250    for (int i = 0; i < subPhaseCnt[1]; ++i)
   2251       phaseInstCnt[1][i] = -1;
   2252    // re-increased in handleDeclaration:
   2253    subPhaseCnt[0] = subPhaseCnt[1] = 0;
   2254 
   2255    allocateValues();
   2256    nrArrays = 0;
   2257    for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
   2258       handleDeclaration(*sm4.dcls[pos]);
   2259 
   2260    info.io.genUserClip = -1; // no UCPs permitted with SM4 shaders
   2261    info.io.clipDistanceMask = (1 << info.io.clipDistanceMask) - 1;
   2262 
   2263    info.assignSlots(&info);
   2264 
   2265    if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
   2266       return true;
   2267 
   2268    BasicBlock *entry = new BasicBlock(prog->main);
   2269    BasicBlock *leave = new BasicBlock(prog->main);
   2270 
   2271    prog->main->setEntry(entry);
   2272    prog->main->setExit(leave);
   2273 
   2274    setPosition(entry, true);
   2275 
   2276    entryBBs.push(entry);
   2277    leaveBBs.push(leave);
   2278 
   2279    if (prog->getType() == Program::TYPE_FRAGMENT) {
   2280       Symbol *sv = mkSysVal(SV_POSITION, 3);
   2281       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
   2282       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
   2283    } else
   2284    if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
   2285       const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
   2286       int c;
   2287       for (c = 0; c < n; ++c)
   2288          domainPt[c] =
   2289             mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
   2290       if (c == 2)
   2291          domainPt[2] = loadImm(NULL, 0.0f);
   2292    }
   2293 
   2294    finalized = false;
   2295    phaseEnded = 0;
   2296    phase = 0;
   2297    subPhase = 0;
   2298    for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
   2299       handleInstruction(pos);
   2300       if (likely(phase == 0) || (phaseEnded < 2))
   2301          continue;
   2302       phaseEnded = 0;
   2303       if (!unrollPhase || !phaseInstanceUsed) {
   2304          ++subPhase;
   2305          continue;
   2306       }
   2307       phaseInstanceUsed = false;
   2308       if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
   2309          pos = phaseStart - 1;
   2310       else
   2311          ++subPhase;
   2312    }
   2313    finalizeShader();
   2314 
   2315    return true;
   2316 }
   2317 
   2318 } // anonymous namespace
   2319 
   2320 namespace nv50_ir {
   2321 
   2322 bool
   2323 Program::makeFromSM4(struct nv50_ir_prog_info *info)
   2324 {
   2325    Converter bld(this, info);
   2326    return bld.run();
   2327 }
   2328 
   2329 } // namespace nv50_ir
   2330