Home | History | Annotate | Download | only in nine
      1 /*
      2  * Copyright 2011 Joakim Sindholt <opensource (at) zhasha.com>
      3  * Copyright 2013 Christoph Bumiller
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * on the rights to use, copy, modify, merge, publish, distribute, sub
      9  * license, and/or sell copies of the Software, and to permit persons to whom
     10  * the Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
     23 
     24 #include "nine_shader.h"
     25 
     26 #include "device9.h"
     27 #include "nine_debug.h"
     28 #include "nine_state.h"
     29 #include "vertexdeclaration9.h"
     30 
     31 #include "util/macros.h"
     32 #include "util/u_memory.h"
     33 #include "util/u_inlines.h"
     34 #include "pipe/p_shader_tokens.h"
     35 #include "tgsi/tgsi_ureg.h"
     36 #include "tgsi/tgsi_dump.h"
     37 
     38 #define DBG_CHANNEL DBG_SHADER
     39 
     40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
     41 
     42 
     43 struct shader_translator;
     44 
     45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
     46 
     47 static inline const char *d3dsio_to_string(unsigned opcode);
     48 
     49 
     50 #define NINED3D_SM1_VS 0xfffe
     51 #define NINED3D_SM1_PS 0xffff
     52 
     53 #define NINE_MAX_COND_DEPTH 64
     54 #define NINE_MAX_LOOP_DEPTH 64
     55 
     56 #define NINED3DSP_END 0x0000ffff
     57 
     58 #define NINED3DSPTYPE_FLOAT4  0
     59 #define NINED3DSPTYPE_INT4    1
     60 #define NINED3DSPTYPE_BOOL    2
     61 
     62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
     63 
     64 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
     65 #define NINED3DSP_WRITEMASK_SHIFT 16
     66 
     67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
     68 
     69 #define NINED3DSHADER_REL_OP_GT 1
     70 #define NINED3DSHADER_REL_OP_EQ 2
     71 #define NINED3DSHADER_REL_OP_GE 3
     72 #define NINED3DSHADER_REL_OP_LT 4
     73 #define NINED3DSHADER_REL_OP_NE 5
     74 #define NINED3DSHADER_REL_OP_LE 6
     75 
     76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
     77 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
     78 
     79 #define NINED3DSI_TEXLD_PROJECT 0x1
     80 #define NINED3DSI_TEXLD_BIAS    0x2
     81 
     82 #define NINED3DSP_WRITEMASK_0   0x1
     83 #define NINED3DSP_WRITEMASK_1   0x2
     84 #define NINED3DSP_WRITEMASK_2   0x4
     85 #define NINED3DSP_WRITEMASK_3   0x8
     86 #define NINED3DSP_WRITEMASK_ALL 0xf
     87 
     88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
     89 
     90 #define NINE_SWIZZLE4(x,y,z,w) \
     91    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
     92 
     93 #define NINE_CONSTANT_SRC(index) \
     94    ureg_src_register(TGSI_FILE_CONSTANT, index)
     95 
     96 #define NINE_APPLY_SWIZZLE(src, s) \
     97    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
     98 
     99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
    100    NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
    101 
    102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
    103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
    104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
    105 
    106 /*
    107  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
    108  * BIAS    <= PS 1.4 (x-0.5)
    109  * BIASNEG <= PS 1.4 (-(x-0.5))
    110  * SIGN    <= PS 1.4 (2(x-0.5))
    111  * SIGNNEG <= PS 1.4 (-2(x-0.5))
    112  * COMP    <= PS 1.4 (1-x)
    113  * X2       = PS 1.4 (2x)
    114  * X2NEG    = PS 1.4 (-2x)
    115  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
    116  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
    117  * ABS     >= SM 3.0 (abs(x))
    118  * ABSNEG  >= SM 3.0 (-abs(x))
    119  * NOT     >= SM 2.0 pedication only
    120  */
    121 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
    122 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
    123 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
    124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
    125 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
    126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
    127 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
    128 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
    129 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
    130 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
    131 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
    132 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
    133 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
    134 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
    135 
    136 static const char *sm1_mod_str[] =
    137 {
    138     [NINED3DSPSM_NONE] = "",
    139     [NINED3DSPSM_NEG] = "-",
    140     [NINED3DSPSM_BIAS] = "bias",
    141     [NINED3DSPSM_BIASNEG] = "biasneg",
    142     [NINED3DSPSM_SIGN] = "sign",
    143     [NINED3DSPSM_SIGNNEG] = "signneg",
    144     [NINED3DSPSM_COMP] = "comp",
    145     [NINED3DSPSM_X2] = "x2",
    146     [NINED3DSPSM_X2NEG] = "x2neg",
    147     [NINED3DSPSM_DZ] = "dz",
    148     [NINED3DSPSM_DW] = "dw",
    149     [NINED3DSPSM_ABS] = "abs",
    150     [NINED3DSPSM_ABSNEG] = "-abs",
    151     [NINED3DSPSM_NOT] = "not"
    152 };
    153 
    154 static void
    155 sm1_dump_writemask(BYTE mask)
    156 {
    157     if (mask & 1) DUMP("x"); else DUMP("_");
    158     if (mask & 2) DUMP("y"); else DUMP("_");
    159     if (mask & 4) DUMP("z"); else DUMP("_");
    160     if (mask & 8) DUMP("w"); else DUMP("_");
    161 }
    162 
    163 static void
    164 sm1_dump_swizzle(BYTE s)
    165 {
    166     char c[4] = { 'x', 'y', 'z', 'w' };
    167     DUMP("%c%c%c%c",
    168          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
    169 }
    170 
    171 static const char sm1_file_char[] =
    172 {
    173     [D3DSPR_TEMP] = 'r',
    174     [D3DSPR_INPUT] = 'v',
    175     [D3DSPR_CONST] = 'c',
    176     [D3DSPR_ADDR] = 'A',
    177     [D3DSPR_RASTOUT] = 'R',
    178     [D3DSPR_ATTROUT] = 'D',
    179     [D3DSPR_OUTPUT] = 'o',
    180     [D3DSPR_CONSTINT] = 'I',
    181     [D3DSPR_COLOROUT] = 'C',
    182     [D3DSPR_DEPTHOUT] = 'D',
    183     [D3DSPR_SAMPLER] = 's',
    184     [D3DSPR_CONST2] = 'c',
    185     [D3DSPR_CONST3] = 'c',
    186     [D3DSPR_CONST4] = 'c',
    187     [D3DSPR_CONSTBOOL] = 'B',
    188     [D3DSPR_LOOP] = 'L',
    189     [D3DSPR_TEMPFLOAT16] = 'h',
    190     [D3DSPR_MISCTYPE] = 'M',
    191     [D3DSPR_LABEL] = 'X',
    192     [D3DSPR_PREDICATE] = 'p'
    193 };
    194 
    195 static void
    196 sm1_dump_reg(BYTE file, INT index)
    197 {
    198     switch (file) {
    199     case D3DSPR_LOOP:
    200         DUMP("aL");
    201         break;
    202     case D3DSPR_COLOROUT:
    203         DUMP("oC%i", index);
    204         break;
    205     case D3DSPR_DEPTHOUT:
    206         DUMP("oDepth");
    207         break;
    208     case D3DSPR_RASTOUT:
    209         DUMP("oRast%i", index);
    210         break;
    211     case D3DSPR_CONSTINT:
    212         DUMP("iconst[%i]", index);
    213         break;
    214     case D3DSPR_CONSTBOOL:
    215         DUMP("bconst[%i]", index);
    216         break;
    217     default:
    218         DUMP("%c%i", sm1_file_char[file], index);
    219         break;
    220     }
    221 }
    222 
    223 struct sm1_src_param
    224 {
    225     INT idx;
    226     struct sm1_src_param *rel;
    227     BYTE file;
    228     BYTE swizzle;
    229     BYTE mod;
    230     BYTE type;
    231     union {
    232         DWORD d[4];
    233         float f[4];
    234         int i[4];
    235         BOOL b;
    236     } imm;
    237 };
    238 static void
    239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
    240 
    241 struct sm1_dst_param
    242 {
    243     INT idx;
    244     struct sm1_src_param *rel;
    245     BYTE file;
    246     BYTE mask;
    247     BYTE mod;
    248     int8_t shift; /* sint4 */
    249     BYTE type;
    250 };
    251 
    252 static inline void
    253 assert_replicate_swizzle(const struct ureg_src *reg)
    254 {
    255     assert(reg->SwizzleY == reg->SwizzleX &&
    256            reg->SwizzleZ == reg->SwizzleX &&
    257            reg->SwizzleW == reg->SwizzleX);
    258 }
    259 
    260 static void
    261 sm1_dump_immediate(const struct sm1_src_param *param)
    262 {
    263     switch (param->type) {
    264     case NINED3DSPTYPE_FLOAT4:
    265         DUMP("{ %f %f %f %f }",
    266              param->imm.f[0], param->imm.f[1],
    267              param->imm.f[2], param->imm.f[3]);
    268         break;
    269     case NINED3DSPTYPE_INT4:
    270         DUMP("{ %i %i %i %i }",
    271              param->imm.i[0], param->imm.i[1],
    272              param->imm.i[2], param->imm.i[3]);
    273         break;
    274     case NINED3DSPTYPE_BOOL:
    275         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
    276         break;
    277     default:
    278         assert(0);
    279         break;
    280     }
    281 }
    282 
    283 static void
    284 sm1_dump_src_param(const struct sm1_src_param *param)
    285 {
    286     if (param->file == NINED3DSPR_IMMEDIATE) {
    287         assert(!param->mod &&
    288                !param->rel &&
    289                param->swizzle == NINED3DSP_NOSWIZZLE);
    290         sm1_dump_immediate(param);
    291         return;
    292     }
    293 
    294     if (param->mod)
    295         DUMP("%s(", sm1_mod_str[param->mod]);
    296     if (param->rel) {
    297         DUMP("%c[", sm1_file_char[param->file]);
    298         sm1_dump_src_param(param->rel);
    299         DUMP("+%i]", param->idx);
    300     } else {
    301         sm1_dump_reg(param->file, param->idx);
    302     }
    303     if (param->mod)
    304        DUMP(")");
    305     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
    306        DUMP(".");
    307        sm1_dump_swizzle(param->swizzle);
    308     }
    309 }
    310 
    311 static void
    312 sm1_dump_dst_param(const struct sm1_dst_param *param)
    313 {
    314    if (param->mod & NINED3DSPDM_SATURATE)
    315       DUMP("sat ");
    316    if (param->mod & NINED3DSPDM_PARTIALP)
    317       DUMP("pp ");
    318    if (param->mod & NINED3DSPDM_CENTROID)
    319       DUMP("centroid ");
    320    if (param->shift < 0)
    321       DUMP("/%u ", 1 << -param->shift);
    322    if (param->shift > 0)
    323       DUMP("*%u ", 1 << param->shift);
    324 
    325    if (param->rel) {
    326       DUMP("%c[", sm1_file_char[param->file]);
    327       sm1_dump_src_param(param->rel);
    328       DUMP("+%i]", param->idx);
    329    } else {
    330       sm1_dump_reg(param->file, param->idx);
    331    }
    332    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
    333       DUMP(".");
    334       sm1_dump_writemask(param->mask);
    335    }
    336 }
    337 
    338 struct sm1_semantic
    339 {
    340    struct sm1_dst_param reg;
    341    BYTE sampler_type;
    342    D3DDECLUSAGE usage;
    343    BYTE usage_idx;
    344 };
    345 
    346 struct sm1_op_info
    347 {
    348     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
    349      * should be ignored completely */
    350     unsigned sio;
    351     unsigned opcode; /* TGSI_OPCODE_x */
    352 
    353     /* versions are still set even handler is set */
    354     struct {
    355         unsigned min;
    356         unsigned max;
    357     } vert_version, frag_version;
    358 
    359     /* number of regs parsed outside of special handler */
    360     unsigned ndst;
    361     unsigned nsrc;
    362 
    363     /* some instructions don't map perfectly, so use a special handler */
    364     translate_instruction_func handler;
    365 };
    366 
    367 struct sm1_instruction
    368 {
    369     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
    370     BYTE flags;
    371     BOOL coissue;
    372     BOOL predicated;
    373     BYTE ndst;
    374     BYTE nsrc;
    375     struct sm1_src_param src[4];
    376     struct sm1_src_param src_rel[4];
    377     struct sm1_src_param pred;
    378     struct sm1_src_param dst_rel[1];
    379     struct sm1_dst_param dst[1];
    380 
    381     struct sm1_op_info *info;
    382 };
    383 
    384 static void
    385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
    386 {
    387     unsigned i;
    388 
    389     /* no info stored for these: */
    390     if (insn->opcode == D3DSIO_DCL)
    391         return;
    392     for (i = 0; i < indent; ++i)
    393         DUMP("  ");
    394 
    395     if (insn->predicated) {
    396         DUMP("@");
    397         sm1_dump_src_param(&insn->pred);
    398         DUMP(" ");
    399     }
    400     DUMP("%s", d3dsio_to_string(insn->opcode));
    401     if (insn->flags) {
    402         switch (insn->opcode) {
    403         case D3DSIO_TEX:
    404             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
    405             break;
    406         default:
    407             DUMP("_%x", insn->flags);
    408             break;
    409         }
    410     }
    411     if (insn->coissue)
    412         DUMP("_co");
    413     DUMP(" ");
    414 
    415     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
    416         sm1_dump_dst_param(&insn->dst[i]);
    417         DUMP(" ");
    418     }
    419 
    420     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
    421         sm1_dump_src_param(&insn->src[i]);
    422         DUMP(" ");
    423     }
    424     if (insn->opcode == D3DSIO_DEF ||
    425         insn->opcode == D3DSIO_DEFI ||
    426         insn->opcode == D3DSIO_DEFB)
    427         sm1_dump_immediate(&insn->src[0]);
    428 
    429     DUMP("\n");
    430 }
    431 
    432 struct sm1_local_const
    433 {
    434     INT idx;
    435     struct ureg_src reg;
    436     float f[4]; /* for indirect addressing of float constants */
    437 };
    438 
    439 struct shader_translator
    440 {
    441     const DWORD *byte_code;
    442     const DWORD *parse;
    443     const DWORD *parse_next;
    444 
    445     struct ureg_program *ureg;
    446 
    447     /* shader version */
    448     struct {
    449         BYTE major;
    450         BYTE minor;
    451     } version;
    452     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
    453     unsigned num_constf_allowed;
    454     unsigned num_consti_allowed;
    455     unsigned num_constb_allowed;
    456 
    457     boolean native_integers;
    458     boolean inline_subroutines;
    459     boolean lower_preds;
    460     boolean want_texcoord;
    461     boolean shift_wpos;
    462     boolean wpos_is_sysval;
    463     boolean face_is_sysval_integer;
    464     unsigned texcoord_sn;
    465 
    466     struct sm1_instruction insn; /* current instruction */
    467 
    468     struct {
    469         struct ureg_dst *r;
    470         struct ureg_dst oPos;
    471         struct ureg_dst oPos_out; /* the real output when doing streamout */
    472         struct ureg_dst oFog;
    473         struct ureg_dst oPts;
    474         struct ureg_dst oCol[4];
    475         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
    476         struct ureg_dst oDepth;
    477         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
    478         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
    479         struct ureg_src vPos;
    480         struct ureg_src vFace;
    481         struct ureg_src s;
    482         struct ureg_dst p;
    483         struct ureg_dst address;
    484         struct ureg_dst a0;
    485         struct ureg_dst tS[8]; /* texture stage registers */
    486         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
    487         struct ureg_dst t[5]; /* scratch TEMPs */
    488         struct ureg_src vC[2]; /* PS color in */
    489         struct ureg_src vT[8]; /* PS texcoord in */
    490         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
    491     } regs;
    492     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
    493     unsigned num_scratch;
    494     unsigned loop_depth;
    495     unsigned loop_depth_max;
    496     unsigned cond_depth;
    497     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
    498     unsigned cond_labels[NINE_MAX_COND_DEPTH];
    499     boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
    500 
    501     unsigned *inst_labels; /* LABEL op */
    502     unsigned num_inst_labels;
    503 
    504     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
    505 
    506     struct sm1_local_const *lconstf;
    507     unsigned num_lconstf;
    508     struct sm1_local_const *lconsti;
    509     unsigned num_lconsti;
    510     struct sm1_local_const *lconstb;
    511     unsigned num_lconstb;
    512 
    513     boolean indirect_const_access;
    514     boolean failure;
    515 
    516     struct nine_vs_output_info output_info[16];
    517     int num_outputs;
    518 
    519     struct nine_shader_info *info;
    520 
    521     int16_t op_info_map[D3DSIO_BREAKP + 1];
    522 };
    523 
    524 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
    525 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
    526 
    527 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
    528 
    529 static void
    530 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
    531 
    532 static void
    533 sm1_instruction_check(const struct sm1_instruction *insn)
    534 {
    535     if (insn->opcode == D3DSIO_CRS)
    536     {
    537         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
    538         {
    539             DBG("CRS.mask.w\n");
    540         }
    541     }
    542 }
    543 
    544 static void
    545 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
    546                     int mask, int output_index)
    547 {
    548     tx->output_info[tx->num_outputs].output_semantic = Usage;
    549     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
    550     tx->output_info[tx->num_outputs].mask = mask;
    551     tx->output_info[tx->num_outputs].output_index = output_index;
    552     tx->num_outputs++;
    553 }
    554 
    555 static boolean
    556 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
    557 {
    558    INT i;
    559 
    560    if (index < 0 || index >= tx->num_constf_allowed) {
    561        tx->failure = TRUE;
    562        return FALSE;
    563    }
    564    for (i = 0; i < tx->num_lconstf; ++i) {
    565       if (tx->lconstf[i].idx == index) {
    566          *src = tx->lconstf[i].reg;
    567          return TRUE;
    568       }
    569    }
    570    return FALSE;
    571 }
    572 static boolean
    573 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
    574 {
    575    int i;
    576 
    577    if (index < 0 || index >= tx->num_consti_allowed) {
    578        tx->failure = TRUE;
    579        return FALSE;
    580    }
    581    for (i = 0; i < tx->num_lconsti; ++i) {
    582       if (tx->lconsti[i].idx == index) {
    583          *src = tx->lconsti[i].reg;
    584          return TRUE;
    585       }
    586    }
    587    return FALSE;
    588 }
    589 static boolean
    590 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
    591 {
    592    int i;
    593 
    594    if (index < 0 || index >= tx->num_constb_allowed) {
    595        tx->failure = TRUE;
    596        return FALSE;
    597    }
    598    for (i = 0; i < tx->num_lconstb; ++i) {
    599       if (tx->lconstb[i].idx == index) {
    600          *src = tx->lconstb[i].reg;
    601          return TRUE;
    602       }
    603    }
    604    return FALSE;
    605 }
    606 
    607 static void
    608 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
    609 {
    610     unsigned n;
    611 
    612     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
    613 
    614     for (n = 0; n < tx->num_lconstf; ++n)
    615         if (tx->lconstf[n].idx == index)
    616             break;
    617     if (n == tx->num_lconstf) {
    618        if ((n % 8) == 0) {
    619           tx->lconstf = REALLOC(tx->lconstf,
    620                                 (n + 0) * sizeof(tx->lconstf[0]),
    621                                 (n + 8) * sizeof(tx->lconstf[0]));
    622           assert(tx->lconstf);
    623        }
    624        tx->num_lconstf++;
    625     }
    626     tx->lconstf[n].idx = index;
    627     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
    628 
    629     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
    630 }
    631 static void
    632 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
    633 {
    634     unsigned n;
    635 
    636     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
    637 
    638     for (n = 0; n < tx->num_lconsti; ++n)
    639         if (tx->lconsti[n].idx == index)
    640             break;
    641     if (n == tx->num_lconsti) {
    642        if ((n % 8) == 0) {
    643           tx->lconsti = REALLOC(tx->lconsti,
    644                                 (n + 0) * sizeof(tx->lconsti[0]),
    645                                 (n + 8) * sizeof(tx->lconsti[0]));
    646           assert(tx->lconsti);
    647        }
    648        tx->num_lconsti++;
    649     }
    650 
    651     tx->lconsti[n].idx = index;
    652     tx->lconsti[n].reg = tx->native_integers ?
    653        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
    654        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
    655 }
    656 static void
    657 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
    658 {
    659     unsigned n;
    660 
    661     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
    662 
    663     for (n = 0; n < tx->num_lconstb; ++n)
    664         if (tx->lconstb[n].idx == index)
    665             break;
    666     if (n == tx->num_lconstb) {
    667        if ((n % 8) == 0) {
    668           tx->lconstb = REALLOC(tx->lconstb,
    669                                 (n + 0) * sizeof(tx->lconstb[0]),
    670                                 (n + 8) * sizeof(tx->lconstb[0]));
    671           assert(tx->lconstb);
    672        }
    673        tx->num_lconstb++;
    674     }
    675 
    676     tx->lconstb[n].idx = index;
    677     tx->lconstb[n].reg = tx->native_integers ?
    678        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
    679        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
    680 }
    681 
    682 static inline struct ureg_dst
    683 tx_scratch(struct shader_translator *tx)
    684 {
    685     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
    686         tx->failure = TRUE;
    687         return tx->regs.t[0];
    688     }
    689     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
    690         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
    691     return tx->regs.t[tx->num_scratch++];
    692 }
    693 
    694 static inline struct ureg_dst
    695 tx_scratch_scalar(struct shader_translator *tx)
    696 {
    697     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
    698 }
    699 
    700 static inline struct ureg_src
    701 tx_src_scalar(struct ureg_dst dst)
    702 {
    703     struct ureg_src src = ureg_src(dst);
    704     int c = ffs(dst.WriteMask) - 1;
    705     if (dst.WriteMask == (1 << c))
    706         src = ureg_scalar(src, c);
    707     return src;
    708 }
    709 
    710 static inline void
    711 tx_temp_alloc(struct shader_translator *tx, INT idx)
    712 {
    713     assert(idx >= 0);
    714     if (idx >= tx->num_temp) {
    715        unsigned k = tx->num_temp;
    716        unsigned n = idx + 1;
    717        tx->regs.r = REALLOC(tx->regs.r,
    718                             k * sizeof(tx->regs.r[0]),
    719                             n * sizeof(tx->regs.r[0]));
    720        for (; k < n; ++k)
    721           tx->regs.r[k] = ureg_dst_undef();
    722        tx->num_temp = n;
    723     }
    724     if (ureg_dst_is_undef(tx->regs.r[idx]))
    725         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
    726 }
    727 
    728 static inline void
    729 tx_addr_alloc(struct shader_translator *tx, INT idx)
    730 {
    731     assert(idx == 0);
    732     if (ureg_dst_is_undef(tx->regs.address))
    733         tx->regs.address = ureg_DECL_address(tx->ureg);
    734     if (ureg_dst_is_undef(tx->regs.a0))
    735         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
    736 }
    737 
    738 static inline void
    739 tx_pred_alloc(struct shader_translator *tx, INT idx)
    740 {
    741     assert(idx == 0);
    742     if (ureg_dst_is_undef(tx->regs.p))
    743         tx->regs.p = ureg_DECL_predicate(tx->ureg);
    744 }
    745 
    746 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
    747  * the projection should be applied on the texture. It doesn't
    748  * apply on texkill.
    749  * The doc is very imprecise here (it says the projection is done
    750  * before rasterization, thus in vs, which seems wrong since ps instructions
    751  * are affected differently)
    752  * For now we only apply to the ps TEX instruction and TEXBEM.
    753  * Perhaps some other instructions would need it */
    754 static inline void
    755 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
    756                       struct ureg_src src, INT idx)
    757 {
    758     struct ureg_dst tmp;
    759     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
    760 
    761     /* no projection */
    762     if (dim == 1) {
    763         ureg_MOV(tx->ureg, dst, src);
    764     } else {
    765         tmp = tx_scratch_scalar(tx);
    766         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
    767         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
    768     }
    769 }
    770 
    771 static inline void
    772 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
    773                          unsigned target, struct ureg_src src0,
    774                          struct ureg_src src1, INT idx)
    775 {
    776     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
    777     struct ureg_dst tmp;
    778 
    779     /* dim == 1: no projection
    780      * Looks like must be disabled when it makes no
    781      * sense according the texture dimensions
    782      */
    783     if (dim == 1 || dim <= target) {
    784         ureg_TEX(tx->ureg, dst, target, src0, src1);
    785     } else if (dim == 4) {
    786         ureg_TXP(tx->ureg, dst, target, src0, src1);
    787     } else {
    788         tmp = tx_scratch(tx);
    789         apply_ps1x_projection(tx, tmp, src0, idx);
    790         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
    791     }
    792 }
    793 
    794 static inline void
    795 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
    796 {
    797     assert(IS_PS);
    798     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
    799     if (ureg_src_is_undef(tx->regs.vT[idx]))
    800        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
    801                                              TGSI_INTERPOLATE_PERSPECTIVE);
    802 }
    803 
    804 static inline unsigned *
    805 tx_bgnloop(struct shader_translator *tx)
    806 {
    807     tx->loop_depth++;
    808     if (tx->loop_depth_max < tx->loop_depth)
    809         tx->loop_depth_max = tx->loop_depth;
    810     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
    811     return &tx->loop_labels[tx->loop_depth - 1];
    812 }
    813 
    814 static inline unsigned *
    815 tx_endloop(struct shader_translator *tx)
    816 {
    817     assert(tx->loop_depth);
    818     tx->loop_depth--;
    819     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
    820                      ureg_get_instruction_number(tx->ureg));
    821     return &tx->loop_labels[tx->loop_depth];
    822 }
    823 
    824 static struct ureg_dst
    825 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
    826 {
    827     const unsigned l = tx->loop_depth - 1;
    828 
    829     if (!tx->loop_depth)
    830     {
    831         DBG("loop counter requested outside of loop\n");
    832         return ureg_dst_undef();
    833     }
    834 
    835     if (ureg_dst_is_undef(tx->regs.rL[l])) {
    836         /* loop or rep ctr creation */
    837         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
    838         tx->loop_or_rep[l] = loop_or_rep;
    839     }
    840     /* loop - rep - endloop - endrep not allowed */
    841     assert(tx->loop_or_rep[l] == loop_or_rep);
    842 
    843     return tx->regs.rL[l];
    844 }
    845 
    846 static struct ureg_src
    847 tx_get_loopal(struct shader_translator *tx)
    848 {
    849     int loop_level = tx->loop_depth - 1;
    850 
    851     while (loop_level >= 0) {
    852         /* handle loop - rep - endrep - endloop case */
    853         if (tx->loop_or_rep[loop_level])
    854             /* the value is in the loop counter y component (nine implementation) */
    855             return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
    856         loop_level--;
    857     }
    858 
    859     DBG("aL counter requested outside of loop\n");
    860     return ureg_src_undef();
    861 }
    862 
    863 static inline unsigned *
    864 tx_cond(struct shader_translator *tx)
    865 {
    866    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
    867    tx->cond_depth++;
    868    return &tx->cond_labels[tx->cond_depth - 1];
    869 }
    870 
    871 static inline unsigned *
    872 tx_elsecond(struct shader_translator *tx)
    873 {
    874    assert(tx->cond_depth);
    875    return &tx->cond_labels[tx->cond_depth - 1];
    876 }
    877 
    878 static inline void
    879 tx_endcond(struct shader_translator *tx)
    880 {
    881    assert(tx->cond_depth);
    882    tx->cond_depth--;
    883    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
    884                     ureg_get_instruction_number(tx->ureg));
    885 }
    886 
    887 static inline struct ureg_dst
    888 nine_ureg_dst_register(unsigned file, int index)
    889 {
    890     return ureg_dst(ureg_src_register(file, index));
    891 }
    892 
    893 static inline struct ureg_src
    894 nine_get_position_input(struct shader_translator *tx)
    895 {
    896     struct ureg_program *ureg = tx->ureg;
    897 
    898     if (tx->wpos_is_sysval)
    899         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
    900     else
    901         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
    902                                   0, TGSI_INTERPOLATE_LINEAR);
    903 }
    904 
    905 static struct ureg_src
    906 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
    907 {
    908     struct ureg_program *ureg = tx->ureg;
    909     struct ureg_src src;
    910     struct ureg_dst tmp;
    911 
    912     switch (param->file)
    913     {
    914     case D3DSPR_TEMP:
    915         assert(!param->rel);
    916         tx_temp_alloc(tx, param->idx);
    917         src = ureg_src(tx->regs.r[param->idx]);
    918         break;
    919  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
    920     case D3DSPR_ADDR:
    921         assert(!param->rel);
    922         if (IS_VS) {
    923             assert(param->idx == 0);
    924             /* the address register (vs only) must be
    925              * assigned before use */
    926             assert(!ureg_dst_is_undef(tx->regs.a0));
    927             /* Round to lowest for vs1.1 (contrary to the doc), else
    928              * round to nearest */
    929             if (tx->version.major < 2 && tx->version.minor < 2)
    930                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
    931             else
    932                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
    933             src = ureg_src(tx->regs.address);
    934         } else {
    935             if (tx->version.major < 2 && tx->version.minor < 4) {
    936                 /* no subroutines, so should be defined */
    937                 src = ureg_src(tx->regs.tS[param->idx]);
    938             } else {
    939                 tx_texcoord_alloc(tx, param->idx);
    940                 src = tx->regs.vT[param->idx];
    941             }
    942         }
    943         break;
    944     case D3DSPR_INPUT:
    945         if (IS_VS) {
    946             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
    947         } else {
    948             if (tx->version.major < 3) {
    949                 assert(!param->rel);
    950                 src = ureg_DECL_fs_input_cyl_centroid(
    951                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
    952                     TGSI_INTERPOLATE_COLOR, 0,
    953                     tx->info->force_color_in_centroid ?
    954                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
    955                     0, 1);
    956             } else {
    957                 if(param->rel) {
    958                     /* Copy all inputs (non consecutive)
    959                      * to temp array (consecutive).
    960                      * This is not good for performance.
    961                      * A better way would be to have inputs
    962                      * consecutive (would need implement alternative
    963                      * way to match vs outputs and ps inputs).
    964                      * However even with the better way, the temp array
    965                      * copy would need to be used if some inputs
    966                      * are not GENERIC or if they have different
    967                      * interpolation flag. */
    968                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
    969                         int i;
    970                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
    971                         for (i = 0; i < 10; i++) {
    972                             if (!ureg_src_is_undef(tx->regs.v[i]))
    973                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
    974                             else
    975                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
    976                         }
    977                     }
    978                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
    979                 } else {
    980                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
    981                     src = tx->regs.v[param->idx];
    982                 }
    983             }
    984         }
    985         break;
    986     case D3DSPR_PREDICATE:
    987         assert(!param->rel);
    988         tx_pred_alloc(tx, param->idx);
    989         src = ureg_src(tx->regs.p);
    990         break;
    991     case D3DSPR_SAMPLER:
    992         assert(param->mod == NINED3DSPSM_NONE);
    993         assert(param->swizzle == NINED3DSP_NOSWIZZLE);
    994         assert(!param->rel);
    995         src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
    996         break;
    997     case D3DSPR_CONST:
    998         assert(!param->rel || IS_VS);
    999         if (param->rel)
   1000             tx->indirect_const_access = TRUE;
   1001         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
   1002             if (!param->rel)
   1003                 nine_info_mark_const_f_used(tx->info, param->idx);
   1004             /* vswp constant handling: we use two buffers
   1005              * to fit all the float constants. The special handling
   1006              * doesn't need to be elsewhere, because all the instructions
   1007              * accessing the constants directly are VS1, and swvp
   1008              * is VS >= 2 */
   1009             if (IS_VS && tx->info->swvp_on) {
   1010                 if (!param->rel) {
   1011                     if (param->idx < 4096) {
   1012                         src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
   1013                         src = ureg_src_dimension(src, 0);
   1014                     } else {
   1015                         src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
   1016                         src = ureg_src_dimension(src, 1);
   1017                     }
   1018                 } else {
   1019                     src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
   1020                     src = ureg_src_dimension(src, 0);
   1021                 }
   1022             } else
   1023                 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
   1024         }
   1025         if (!IS_VS && tx->version.major < 2) {
   1026             /* ps 1.X clamps constants */
   1027             tmp = tx_scratch(tx);
   1028             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
   1029             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
   1030             src = ureg_src(tmp);
   1031         }
   1032         break;
   1033     case D3DSPR_CONST2:
   1034     case D3DSPR_CONST3:
   1035     case D3DSPR_CONST4:
   1036         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
   1037         assert(!"CONST2/3/4");
   1038         src = ureg_imm1f(ureg, 0.0f);
   1039         break;
   1040     case D3DSPR_CONSTINT:
   1041         /* relative adressing only possible for float constants in vs */
   1042         assert(!param->rel);
   1043         if (!tx_lconsti(tx, &src, param->idx)) {
   1044             nine_info_mark_const_i_used(tx->info, param->idx);
   1045             if (IS_VS && tx->info->swvp_on) {
   1046                 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
   1047                 src = ureg_src_dimension(src, 2);
   1048             } else
   1049                 src = ureg_src_register(TGSI_FILE_CONSTANT,
   1050                                         tx->info->const_i_base + param->idx);
   1051         }
   1052         break;
   1053     case D3DSPR_CONSTBOOL:
   1054         assert(!param->rel);
   1055         if (!tx_lconstb(tx, &src, param->idx)) {
   1056            char r = param->idx / 4;
   1057            char s = param->idx & 3;
   1058            nine_info_mark_const_b_used(tx->info, param->idx);
   1059            if (IS_VS && tx->info->swvp_on) {
   1060                src = ureg_src_register(TGSI_FILE_CONSTANT, r);
   1061                src = ureg_src_dimension(src, 3);
   1062            } else
   1063                src = ureg_src_register(TGSI_FILE_CONSTANT,
   1064                                        tx->info->const_b_base + r);
   1065            src = ureg_swizzle(src, s, s, s, s);
   1066         }
   1067         break;
   1068     case D3DSPR_LOOP:
   1069         if (ureg_dst_is_undef(tx->regs.address))
   1070             tx->regs.address = ureg_DECL_address(ureg);
   1071         if (!tx->native_integers)
   1072             ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
   1073         else
   1074             ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
   1075         src = ureg_src(tx->regs.address);
   1076         break;
   1077     case D3DSPR_MISCTYPE:
   1078         switch (param->idx) {
   1079         case D3DSMO_POSITION:
   1080            if (ureg_src_is_undef(tx->regs.vPos))
   1081               tx->regs.vPos = nine_get_position_input(tx);
   1082            if (tx->shift_wpos) {
   1083                /* TODO: do this only once */
   1084                struct ureg_dst wpos = tx_scratch(tx);
   1085                ureg_ADD(ureg, wpos, tx->regs.vPos,
   1086                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
   1087                src = ureg_src(wpos);
   1088            } else {
   1089                src = tx->regs.vPos;
   1090            }
   1091            break;
   1092         case D3DSMO_FACE:
   1093            if (ureg_src_is_undef(tx->regs.vFace)) {
   1094                if (tx->face_is_sysval_integer) {
   1095                    tmp = tx_scratch(tx);
   1096                    tx->regs.vFace =
   1097                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
   1098 
   1099                    /* convert bool to float */
   1100                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
   1101                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
   1102                    tx->regs.vFace = ureg_src(tmp);
   1103                } else {
   1104                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
   1105                                                        TGSI_SEMANTIC_FACE, 0,
   1106                                                        TGSI_INTERPOLATE_CONSTANT);
   1107                }
   1108                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
   1109            }
   1110            src = tx->regs.vFace;
   1111            break;
   1112         default:
   1113             assert(!"invalid src D3DSMO");
   1114             break;
   1115         }
   1116         assert(!param->rel);
   1117         break;
   1118     case D3DSPR_TEMPFLOAT16:
   1119         break;
   1120     default:
   1121         assert(!"invalid src D3DSPR");
   1122     }
   1123     if (param->rel)
   1124         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
   1125 
   1126     switch (param->mod) {
   1127     case NINED3DSPSM_DW:
   1128         tmp = tx_scratch(tx);
   1129         /* NOTE: app is not allowed to read w with this modifier */
   1130         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
   1131         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
   1132         src = ureg_src(tmp);
   1133         break;
   1134     case NINED3DSPSM_DZ:
   1135         tmp = tx_scratch(tx);
   1136         /* NOTE: app is not allowed to read z with this modifier */
   1137         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
   1138         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
   1139         src = ureg_src(tmp);
   1140         break;
   1141     default:
   1142         break;
   1143     }
   1144 
   1145     if (param->swizzle != NINED3DSP_NOSWIZZLE)
   1146         src = ureg_swizzle(src,
   1147                            (param->swizzle >> 0) & 0x3,
   1148                            (param->swizzle >> 2) & 0x3,
   1149                            (param->swizzle >> 4) & 0x3,
   1150                            (param->swizzle >> 6) & 0x3);
   1151 
   1152     switch (param->mod) {
   1153     case NINED3DSPSM_ABS:
   1154         src = ureg_abs(src);
   1155         break;
   1156     case NINED3DSPSM_ABSNEG:
   1157         src = ureg_negate(ureg_abs(src));
   1158         break;
   1159     case NINED3DSPSM_NEG:
   1160         src = ureg_negate(src);
   1161         break;
   1162     case NINED3DSPSM_BIAS:
   1163         tmp = tx_scratch(tx);
   1164         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
   1165         src = ureg_src(tmp);
   1166         break;
   1167     case NINED3DSPSM_BIASNEG:
   1168         tmp = tx_scratch(tx);
   1169         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
   1170         src = ureg_src(tmp);
   1171         break;
   1172     case NINED3DSPSM_NOT:
   1173         if (tx->native_integers) {
   1174             tmp = tx_scratch(tx);
   1175             ureg_NOT(ureg, tmp, src);
   1176             src = ureg_src(tmp);
   1177             break;
   1178         }
   1179         /* fall through */
   1180     case NINED3DSPSM_COMP:
   1181         tmp = tx_scratch(tx);
   1182         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
   1183         src = ureg_src(tmp);
   1184         break;
   1185     case NINED3DSPSM_DZ:
   1186     case NINED3DSPSM_DW:
   1187         /* Already handled*/
   1188         break;
   1189     case NINED3DSPSM_SIGN:
   1190         tmp = tx_scratch(tx);
   1191         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
   1192         src = ureg_src(tmp);
   1193         break;
   1194     case NINED3DSPSM_SIGNNEG:
   1195         tmp = tx_scratch(tx);
   1196         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
   1197         src = ureg_src(tmp);
   1198         break;
   1199     case NINED3DSPSM_X2:
   1200         tmp = tx_scratch(tx);
   1201         ureg_ADD(ureg, tmp, src, src);
   1202         src = ureg_src(tmp);
   1203         break;
   1204     case NINED3DSPSM_X2NEG:
   1205         tmp = tx_scratch(tx);
   1206         ureg_ADD(ureg, tmp, src, src);
   1207         src = ureg_negate(ureg_src(tmp));
   1208         break;
   1209     default:
   1210         assert(param->mod == NINED3DSPSM_NONE);
   1211         break;
   1212     }
   1213 
   1214     return src;
   1215 }
   1216 
   1217 static struct ureg_dst
   1218 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
   1219 {
   1220     struct ureg_dst dst;
   1221 
   1222     switch (param->file)
   1223     {
   1224     case D3DSPR_TEMP:
   1225         assert(!param->rel);
   1226         tx_temp_alloc(tx, param->idx);
   1227         dst = tx->regs.r[param->idx];
   1228         break;
   1229  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
   1230     case D3DSPR_ADDR:
   1231         assert(!param->rel);
   1232         if (tx->version.major < 2 && !IS_VS) {
   1233             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
   1234                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
   1235             dst = tx->regs.tS[param->idx];
   1236         } else
   1237         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
   1238             tx_texcoord_alloc(tx, param->idx);
   1239             dst = ureg_dst(tx->regs.vT[param->idx]);
   1240         } else {
   1241             tx_addr_alloc(tx, param->idx);
   1242             dst = tx->regs.a0;
   1243         }
   1244         break;
   1245     case D3DSPR_RASTOUT:
   1246         assert(!param->rel);
   1247         switch (param->idx) {
   1248         case 0:
   1249             if (ureg_dst_is_undef(tx->regs.oPos))
   1250                 tx->regs.oPos =
   1251                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
   1252             dst = tx->regs.oPos;
   1253             break;
   1254         case 1:
   1255             if (ureg_dst_is_undef(tx->regs.oFog))
   1256                 tx->regs.oFog =
   1257                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
   1258             dst = tx->regs.oFog;
   1259             break;
   1260         case 2:
   1261             if (ureg_dst_is_undef(tx->regs.oPts))
   1262                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
   1263             dst = tx->regs.oPts;
   1264             break;
   1265         default:
   1266             assert(0);
   1267             break;
   1268         }
   1269         break;
   1270  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
   1271     case D3DSPR_OUTPUT:
   1272         if (tx->version.major < 3) {
   1273             assert(!param->rel);
   1274             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
   1275         } else {
   1276             assert(!param->rel); /* TODO */
   1277             assert(param->idx < ARRAY_SIZE(tx->regs.o));
   1278             dst = tx->regs.o[param->idx];
   1279         }
   1280         break;
   1281     case D3DSPR_ATTROUT: /* VS */
   1282     case D3DSPR_COLOROUT: /* PS */
   1283         assert(param->idx >= 0 && param->idx < 4);
   1284         assert(!param->rel);
   1285         tx->info->rt_mask |= 1 << param->idx;
   1286         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
   1287             /* ps < 3: oCol[0] will have fog blending afterward */
   1288             if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
   1289                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
   1290             } else {
   1291                 tx->regs.oCol[param->idx] =
   1292                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
   1293             }
   1294         }
   1295         dst = tx->regs.oCol[param->idx];
   1296         if (IS_VS && tx->version.major < 3)
   1297             dst = ureg_saturate(dst);
   1298         break;
   1299     case D3DSPR_DEPTHOUT:
   1300         assert(!param->rel);
   1301         if (ureg_dst_is_undef(tx->regs.oDepth))
   1302            tx->regs.oDepth =
   1303               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
   1304                                       TGSI_WRITEMASK_Z, 0, 1);
   1305         dst = tx->regs.oDepth; /* XXX: must write .z component */
   1306         break;
   1307     case D3DSPR_PREDICATE:
   1308         assert(!param->rel);
   1309         tx_pred_alloc(tx, param->idx);
   1310         dst = tx->regs.p;
   1311         break;
   1312     case D3DSPR_TEMPFLOAT16:
   1313         DBG("unhandled D3DSPR: %u\n", param->file);
   1314         break;
   1315     default:
   1316         assert(!"invalid dst D3DSPR");
   1317         break;
   1318     }
   1319     if (param->rel)
   1320         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
   1321 
   1322     if (param->mask != NINED3DSP_WRITEMASK_ALL)
   1323         dst = ureg_writemask(dst, param->mask);
   1324     if (param->mod & NINED3DSPDM_SATURATE)
   1325         dst = ureg_saturate(dst);
   1326 
   1327     return dst;
   1328 }
   1329 
   1330 static struct ureg_dst
   1331 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
   1332 {
   1333     if (param->shift) {
   1334         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
   1335         return tx->regs.tdst;
   1336     }
   1337     return _tx_dst_param(tx, param);
   1338 }
   1339 
   1340 static void
   1341 tx_apply_dst0_modifiers(struct shader_translator *tx)
   1342 {
   1343     struct ureg_dst rdst;
   1344     float f;
   1345 
   1346     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
   1347         return;
   1348     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
   1349 
   1350     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
   1351 
   1352     if (tx->insn.dst[0].shift < 0)
   1353         f = 1.0f / (1 << -tx->insn.dst[0].shift);
   1354     else
   1355         f = 1 << tx->insn.dst[0].shift;
   1356 
   1357     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
   1358 }
   1359 
   1360 static struct ureg_src
   1361 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
   1362 {
   1363     struct ureg_src src;
   1364 
   1365     assert(!param->shift);
   1366     assert(!(param->mod & NINED3DSPDM_SATURATE));
   1367 
   1368     switch (param->file) {
   1369     case D3DSPR_INPUT:
   1370         if (IS_VS) {
   1371             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
   1372         } else {
   1373             assert(!param->rel);
   1374             assert(param->idx < ARRAY_SIZE(tx->regs.v));
   1375             src = tx->regs.v[param->idx];
   1376         }
   1377         break;
   1378     default:
   1379         src = ureg_src(tx_dst_param(tx, param));
   1380         break;
   1381     }
   1382     if (param->rel)
   1383         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
   1384 
   1385     if (!param->mask)
   1386         WARN("mask is 0, using identity swizzle\n");
   1387 
   1388     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
   1389         char s[4];
   1390         int n;
   1391         int c;
   1392         for (n = 0, c = 0; c < 4; ++c)
   1393             if (param->mask & (1 << c))
   1394                 s[n++] = c;
   1395         assert(n);
   1396         for (c = n; c < 4; ++c)
   1397             s[c] = s[n - 1];
   1398         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
   1399     }
   1400     return src;
   1401 }
   1402 
   1403 static HRESULT
   1404 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
   1405 {
   1406     struct ureg_program *ureg = tx->ureg;
   1407     struct ureg_dst dst;
   1408     struct ureg_src src[2];
   1409     struct sm1_src_param *src_mat = &tx->insn.src[1];
   1410     unsigned i;
   1411 
   1412     dst = tx_dst_param(tx, &tx->insn.dst[0]);
   1413     src[0] = tx_src_param(tx, &tx->insn.src[0]);
   1414 
   1415     for (i = 0; i < n; i++)
   1416     {
   1417         const unsigned m = (1 << i);
   1418 
   1419         src[1] = tx_src_param(tx, src_mat);
   1420         src_mat->idx++;
   1421 
   1422         if (!(dst.WriteMask & m))
   1423             continue;
   1424 
   1425         /* XXX: src == dst case ? */
   1426 
   1427         switch (k) {
   1428         case 3:
   1429             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
   1430             break;
   1431         case 4:
   1432             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
   1433             break;
   1434         default:
   1435             DBG("invalid operation: M%ux%u\n", m, n);
   1436             break;
   1437         }
   1438     }
   1439 
   1440     return D3D_OK;
   1441 }
   1442 
   1443 #define VNOTSUPPORTED   0, 0
   1444 #define V(maj, min)     (((maj) << 8) | (min))
   1445 
   1446 static inline const char *
   1447 d3dsio_to_string( unsigned opcode )
   1448 {
   1449     static const char *names[] = {
   1450         "NOP",
   1451         "MOV",
   1452         "ADD",
   1453         "SUB",
   1454         "MAD",
   1455         "MUL",
   1456         "RCP",
   1457         "RSQ",
   1458         "DP3",
   1459         "DP4",
   1460         "MIN",
   1461         "MAX",
   1462         "SLT",
   1463         "SGE",
   1464         "EXP",
   1465         "LOG",
   1466         "LIT",
   1467         "DST",
   1468         "LRP",
   1469         "FRC",
   1470         "M4x4",
   1471         "M4x3",
   1472         "M3x4",
   1473         "M3x3",
   1474         "M3x2",
   1475         "CALL",
   1476         "CALLNZ",
   1477         "LOOP",
   1478         "RET",
   1479         "ENDLOOP",
   1480         "LABEL",
   1481         "DCL",
   1482         "POW",
   1483         "CRS",
   1484         "SGN",
   1485         "ABS",
   1486         "NRM",
   1487         "SINCOS",
   1488         "REP",
   1489         "ENDREP",
   1490         "IF",
   1491         "IFC",
   1492         "ELSE",
   1493         "ENDIF",
   1494         "BREAK",
   1495         "BREAKC",
   1496         "MOVA",
   1497         "DEFB",
   1498         "DEFI",
   1499         NULL,
   1500         NULL,
   1501         NULL,
   1502         NULL,
   1503         NULL,
   1504         NULL,
   1505         NULL,
   1506         NULL,
   1507         NULL,
   1508         NULL,
   1509         NULL,
   1510         NULL,
   1511         NULL,
   1512         NULL,
   1513         NULL,
   1514         "TEXCOORD",
   1515         "TEXKILL",
   1516         "TEX",
   1517         "TEXBEM",
   1518         "TEXBEML",
   1519         "TEXREG2AR",
   1520         "TEXREG2GB",
   1521         "TEXM3x2PAD",
   1522         "TEXM3x2TEX",
   1523         "TEXM3x3PAD",
   1524         "TEXM3x3TEX",
   1525         NULL,
   1526         "TEXM3x3SPEC",
   1527         "TEXM3x3VSPEC",
   1528         "EXPP",
   1529         "LOGP",
   1530         "CND",
   1531         "DEF",
   1532         "TEXREG2RGB",
   1533         "TEXDP3TEX",
   1534         "TEXM3x2DEPTH",
   1535         "TEXDP3",
   1536         "TEXM3x3",
   1537         "TEXDEPTH",
   1538         "CMP",
   1539         "BEM",
   1540         "DP2ADD",
   1541         "DSX",
   1542         "DSY",
   1543         "TEXLDD",
   1544         "SETP",
   1545         "TEXLDL",
   1546         "BREAKP"
   1547     };
   1548 
   1549     if (opcode < ARRAY_SIZE(names)) return names[opcode];
   1550 
   1551     switch (opcode) {
   1552     case D3DSIO_PHASE: return "PHASE";
   1553     case D3DSIO_COMMENT: return "COMMENT";
   1554     case D3DSIO_END: return "END";
   1555     default:
   1556         return NULL;
   1557     }
   1558 }
   1559 
   1560 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
   1561 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
   1562                                      (inst).vert_version.max | \
   1563                                      (inst).frag_version.min | \
   1564                                      (inst).frag_version.max)
   1565 
   1566 #define SPECIAL(name) \
   1567     NineTranslateInstruction_##name
   1568 
   1569 #define DECL_SPECIAL(name) \
   1570     static HRESULT \
   1571     NineTranslateInstruction_##name( struct shader_translator *tx )
   1572 
   1573 static HRESULT
   1574 NineTranslateInstruction_Generic(struct shader_translator *);
   1575 
   1576 DECL_SPECIAL(NOP)
   1577 {
   1578     /* Nothing to do. NOP was used to avoid hangs
   1579      * with very old d3d drivers. */
   1580     return D3D_OK;
   1581 }
   1582 
   1583 DECL_SPECIAL(SUB)
   1584 {
   1585     struct ureg_program *ureg = tx->ureg;
   1586     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   1587     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
   1588     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
   1589 
   1590     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
   1591     return D3D_OK;
   1592 }
   1593 
   1594 DECL_SPECIAL(ABS)
   1595 {
   1596     struct ureg_program *ureg = tx->ureg;
   1597     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   1598     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   1599 
   1600     ureg_MOV(ureg, dst, ureg_abs(src));
   1601     return D3D_OK;
   1602 }
   1603 
   1604 DECL_SPECIAL(M4x4)
   1605 {
   1606     return NineTranslateInstruction_Mkxn(tx, 4, 4);
   1607 }
   1608 
   1609 DECL_SPECIAL(M4x3)
   1610 {
   1611     return NineTranslateInstruction_Mkxn(tx, 4, 3);
   1612 }
   1613 
   1614 DECL_SPECIAL(M3x4)
   1615 {
   1616     return NineTranslateInstruction_Mkxn(tx, 3, 4);
   1617 }
   1618 
   1619 DECL_SPECIAL(M3x3)
   1620 {
   1621     return NineTranslateInstruction_Mkxn(tx, 3, 3);
   1622 }
   1623 
   1624 DECL_SPECIAL(M3x2)
   1625 {
   1626     return NineTranslateInstruction_Mkxn(tx, 3, 2);
   1627 }
   1628 
   1629 DECL_SPECIAL(CMP)
   1630 {
   1631     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
   1632              tx_src_param(tx, &tx->insn.src[0]),
   1633              tx_src_param(tx, &tx->insn.src[2]),
   1634              tx_src_param(tx, &tx->insn.src[1]));
   1635     return D3D_OK;
   1636 }
   1637 
   1638 DECL_SPECIAL(CND)
   1639 {
   1640     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   1641     struct ureg_dst cgt;
   1642     struct ureg_src cnd;
   1643 
   1644     /* the coissue flag was a tip for compilers to advise to
   1645      * execute two operations at the same time, in cases
   1646      * the two executions had same dst with different channels.
   1647      * It has no effect on current hw. However it seems CND
   1648      * is affected. The handling of this very specific case
   1649      * handled below mimick wine behaviour */
   1650     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
   1651         ureg_MOV(tx->ureg,
   1652                  dst, tx_src_param(tx, &tx->insn.src[1]));
   1653         return D3D_OK;
   1654     }
   1655 
   1656     cnd = tx_src_param(tx, &tx->insn.src[0]);
   1657     cgt = tx_scratch(tx);
   1658 
   1659     if (tx->version.major == 1 && tx->version.minor < 4)
   1660         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
   1661 
   1662     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
   1663 
   1664     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
   1665              tx_src_param(tx, &tx->insn.src[1]),
   1666              tx_src_param(tx, &tx->insn.src[2]));
   1667     return D3D_OK;
   1668 }
   1669 
   1670 DECL_SPECIAL(CALL)
   1671 {
   1672     assert(tx->insn.src[0].idx < tx->num_inst_labels);
   1673     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
   1674     return D3D_OK;
   1675 }
   1676 
   1677 DECL_SPECIAL(CALLNZ)
   1678 {
   1679     struct ureg_program *ureg = tx->ureg;
   1680     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
   1681 
   1682     if (!tx->native_integers)
   1683         ureg_IF(ureg, src, tx_cond(tx));
   1684     else
   1685         ureg_UIF(ureg, src, tx_cond(tx));
   1686     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
   1687     tx_endcond(tx);
   1688     ureg_ENDIF(ureg);
   1689     return D3D_OK;
   1690 }
   1691 
   1692 DECL_SPECIAL(LOOP)
   1693 {
   1694     struct ureg_program *ureg = tx->ureg;
   1695     unsigned *label;
   1696     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
   1697     struct ureg_dst ctr;
   1698     struct ureg_dst tmp;
   1699     struct ureg_src ctrx;
   1700 
   1701     label = tx_bgnloop(tx);
   1702     ctr = tx_get_loopctr(tx, TRUE);
   1703     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
   1704 
   1705     /* src: num_iterations - start_value of al - step for al - 0 */
   1706     ureg_MOV(ureg, ctr, src);
   1707     ureg_BGNLOOP(tx->ureg, label);
   1708     tmp = tx_scratch_scalar(tx);
   1709     /* Initially ctr.x contains the number of iterations.
   1710      * ctr.y will contain the updated value of al.
   1711      * We decrease ctr.x at the end of every iteration,
   1712      * and stop when it reaches 0. */
   1713 
   1714     if (!tx->native_integers) {
   1715         /* case src and ctr contain floats */
   1716         /* to avoid precision issue, we stop when ctr <= 0.5 */
   1717         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
   1718         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
   1719     } else {
   1720         /* case src and ctr contain integers */
   1721         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
   1722         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
   1723     }
   1724     ureg_BRK(ureg);
   1725     tx_endcond(tx);
   1726     ureg_ENDIF(ureg);
   1727     return D3D_OK;
   1728 }
   1729 
   1730 DECL_SPECIAL(RET)
   1731 {
   1732     ureg_RET(tx->ureg);
   1733     return D3D_OK;
   1734 }
   1735 
   1736 DECL_SPECIAL(ENDLOOP)
   1737 {
   1738     struct ureg_program *ureg = tx->ureg;
   1739     struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
   1740     struct ureg_dst dst_ctrx, dst_al;
   1741     struct ureg_src src_ctr, al_counter;
   1742 
   1743     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
   1744     dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
   1745     src_ctr = ureg_src(ctr);
   1746     al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
   1747 
   1748     /* ctr.x -= 1
   1749      * ctr.y (aL) += step */
   1750     if (!tx->native_integers) {
   1751         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
   1752         ureg_ADD(ureg, dst_al, src_ctr, al_counter);
   1753     } else {
   1754         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
   1755         ureg_UADD(ureg, dst_al, src_ctr, al_counter);
   1756     }
   1757     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
   1758     return D3D_OK;
   1759 }
   1760 
   1761 DECL_SPECIAL(LABEL)
   1762 {
   1763     unsigned k = tx->num_inst_labels;
   1764     unsigned n = tx->insn.src[0].idx;
   1765     assert(n < 2048);
   1766     if (n >= k)
   1767        tx->inst_labels = REALLOC(tx->inst_labels,
   1768                                  k * sizeof(tx->inst_labels[0]),
   1769                                  n * sizeof(tx->inst_labels[0]));
   1770 
   1771     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
   1772     return D3D_OK;
   1773 }
   1774 
   1775 DECL_SPECIAL(SINCOS)
   1776 {
   1777     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   1778     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   1779 
   1780     assert(!(dst.WriteMask & 0xc));
   1781 
   1782     dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
   1783     ureg_SCS(tx->ureg, dst, src);
   1784     return D3D_OK;
   1785 }
   1786 
   1787 DECL_SPECIAL(SGN)
   1788 {
   1789     ureg_SSG(tx->ureg,
   1790              tx_dst_param(tx, &tx->insn.dst[0]),
   1791              tx_src_param(tx, &tx->insn.src[0]));
   1792     return D3D_OK;
   1793 }
   1794 
   1795 DECL_SPECIAL(REP)
   1796 {
   1797     struct ureg_program *ureg = tx->ureg;
   1798     unsigned *label;
   1799     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
   1800     struct ureg_dst ctr;
   1801     struct ureg_dst tmp;
   1802     struct ureg_src ctrx;
   1803 
   1804     label = tx_bgnloop(tx);
   1805     ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
   1806     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
   1807 
   1808     /* NOTE: rep must be constant, so we don't have to save the count */
   1809     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
   1810 
   1811     /* rep: num_iterations - 0 - 0 - 0 */
   1812     ureg_MOV(ureg, ctr, rep);
   1813     ureg_BGNLOOP(ureg, label);
   1814     tmp = tx_scratch_scalar(tx);
   1815     /* Initially ctr.x contains the number of iterations.
   1816      * We decrease ctr.x at the end of every iteration,
   1817      * and stop when it reaches 0. */
   1818 
   1819     if (!tx->native_integers) {
   1820         /* case src and ctr contain floats */
   1821         /* to avoid precision issue, we stop when ctr <= 0.5 */
   1822         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
   1823         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
   1824     } else {
   1825         /* case src and ctr contain integers */
   1826         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
   1827         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
   1828     }
   1829     ureg_BRK(ureg);
   1830     tx_endcond(tx);
   1831     ureg_ENDIF(ureg);
   1832 
   1833     return D3D_OK;
   1834 }
   1835 
   1836 DECL_SPECIAL(ENDREP)
   1837 {
   1838     struct ureg_program *ureg = tx->ureg;
   1839     struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
   1840     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
   1841     struct ureg_src src_ctr = ureg_src(ctr);
   1842 
   1843     /* ctr.x -= 1 */
   1844     if (!tx->native_integers)
   1845         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
   1846     else
   1847         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
   1848 
   1849     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
   1850     return D3D_OK;
   1851 }
   1852 
   1853 DECL_SPECIAL(ENDIF)
   1854 {
   1855     tx_endcond(tx);
   1856     ureg_ENDIF(tx->ureg);
   1857     return D3D_OK;
   1858 }
   1859 
   1860 DECL_SPECIAL(IF)
   1861 {
   1862     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   1863 
   1864     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
   1865         ureg_UIF(tx->ureg, src, tx_cond(tx));
   1866     else
   1867         ureg_IF(tx->ureg, src, tx_cond(tx));
   1868 
   1869     return D3D_OK;
   1870 }
   1871 
   1872 static inline unsigned
   1873 sm1_insn_flags_to_tgsi_setop(BYTE flags)
   1874 {
   1875     switch (flags) {
   1876     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
   1877     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
   1878     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
   1879     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
   1880     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
   1881     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
   1882     default:
   1883         assert(!"invalid comparison flags");
   1884         return TGSI_OPCODE_SGT;
   1885     }
   1886 }
   1887 
   1888 DECL_SPECIAL(IFC)
   1889 {
   1890     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
   1891     struct ureg_src src[2];
   1892     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
   1893     src[0] = tx_src_param(tx, &tx->insn.src[0]);
   1894     src[1] = tx_src_param(tx, &tx->insn.src[1]);
   1895     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
   1896     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
   1897     return D3D_OK;
   1898 }
   1899 
   1900 DECL_SPECIAL(ELSE)
   1901 {
   1902     ureg_ELSE(tx->ureg, tx_elsecond(tx));
   1903     return D3D_OK;
   1904 }
   1905 
   1906 DECL_SPECIAL(BREAKC)
   1907 {
   1908     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
   1909     struct ureg_src src[2];
   1910     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
   1911     src[0] = tx_src_param(tx, &tx->insn.src[0]);
   1912     src[1] = tx_src_param(tx, &tx->insn.src[1]);
   1913     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
   1914     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
   1915     ureg_BRK(tx->ureg);
   1916     tx_endcond(tx);
   1917     ureg_ENDIF(tx->ureg);
   1918     return D3D_OK;
   1919 }
   1920 
   1921 static const char *sm1_declusage_names[] =
   1922 {
   1923     [D3DDECLUSAGE_POSITION] = "POSITION",
   1924     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
   1925     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
   1926     [D3DDECLUSAGE_NORMAL] = "NORMAL",
   1927     [D3DDECLUSAGE_PSIZE] = "PSIZE",
   1928     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
   1929     [D3DDECLUSAGE_TANGENT] = "TANGENT",
   1930     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
   1931     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
   1932     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
   1933     [D3DDECLUSAGE_COLOR] = "COLOR",
   1934     [D3DDECLUSAGE_FOG] = "FOG",
   1935     [D3DDECLUSAGE_DEPTH] = "DEPTH",
   1936     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
   1937 };
   1938 
   1939 static inline unsigned
   1940 sm1_to_nine_declusage(struct sm1_semantic *dcl)
   1941 {
   1942     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
   1943 }
   1944 
   1945 static void
   1946 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
   1947                       boolean tc,
   1948                       struct sm1_semantic *dcl)
   1949 {
   1950     BYTE index = dcl->usage_idx;
   1951 
   1952     /* For everything that is not matching to a TGSI_SEMANTIC_****,
   1953      * we match to a TGSI_SEMANTIC_GENERIC with index.
   1954      *
   1955      * The index can be anything UINT16 and usage_idx is BYTE,
   1956      * so we can fit everything. It doesn't matter if indices
   1957      * are close together or low.
   1958      *
   1959      *
   1960      * POSITION >= 1: 10 * index + 6
   1961      * COLOR >= 2: 10 * (index-1) + 7
   1962      * TEXCOORD[0..15]: index
   1963      * BLENDWEIGHT: 10 * index + 18
   1964      * BLENDINDICES: 10 * index + 19
   1965      * NORMAL: 10 * index + 20
   1966      * TANGENT: 10 * index + 21
   1967      * BINORMAL: 10 * index + 22
   1968      * TESSFACTOR: 10 * index + 23
   1969      */
   1970 
   1971     switch (dcl->usage) {
   1972     case D3DDECLUSAGE_POSITION:
   1973     case D3DDECLUSAGE_POSITIONT:
   1974     case D3DDECLUSAGE_DEPTH:
   1975         if (index == 0) {
   1976             sem->Name = TGSI_SEMANTIC_POSITION;
   1977             sem->Index = 0;
   1978         } else {
   1979             sem->Name = TGSI_SEMANTIC_GENERIC;
   1980             sem->Index = 10 * index + 6;
   1981         }
   1982         break;
   1983     case D3DDECLUSAGE_COLOR:
   1984         if (index < 2) {
   1985             sem->Name = TGSI_SEMANTIC_COLOR;
   1986             sem->Index = index;
   1987         } else {
   1988             sem->Name = TGSI_SEMANTIC_GENERIC;
   1989             sem->Index = 10 * (index-1) + 7;
   1990         }
   1991         break;
   1992     case D3DDECLUSAGE_FOG:
   1993         assert(index == 0);
   1994         sem->Name = TGSI_SEMANTIC_FOG;
   1995         sem->Index = 0;
   1996         break;
   1997     case D3DDECLUSAGE_PSIZE:
   1998         assert(index == 0);
   1999         sem->Name = TGSI_SEMANTIC_PSIZE;
   2000         sem->Index = 0;
   2001         break;
   2002     case D3DDECLUSAGE_TEXCOORD:
   2003         assert(index < 16);
   2004         if (index < 8 && tc)
   2005             sem->Name = TGSI_SEMANTIC_TEXCOORD;
   2006         else
   2007             sem->Name = TGSI_SEMANTIC_GENERIC;
   2008         sem->Index = index;
   2009         break;
   2010     case D3DDECLUSAGE_BLENDWEIGHT:
   2011         sem->Name = TGSI_SEMANTIC_GENERIC;
   2012         sem->Index = 10 * index + 18;
   2013         break;
   2014     case D3DDECLUSAGE_BLENDINDICES:
   2015         sem->Name = TGSI_SEMANTIC_GENERIC;
   2016         sem->Index = 10 * index + 19;
   2017         break;
   2018     case D3DDECLUSAGE_NORMAL:
   2019         sem->Name = TGSI_SEMANTIC_GENERIC;
   2020         sem->Index = 10 * index + 20;
   2021         break;
   2022     case D3DDECLUSAGE_TANGENT:
   2023         sem->Name = TGSI_SEMANTIC_GENERIC;
   2024         sem->Index = 10 * index + 21;
   2025         break;
   2026     case D3DDECLUSAGE_BINORMAL:
   2027         sem->Name = TGSI_SEMANTIC_GENERIC;
   2028         sem->Index = 10 * index + 22;
   2029         break;
   2030     case D3DDECLUSAGE_TESSFACTOR:
   2031         sem->Name = TGSI_SEMANTIC_GENERIC;
   2032         sem->Index = 10 * index + 23;
   2033         break;
   2034     case D3DDECLUSAGE_SAMPLE:
   2035         sem->Name = TGSI_SEMANTIC_COUNT;
   2036         sem->Index = 0;
   2037         break;
   2038     default:
   2039         unreachable("Invalid DECLUSAGE.");
   2040         break;
   2041     }
   2042 }
   2043 
   2044 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
   2045 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
   2046 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
   2047 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
   2048 static inline unsigned
   2049 d3dstt_to_tgsi_tex(BYTE sampler_type)
   2050 {
   2051     switch (sampler_type) {
   2052     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
   2053     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
   2054     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
   2055     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
   2056     default:
   2057         assert(0);
   2058         return TGSI_TEXTURE_UNKNOWN;
   2059     }
   2060 }
   2061 static inline unsigned
   2062 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
   2063 {
   2064     switch (sampler_type) {
   2065     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
   2066     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
   2067     case NINED3DSTT_VOLUME:
   2068     case NINED3DSTT_CUBE:
   2069     default:
   2070         assert(0);
   2071         return TGSI_TEXTURE_UNKNOWN;
   2072     }
   2073 }
   2074 static inline unsigned
   2075 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
   2076 {
   2077     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
   2078     case 1: return TGSI_TEXTURE_1D;
   2079     case 0: return TGSI_TEXTURE_2D;
   2080     case 3: return TGSI_TEXTURE_3D;
   2081     default:
   2082         return TGSI_TEXTURE_CUBE;
   2083     }
   2084 }
   2085 
   2086 static const char *
   2087 sm1_sampler_type_name(BYTE sampler_type)
   2088 {
   2089     switch (sampler_type) {
   2090     case NINED3DSTT_1D:     return "1D";
   2091     case NINED3DSTT_2D:     return "2D";
   2092     case NINED3DSTT_VOLUME: return "VOLUME";
   2093     case NINED3DSTT_CUBE:   return "CUBE";
   2094     default:
   2095         return "(D3DSTT_?)";
   2096     }
   2097 }
   2098 
   2099 static inline unsigned
   2100 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
   2101 {
   2102     switch (sem->Name) {
   2103     case TGSI_SEMANTIC_POSITION:
   2104     case TGSI_SEMANTIC_NORMAL:
   2105         return TGSI_INTERPOLATE_LINEAR;
   2106     case TGSI_SEMANTIC_BCOLOR:
   2107     case TGSI_SEMANTIC_COLOR:
   2108         return TGSI_INTERPOLATE_COLOR;
   2109     case TGSI_SEMANTIC_FOG:
   2110     case TGSI_SEMANTIC_GENERIC:
   2111     case TGSI_SEMANTIC_TEXCOORD:
   2112     case TGSI_SEMANTIC_CLIPDIST:
   2113     case TGSI_SEMANTIC_CLIPVERTEX:
   2114         return TGSI_INTERPOLATE_PERSPECTIVE;
   2115     case TGSI_SEMANTIC_EDGEFLAG:
   2116     case TGSI_SEMANTIC_FACE:
   2117     case TGSI_SEMANTIC_INSTANCEID:
   2118     case TGSI_SEMANTIC_PCOORD:
   2119     case TGSI_SEMANTIC_PRIMID:
   2120     case TGSI_SEMANTIC_PSIZE:
   2121     case TGSI_SEMANTIC_VERTEXID:
   2122         return TGSI_INTERPOLATE_CONSTANT;
   2123     default:
   2124         assert(0);
   2125         return TGSI_INTERPOLATE_CONSTANT;
   2126     }
   2127 }
   2128 
   2129 DECL_SPECIAL(DCL)
   2130 {
   2131     struct ureg_program *ureg = tx->ureg;
   2132     boolean is_input;
   2133     boolean is_sampler;
   2134     struct tgsi_declaration_semantic tgsi;
   2135     struct sm1_semantic sem;
   2136     sm1_read_semantic(tx, &sem);
   2137 
   2138     is_input = sem.reg.file == D3DSPR_INPUT;
   2139     is_sampler =
   2140         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
   2141 
   2142     DUMP("DCL ");
   2143     sm1_dump_dst_param(&sem.reg);
   2144     if (is_sampler)
   2145         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
   2146     else
   2147     if (tx->version.major >= 3)
   2148         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
   2149     else
   2150     if (sem.usage | sem.usage_idx)
   2151         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
   2152     else
   2153         DUMP("\n");
   2154 
   2155     if (is_sampler) {
   2156         const unsigned m = 1 << sem.reg.idx;
   2157         ureg_DECL_sampler(ureg, sem.reg.idx);
   2158         tx->info->sampler_mask |= m;
   2159         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
   2160             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
   2161             d3dstt_to_tgsi_tex(sem.sampler_type);
   2162         return D3D_OK;
   2163     }
   2164 
   2165     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
   2166     if (IS_VS) {
   2167         if (is_input) {
   2168             /* linkage outside of shader with vertex declaration */
   2169             ureg_DECL_vs_input(ureg, sem.reg.idx);
   2170             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
   2171             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
   2172             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
   2173             /* NOTE: preserving order in case of indirect access */
   2174         } else
   2175         if (tx->version.major >= 3) {
   2176             /* SM2 output semantic determined by file */
   2177             assert(sem.reg.mask != 0);
   2178             if (sem.usage == D3DDECLUSAGE_POSITIONT)
   2179                 tx->info->position_t = TRUE;
   2180             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
   2181             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
   2182             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
   2183                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
   2184             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
   2185             if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
   2186                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
   2187                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
   2188                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
   2189             }
   2190 
   2191             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
   2192                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
   2193                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
   2194             }
   2195         }
   2196     } else {
   2197         if (is_input && tx->version.major >= 3) {
   2198             unsigned interp_location = 0;
   2199             /* SM3 only, SM2 input semantic determined by file */
   2200             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
   2201             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
   2202             /* PositionT and tessfactor forbidden */
   2203             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
   2204                 return D3DERR_INVALIDCALL;
   2205 
   2206             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
   2207                 /* Position0 is forbidden (likely because vPos already does that) */
   2208                 if (sem.usage == D3DDECLUSAGE_POSITION)
   2209                     return D3DERR_INVALIDCALL;
   2210                 /* Following code is for depth */
   2211                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
   2212                 return D3D_OK;
   2213             }
   2214 
   2215             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
   2216                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
   2217                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
   2218 
   2219             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
   2220                 ureg, tgsi.Name, tgsi.Index,
   2221                 nine_tgsi_to_interp_mode(&tgsi),
   2222                 0, /* cylwrap */
   2223                 interp_location, 0, 1);
   2224         } else
   2225         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
   2226             /* FragColor or FragDepth */
   2227             assert(sem.reg.mask != 0);
   2228             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
   2229                                     0, 1);
   2230         }
   2231     }
   2232     return D3D_OK;
   2233 }
   2234 
   2235 DECL_SPECIAL(DEF)
   2236 {
   2237     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
   2238     return D3D_OK;
   2239 }
   2240 
   2241 DECL_SPECIAL(DEFB)
   2242 {
   2243     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
   2244     return D3D_OK;
   2245 }
   2246 
   2247 DECL_SPECIAL(DEFI)
   2248 {
   2249     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
   2250     return D3D_OK;
   2251 }
   2252 
   2253 DECL_SPECIAL(POW)
   2254 {
   2255     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2256     struct ureg_src src[2] = {
   2257         tx_src_param(tx, &tx->insn.src[0]),
   2258         tx_src_param(tx, &tx->insn.src[1])
   2259     };
   2260     ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
   2261     return D3D_OK;
   2262 }
   2263 
   2264 DECL_SPECIAL(RSQ)
   2265 {
   2266     struct ureg_program *ureg = tx->ureg;
   2267     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2268     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2269     struct ureg_dst tmp = tx_scratch(tx);
   2270     ureg_RSQ(ureg, tmp, ureg_abs(src));
   2271     ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
   2272     return D3D_OK;
   2273 }
   2274 
   2275 DECL_SPECIAL(LOG)
   2276 {
   2277     struct ureg_program *ureg = tx->ureg;
   2278     struct ureg_dst tmp = tx_scratch_scalar(tx);
   2279     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2280     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2281     ureg_LG2(ureg, tmp, ureg_abs(src));
   2282     ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
   2283     return D3D_OK;
   2284 }
   2285 
   2286 DECL_SPECIAL(LIT)
   2287 {
   2288     struct ureg_program *ureg = tx->ureg;
   2289     struct ureg_dst tmp = tx_scratch(tx);
   2290     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2291     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2292     ureg_LIT(ureg, tmp, src);
   2293     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
   2294      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
   2295      * it 0^0 if src.w=0, which value is driver dependent. */
   2296     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
   2297              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
   2298              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
   2299     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
   2300     return D3D_OK;
   2301 }
   2302 
   2303 DECL_SPECIAL(NRM)
   2304 {
   2305     struct ureg_program *ureg = tx->ureg;
   2306     struct ureg_dst tmp = tx_scratch_scalar(tx);
   2307     struct ureg_src nrm = tx_src_scalar(tmp);
   2308     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2309     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2310     ureg_DP3(ureg, tmp, src, src);
   2311     ureg_RSQ(ureg, tmp, nrm);
   2312     ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
   2313     ureg_MUL(ureg, dst, src, nrm);
   2314     return D3D_OK;
   2315 }
   2316 
   2317 DECL_SPECIAL(DP2ADD)
   2318 {
   2319     struct ureg_dst tmp = tx_scratch_scalar(tx);
   2320     struct ureg_src dp2 = tx_src_scalar(tmp);
   2321     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2322     struct ureg_src src[3];
   2323     int i;
   2324     for (i = 0; i < 3; ++i)
   2325         src[i] = tx_src_param(tx, &tx->insn.src[i]);
   2326     assert_replicate_swizzle(&src[2]);
   2327 
   2328     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
   2329     ureg_ADD(tx->ureg, dst, src[2], dp2);
   2330 
   2331     return D3D_OK;
   2332 }
   2333 
   2334 DECL_SPECIAL(TEXCOORD)
   2335 {
   2336     struct ureg_program *ureg = tx->ureg;
   2337     const unsigned s = tx->insn.dst[0].idx;
   2338     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2339 
   2340     tx_texcoord_alloc(tx, s);
   2341     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
   2342     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
   2343 
   2344     return D3D_OK;
   2345 }
   2346 
   2347 DECL_SPECIAL(TEXCOORD_ps14)
   2348 {
   2349     struct ureg_program *ureg = tx->ureg;
   2350     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2351     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2352 
   2353     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
   2354 
   2355     ureg_MOV(ureg, dst, src);
   2356 
   2357     return D3D_OK;
   2358 }
   2359 
   2360 DECL_SPECIAL(TEXKILL)
   2361 {
   2362     struct ureg_src reg;
   2363 
   2364     if (tx->version.major > 1 || tx->version.minor > 3) {
   2365         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
   2366     } else {
   2367         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
   2368         reg = tx->regs.vT[tx->insn.dst[0].idx];
   2369     }
   2370     if (tx->version.major < 2)
   2371         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
   2372     ureg_KILL_IF(tx->ureg, reg);
   2373 
   2374     return D3D_OK;
   2375 }
   2376 
   2377 DECL_SPECIAL(TEXBEM)
   2378 {
   2379     struct ureg_program *ureg = tx->ureg;
   2380     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2381     struct ureg_dst tmp, tmp2, texcoord;
   2382     struct ureg_src sample, m00, m01, m10, m11;
   2383     struct ureg_src bumpenvlscale, bumpenvloffset;
   2384     const int m = tx->insn.dst[0].idx;
   2385     const int n = tx->insn.src[0].idx;
   2386 
   2387     assert(tx->version.major == 1);
   2388 
   2389     sample = ureg_DECL_sampler(ureg, m);
   2390     tx->info->sampler_mask |= 1 << m;
   2391 
   2392     tx_texcoord_alloc(tx, m);
   2393 
   2394     tmp = tx_scratch(tx);
   2395     tmp2 = tx_scratch(tx);
   2396     texcoord = tx_scratch(tx);
   2397     /*
   2398      * Bump-env-matrix:
   2399      * 00 is X
   2400      * 01 is Y
   2401      * 10 is Z
   2402      * 11 is W
   2403      */
   2404     nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
   2405     m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
   2406     m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
   2407     m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
   2408     m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
   2409 
   2410     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
   2411     if (m % 2 == 0) {
   2412         bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
   2413         bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
   2414     } else {
   2415         bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
   2416         bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
   2417     }
   2418 
   2419     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
   2420 
   2421     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
   2422     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
   2423              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
   2424     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
   2425     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
   2426              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
   2427              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
   2428 
   2429     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
   2430     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
   2431              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
   2432     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
   2433     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
   2434              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
   2435              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
   2436 
   2437     /* Now the texture coordinates are in tmp.xy */
   2438 
   2439     if (tx->insn.opcode == D3DSIO_TEXBEM) {
   2440         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
   2441     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
   2442         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
   2443         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
   2444         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
   2445                  bumpenvlscale, bumpenvloffset);
   2446         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
   2447     }
   2448 
   2449     tx->info->bumpenvmat_needed = 1;
   2450 
   2451     return D3D_OK;
   2452 }
   2453 
   2454 DECL_SPECIAL(TEXREG2AR)
   2455 {
   2456     struct ureg_program *ureg = tx->ureg;
   2457     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2458     struct ureg_src sample;
   2459     const int m = tx->insn.dst[0].idx;
   2460     const int n = tx->insn.src[0].idx;
   2461     assert(m >= 0 && m > n);
   2462 
   2463     sample = ureg_DECL_sampler(ureg, m);
   2464     tx->info->sampler_mask |= 1 << m;
   2465     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
   2466 
   2467     return D3D_OK;
   2468 }
   2469 
   2470 DECL_SPECIAL(TEXREG2GB)
   2471 {
   2472     struct ureg_program *ureg = tx->ureg;
   2473     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2474     struct ureg_src sample;
   2475     const int m = tx->insn.dst[0].idx;
   2476     const int n = tx->insn.src[0].idx;
   2477     assert(m >= 0 && m > n);
   2478 
   2479     sample = ureg_DECL_sampler(ureg, m);
   2480     tx->info->sampler_mask |= 1 << m;
   2481     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
   2482 
   2483     return D3D_OK;
   2484 }
   2485 
   2486 DECL_SPECIAL(TEXM3x2PAD)
   2487 {
   2488     return D3D_OK; /* this is just padding */
   2489 }
   2490 
   2491 DECL_SPECIAL(TEXM3x2TEX)
   2492 {
   2493     struct ureg_program *ureg = tx->ureg;
   2494     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2495     struct ureg_src sample;
   2496     const int m = tx->insn.dst[0].idx - 1;
   2497     const int n = tx->insn.src[0].idx;
   2498     assert(m >= 0 && m > n);
   2499 
   2500     tx_texcoord_alloc(tx, m);
   2501     tx_texcoord_alloc(tx, m+1);
   2502 
   2503     /* performs the matrix multiplication */
   2504     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2505     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
   2506 
   2507     sample = ureg_DECL_sampler(ureg, m + 1);
   2508     tx->info->sampler_mask |= 1 << (m + 1);
   2509     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
   2510 
   2511     return D3D_OK;
   2512 }
   2513 
   2514 DECL_SPECIAL(TEXM3x3PAD)
   2515 {
   2516     return D3D_OK; /* this is just padding */
   2517 }
   2518 
   2519 DECL_SPECIAL(TEXM3x3SPEC)
   2520 {
   2521     struct ureg_program *ureg = tx->ureg;
   2522     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2523     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
   2524     struct ureg_src sample;
   2525     struct ureg_dst tmp;
   2526     const int m = tx->insn.dst[0].idx - 2;
   2527     const int n = tx->insn.src[0].idx;
   2528     assert(m >= 0 && m > n);
   2529 
   2530     tx_texcoord_alloc(tx, m);
   2531     tx_texcoord_alloc(tx, m+1);
   2532     tx_texcoord_alloc(tx, m+2);
   2533 
   2534     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2535     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
   2536     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
   2537 
   2538     sample = ureg_DECL_sampler(ureg, m + 2);
   2539     tx->info->sampler_mask |= 1 << (m + 2);
   2540     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
   2541 
   2542     /* At this step, dst = N = (u', w', z').
   2543      * We want dst to be the texture sampled at (u'', w'', z''), with
   2544      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
   2545     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
   2546     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
   2547     /* at this step tmp.x = 1/N.N */
   2548     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
   2549     /* at this step tmp.y = N.E */
   2550     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
   2551     /* at this step tmp.x = N.E/N.N */
   2552     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
   2553     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
   2554     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
   2555     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
   2556     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
   2557 
   2558     return D3D_OK;
   2559 }
   2560 
   2561 DECL_SPECIAL(TEXREG2RGB)
   2562 {
   2563     struct ureg_program *ureg = tx->ureg;
   2564     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2565     struct ureg_src sample;
   2566     const int m = tx->insn.dst[0].idx;
   2567     const int n = tx->insn.src[0].idx;
   2568     assert(m >= 0 && m > n);
   2569 
   2570     sample = ureg_DECL_sampler(ureg, m);
   2571     tx->info->sampler_mask |= 1 << m;
   2572     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
   2573 
   2574     return D3D_OK;
   2575 }
   2576 
   2577 DECL_SPECIAL(TEXDP3TEX)
   2578 {
   2579     struct ureg_program *ureg = tx->ureg;
   2580     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2581     struct ureg_dst tmp;
   2582     struct ureg_src sample;
   2583     const int m = tx->insn.dst[0].idx;
   2584     const int n = tx->insn.src[0].idx;
   2585     assert(m >= 0 && m > n);
   2586 
   2587     tx_texcoord_alloc(tx, m);
   2588 
   2589     tmp = tx_scratch(tx);
   2590     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2591     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
   2592 
   2593     sample = ureg_DECL_sampler(ureg, m);
   2594     tx->info->sampler_mask |= 1 << m;
   2595     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
   2596 
   2597     return D3D_OK;
   2598 }
   2599 
   2600 DECL_SPECIAL(TEXM3x2DEPTH)
   2601 {
   2602     struct ureg_program *ureg = tx->ureg;
   2603     struct ureg_dst tmp;
   2604     const int m = tx->insn.dst[0].idx - 1;
   2605     const int n = tx->insn.src[0].idx;
   2606     assert(m >= 0 && m > n);
   2607 
   2608     tx_texcoord_alloc(tx, m);
   2609     tx_texcoord_alloc(tx, m+1);
   2610 
   2611     tmp = tx_scratch(tx);
   2612 
   2613     /* performs the matrix multiplication */
   2614     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2615     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
   2616 
   2617     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
   2618     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
   2619     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
   2620     /* res = 'w' == 0 ? 1.0 : z/w */
   2621     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
   2622              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
   2623     /* replace the depth for depth testing with the result */
   2624     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
   2625                                               TGSI_WRITEMASK_Z, 0, 1);
   2626     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
   2627     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
   2628     return D3D_OK;
   2629 }
   2630 
   2631 DECL_SPECIAL(TEXDP3)
   2632 {
   2633     struct ureg_program *ureg = tx->ureg;
   2634     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2635     const int m = tx->insn.dst[0].idx;
   2636     const int n = tx->insn.src[0].idx;
   2637     assert(m >= 0 && m > n);
   2638 
   2639     tx_texcoord_alloc(tx, m);
   2640 
   2641     ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2642 
   2643     return D3D_OK;
   2644 }
   2645 
   2646 DECL_SPECIAL(TEXM3x3)
   2647 {
   2648     struct ureg_program *ureg = tx->ureg;
   2649     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2650     struct ureg_src sample;
   2651     struct ureg_dst E, tmp;
   2652     const int m = tx->insn.dst[0].idx - 2;
   2653     const int n = tx->insn.src[0].idx;
   2654     assert(m >= 0 && m > n);
   2655 
   2656     tx_texcoord_alloc(tx, m);
   2657     tx_texcoord_alloc(tx, m+1);
   2658     tx_texcoord_alloc(tx, m+2);
   2659 
   2660     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
   2661     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
   2662     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
   2663 
   2664     switch (tx->insn.opcode) {
   2665     case D3DSIO_TEXM3x3:
   2666         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
   2667         break;
   2668     case D3DSIO_TEXM3x3TEX:
   2669         sample = ureg_DECL_sampler(ureg, m + 2);
   2670         tx->info->sampler_mask |= 1 << (m + 2);
   2671         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
   2672         break;
   2673     case D3DSIO_TEXM3x3VSPEC:
   2674         sample = ureg_DECL_sampler(ureg, m + 2);
   2675         tx->info->sampler_mask |= 1 << (m + 2);
   2676         E = tx_scratch(tx);
   2677         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
   2678         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
   2679         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
   2680         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
   2681         /* At this step, dst = N = (u', w', z').
   2682          * We want dst to be the texture sampled at (u'', w'', z''), with
   2683          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
   2684         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
   2685         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
   2686         /* at this step tmp.x = 1/N.N */
   2687         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
   2688         /* at this step tmp.y = N.E */
   2689         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
   2690         /* at this step tmp.x = N.E/N.N */
   2691         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
   2692         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
   2693         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
   2694         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
   2695         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
   2696         break;
   2697     default:
   2698         return D3DERR_INVALIDCALL;
   2699     }
   2700     return D3D_OK;
   2701 }
   2702 
   2703 DECL_SPECIAL(TEXDEPTH)
   2704 {
   2705     struct ureg_program *ureg = tx->ureg;
   2706     struct ureg_dst r5;
   2707     struct ureg_src r5r, r5g;
   2708 
   2709     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
   2710 
   2711     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
   2712      * r5 won't be used afterward, thus we can use r5.ba */
   2713     r5 = tx->regs.r[5];
   2714     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
   2715     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
   2716 
   2717     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
   2718     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
   2719     /* r5.r = r/g */
   2720     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
   2721              r5r, ureg_imm1f(ureg, 1.0f));
   2722     /* replace the depth for depth testing with the result */
   2723     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
   2724                                               TGSI_WRITEMASK_Z, 0, 1);
   2725     ureg_MOV(ureg, tx->regs.oDepth, r5r);
   2726 
   2727     return D3D_OK;
   2728 }
   2729 
   2730 DECL_SPECIAL(BEM)
   2731 {
   2732     struct ureg_program *ureg = tx->ureg;
   2733     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2734     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
   2735     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
   2736     struct ureg_src m00, m01, m10, m11;
   2737     const int m = tx->insn.dst[0].idx;
   2738     struct ureg_dst tmp;
   2739     /*
   2740      * Bump-env-matrix:
   2741      * 00 is X
   2742      * 01 is Y
   2743      * 10 is Z
   2744      * 11 is W
   2745      */
   2746     nine_info_mark_const_f_used(tx->info, 8 + m);
   2747     m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
   2748     m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
   2749     m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
   2750     m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
   2751     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
   2752     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
   2753              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
   2754     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
   2755     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
   2756              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
   2757 
   2758     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
   2759     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
   2760              NINE_APPLY_SWIZZLE(src1, X), src0);
   2761     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
   2762     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
   2763              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
   2764     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
   2765 
   2766     tx->info->bumpenvmat_needed = 1;
   2767 
   2768     return D3D_OK;
   2769 }
   2770 
   2771 DECL_SPECIAL(TEXLD)
   2772 {
   2773     struct ureg_program *ureg = tx->ureg;
   2774     unsigned target;
   2775     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2776     struct ureg_src src[2] = {
   2777         tx_src_param(tx, &tx->insn.src[0]),
   2778         tx_src_param(tx, &tx->insn.src[1])
   2779     };
   2780     assert(tx->insn.src[1].idx >= 0 &&
   2781            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
   2782     target = tx->sampler_targets[tx->insn.src[1].idx];
   2783 
   2784     switch (tx->insn.flags) {
   2785     case 0:
   2786         ureg_TEX(ureg, dst, target, src[0], src[1]);
   2787         break;
   2788     case NINED3DSI_TEXLD_PROJECT:
   2789         ureg_TXP(ureg, dst, target, src[0], src[1]);
   2790         break;
   2791     case NINED3DSI_TEXLD_BIAS:
   2792         ureg_TXB(ureg, dst, target, src[0], src[1]);
   2793         break;
   2794     default:
   2795         assert(0);
   2796         return D3DERR_INVALIDCALL;
   2797     }
   2798     return D3D_OK;
   2799 }
   2800 
   2801 DECL_SPECIAL(TEXLD_14)
   2802 {
   2803     struct ureg_program *ureg = tx->ureg;
   2804     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2805     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
   2806     const unsigned s = tx->insn.dst[0].idx;
   2807     const unsigned t = ps1x_sampler_type(tx->info, s);
   2808 
   2809     tx->info->sampler_mask |= 1 << s;
   2810     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
   2811 
   2812     return D3D_OK;
   2813 }
   2814 
   2815 DECL_SPECIAL(TEX)
   2816 {
   2817     struct ureg_program *ureg = tx->ureg;
   2818     const unsigned s = tx->insn.dst[0].idx;
   2819     const unsigned t = ps1x_sampler_type(tx->info, s);
   2820     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2821     struct ureg_src src[2];
   2822 
   2823     tx_texcoord_alloc(tx, s);
   2824 
   2825     src[0] = tx->regs.vT[s];
   2826     src[1] = ureg_DECL_sampler(ureg, s);
   2827     tx->info->sampler_mask |= 1 << s;
   2828 
   2829     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
   2830 
   2831     return D3D_OK;
   2832 }
   2833 
   2834 DECL_SPECIAL(TEXLDD)
   2835 {
   2836     unsigned target;
   2837     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2838     struct ureg_src src[4] = {
   2839         tx_src_param(tx, &tx->insn.src[0]),
   2840         tx_src_param(tx, &tx->insn.src[1]),
   2841         tx_src_param(tx, &tx->insn.src[2]),
   2842         tx_src_param(tx, &tx->insn.src[3])
   2843     };
   2844     assert(tx->insn.src[1].idx >= 0 &&
   2845            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
   2846     target = tx->sampler_targets[tx->insn.src[1].idx];
   2847 
   2848     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
   2849     return D3D_OK;
   2850 }
   2851 
   2852 DECL_SPECIAL(TEXLDL)
   2853 {
   2854     unsigned target;
   2855     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
   2856     struct ureg_src src[2] = {
   2857        tx_src_param(tx, &tx->insn.src[0]),
   2858        tx_src_param(tx, &tx->insn.src[1])
   2859     };
   2860     assert(tx->insn.src[1].idx >= 0 &&
   2861            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
   2862     target = tx->sampler_targets[tx->insn.src[1].idx];
   2863 
   2864     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
   2865     return D3D_OK;
   2866 }
   2867 
   2868 DECL_SPECIAL(SETP)
   2869 {
   2870     STUB(D3DERR_INVALIDCALL);
   2871 }
   2872 
   2873 DECL_SPECIAL(BREAKP)
   2874 {
   2875     STUB(D3DERR_INVALIDCALL);
   2876 }
   2877 
   2878 DECL_SPECIAL(PHASE)
   2879 {
   2880     return D3D_OK; /* we don't care about phase */
   2881 }
   2882 
   2883 DECL_SPECIAL(COMMENT)
   2884 {
   2885     return D3D_OK; /* nothing to do */
   2886 }
   2887 
   2888 
   2889 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
   2890     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
   2891 
   2892 struct sm1_op_info inst_table[] =
   2893 {
   2894     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
   2895     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
   2896     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
   2897     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
   2898     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
   2899     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
   2900     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
   2901     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
   2902     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
   2903     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
   2904     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
   2905     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
   2906     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
   2907     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
   2908     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
   2909     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
   2910     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
   2911     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
   2912     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
   2913     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
   2914 
   2915     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
   2916     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
   2917     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
   2918     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
   2919     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
   2920 
   2921     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
   2922     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
   2923     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
   2924     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
   2925     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
   2926     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
   2927 
   2928     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
   2929 
   2930     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
   2931     _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
   2932     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
   2933     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
   2934     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
   2935 
   2936     _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
   2937     _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
   2938 
   2939     /* More flow control */
   2940     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
   2941     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
   2942     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
   2943     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
   2944     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
   2945     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
   2946     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
   2947     _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
   2948     /* we don't write to the address register, but a normal register (copied
   2949      * when needed to the address register), thus we don't use ARR */
   2950     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
   2951 
   2952     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
   2953     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
   2954 
   2955     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
   2956     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
   2957     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
   2958     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
   2959     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
   2960     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
   2961     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
   2962     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
   2963     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
   2964     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
   2965     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
   2966     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
   2967     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
   2968     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
   2969     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
   2970     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
   2971 
   2972     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
   2973     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
   2974     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
   2975     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
   2976 
   2977     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
   2978 
   2979     /* More tex stuff */
   2980     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
   2981     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
   2982     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
   2983     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
   2984     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
   2985     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
   2986 
   2987     /* Misc */
   2988     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
   2989     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
   2990     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
   2991     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
   2992     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
   2993     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
   2994     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
   2995     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
   2996     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
   2997 };
   2998 
   2999 struct sm1_op_info inst_phase =
   3000     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
   3001 
   3002 struct sm1_op_info inst_comment =
   3003     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
   3004 
   3005 static void
   3006 create_op_info_map(struct shader_translator *tx)
   3007 {
   3008     const unsigned version = (tx->version.major << 8) | tx->version.minor;
   3009     unsigned i;
   3010 
   3011     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
   3012         tx->op_info_map[i] = -1;
   3013 
   3014     if (tx->processor == PIPE_SHADER_VERTEX) {
   3015         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
   3016             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
   3017             if (inst_table[i].vert_version.min <= version &&
   3018                 inst_table[i].vert_version.max >= version)
   3019                 tx->op_info_map[inst_table[i].sio] = i;
   3020         }
   3021     } else {
   3022         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
   3023             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
   3024             if (inst_table[i].frag_version.min <= version &&
   3025                 inst_table[i].frag_version.max >= version)
   3026                 tx->op_info_map[inst_table[i].sio] = i;
   3027         }
   3028     }
   3029 }
   3030 
   3031 static inline HRESULT
   3032 NineTranslateInstruction_Generic(struct shader_translator *tx)
   3033 {
   3034     struct ureg_dst dst[1];
   3035     struct ureg_src src[4];
   3036     unsigned i;
   3037 
   3038     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
   3039         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
   3040     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
   3041         src[i] = tx_src_param(tx, &tx->insn.src[i]);
   3042 
   3043     ureg_insn(tx->ureg, tx->insn.info->opcode,
   3044               dst, tx->insn.ndst,
   3045               src, tx->insn.nsrc);
   3046     return D3D_OK;
   3047 }
   3048 
   3049 static inline DWORD
   3050 TOKEN_PEEK(struct shader_translator *tx)
   3051 {
   3052     return *(tx->parse);
   3053 }
   3054 
   3055 static inline DWORD
   3056 TOKEN_NEXT(struct shader_translator *tx)
   3057 {
   3058     return *(tx->parse)++;
   3059 }
   3060 
   3061 static inline void
   3062 TOKEN_JUMP(struct shader_translator *tx)
   3063 {
   3064     if (tx->parse_next && tx->parse != tx->parse_next) {
   3065         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
   3066         tx->parse = tx->parse_next;
   3067     }
   3068 }
   3069 
   3070 static inline boolean
   3071 sm1_parse_eof(struct shader_translator *tx)
   3072 {
   3073     return TOKEN_PEEK(tx) == NINED3DSP_END;
   3074 }
   3075 
   3076 static void
   3077 sm1_read_version(struct shader_translator *tx)
   3078 {
   3079     const DWORD tok = TOKEN_NEXT(tx);
   3080 
   3081     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
   3082     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
   3083 
   3084     switch (tok >> 16) {
   3085     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
   3086     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
   3087     default:
   3088        DBG("Invalid shader type: %x\n", tok);
   3089        tx->processor = ~0;
   3090        break;
   3091     }
   3092 }
   3093 
   3094 /* This is just to check if we parsed the instruction properly. */
   3095 static void
   3096 sm1_parse_get_skip(struct shader_translator *tx)
   3097 {
   3098     const DWORD tok = TOKEN_PEEK(tx);
   3099 
   3100     if (tx->version.major >= 2) {
   3101         tx->parse_next = tx->parse + 1 /* this */ +
   3102             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
   3103     } else {
   3104         tx->parse_next = NULL; /* TODO: determine from param count */
   3105     }
   3106 }
   3107 
   3108 static void
   3109 sm1_print_comment(const char *comment, UINT size)
   3110 {
   3111     if (!size)
   3112         return;
   3113     /* TODO */
   3114 }
   3115 
   3116 static void
   3117 sm1_parse_comments(struct shader_translator *tx, BOOL print)
   3118 {
   3119     DWORD tok = TOKEN_PEEK(tx);
   3120 
   3121     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
   3122     {
   3123         const char *comment = "";
   3124         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
   3125         tx->parse += size + 1;
   3126 
   3127         if (print)
   3128             sm1_print_comment(comment, size);
   3129 
   3130         tok = TOKEN_PEEK(tx);
   3131     }
   3132 }
   3133 
   3134 static void
   3135 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
   3136 {
   3137     *reg = TOKEN_NEXT(tx);
   3138 
   3139     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
   3140     {
   3141         if (tx->version.major < 2)
   3142             *rel = (1 << 31) |
   3143                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
   3144                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
   3145                 D3DSP_NOSWIZZLE;
   3146         else
   3147             *rel = TOKEN_NEXT(tx);
   3148     }
   3149 }
   3150 
   3151 static void
   3152 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
   3153 {
   3154     int8_t shift;
   3155     dst->file =
   3156         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
   3157         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
   3158     dst->type = TGSI_RETURN_TYPE_FLOAT;
   3159     dst->idx = tok & D3DSP_REGNUM_MASK;
   3160     dst->rel = NULL;
   3161     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
   3162     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
   3163     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
   3164     dst->shift = (shift & 0x7) - (shift & 0x8);
   3165 }
   3166 
   3167 static void
   3168 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
   3169 {
   3170     src->file =
   3171         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
   3172         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
   3173     src->type = TGSI_RETURN_TYPE_FLOAT;
   3174     src->idx = tok & D3DSP_REGNUM_MASK;
   3175     src->rel = NULL;
   3176     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
   3177     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
   3178 
   3179     switch (src->file) {
   3180     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
   3181     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
   3182     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
   3183     default:
   3184         break;
   3185     }
   3186 }
   3187 
   3188 static void
   3189 sm1_parse_immediate(struct shader_translator *tx,
   3190                     struct sm1_src_param *imm)
   3191 {
   3192     imm->file = NINED3DSPR_IMMEDIATE;
   3193     imm->idx = INT_MIN;
   3194     imm->rel = NULL;
   3195     imm->swizzle = NINED3DSP_NOSWIZZLE;
   3196     imm->mod = 0;
   3197     switch (tx->insn.opcode) {
   3198     case D3DSIO_DEF:
   3199         imm->type = NINED3DSPTYPE_FLOAT4;
   3200         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
   3201         tx->parse += 4;
   3202         break;
   3203     case D3DSIO_DEFI:
   3204         imm->type = NINED3DSPTYPE_INT4;
   3205         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
   3206         tx->parse += 4;
   3207         break;
   3208     case D3DSIO_DEFB:
   3209         imm->type = NINED3DSPTYPE_BOOL;
   3210         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
   3211         tx->parse += 1;
   3212         break;
   3213     default:
   3214        assert(0);
   3215        break;
   3216     }
   3217 }
   3218 
   3219 static void
   3220 sm1_read_dst_param(struct shader_translator *tx,
   3221                    struct sm1_dst_param *dst,
   3222                    struct sm1_src_param *rel)
   3223 {
   3224     DWORD tok_dst, tok_rel = 0;
   3225 
   3226     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
   3227     sm1_parse_dst_param(dst, tok_dst);
   3228     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
   3229         sm1_parse_src_param(rel, tok_rel);
   3230         dst->rel = rel;
   3231     }
   3232 }
   3233 
   3234 static void
   3235 sm1_read_src_param(struct shader_translator *tx,
   3236                    struct sm1_src_param *src,
   3237                    struct sm1_src_param *rel)
   3238 {
   3239     DWORD tok_src, tok_rel = 0;
   3240 
   3241     sm1_parse_get_param(tx, &tok_src, &tok_rel);
   3242     sm1_parse_src_param(src, tok_src);
   3243     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
   3244         assert(rel);
   3245         sm1_parse_src_param(rel, tok_rel);
   3246         src->rel = rel;
   3247     }
   3248 }
   3249 
   3250 static void
   3251 sm1_read_semantic(struct shader_translator *tx,
   3252                   struct sm1_semantic *sem)
   3253 {
   3254     const DWORD tok_usg = TOKEN_NEXT(tx);
   3255     const DWORD tok_dst = TOKEN_NEXT(tx);
   3256 
   3257     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
   3258     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
   3259     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
   3260 
   3261     sm1_parse_dst_param(&sem->reg, tok_dst);
   3262 }
   3263 
   3264 static void
   3265 sm1_parse_instruction(struct shader_translator *tx)
   3266 {
   3267     struct sm1_instruction *insn = &tx->insn;
   3268     HRESULT hr;
   3269     DWORD tok;
   3270     struct sm1_op_info *info = NULL;
   3271     unsigned i;
   3272 
   3273     sm1_parse_comments(tx, TRUE);
   3274     sm1_parse_get_skip(tx);
   3275 
   3276     tok = TOKEN_NEXT(tx);
   3277 
   3278     insn->opcode = tok & D3DSI_OPCODE_MASK;
   3279     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
   3280     insn->coissue = !!(tok & D3DSI_COISSUE);
   3281     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
   3282 
   3283     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
   3284         int k = tx->op_info_map[insn->opcode];
   3285         if (k >= 0) {
   3286             assert(k < ARRAY_SIZE(inst_table));
   3287             info = &inst_table[k];
   3288         }
   3289     } else {
   3290        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
   3291        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
   3292     }
   3293     if (!info) {
   3294        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
   3295        TOKEN_JUMP(tx);
   3296        return;
   3297     }
   3298     insn->info = info;
   3299     insn->ndst = info->ndst;
   3300     insn->nsrc = info->nsrc;
   3301 
   3302     assert(!insn->predicated && "TODO: predicated instructions");
   3303 
   3304     /* check version */
   3305     {
   3306         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
   3307         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
   3308         unsigned ver = (tx->version.major << 8) | tx->version.minor;
   3309         if (ver < min || ver > max) {
   3310             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
   3311                 min, ver, max);
   3312             return;
   3313         }
   3314     }
   3315 
   3316     for (i = 0; i < insn->ndst; ++i)
   3317         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
   3318     if (insn->predicated)
   3319         sm1_read_src_param(tx, &insn->pred, NULL);
   3320     for (i = 0; i < insn->nsrc; ++i)
   3321         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
   3322 
   3323     /* parse here so we can dump them before processing */
   3324     if (insn->opcode == D3DSIO_DEF ||
   3325         insn->opcode == D3DSIO_DEFI ||
   3326         insn->opcode == D3DSIO_DEFB)
   3327         sm1_parse_immediate(tx, &tx->insn.src[0]);
   3328 
   3329     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
   3330     sm1_instruction_check(insn);
   3331 
   3332     if (info->handler)
   3333         hr = info->handler(tx);
   3334     else
   3335         hr = NineTranslateInstruction_Generic(tx);
   3336     tx_apply_dst0_modifiers(tx);
   3337 
   3338     if (hr != D3D_OK)
   3339         tx->failure = TRUE;
   3340     tx->num_scratch = 0; /* reset */
   3341 
   3342     TOKEN_JUMP(tx);
   3343 }
   3344 
   3345 static void
   3346 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
   3347 {
   3348     unsigned i;
   3349 
   3350     tx->info = info;
   3351 
   3352     tx->byte_code = info->byte_code;
   3353     tx->parse = info->byte_code;
   3354 
   3355     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
   3356         info->input_map[i] = NINE_DECLUSAGE_NONE;
   3357     info->num_inputs = 0;
   3358 
   3359     info->position_t = FALSE;
   3360     info->point_size = FALSE;
   3361 
   3362     tx->info->const_float_slots = 0;
   3363     tx->info->const_int_slots = 0;
   3364     tx->info->const_bool_slots = 0;
   3365 
   3366     info->sampler_mask = 0x0;
   3367     info->rt_mask = 0x0;
   3368 
   3369     info->lconstf.data = NULL;
   3370     info->lconstf.ranges = NULL;
   3371 
   3372     info->bumpenvmat_needed = 0;
   3373 
   3374     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
   3375         tx->regs.rL[i] = ureg_dst_undef();
   3376     }
   3377     tx->regs.address = ureg_dst_undef();
   3378     tx->regs.a0 = ureg_dst_undef();
   3379     tx->regs.p = ureg_dst_undef();
   3380     tx->regs.oDepth = ureg_dst_undef();
   3381     tx->regs.vPos = ureg_src_undef();
   3382     tx->regs.vFace = ureg_src_undef();
   3383     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
   3384         tx->regs.o[i] = ureg_dst_undef();
   3385     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
   3386         tx->regs.oCol[i] = ureg_dst_undef();
   3387     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
   3388         tx->regs.vC[i] = ureg_src_undef();
   3389     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
   3390         tx->regs.vT[i] = ureg_src_undef();
   3391 
   3392     sm1_read_version(tx);
   3393 
   3394     info->version = (tx->version.major << 4) | tx->version.minor;
   3395 
   3396     tx->num_outputs = 0;
   3397 
   3398     create_op_info_map(tx);
   3399 }
   3400 
   3401 static void
   3402 tx_dtor(struct shader_translator *tx)
   3403 {
   3404     if (tx->num_inst_labels)
   3405         FREE(tx->inst_labels);
   3406     FREE(tx->lconstf);
   3407     FREE(tx->regs.r);
   3408     FREE(tx);
   3409 }
   3410 
   3411 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
   3412  * CONST[1].xyz = x+width/2, y+height/2, zmin */
   3413 static void
   3414 shader_add_vs_viewport_transform(struct shader_translator *tx)
   3415 {
   3416     struct ureg_program *ureg = tx->ureg;
   3417     struct ureg_src c0 = NINE_CONSTANT_SRC(0);
   3418     struct ureg_src c1 = NINE_CONSTANT_SRC(1);
   3419     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
   3420 
   3421     c0 = ureg_src_dimension(c0, 4);
   3422     c1 = ureg_src_dimension(c1, 4);
   3423     /* TODO: find out when we need to apply the viewport transformation or not.
   3424      * Likely will be XYZ vs XYZRHW in vdecl_out
   3425      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
   3426      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
   3427      */
   3428     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
   3429 }
   3430 
   3431 static void
   3432 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
   3433 {
   3434     struct ureg_program *ureg = tx->ureg;
   3435     struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   3436     struct ureg_src fog_end, fog_coeff, fog_density;
   3437     struct ureg_src fog_vs, depth, fog_color;
   3438     struct ureg_dst fog_factor;
   3439 
   3440     if (!tx->info->fog_enable) {
   3441         ureg_MOV(ureg, oCol0, src_col);
   3442         return;
   3443     }
   3444 
   3445     if (tx->info->fog_mode != D3DFOG_NONE) {
   3446         depth = nine_get_position_input(tx);
   3447         depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
   3448     }
   3449 
   3450     nine_info_mark_const_f_used(tx->info, 33);
   3451     fog_color = NINE_CONSTANT_SRC(32);
   3452     fog_factor = tx_scratch_scalar(tx);
   3453 
   3454     if (tx->info->fog_mode == D3DFOG_LINEAR) {
   3455         fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
   3456         fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
   3457         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
   3458         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
   3459     } else if (tx->info->fog_mode == D3DFOG_EXP) {
   3460         fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
   3461         ureg_MUL(ureg, fog_factor, depth, fog_density);
   3462         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
   3463         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
   3464     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
   3465         fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
   3466         ureg_MUL(ureg, fog_factor, depth, fog_density);
   3467         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
   3468         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
   3469         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
   3470     } else {
   3471         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
   3472                                             TGSI_INTERPOLATE_PERSPECTIVE),
   3473                                             TGSI_SWIZZLE_X);
   3474         ureg_MOV(ureg, fog_factor, fog_vs);
   3475     }
   3476 
   3477     ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
   3478              tx_src_scalar(fog_factor), src_col, fog_color);
   3479     ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
   3480 }
   3481 
   3482 #define GET_CAP(n) screen->get_param( \
   3483       screen, PIPE_CAP_##n)
   3484 #define GET_SHADER_CAP(n) screen->get_shader_param( \
   3485       screen, info->type, PIPE_SHADER_CAP_##n)
   3486 
   3487 HRESULT
   3488 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
   3489 {
   3490     struct shader_translator *tx;
   3491     HRESULT hr = D3D_OK;
   3492     const unsigned processor = info->type;
   3493     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
   3494 
   3495     user_assert(processor != ~0, D3DERR_INVALIDCALL);
   3496 
   3497     tx = CALLOC_STRUCT(shader_translator);
   3498     if (!tx)
   3499         return E_OUTOFMEMORY;
   3500     tx_ctor(tx, info);
   3501 
   3502     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
   3503         hr = D3DERR_INVALIDCALL;
   3504         DBG("Unsupported shader version: %u.%u !\n",
   3505             tx->version.major, tx->version.minor);
   3506         goto out;
   3507     }
   3508     if (tx->processor != processor) {
   3509         hr = D3DERR_INVALIDCALL;
   3510         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
   3511         goto out;
   3512     }
   3513     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
   3514          tx->version.major, tx->version.minor);
   3515 
   3516     tx->ureg = ureg_create(processor);
   3517     if (!tx->ureg) {
   3518         hr = E_OUTOFMEMORY;
   3519         goto out;
   3520     }
   3521 
   3522     tx->native_integers = GET_SHADER_CAP(INTEGERS);
   3523     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
   3524     tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
   3525     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
   3526     tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
   3527     tx->texcoord_sn = tx->want_texcoord ?
   3528         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
   3529     tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
   3530     tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
   3531 
   3532     if (IS_VS) {
   3533         tx->num_constf_allowed = NINE_MAX_CONST_F;
   3534     } else if (tx->version.major < 2) {/* IS_PS v1 */
   3535         tx->num_constf_allowed = 8;
   3536     } else if (tx->version.major == 2) {/* IS_PS v2 */
   3537         tx->num_constf_allowed = 32;
   3538     } else {/* IS_PS v3 */
   3539         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
   3540     }
   3541 
   3542     if (tx->version.major < 2) {
   3543         tx->num_consti_allowed = 0;
   3544         tx->num_constb_allowed = 0;
   3545     } else {
   3546         tx->num_consti_allowed = NINE_MAX_CONST_I;
   3547         tx->num_constb_allowed = NINE_MAX_CONST_B;
   3548     }
   3549 
   3550     if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
   3551         tx->num_constf_allowed = 8192;
   3552         tx->num_consti_allowed = 2048;
   3553         tx->num_constb_allowed = 2048;
   3554     }
   3555 
   3556     /* VS must always write position. Declare it here to make it the 1st output.
   3557      * (Some drivers like nv50 are buggy and rely on that.)
   3558      */
   3559     if (IS_VS) {
   3560         tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
   3561     } else {
   3562         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
   3563         if (!tx->shift_wpos)
   3564             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
   3565     }
   3566 
   3567     while (!sm1_parse_eof(tx) && !tx->failure)
   3568         sm1_parse_instruction(tx);
   3569     tx->parse++; /* for byte_size */
   3570 
   3571     if (tx->failure) {
   3572         /* For VS shaders, we print the warning later,
   3573          * we first try with swvp. */
   3574         if (IS_PS)
   3575             ERR("Encountered buggy shader\n");
   3576         ureg_destroy(tx->ureg);
   3577         hr = D3DERR_INVALIDCALL;
   3578         goto out;
   3579     }
   3580 
   3581     if (IS_PS && tx->version.major < 3) {
   3582         if (tx->version.major < 2) {
   3583             assert(tx->num_temp); /* there must be color output */
   3584             info->rt_mask |= 0x1;
   3585             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
   3586         } else {
   3587             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
   3588         }
   3589     }
   3590 
   3591     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
   3592         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
   3593         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
   3594     }
   3595 
   3596     if (info->position_t)
   3597         ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
   3598 
   3599     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
   3600         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
   3601         ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
   3602         ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
   3603         info->point_size = TRUE;
   3604     }
   3605 
   3606     if (info->process_vertices)
   3607         shader_add_vs_viewport_transform(tx);
   3608 
   3609     ureg_END(tx->ureg);
   3610 
   3611     /* record local constants */
   3612     if (tx->num_lconstf && tx->indirect_const_access) {
   3613         struct nine_range *ranges;
   3614         float *data;
   3615         int *indices;
   3616         unsigned i, k, n;
   3617 
   3618         hr = E_OUTOFMEMORY;
   3619 
   3620         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
   3621         if (!data)
   3622             goto out;
   3623         info->lconstf.data = data;
   3624 
   3625         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
   3626         if (!indices)
   3627             goto out;
   3628 
   3629         /* lazy sort, num_lconstf should be small */
   3630         for (n = 0; n < tx->num_lconstf; ++n) {
   3631             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
   3632                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
   3633                     k = i;
   3634             }
   3635             indices[n] = tx->lconstf[k].idx;
   3636             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
   3637             tx->lconstf[k].idx = INT_MAX;
   3638         }
   3639 
   3640         /* count ranges */
   3641         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
   3642             if (indices[i] != indices[i - 1] + 1)
   3643                 ++n;
   3644         ranges = MALLOC(n * sizeof(ranges[0]));
   3645         if (!ranges) {
   3646             FREE(indices);
   3647             goto out;
   3648         }
   3649         info->lconstf.ranges = ranges;
   3650 
   3651         k = 0;
   3652         ranges[k].bgn = indices[0];
   3653         for (i = 1; i < tx->num_lconstf; ++i) {
   3654             if (indices[i] != indices[i - 1] + 1) {
   3655                 ranges[k].next = &ranges[k + 1];
   3656                 ranges[k].end = indices[i - 1] + 1;
   3657                 ++k;
   3658                 ranges[k].bgn = indices[i];
   3659             }
   3660         }
   3661         ranges[k].end = indices[i - 1] + 1;
   3662         ranges[k].next = NULL;
   3663         assert(n == (k + 1));
   3664 
   3665         FREE(indices);
   3666         hr = D3D_OK;
   3667     }
   3668 
   3669     /* r500 */
   3670     if (info->const_float_slots > device->max_vs_const_f &&
   3671         (info->const_int_slots || info->const_bool_slots) &&
   3672         (!IS_VS || !info->swvp_on))
   3673         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
   3674 
   3675 
   3676     if (tx->indirect_const_access) /* vs only */
   3677         info->const_float_slots = device->max_vs_const_f;
   3678 
   3679     if (!IS_VS || !info->swvp_on) {
   3680         unsigned s, slot_max;
   3681         unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
   3682 
   3683         slot_max = info->const_bool_slots > 0 ?
   3684                        max_const_f + NINE_MAX_CONST_I
   3685                        + DIV_ROUND_UP(info->const_bool_slots, 4) :
   3686                            info->const_int_slots > 0 ?
   3687                                max_const_f + info->const_int_slots :
   3688                                    info->const_float_slots;
   3689 
   3690         info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
   3691 
   3692         for (s = 0; s < slot_max; s++)
   3693             ureg_DECL_constant(tx->ureg, s);
   3694     } else {
   3695          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
   3696          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
   3697          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
   3698          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
   3699     }
   3700 
   3701     if (info->process_vertices)
   3702         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
   3703 
   3704     if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
   3705         unsigned count;
   3706         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
   3707         tgsi_dump(toks, 0);
   3708         ureg_free_tokens(toks);
   3709     }
   3710 
   3711     if (info->process_vertices) {
   3712         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
   3713                                                     tx->output_info,
   3714                                                     tx->num_outputs,
   3715                                                     &(info->so));
   3716         info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
   3717     } else
   3718         info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
   3719     if (!info->cso) {
   3720         hr = D3DERR_DRIVERINTERNALERROR;
   3721         FREE(info->lconstf.data);
   3722         FREE(info->lconstf.ranges);
   3723         goto out;
   3724     }
   3725 
   3726     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
   3727 out:
   3728     tx_dtor(tx);
   3729     return hr;
   3730 }
   3731