Home | History | Annotate | Download | only in tgsi
      1 /*
      2  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #include "tgsi/tgsi_transform.h"
     28 #include "tgsi/tgsi_scan.h"
     29 #include "tgsi/tgsi_dump.h"
     30 
     31 #include "util/u_debug.h"
     32 #include "util/u_math.h"
     33 
     34 #include "tgsi_lowering.h"
     35 
     36 struct tgsi_lowering_context {
     37    struct tgsi_transform_context base;
     38    const struct tgsi_lowering_config *config;
     39    struct tgsi_shader_info *info;
     40    unsigned two_side_colors;
     41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
     42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
     43    int face_idx;
     44    unsigned numtmp;
     45    struct {
     46       struct tgsi_full_src_register src;
     47       struct tgsi_full_dst_register dst;
     48    } tmp[2];
     49 #define A 0
     50 #define B 1
     51    struct tgsi_full_src_register imm;
     52    int emitted_decls;
     53    unsigned saturate;
     54 };
     55 
     56 static inline struct tgsi_lowering_context *
     57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
     58 {
     59    return (struct tgsi_lowering_context *)tctx;
     60 }
     61 
     62 /*
     63  * Utility helpers:
     64  */
     65 
     66 static void
     67 reg_dst(struct tgsi_full_dst_register *dst,
     68 	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
     69 {
     70    *dst = *orig_dst;
     71    dst->Register.WriteMask &= wrmask;
     72    assert(dst->Register.WriteMask);
     73 }
     74 
     75 static inline void
     76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
     77 {
     78    swiz[0] = src->SwizzleX;
     79    swiz[1] = src->SwizzleY;
     80    swiz[2] = src->SwizzleZ;
     81    swiz[3] = src->SwizzleW;
     82 }
     83 
     84 static void
     85 reg_src(struct tgsi_full_src_register *src,
     86 	const struct tgsi_full_src_register *orig_src,
     87 	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
     88 {
     89    unsigned swiz[4];
     90    get_swiz(swiz, &orig_src->Register);
     91    *src = *orig_src;
     92    src->Register.SwizzleX = swiz[sx];
     93    src->Register.SwizzleY = swiz[sy];
     94    src->Register.SwizzleZ = swiz[sz];
     95    src->Register.SwizzleW = swiz[sw];
     96 }
     97 
     98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
     99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
    100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
    101 
    102 /*
    103  * if (dst.x aliases src.x) {
    104  *   MOV tmpA.x, src.x
    105  *   src = tmpA
    106  * }
    107  * COS dst.x, src.x
    108  * SIN dst.y, src.x
    109  * MOV dst.zw, imm{0.0, 1.0}
    110  */
    111 static bool
    112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
    113 	const struct tgsi_full_src_register *src, unsigned src_mask)
    114 {
    115    if ((dst->Register.File == src->Register.File) &&
    116        (dst->Register.Index == src->Register.Index)) {
    117       unsigned i, actual_mask = 0;
    118       unsigned swiz[4];
    119       get_swiz(swiz, &src->Register);
    120       for (i = 0; i < 4; i++)
    121          if (src_mask & (1 << i))
    122             actual_mask |= (1 << swiz[i]);
    123       if (actual_mask & dst_mask)
    124          return true;
    125    }
    126    return false;
    127 }
    128 
    129 static void
    130 create_mov(struct tgsi_transform_context *tctx,
    131            const struct tgsi_full_dst_register *dst,
    132            const struct tgsi_full_src_register *src,
    133            unsigned mask, unsigned saturate)
    134 {
    135    struct tgsi_full_instruction new_inst;
    136 
    137    new_inst = tgsi_default_full_instruction();
    138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    139    new_inst.Instruction.Saturate = saturate;
    140    new_inst.Instruction.NumDstRegs = 1;
    141    reg_dst(&new_inst.Dst[0], dst, mask);
    142    new_inst.Instruction.NumSrcRegs = 1;
    143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    144    tctx->emit_instruction(tctx, &new_inst);
    145 }
    146 
    147 /* to help calculate # of tgsi tokens for a lowering.. we assume
    148  * the worst case, ie. removed instructions don't have ADDR[] or
    149  * anything which increases the # of tokens per src/dst and the
    150  * inserted instructions do.
    151  *
    152  * OINST() - old instruction
    153  *    1         : instruction itself
    154  *    1         : dst
    155  *    1 * nargs : srcN
    156  *
    157  * NINST() - new instruction
    158  *    1         : instruction itself
    159  *    2         : dst
    160  *    2 * nargs : srcN
    161  */
    162 
    163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
    164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
    165 
    166 /*
    167  * Lowering Translators:
    168  */
    169 
    170 /* DST - Distance Vector
    171  *   dst.x = 1.0
    172  *   dst.y = src0.y \times src1.y
    173  *   dst.z = src0.z
    174  *   dst.w = src1.w
    175  *
    176  * ; note: could be more clever and use just a single temp
    177  * ;       if I was clever enough to re-write the swizzles.
    178  * ; needs: 2 tmp, imm{1.0}
    179  * if (dst.y aliases src0.z) {
    180  *   MOV tmpA.yz, src0.yz
    181  *   src0 = tmpA
    182  * }
    183  * if (dst.yz aliases src1.w) {
    184  *   MOV tmpB.yw, src1.yw
    185  *   src1 = tmpB
    186  * }
    187  * MUL dst.y, src0.y, src1.y
    188  * MOV dst.z, src0.z
    189  * MOV dst.w, src1.w
    190  * MOV dst.x, imm{1.0}
    191  */
    192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
    193 		NINST(1) + NINST(1) - OINST(2))
    194 #define DST_TMP  2
    195 static void
    196 transform_dst(struct tgsi_transform_context *tctx,
    197               struct tgsi_full_instruction *inst)
    198 {
    199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    201    struct tgsi_full_src_register *src0 = &inst->Src[0];
    202    struct tgsi_full_src_register *src1 = &inst->Src[1];
    203    struct tgsi_full_instruction new_inst;
    204 
    205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
    206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
    207       src0 = &ctx->tmp[A].src;
    208    }
    209 
    210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
    211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
    212       src1 = &ctx->tmp[B].src;
    213    }
    214 
    215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    216       /* MUL dst.y, src0.y, src1.y */
    217       new_inst = tgsi_default_full_instruction();
    218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    219       new_inst.Instruction.NumDstRegs = 1;
    220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    221       new_inst.Instruction.NumSrcRegs = 2;
    222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
    223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
    224       tctx->emit_instruction(tctx, &new_inst);
    225    }
    226 
    227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    228       /* MOV dst.z, src0.z */
    229       new_inst = tgsi_default_full_instruction();
    230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    231       new_inst.Instruction.NumDstRegs = 1;
    232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
    233       new_inst.Instruction.NumSrcRegs = 1;
    234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
    235       tctx->emit_instruction(tctx, &new_inst);
    236    }
    237 
    238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    239       /* MOV dst.w, src1.w */
    240       new_inst = tgsi_default_full_instruction();
    241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    242       new_inst.Instruction.NumDstRegs = 1;
    243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    244       new_inst.Instruction.NumSrcRegs = 1;
    245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
    246       tctx->emit_instruction(tctx, &new_inst);
    247    }
    248 
    249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
    250       /* MOV dst.x, imm{1.0} */
    251       new_inst = tgsi_default_full_instruction();
    252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    253       new_inst.Instruction.NumDstRegs = 1;
    254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
    255       new_inst.Instruction.NumSrcRegs = 1;
    256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
    257       tctx->emit_instruction(tctx, &new_inst);
    258    }
    259 }
    260 
    261 /* XPD - Cross Product
    262  *   dst.x = src0.y \times src1.z - src1.y \times src0.z
    263  *   dst.y = src0.z \times src1.x - src1.z \times src0.x
    264  *   dst.z = src0.x \times src1.y - src1.x \times src0.y
    265  *   dst.w = 1.0
    266  *
    267  * ; needs: 1 tmp, imm{1.0}
    268  * MUL tmpA.xyz, src1.yzx, src0.zxy
    269  * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
    270  * MOV dst.w, imm{1.0}
    271  */
    272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
    273 #define XPD_TMP  1
    274 static void
    275 transform_xpd(struct tgsi_transform_context *tctx,
    276               struct tgsi_full_instruction *inst)
    277 {
    278    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    279    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    280    struct tgsi_full_src_register *src0 = &inst->Src[0];
    281    struct tgsi_full_src_register *src1 = &inst->Src[1];
    282    struct tgsi_full_instruction new_inst;
    283 
    284    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
    285       /* MUL tmpA.xyz, src1.yzx, src0.zxy */
    286       new_inst = tgsi_default_full_instruction();
    287       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    288       new_inst.Instruction.NumDstRegs = 1;
    289       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
    290       new_inst.Instruction.NumSrcRegs = 2;
    291       reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
    292       reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
    293       tctx->emit_instruction(tctx, &new_inst);
    294 
    295       /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
    296       new_inst = tgsi_default_full_instruction();
    297       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    298       new_inst.Instruction.NumDstRegs = 1;
    299       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
    300       new_inst.Instruction.NumSrcRegs = 3;
    301       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
    302       reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
    303       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
    304       new_inst.Src[2].Register.Negate = true;
    305       tctx->emit_instruction(tctx, &new_inst);
    306    }
    307 
    308    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    309       /* MOV dst.w, imm{1.0} */
    310       new_inst = tgsi_default_full_instruction();
    311       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    312       new_inst.Instruction.NumDstRegs = 1;
    313       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    314       new_inst.Instruction.NumSrcRegs = 1;
    315       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
    316       tctx->emit_instruction(tctx, &new_inst);
    317    }
    318 }
    319 
    320 /* SCS - Sine Cosine
    321  *   dst.x = \cos{src.x}
    322  *   dst.y = \sin{src.x}
    323  *   dst.z = 0.0
    324  *   dst.w = 1.0
    325  *
    326  * ; needs: 1 tmp, imm{0.0, 1.0}
    327  * if (dst.x aliases src.x) {
    328  *   MOV tmpA.x, src.x
    329  *   src = tmpA
    330  * }
    331  * COS dst.x, src.x
    332  * SIN dst.y, src.x
    333  * MOV dst.zw, imm{0.0, 1.0}
    334  */
    335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
    336 #define SCS_TMP  1
    337 static void
    338 transform_scs(struct tgsi_transform_context *tctx,
    339               struct tgsi_full_instruction *inst)
    340 {
    341    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    342    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    343    struct tgsi_full_src_register *src = &inst->Src[0];
    344    struct tgsi_full_instruction new_inst;
    345 
    346    if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
    347       create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
    348       src = &ctx->tmp[A].src;
    349    }
    350 
    351    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
    352       /* COS dst.x, src.x */
    353       new_inst = tgsi_default_full_instruction();
    354       new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
    355       new_inst.Instruction.NumDstRegs = 1;
    356       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
    357       new_inst.Instruction.NumSrcRegs = 1;
    358       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    359       tctx->emit_instruction(tctx, &new_inst);
    360    }
    361 
    362    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    363       /* SIN dst.y, src.x */
    364       new_inst = tgsi_default_full_instruction();
    365       new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
    366       new_inst.Instruction.NumDstRegs = 1;
    367       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    368       new_inst.Instruction.NumSrcRegs = 1;
    369       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    370       tctx->emit_instruction(tctx, &new_inst);
    371    }
    372 
    373    if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
    374       /* MOV dst.zw, imm{0.0, 1.0} */
    375       new_inst = tgsi_default_full_instruction();
    376       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    377       new_inst.Instruction.NumDstRegs = 1;
    378       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
    379       new_inst.Instruction.NumSrcRegs = 1;
    380       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
    381       tctx->emit_instruction(tctx, &new_inst);
    382    }
    383 }
    384 
    385 /* LRP - Linear Interpolate
    386  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
    387  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
    388  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
    389  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
    390  *
    391  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
    392  * can then become: src0 \times src1 - (src0 \times src2 - src2)
    393  *
    394  * ; needs: 1 tmp
    395  * MAD tmpA, src0, src2, -src2
    396  * MAD dst, src0, src1, -tmpA
    397  */
    398 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
    399 #define LRP_TMP  1
    400 static void
    401 transform_lrp(struct tgsi_transform_context *tctx,
    402               struct tgsi_full_instruction *inst)
    403 {
    404    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    405    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    406    struct tgsi_full_src_register *src0 = &inst->Src[0];
    407    struct tgsi_full_src_register *src1 = &inst->Src[1];
    408    struct tgsi_full_src_register *src2 = &inst->Src[2];
    409    struct tgsi_full_instruction new_inst;
    410 
    411    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    412       /* MAD tmpA, src0, src2, -src2 */
    413       new_inst = tgsi_default_full_instruction();
    414       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    415       new_inst.Instruction.NumDstRegs = 1;
    416       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    417       new_inst.Instruction.NumSrcRegs = 3;
    418       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    419       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
    420       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
    421       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
    422       tctx->emit_instruction(tctx, &new_inst);
    423 
    424       /* MAD dst, src0, src1, -tmpA */
    425       new_inst = tgsi_default_full_instruction();
    426       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    427       new_inst.Instruction.NumDstRegs = 1;
    428       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    429       new_inst.Instruction.NumSrcRegs = 3;
    430       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    431       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
    432       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    433       new_inst.Src[2].Register.Negate = true;
    434       tctx->emit_instruction(tctx, &new_inst);
    435    }
    436 }
    437 
    438 /* FRC - Fraction
    439  *  dst.x = src.x - \lfloor src.x\rfloor
    440  *  dst.y = src.y - \lfloor src.y\rfloor
    441  *  dst.z = src.z - \lfloor src.z\rfloor
    442  *  dst.w = src.w - \lfloor src.w\rfloor
    443  *
    444  * ; needs: 1 tmp
    445  * FLR tmpA, src
    446  * SUB dst, src, tmpA
    447  */
    448 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
    449 #define FRC_TMP  1
    450 static void
    451 transform_frc(struct tgsi_transform_context *tctx,
    452               struct tgsi_full_instruction *inst)
    453 {
    454    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    455    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    456    struct tgsi_full_src_register *src = &inst->Src[0];
    457    struct tgsi_full_instruction new_inst;
    458 
    459    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    460       /* FLR tmpA, src */
    461       new_inst = tgsi_default_full_instruction();
    462       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    463       new_inst.Instruction.NumDstRegs = 1;
    464       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    465       new_inst.Instruction.NumSrcRegs = 1;
    466       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    467       tctx->emit_instruction(tctx, &new_inst);
    468 
    469       /* SUB dst, src, tmpA */
    470       new_inst = tgsi_default_full_instruction();
    471       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    472       new_inst.Instruction.NumDstRegs = 1;
    473       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    474       new_inst.Instruction.NumSrcRegs = 2;
    475       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    476       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    477       new_inst.Src[1].Register.Negate = 1;
    478       tctx->emit_instruction(tctx, &new_inst);
    479    }
    480 }
    481 
    482 /* POW - Power
    483  *  dst.x = src0.x^{src1.x}
    484  *  dst.y = src0.x^{src1.x}
    485  *  dst.z = src0.x^{src1.x}
    486  *  dst.w = src0.x^{src1.x}
    487  *
    488  * ; needs: 1 tmp
    489  * LG2 tmpA.x, src0.x
    490  * MUL tmpA.x, src1.x, tmpA.x
    491  * EX2 dst, tmpA.x
    492  */
    493 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
    494 #define POW_TMP  1
    495 static void
    496 transform_pow(struct tgsi_transform_context *tctx,
    497               struct tgsi_full_instruction *inst)
    498 {
    499    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    500    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    501    struct tgsi_full_src_register *src0 = &inst->Src[0];
    502    struct tgsi_full_src_register *src1 = &inst->Src[1];
    503    struct tgsi_full_instruction new_inst;
    504 
    505    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    506       /* LG2 tmpA.x, src0.x */
    507       new_inst = tgsi_default_full_instruction();
    508       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    509       new_inst.Instruction.NumDstRegs = 1;
    510       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    511       new_inst.Instruction.NumSrcRegs = 1;
    512       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
    513       tctx->emit_instruction(tctx, &new_inst);
    514 
    515       /* MUL tmpA.x, src1.x, tmpA.x */
    516       new_inst = tgsi_default_full_instruction();
    517       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    518       new_inst.Instruction.NumDstRegs = 1;
    519       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    520       new_inst.Instruction.NumSrcRegs = 2;
    521       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
    522       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    523       tctx->emit_instruction(tctx, &new_inst);
    524 
    525       /* EX2 dst, tmpA.x */
    526       new_inst = tgsi_default_full_instruction();
    527       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    528       new_inst.Instruction.NumDstRegs = 1;
    529       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    530       new_inst.Instruction.NumSrcRegs = 1;
    531       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    532       tctx->emit_instruction(tctx, &new_inst);
    533    }
    534 }
    535 
    536 /* LIT - Light Coefficients
    537  *  dst.x = 1.0
    538  *  dst.y = max(src.x, 0.0)
    539  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
    540  *  dst.w = 1.0
    541  *
    542  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
    543  * MAX tmpA.xy, src.xy, imm{0.0}
    544  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
    545  * LG2 tmpA.y, tmpA.y
    546  * MUL tmpA.y, tmpA.z, tmpA.y
    547  * EX2 tmpA.y, tmpA.y
    548  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
    549  * MOV dst.yz, tmpA.xy
    550  * MOV dst.xw, imm{1.0}
    551  */
    552 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
    553 		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
    554 #define LIT_TMP  1
    555 static void
    556 transform_lit(struct tgsi_transform_context *tctx,
    557               struct tgsi_full_instruction *inst)
    558 {
    559    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    560    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    561    struct tgsi_full_src_register *src = &inst->Src[0];
    562    struct tgsi_full_instruction new_inst;
    563 
    564    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
    565       /* MAX tmpA.xy, src.xy, imm{0.0} */
    566       new_inst = tgsi_default_full_instruction();
    567       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
    568       new_inst.Instruction.NumDstRegs = 1;
    569       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
    570       new_inst.Instruction.NumSrcRegs = 2;
    571       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
    572       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
    573       tctx->emit_instruction(tctx, &new_inst);
    574 
    575       /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
    576       new_inst = tgsi_default_full_instruction();
    577       new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
    578       new_inst.Instruction.NumDstRegs = 1;
    579       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    580       new_inst.Instruction.NumSrcRegs = 3;
    581       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
    582       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
    583       new_inst.Src[1].Register.Negate = true;
    584       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
    585       tctx->emit_instruction(tctx, &new_inst);
    586 
    587       /* LG2 tmpA.y, tmpA.y */
    588       new_inst = tgsi_default_full_instruction();
    589       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    590       new_inst.Instruction.NumDstRegs = 1;
    591       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    592       new_inst.Instruction.NumSrcRegs = 1;
    593       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    594       tctx->emit_instruction(tctx, &new_inst);
    595 
    596       /* MUL tmpA.y, tmpA.z, tmpA.y */
    597       new_inst = tgsi_default_full_instruction();
    598       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    599       new_inst.Instruction.NumDstRegs = 1;
    600       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    601       new_inst.Instruction.NumSrcRegs = 2;
    602       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
    603       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    604       tctx->emit_instruction(tctx, &new_inst);
    605 
    606       /* EX2 tmpA.y, tmpA.y */
    607       new_inst = tgsi_default_full_instruction();
    608       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    609       new_inst.Instruction.NumDstRegs = 1;
    610       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    611       new_inst.Instruction.NumSrcRegs = 1;
    612       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    613       tctx->emit_instruction(tctx, &new_inst);
    614 
    615       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
    616       new_inst = tgsi_default_full_instruction();
    617       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
    618       new_inst.Instruction.NumDstRegs = 1;
    619       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    620       new_inst.Instruction.NumSrcRegs = 3;
    621       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    622       new_inst.Src[0].Register.Negate = true;
    623       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    624       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
    625       tctx->emit_instruction(tctx, &new_inst);
    626 
    627       /* MOV dst.yz, tmpA.xy */
    628       new_inst = tgsi_default_full_instruction();
    629       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    630       new_inst.Instruction.NumDstRegs = 1;
    631       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
    632       new_inst.Instruction.NumSrcRegs = 1;
    633       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
    634       tctx->emit_instruction(tctx, &new_inst);
    635    }
    636 
    637    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
    638       /* MOV dst.xw, imm{1.0} */
    639       new_inst = tgsi_default_full_instruction();
    640       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    641       new_inst.Instruction.NumDstRegs = 1;
    642       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
    643       new_inst.Instruction.NumSrcRegs = 1;
    644       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
    645       tctx->emit_instruction(tctx, &new_inst);
    646    }
    647 }
    648 
    649 /* EXP - Approximate Exponential Base 2
    650  *  dst.x = 2^{\lfloor src.x\rfloor}
    651  *  dst.y = src.x - \lfloor src.x\rfloor
    652  *  dst.z = 2^{src.x}
    653  *  dst.w = 1.0
    654  *
    655  * ; needs: 1 tmp, imm{1.0}
    656  * if (lowering FLR) {
    657  *   FRC tmpA.x, src.x
    658  *   SUB tmpA.x, src.x, tmpA.x
    659  * } else {
    660  *   FLR tmpA.x, src.x
    661  * }
    662  * EX2 tmpA.y, src.x
    663  * SUB dst.y, src.x, tmpA.x
    664  * EX2 dst.x, tmpA.x
    665  * MOV dst.z, tmpA.y
    666  * MOV dst.w, imm{1.0}
    667  */
    668 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
    669 		NINST(1)+ NINST(1) - OINST(1))
    670 #define EXP_TMP  1
    671 static void
    672 transform_exp(struct tgsi_transform_context *tctx,
    673               struct tgsi_full_instruction *inst)
    674 {
    675    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    676    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    677    struct tgsi_full_src_register *src = &inst->Src[0];
    678    struct tgsi_full_instruction new_inst;
    679 
    680    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
    681       if (ctx->config->lower_FLR) {
    682          /* FRC tmpA.x, src.x */
    683          new_inst = tgsi_default_full_instruction();
    684          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    685          new_inst.Instruction.NumDstRegs = 1;
    686          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    687          new_inst.Instruction.NumSrcRegs = 1;
    688          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    689          tctx->emit_instruction(tctx, &new_inst);
    690 
    691          /* SUB tmpA.x, src.x, tmpA.x */
    692          new_inst = tgsi_default_full_instruction();
    693          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    694          new_inst.Instruction.NumDstRegs = 1;
    695          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    696          new_inst.Instruction.NumSrcRegs = 2;
    697          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    698          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    699          new_inst.Src[1].Register.Negate = 1;
    700          tctx->emit_instruction(tctx, &new_inst);
    701      } else {
    702          /* FLR tmpA.x, src.x */
    703          new_inst = tgsi_default_full_instruction();
    704          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    705          new_inst.Instruction.NumDstRegs = 1;
    706          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    707          new_inst.Instruction.NumSrcRegs = 1;
    708          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    709          tctx->emit_instruction(tctx, &new_inst);
    710       }
    711    }
    712 
    713    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    714       /* EX2 tmpA.y, src.x */
    715       new_inst = tgsi_default_full_instruction();
    716       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    717       new_inst.Instruction.NumDstRegs = 1;
    718       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    719       new_inst.Instruction.NumSrcRegs = 1;
    720       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    721       tctx->emit_instruction(tctx, &new_inst);
    722    }
    723 
    724    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    725       /* SUB dst.y, src.x, tmpA.x */
    726       new_inst = tgsi_default_full_instruction();
    727       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    728       new_inst.Instruction.NumDstRegs = 1;
    729       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    730       new_inst.Instruction.NumSrcRegs = 2;
    731       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    732       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    733       new_inst.Src[1].Register.Negate = 1;
    734       tctx->emit_instruction(tctx, &new_inst);
    735    }
    736 
    737    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
    738       /* EX2 dst.x, tmpA.x */
    739       new_inst = tgsi_default_full_instruction();
    740       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    741       new_inst.Instruction.NumDstRegs = 1;
    742       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
    743       new_inst.Instruction.NumSrcRegs = 1;
    744       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    745       tctx->emit_instruction(tctx, &new_inst);
    746    }
    747 
    748    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    749       /* MOV dst.z, tmpA.y */
    750       new_inst = tgsi_default_full_instruction();
    751       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    752       new_inst.Instruction.NumDstRegs = 1;
    753       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
    754       new_inst.Instruction.NumSrcRegs = 1;
    755       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
    756       tctx->emit_instruction(tctx, &new_inst);
    757    }
    758 
    759    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    760       /* MOV dst.w, imm{1.0} */
    761       new_inst = tgsi_default_full_instruction();
    762       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    763       new_inst.Instruction.NumDstRegs = 1;
    764       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    765       new_inst.Instruction.NumSrcRegs = 1;
    766       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
    767       tctx->emit_instruction(tctx, &new_inst);
    768    }
    769 }
    770 
    771 /* LOG - Approximate Logarithm Base 2
    772  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
    773  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
    774  *  dst.z = \log_2{|src.x|}
    775  *  dst.w = 1.0
    776  *
    777  * ; needs: 1 tmp, imm{1.0}
    778  * LG2 tmpA.x, |src.x|
    779  * if (lowering FLR) {
    780  *   FRC tmpA.y, tmpA.x
    781  *   SUB tmpA.y, tmpA.x, tmpA.y
    782  * } else {
    783  *   FLR tmpA.y, tmpA.x
    784  * }
    785  * EX2 tmpA.z, tmpA.y
    786  * RCP tmpA.z, tmpA.z
    787  * MUL dst.y, |src.x|, tmpA.z
    788  * MOV dst.xz, tmpA.yx
    789  * MOV dst.w, imm{1.0}
    790  */
    791 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
    792 		NINST(2) + NINST(1) + NINST(1) - OINST(1))
    793 #define LOG_TMP  1
    794 static void
    795 transform_log(struct tgsi_transform_context *tctx,
    796               struct tgsi_full_instruction *inst)
    797 {
    798    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    799    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    800    struct tgsi_full_src_register *src = &inst->Src[0];
    801    struct tgsi_full_instruction new_inst;
    802 
    803    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
    804       /* LG2 tmpA.x, |src.x| */
    805       new_inst = tgsi_default_full_instruction();
    806       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    807       new_inst.Instruction.NumDstRegs = 1;
    808       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    809       new_inst.Instruction.NumSrcRegs = 1;
    810       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    811       new_inst.Src[0].Register.Absolute = true;
    812       tctx->emit_instruction(tctx, &new_inst);
    813    }
    814 
    815    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
    816       if (ctx->config->lower_FLR) {
    817          /* FRC tmpA.y, tmpA.x */
    818          new_inst = tgsi_default_full_instruction();
    819          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    820          new_inst.Instruction.NumDstRegs = 1;
    821          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    822          new_inst.Instruction.NumSrcRegs = 1;
    823          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    824          tctx->emit_instruction(tctx, &new_inst);
    825 
    826          /* SUB tmpA.y, tmpA.x, tmpA.y */
    827          new_inst = tgsi_default_full_instruction();
    828          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    829          new_inst.Instruction.NumDstRegs = 1;
    830          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    831          new_inst.Instruction.NumSrcRegs = 2;
    832          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    833          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    834          new_inst.Src[1].Register.Negate = 1;
    835          tctx->emit_instruction(tctx, &new_inst);
    836       } else {
    837          /* FLR tmpA.y, tmpA.x */
    838          new_inst = tgsi_default_full_instruction();
    839          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    840          new_inst.Instruction.NumDstRegs = 1;
    841          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    842          new_inst.Instruction.NumSrcRegs = 1;
    843          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    844          tctx->emit_instruction(tctx, &new_inst);
    845       }
    846    }
    847 
    848    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    849       /* EX2 tmpA.z, tmpA.y */
    850       new_inst = tgsi_default_full_instruction();
    851       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    852       new_inst.Instruction.NumDstRegs = 1;
    853       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    854       new_inst.Instruction.NumSrcRegs = 1;
    855       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    856       tctx->emit_instruction(tctx, &new_inst);
    857 
    858       /* RCP tmpA.z, tmpA.z */
    859       new_inst = tgsi_default_full_instruction();
    860       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
    861       new_inst.Instruction.NumDstRegs = 1;
    862       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    863       new_inst.Instruction.NumSrcRegs = 1;
    864       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
    865       tctx->emit_instruction(tctx, &new_inst);
    866 
    867       /* MUL dst.y, |src.x|, tmpA.z */
    868       new_inst = tgsi_default_full_instruction();
    869       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    870       new_inst.Instruction.NumDstRegs = 1;
    871       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    872       new_inst.Instruction.NumSrcRegs = 2;
    873       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    874       new_inst.Src[0].Register.Absolute = true;
    875       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
    876       tctx->emit_instruction(tctx, &new_inst);
    877    }
    878 
    879    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
    880       /* MOV dst.xz, tmpA.yx */
    881       new_inst = tgsi_default_full_instruction();
    882       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    883       new_inst.Instruction.NumDstRegs = 1;
    884       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
    885       new_inst.Instruction.NumSrcRegs = 1;
    886       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
    887       tctx->emit_instruction(tctx, &new_inst);
    888    }
    889 
    890    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    891       /* MOV dst.w, imm{1.0} */
    892       new_inst = tgsi_default_full_instruction();
    893       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    894       new_inst.Instruction.NumDstRegs = 1;
    895       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    896       new_inst.Instruction.NumSrcRegs = 1;
    897       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
    898       tctx->emit_instruction(tctx, &new_inst);
    899    }
    900 }
    901 
    902 /* DP4 - 4-component Dot Product
    903  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
    904  *
    905  * DP3 - 3-component Dot Product
    906  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
    907  *
    908  * DPH - Homogeneous Dot Product
    909  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
    910  *
    911  * DP2 - 2-component Dot Product
    912  *   dst = src0.x \times src1.x + src0.y \times src1.y
    913  *
    914  * DP2A - 2-component Dot Product And Add
    915  *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
    916  *
    917  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
    918  * operations, which is what you'd prefer for a ISA that is natively
    919  * scalar.  Probably a native vector ISA would at least already have
    920  * DP4/DP3 instructions, but perhaps there is room for an alternative
    921  * translation for DPH/DP2/DP2A using vector instructions.
    922  *
    923  * ; needs: 1 tmp
    924  * MUL tmpA.x, src0.x, src1.x
    925  * MAD tmpA.x, src0.y, src1.y, tmpA.x
    926  * if (DPH || DP3 || DP4) {
    927  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
    928  *   if (DPH) {
    929  *     ADD tmpA.x, src1.w, tmpA.x
    930  *   } else if (DP4) {
    931  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
    932  *   }
    933  * } else if (DP2A) {
    934  *   ADD tmpA.x, src2.x, tmpA.x
    935  * }
    936  * ; fixup last instruction to replicate into dst
    937  */
    938 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
    939 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
    940 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
    941 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
    942 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
    943 #define DOTP_TMP  1
    944 static void
    945 transform_dotp(struct tgsi_transform_context *tctx,
    946                struct tgsi_full_instruction *inst)
    947 {
    948    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    949    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    950    struct tgsi_full_src_register *src0 = &inst->Src[0];
    951    struct tgsi_full_src_register *src1 = &inst->Src[1];
    952    struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
    953    struct tgsi_full_instruction new_inst;
    954    unsigned opcode = inst->Instruction.Opcode;
    955 
    956    /* NOTE: any potential last instruction must replicate src on all
    957     * components (since it could be re-written to write to final dst)
    958     */
    959 
    960    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    961       /* MUL tmpA.x, src0.x, src1.x */
    962       new_inst = tgsi_default_full_instruction();
    963       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    964       new_inst.Instruction.NumDstRegs = 1;
    965       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    966       new_inst.Instruction.NumSrcRegs = 2;
    967       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
    968       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
    969       tctx->emit_instruction(tctx, &new_inst);
    970 
    971       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
    972       new_inst = tgsi_default_full_instruction();
    973       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    974       new_inst.Instruction.NumDstRegs = 1;
    975       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    976       new_inst.Instruction.NumSrcRegs = 3;
    977       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
    978       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
    979       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
    980 
    981       if ((opcode == TGSI_OPCODE_DPH) ||
    982           (opcode == TGSI_OPCODE_DP3) ||
    983           (opcode == TGSI_OPCODE_DP4)) {
    984          tctx->emit_instruction(tctx, &new_inst);
    985 
    986          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
    987          new_inst = tgsi_default_full_instruction();
    988          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    989          new_inst.Instruction.NumDstRegs = 1;
    990          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    991          new_inst.Instruction.NumSrcRegs = 3;
    992          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
    993          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
    994          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
    995 
    996          if (opcode == TGSI_OPCODE_DPH) {
    997             tctx->emit_instruction(tctx, &new_inst);
    998 
    999             /* ADD tmpA.x, src1.w, tmpA.x */
   1000             new_inst = tgsi_default_full_instruction();
   1001             new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
   1002             new_inst.Instruction.NumDstRegs = 1;
   1003             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
   1004             new_inst.Instruction.NumSrcRegs = 2;
   1005             reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
   1006             reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
   1007          } else if (opcode == TGSI_OPCODE_DP4) {
   1008             tctx->emit_instruction(tctx, &new_inst);
   1009 
   1010             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
   1011             new_inst = tgsi_default_full_instruction();
   1012             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
   1013             new_inst.Instruction.NumDstRegs = 1;
   1014             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
   1015             new_inst.Instruction.NumSrcRegs = 3;
   1016             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
   1017             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
   1018             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
   1019          }
   1020       } else if (opcode == TGSI_OPCODE_DP2A) {
   1021          tctx->emit_instruction(tctx, &new_inst);
   1022 
   1023          /* ADD tmpA.x, src2.x, tmpA.x */
   1024          new_inst = tgsi_default_full_instruction();
   1025          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
   1026          new_inst.Instruction.NumDstRegs = 1;
   1027          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
   1028          new_inst.Instruction.NumSrcRegs = 2;
   1029          reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
   1030          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
   1031       }
   1032 
   1033       /* fixup last instruction to write to dst: */
   1034       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
   1035 
   1036       tctx->emit_instruction(tctx, &new_inst);
   1037    }
   1038 }
   1039 
   1040 /* FLR - floor, CEIL - ceil
   1041  * ; needs: 1 tmp
   1042  * if (CEIL) {
   1043  *   FRC tmpA, -src
   1044  *   ADD dst, src, tmpA
   1045  * } else {
   1046  *   FRC tmpA, src
   1047  *   SUB dst, src, tmpA
   1048  * }
   1049  */
   1050 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
   1051 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
   1052 #define FLR_TMP 1
   1053 #define CEIL_TMP 1
   1054 static void
   1055 transform_flr_ceil(struct tgsi_transform_context *tctx,
   1056                    struct tgsi_full_instruction *inst)
   1057 {
   1058    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1059    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
   1060    struct tgsi_full_src_register *src0 = &inst->Src[0];
   1061    struct tgsi_full_instruction new_inst;
   1062    unsigned opcode = inst->Instruction.Opcode;
   1063 
   1064    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
   1065       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
   1066       new_inst = tgsi_default_full_instruction();
   1067       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
   1068       new_inst.Instruction.NumDstRegs = 1;
   1069       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
   1070       new_inst.Instruction.NumSrcRegs = 1;
   1071       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1072 
   1073       if (opcode == TGSI_OPCODE_CEIL)
   1074          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
   1075       tctx->emit_instruction(tctx, &new_inst);
   1076 
   1077       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
   1078       new_inst = tgsi_default_full_instruction();
   1079       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
   1080       new_inst.Instruction.NumDstRegs = 1;
   1081       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
   1082       new_inst.Instruction.NumSrcRegs = 2;
   1083       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1084       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1085       if (opcode == TGSI_OPCODE_FLR)
   1086          new_inst.Src[1].Register.Negate = 1;
   1087       tctx->emit_instruction(tctx, &new_inst);
   1088    }
   1089 }
   1090 
   1091 /* TRUNC - truncate off fractional part
   1092  *  dst.x = trunc(src.x)
   1093  *  dst.y = trunc(src.y)
   1094  *  dst.z = trunc(src.z)
   1095  *  dst.w = trunc(src.w)
   1096  *
   1097  * ; needs: 1 tmp
   1098  * if (lower FLR) {
   1099  *   FRC tmpA, |src|
   1100  *   SUB tmpA, |src|, tmpA
   1101  * } else {
   1102  *   FLR tmpA, |src|
   1103  * }
   1104  * CMP dst, src, -tmpA, tmpA
   1105  */
   1106 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
   1107 #define TRUNC_TMP 1
   1108 static void
   1109 transform_trunc(struct tgsi_transform_context *tctx,
   1110                 struct tgsi_full_instruction *inst)
   1111 {
   1112    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1113    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
   1114    struct tgsi_full_src_register *src0 = &inst->Src[0];
   1115    struct tgsi_full_instruction new_inst;
   1116 
   1117    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
   1118       if (ctx->config->lower_FLR) {
   1119          new_inst = tgsi_default_full_instruction();
   1120          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
   1121          new_inst.Instruction.NumDstRegs = 1;
   1122          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
   1123          new_inst.Instruction.NumSrcRegs = 1;
   1124          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1125          new_inst.Src[0].Register.Absolute = true;
   1126          new_inst.Src[0].Register.Negate = false;
   1127          tctx->emit_instruction(tctx, &new_inst);
   1128 
   1129          new_inst = tgsi_default_full_instruction();
   1130          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
   1131          new_inst.Instruction.NumDstRegs = 1;
   1132          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
   1133          new_inst.Instruction.NumSrcRegs = 2;
   1134          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1135          new_inst.Src[0].Register.Absolute = true;
   1136          new_inst.Src[0].Register.Negate = false;
   1137          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1138          new_inst.Src[1].Register.Negate = 1;
   1139          tctx->emit_instruction(tctx, &new_inst);
   1140       } else {
   1141          new_inst = tgsi_default_full_instruction();
   1142          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
   1143          new_inst.Instruction.NumDstRegs = 1;
   1144          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
   1145          new_inst.Instruction.NumSrcRegs = 1;
   1146          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1147          new_inst.Src[0].Register.Absolute = true;
   1148          new_inst.Src[0].Register.Negate = false;
   1149          tctx->emit_instruction(tctx, &new_inst);
   1150       }
   1151 
   1152       new_inst = tgsi_default_full_instruction();
   1153       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
   1154       new_inst.Instruction.NumDstRegs = 1;
   1155       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
   1156       new_inst.Instruction.NumSrcRegs = 3;
   1157       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1158       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1159       new_inst.Src[1].Register.Negate = true;
   1160       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1161       tctx->emit_instruction(tctx, &new_inst);
   1162    }
   1163 }
   1164 
   1165 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
   1166  * in the case of TXP, the clamping must happen *after* projection, so
   1167  * we need to lower TXP to TEX.
   1168  *
   1169  *   MOV tmpA, src0
   1170  *   if (opc == TXP) {
   1171  *     ; do perspective division manually before clamping:
   1172  *     RCP tmpB, tmpA.w
   1173  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
   1174  *     opc = TEX;
   1175  *   }
   1176  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
   1177  *   <opc> dst, tmpA, ...
   1178  */
   1179 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
   1180 #define SAMP_TMP  2
   1181 static int
   1182 transform_samp(struct tgsi_transform_context *tctx,
   1183                struct tgsi_full_instruction *inst)
   1184 {
   1185    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1186    struct tgsi_full_src_register *coord = &inst->Src[0];
   1187    struct tgsi_full_src_register *samp;
   1188    struct tgsi_full_instruction new_inst;
   1189    /* mask is clamped coords, pmask is all coords (for projection): */
   1190    unsigned mask = 0, pmask = 0, smask;
   1191    unsigned tex = inst->Texture.Texture;
   1192    unsigned opcode = inst->Instruction.Opcode;
   1193    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
   1194 		   (ctx->config->lower_TXP & (1 << tex));
   1195 
   1196    if (opcode == TGSI_OPCODE_TXB2) {
   1197       samp = &inst->Src[2];
   1198    } else {
   1199       samp = &inst->Src[1];
   1200    }
   1201 
   1202    /* convert sampler # to bitmask to test: */
   1203    smask = 1 << samp->Register.Index;
   1204 
   1205    /* check if we actually need to lower this one: */
   1206    if (!(ctx->saturate & smask) && !lower_txp)
   1207       return -1;
   1208 
   1209    /* figure out which coordinates need saturating:
   1210     *   - RECT textures should not get saturated
   1211     *   - array index coords should not get saturated
   1212     */
   1213    switch (tex) {
   1214    case TGSI_TEXTURE_3D:
   1215    case TGSI_TEXTURE_CUBE:
   1216    case TGSI_TEXTURE_CUBE_ARRAY:
   1217    case TGSI_TEXTURE_SHADOWCUBE:
   1218    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   1219       if (ctx->config->saturate_r & smask)
   1220          mask |= TGSI_WRITEMASK_Z;
   1221       pmask |= TGSI_WRITEMASK_Z;
   1222       /* fallthrough */
   1223 
   1224    case TGSI_TEXTURE_2D:
   1225    case TGSI_TEXTURE_2D_ARRAY:
   1226    case TGSI_TEXTURE_SHADOW2D:
   1227    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1228    case TGSI_TEXTURE_2D_MSAA:
   1229    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   1230       if (ctx->config->saturate_t & smask)
   1231          mask |= TGSI_WRITEMASK_Y;
   1232       pmask |= TGSI_WRITEMASK_Y;
   1233       /* fallthrough */
   1234 
   1235    case TGSI_TEXTURE_1D:
   1236    case TGSI_TEXTURE_1D_ARRAY:
   1237    case TGSI_TEXTURE_SHADOW1D:
   1238    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1239       if (ctx->config->saturate_s & smask)
   1240          mask |= TGSI_WRITEMASK_X;
   1241       pmask |= TGSI_WRITEMASK_X;
   1242       break;
   1243 
   1244    case TGSI_TEXTURE_RECT:
   1245    case TGSI_TEXTURE_SHADOWRECT:
   1246       /* we don't saturate, but in case of lower_txp we
   1247        * still need to do the perspective divide:
   1248        */
   1249        pmask = TGSI_WRITEMASK_XY;
   1250        break;
   1251    }
   1252 
   1253    /* sanity check.. driver could be asking to saturate a non-
   1254     * existent coordinate component:
   1255     */
   1256    if (!mask && !lower_txp)
   1257       return -1;
   1258 
   1259    /* MOV tmpA, src0 */
   1260    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
   1261 
   1262    /* This is a bit sad.. we need to clamp *after* the coords
   1263     * are projected, which means lowering TXP to TEX and doing
   1264     * the projection ourself.  But since I haven't figured out
   1265     * how to make the lowering code deliver an electric shock
   1266     * to anyone using GL_CLAMP, we must do this instead:
   1267     */
   1268    if (opcode == TGSI_OPCODE_TXP) {
   1269       /* RCP tmpB.x tmpA.w */
   1270       new_inst = tgsi_default_full_instruction();
   1271       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
   1272       new_inst.Instruction.NumDstRegs = 1;
   1273       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
   1274       new_inst.Instruction.NumSrcRegs = 1;
   1275       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
   1276       tctx->emit_instruction(tctx, &new_inst);
   1277 
   1278       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
   1279       new_inst = tgsi_default_full_instruction();
   1280       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
   1281       new_inst.Instruction.NumDstRegs = 1;
   1282       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
   1283       new_inst.Instruction.NumSrcRegs = 2;
   1284       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1285       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
   1286       tctx->emit_instruction(tctx, &new_inst);
   1287 
   1288       opcode = TGSI_OPCODE_TEX;
   1289    }
   1290 
   1291    /* MOV_SAT tmpA.<mask>, tmpA */
   1292    if (mask) {
   1293       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
   1294    }
   1295 
   1296    /* modify the texture samp instruction to take fixed up coord: */
   1297    new_inst = *inst;
   1298    new_inst.Instruction.Opcode = opcode;
   1299    new_inst.Src[0] = ctx->tmp[A].src;
   1300    tctx->emit_instruction(tctx, &new_inst);
   1301 
   1302    return 0;
   1303 }
   1304 
   1305 /* Two-sided color emulation:
   1306  * For each COLOR input, create a corresponding BCOLOR input, plus
   1307  * CMP instruction to select front or back color based on FACE
   1308  */
   1309 #define TWOSIDE_GROW(n)  (                      \
   1310       2 +         /* FACE */                    \
   1311       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
   1312       ((n) * 1) + /* TEMP[] */                  \
   1313       ((n) * NINST(3))   /* CMP instr */        \
   1314       )
   1315 
   1316 static void
   1317 emit_twoside(struct tgsi_transform_context *tctx)
   1318 {
   1319    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1320    struct tgsi_shader_info *info = ctx->info;
   1321    struct tgsi_full_declaration decl;
   1322    struct tgsi_full_instruction new_inst;
   1323    unsigned inbase, tmpbase;
   1324    int i;
   1325 
   1326    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
   1327    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
   1328 
   1329    /* additional inputs for BCOLOR's */
   1330    for (i = 0; i < ctx->two_side_colors; i++) {
   1331       unsigned in_idx = ctx->two_side_idx[i];
   1332       decl = tgsi_default_full_declaration();
   1333       decl.Declaration.File = TGSI_FILE_INPUT;
   1334       decl.Declaration.Semantic = true;
   1335       decl.Range.First = decl.Range.Last = inbase + i;
   1336       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
   1337       decl.Semantic.Index = info->input_semantic_index[in_idx];
   1338       decl.Declaration.Interpolate = true;
   1339       decl.Interp.Interpolate = info->input_interpolate[in_idx];
   1340       decl.Interp.Location = info->input_interpolate_loc[in_idx];
   1341       decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
   1342       tctx->emit_declaration(tctx, &decl);
   1343    }
   1344 
   1345    /* additional input for FACE */
   1346    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
   1347       decl = tgsi_default_full_declaration();
   1348       decl.Declaration.File = TGSI_FILE_INPUT;
   1349       decl.Declaration.Semantic = true;
   1350       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
   1351       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
   1352       decl.Semantic.Index = 0;
   1353       tctx->emit_declaration(tctx, &decl);
   1354 
   1355       ctx->face_idx = decl.Range.First;
   1356    }
   1357 
   1358    /* additional temps for COLOR/BCOLOR selection: */
   1359    for (i = 0; i < ctx->two_side_colors; i++) {
   1360       decl = tgsi_default_full_declaration();
   1361       decl.Declaration.File = TGSI_FILE_TEMPORARY;
   1362       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
   1363       tctx->emit_declaration(tctx, &decl);
   1364    }
   1365 
   1366    /* and finally additional instructions to select COLOR/BCOLOR: */
   1367    for (i = 0; i < ctx->two_side_colors; i++) {
   1368       new_inst = tgsi_default_full_instruction();
   1369       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
   1370 
   1371       new_inst.Instruction.NumDstRegs = 1;
   1372       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
   1373       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
   1374       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1375 
   1376       new_inst.Instruction.NumSrcRegs = 3;
   1377       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
   1378       new_inst.Src[0].Register.Index = ctx->face_idx;
   1379       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
   1380       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
   1381       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
   1382       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
   1383       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
   1384       new_inst.Src[1].Register.Index = inbase + i;
   1385       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
   1386       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
   1387       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1388       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
   1389       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
   1390       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
   1391       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
   1392       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
   1393       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1394       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
   1395 
   1396       tctx->emit_instruction(tctx, &new_inst);
   1397    }
   1398 }
   1399 
   1400 static void
   1401 emit_decls(struct tgsi_transform_context *tctx)
   1402 {
   1403    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1404    struct tgsi_shader_info *info = ctx->info;
   1405    struct tgsi_full_declaration decl;
   1406    struct tgsi_full_immediate immed;
   1407    unsigned tmpbase;
   1408    int i;
   1409 
   1410    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
   1411 
   1412    ctx->color_base = tmpbase + ctx->numtmp;
   1413 
   1414    /* declare immediate: */
   1415    immed = tgsi_default_full_immediate();
   1416    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
   1417    immed.u[0].Float = 0.0;
   1418    immed.u[1].Float = 1.0;
   1419    immed.u[2].Float = 128.0;
   1420    immed.u[3].Float = 0.0;
   1421    tctx->emit_immediate(tctx, &immed);
   1422 
   1423    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
   1424    ctx->imm.Register.Index = info->immediate_count;
   1425    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
   1426    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1427    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1428    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
   1429 
   1430    /* declare temp regs: */
   1431    for (i = 0; i < ctx->numtmp; i++) {
   1432       decl = tgsi_default_full_declaration();
   1433       decl.Declaration.File = TGSI_FILE_TEMPORARY;
   1434       decl.Range.First = decl.Range.Last = tmpbase + i;
   1435       tctx->emit_declaration(tctx, &decl);
   1436 
   1437       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
   1438       ctx->tmp[i].src.Register.Index = tmpbase + i;
   1439       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
   1440       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1441       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1442       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
   1443 
   1444       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
   1445       ctx->tmp[i].dst.Register.Index = tmpbase + i;
   1446       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1447    }
   1448 
   1449    if (ctx->two_side_colors)
   1450       emit_twoside(tctx);
   1451 }
   1452 
   1453 static void
   1454 rename_color_inputs(struct tgsi_lowering_context *ctx,
   1455                     struct tgsi_full_instruction *inst)
   1456 {
   1457    unsigned i, j;
   1458    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
   1459       struct tgsi_src_register *src = &inst->Src[i].Register;
   1460       if (src->File == TGSI_FILE_INPUT) {
   1461          for (j = 0; j < ctx->two_side_colors; j++) {
   1462             if (src->Index == ctx->two_side_idx[j]) {
   1463                src->File = TGSI_FILE_TEMPORARY;
   1464                src->Index = ctx->color_base + j;
   1465                break;
   1466             }
   1467          }
   1468       }
   1469    }
   1470 
   1471 }
   1472 
   1473 static void
   1474 transform_instr(struct tgsi_transform_context *tctx,
   1475 		struct tgsi_full_instruction *inst)
   1476 {
   1477    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1478 
   1479    if (!ctx->emitted_decls) {
   1480       emit_decls(tctx);
   1481       ctx->emitted_decls = 1;
   1482    }
   1483 
   1484    /* if emulating two-sided-color, we need to re-write some
   1485     * src registers:
   1486     */
   1487    if (ctx->two_side_colors)
   1488       rename_color_inputs(ctx, inst);
   1489 
   1490    switch (inst->Instruction.Opcode) {
   1491    case TGSI_OPCODE_DST:
   1492       if (!ctx->config->lower_DST)
   1493          goto skip;
   1494       transform_dst(tctx, inst);
   1495       break;
   1496    case TGSI_OPCODE_XPD:
   1497       if (!ctx->config->lower_XPD)
   1498          goto skip;
   1499       transform_xpd(tctx, inst);
   1500       break;
   1501    case TGSI_OPCODE_SCS:
   1502       if (!ctx->config->lower_SCS)
   1503          goto skip;
   1504       transform_scs(tctx, inst);
   1505       break;
   1506    case TGSI_OPCODE_LRP:
   1507       if (!ctx->config->lower_LRP)
   1508          goto skip;
   1509       transform_lrp(tctx, inst);
   1510       break;
   1511    case TGSI_OPCODE_FRC:
   1512       if (!ctx->config->lower_FRC)
   1513          goto skip;
   1514       transform_frc(tctx, inst);
   1515       break;
   1516    case TGSI_OPCODE_POW:
   1517       if (!ctx->config->lower_POW)
   1518          goto skip;
   1519       transform_pow(tctx, inst);
   1520       break;
   1521    case TGSI_OPCODE_LIT:
   1522       if (!ctx->config->lower_LIT)
   1523          goto skip;
   1524       transform_lit(tctx, inst);
   1525       break;
   1526    case TGSI_OPCODE_EXP:
   1527       if (!ctx->config->lower_EXP)
   1528          goto skip;
   1529       transform_exp(tctx, inst);
   1530       break;
   1531    case TGSI_OPCODE_LOG:
   1532       if (!ctx->config->lower_LOG)
   1533          goto skip;
   1534       transform_log(tctx, inst);
   1535       break;
   1536    case TGSI_OPCODE_DP4:
   1537       if (!ctx->config->lower_DP4)
   1538          goto skip;
   1539       transform_dotp(tctx, inst);
   1540       break;
   1541    case TGSI_OPCODE_DP3:
   1542       if (!ctx->config->lower_DP3)
   1543          goto skip;
   1544       transform_dotp(tctx, inst);
   1545       break;
   1546    case TGSI_OPCODE_DPH:
   1547       if (!ctx->config->lower_DPH)
   1548          goto skip;
   1549       transform_dotp(tctx, inst);
   1550       break;
   1551    case TGSI_OPCODE_DP2:
   1552       if (!ctx->config->lower_DP2)
   1553          goto skip;
   1554       transform_dotp(tctx, inst);
   1555       break;
   1556    case TGSI_OPCODE_DP2A:
   1557       if (!ctx->config->lower_DP2A)
   1558          goto skip;
   1559       transform_dotp(tctx, inst);
   1560       break;
   1561    case TGSI_OPCODE_FLR:
   1562       if (!ctx->config->lower_FLR)
   1563          goto skip;
   1564       transform_flr_ceil(tctx, inst);
   1565       break;
   1566    case TGSI_OPCODE_CEIL:
   1567       if (!ctx->config->lower_CEIL)
   1568          goto skip;
   1569       transform_flr_ceil(tctx, inst);
   1570       break;
   1571    case TGSI_OPCODE_TRUNC:
   1572       if (!ctx->config->lower_TRUNC)
   1573          goto skip;
   1574       transform_trunc(tctx, inst);
   1575       break;
   1576    case TGSI_OPCODE_TEX:
   1577    case TGSI_OPCODE_TXP:
   1578    case TGSI_OPCODE_TXB:
   1579    case TGSI_OPCODE_TXB2:
   1580    case TGSI_OPCODE_TXL:
   1581       if (transform_samp(tctx, inst))
   1582          goto skip;
   1583       break;
   1584    default:
   1585    skip:
   1586       tctx->emit_instruction(tctx, inst);
   1587       break;
   1588    }
   1589 }
   1590 
   1591 /* returns NULL if no lowering required, else returns the new
   1592  * tokens (which caller is required to free()).  In either case
   1593  * returns the current info.
   1594  */
   1595 const struct tgsi_token *
   1596 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
   1597                         const struct tgsi_token *tokens,
   1598                         struct tgsi_shader_info *info)
   1599 {
   1600    struct tgsi_lowering_context ctx;
   1601    struct tgsi_token *newtoks;
   1602    int newlen, numtmp;
   1603 
   1604    /* sanity check in case limit is ever increased: */
   1605    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
   1606 
   1607    /* sanity check the lowering */
   1608    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
   1609    assert(!(config->lower_FRC && config->lower_TRUNC));
   1610 
   1611    memset(&ctx, 0, sizeof(ctx));
   1612    ctx.base.transform_instruction = transform_instr;
   1613    ctx.info = info;
   1614    ctx.config = config;
   1615 
   1616    tgsi_scan_shader(tokens, info);
   1617 
   1618    /* if we are adding fragment shader support to emulate two-sided
   1619     * color, then figure out the number of additional inputs we need
   1620     * to create for BCOLOR's..
   1621     */
   1622    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
   1623        config->color_two_side) {
   1624       int i;
   1625       ctx.face_idx = -1;
   1626       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
   1627          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
   1628             ctx.two_side_idx[ctx.two_side_colors++] = i;
   1629          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
   1630             ctx.face_idx = i;
   1631       }
   1632    }
   1633 
   1634    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
   1635 
   1636 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
   1637    /* if there are no instructions to lower, then we are done: */
   1638    if (!(OPCS(DST) ||
   1639          OPCS(XPD) ||
   1640          OPCS(SCS) ||
   1641          OPCS(LRP) ||
   1642          OPCS(FRC) ||
   1643          OPCS(POW) ||
   1644          OPCS(LIT) ||
   1645          OPCS(EXP) ||
   1646          OPCS(LOG) ||
   1647          OPCS(DP4) ||
   1648          OPCS(DP3) ||
   1649          OPCS(DPH) ||
   1650          OPCS(DP2) ||
   1651          OPCS(DP2A) ||
   1652          OPCS(FLR) ||
   1653          OPCS(CEIL) ||
   1654          OPCS(TRUNC) ||
   1655          OPCS(TXP) ||
   1656          ctx.two_side_colors ||
   1657          ctx.saturate))
   1658       return NULL;
   1659 
   1660 #if 0  /* debug */
   1661    _debug_printf("BEFORE:");
   1662    tgsi_dump(tokens, 0);
   1663 #endif
   1664 
   1665    numtmp = 0;
   1666    newlen = tgsi_num_tokens(tokens);
   1667    if (OPCS(DST)) {
   1668       newlen += DST_GROW * OPCS(DST);
   1669       numtmp = MAX2(numtmp, DST_TMP);
   1670    }
   1671    if (OPCS(XPD)) {
   1672       newlen += XPD_GROW * OPCS(XPD);
   1673       numtmp = MAX2(numtmp, XPD_TMP);
   1674    }
   1675    if (OPCS(SCS)) {
   1676       newlen += SCS_GROW * OPCS(SCS);
   1677       numtmp = MAX2(numtmp, SCS_TMP);
   1678    }
   1679    if (OPCS(LRP)) {
   1680       newlen += LRP_GROW * OPCS(LRP);
   1681       numtmp = MAX2(numtmp, LRP_TMP);
   1682    }
   1683    if (OPCS(FRC)) {
   1684       newlen += FRC_GROW * OPCS(FRC);
   1685       numtmp = MAX2(numtmp, FRC_TMP);
   1686    }
   1687    if (OPCS(POW)) {
   1688       newlen += POW_GROW * OPCS(POW);
   1689       numtmp = MAX2(numtmp, POW_TMP);
   1690    }
   1691    if (OPCS(LIT)) {
   1692       newlen += LIT_GROW * OPCS(LIT);
   1693       numtmp = MAX2(numtmp, LIT_TMP);
   1694    }
   1695    if (OPCS(EXP)) {
   1696       newlen += EXP_GROW * OPCS(EXP);
   1697       numtmp = MAX2(numtmp, EXP_TMP);
   1698    }
   1699    if (OPCS(LOG)) {
   1700       newlen += LOG_GROW * OPCS(LOG);
   1701       numtmp = MAX2(numtmp, LOG_TMP);
   1702    }
   1703    if (OPCS(DP4)) {
   1704       newlen += DP4_GROW * OPCS(DP4);
   1705       numtmp = MAX2(numtmp, DOTP_TMP);
   1706    }
   1707    if (OPCS(DP3)) {
   1708       newlen += DP3_GROW * OPCS(DP3);
   1709       numtmp = MAX2(numtmp, DOTP_TMP);
   1710    }
   1711    if (OPCS(DPH)) {
   1712       newlen += DPH_GROW * OPCS(DPH);
   1713       numtmp = MAX2(numtmp, DOTP_TMP);
   1714    }
   1715    if (OPCS(DP2)) {
   1716       newlen += DP2_GROW * OPCS(DP2);
   1717       numtmp = MAX2(numtmp, DOTP_TMP);
   1718    }
   1719    if (OPCS(DP2A)) {
   1720       newlen += DP2A_GROW * OPCS(DP2A);
   1721       numtmp = MAX2(numtmp, DOTP_TMP);
   1722    }
   1723    if (OPCS(FLR)) {
   1724       newlen += FLR_GROW * OPCS(FLR);
   1725       numtmp = MAX2(numtmp, FLR_TMP);
   1726    }
   1727    if (OPCS(CEIL)) {
   1728       newlen += CEIL_GROW * OPCS(CEIL);
   1729       numtmp = MAX2(numtmp, CEIL_TMP);
   1730    }
   1731    if (OPCS(TRUNC)) {
   1732       newlen += TRUNC_GROW * OPCS(TRUNC);
   1733       numtmp = MAX2(numtmp, TRUNC_TMP);
   1734    }
   1735    if (ctx.saturate || config->lower_TXP) {
   1736       int n = 0;
   1737 
   1738       if (ctx.saturate) {
   1739          n = info->opcode_count[TGSI_OPCODE_TEX] +
   1740             info->opcode_count[TGSI_OPCODE_TXP] +
   1741             info->opcode_count[TGSI_OPCODE_TXB] +
   1742             info->opcode_count[TGSI_OPCODE_TXB2] +
   1743             info->opcode_count[TGSI_OPCODE_TXL];
   1744       } else if (config->lower_TXP) {
   1745           n = info->opcode_count[TGSI_OPCODE_TXP];
   1746       }
   1747 
   1748       newlen += SAMP_GROW * n;
   1749       numtmp = MAX2(numtmp, SAMP_TMP);
   1750    }
   1751 
   1752    /* specifically don't include two_side_colors temps in the count: */
   1753    ctx.numtmp = numtmp;
   1754 
   1755    if (ctx.two_side_colors) {
   1756       newlen += TWOSIDE_GROW(ctx.two_side_colors);
   1757       /* note: we permanently consume temp regs, re-writing references
   1758        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
   1759        * instruction that selects which varying to use):
   1760        */
   1761       numtmp += ctx.two_side_colors;
   1762    }
   1763 
   1764    newlen += 2 * numtmp;
   1765    newlen += 5;        /* immediate */
   1766 
   1767    newtoks = tgsi_alloc_tokens(newlen);
   1768    if (!newtoks)
   1769       return NULL;
   1770 
   1771    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
   1772 
   1773    tgsi_scan_shader(newtoks, info);
   1774 
   1775 #if 0  /* debug */
   1776    _debug_printf("AFTER:");
   1777    tgsi_dump(newtoks, 0);
   1778 #endif
   1779 
   1780    return newtoks;
   1781 }
   1782