Home | History | Annotate | Download | only in tgsi
      1 /*
      2  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #include "tgsi/tgsi_transform.h"
     28 #include "tgsi/tgsi_scan.h"
     29 #include "tgsi/tgsi_dump.h"
     30 
     31 #include "util/u_debug.h"
     32 #include "util/u_math.h"
     33 
     34 #include "tgsi_lowering.h"
     35 
     36 struct tgsi_lowering_context {
     37    struct tgsi_transform_context base;
     38    const struct tgsi_lowering_config *config;
     39    struct tgsi_shader_info *info;
     40    unsigned two_side_colors;
     41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
     42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
     43    int face_idx;
     44    unsigned numtmp;
     45    struct {
     46       struct tgsi_full_src_register src;
     47       struct tgsi_full_dst_register dst;
     48    } tmp[2];
     49 #define A 0
     50 #define B 1
     51    struct tgsi_full_src_register imm;
     52    int emitted_decls;
     53    unsigned saturate;
     54 };
     55 
     56 static inline struct tgsi_lowering_context *
     57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
     58 {
     59    return (struct tgsi_lowering_context *)tctx;
     60 }
     61 
     62 /*
     63  * Utility helpers:
     64  */
     65 
     66 static void
     67 reg_dst(struct tgsi_full_dst_register *dst,
     68 	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
     69 {
     70    *dst = *orig_dst;
     71    dst->Register.WriteMask &= wrmask;
     72    assert(dst->Register.WriteMask);
     73 }
     74 
     75 static inline void
     76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
     77 {
     78    swiz[0] = src->SwizzleX;
     79    swiz[1] = src->SwizzleY;
     80    swiz[2] = src->SwizzleZ;
     81    swiz[3] = src->SwizzleW;
     82 }
     83 
     84 static void
     85 reg_src(struct tgsi_full_src_register *src,
     86 	const struct tgsi_full_src_register *orig_src,
     87 	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
     88 {
     89    unsigned swiz[4];
     90    get_swiz(swiz, &orig_src->Register);
     91    *src = *orig_src;
     92    src->Register.SwizzleX = swiz[sx];
     93    src->Register.SwizzleY = swiz[sy];
     94    src->Register.SwizzleZ = swiz[sz];
     95    src->Register.SwizzleW = swiz[sw];
     96 }
     97 
     98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
     99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
    100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
    101 
    102 /*
    103  * if (dst.x aliases src.x) {
    104  *   MOV tmpA.x, src.x
    105  *   src = tmpA
    106  * }
    107  * COS dst.x, src.x
    108  * SIN dst.y, src.x
    109  * MOV dst.zw, imm{0.0, 1.0}
    110  */
    111 static bool
    112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
    113 	const struct tgsi_full_src_register *src, unsigned src_mask)
    114 {
    115    if ((dst->Register.File == src->Register.File) &&
    116        (dst->Register.Index == src->Register.Index)) {
    117       unsigned i, actual_mask = 0;
    118       unsigned swiz[4];
    119       get_swiz(swiz, &src->Register);
    120       for (i = 0; i < 4; i++)
    121          if (src_mask & (1 << i))
    122             actual_mask |= (1 << swiz[i]);
    123       if (actual_mask & dst_mask)
    124          return true;
    125    }
    126    return false;
    127 }
    128 
    129 static void
    130 create_mov(struct tgsi_transform_context *tctx,
    131            const struct tgsi_full_dst_register *dst,
    132            const struct tgsi_full_src_register *src,
    133            unsigned mask, unsigned saturate)
    134 {
    135    struct tgsi_full_instruction new_inst;
    136 
    137    new_inst = tgsi_default_full_instruction();
    138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    139    new_inst.Instruction.Saturate = saturate;
    140    new_inst.Instruction.NumDstRegs = 1;
    141    reg_dst(&new_inst.Dst[0], dst, mask);
    142    new_inst.Instruction.NumSrcRegs = 1;
    143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    144    tctx->emit_instruction(tctx, &new_inst);
    145 }
    146 
    147 /* to help calculate # of tgsi tokens for a lowering.. we assume
    148  * the worst case, ie. removed instructions don't have ADDR[] or
    149  * anything which increases the # of tokens per src/dst and the
    150  * inserted instructions do.
    151  *
    152  * OINST() - old instruction
    153  *    1         : instruction itself
    154  *    1         : dst
    155  *    1 * nargs : srcN
    156  *
    157  * NINST() - new instruction
    158  *    1         : instruction itself
    159  *    2         : dst
    160  *    2 * nargs : srcN
    161  */
    162 
    163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
    164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
    165 
    166 /*
    167  * Lowering Translators:
    168  */
    169 
    170 /* DST - Distance Vector
    171  *   dst.x = 1.0
    172  *   dst.y = src0.y \times src1.y
    173  *   dst.z = src0.z
    174  *   dst.w = src1.w
    175  *
    176  * ; note: could be more clever and use just a single temp
    177  * ;       if I was clever enough to re-write the swizzles.
    178  * ; needs: 2 tmp, imm{1.0}
    179  * if (dst.y aliases src0.z) {
    180  *   MOV tmpA.yz, src0.yz
    181  *   src0 = tmpA
    182  * }
    183  * if (dst.yz aliases src1.w) {
    184  *   MOV tmpB.yw, src1.yw
    185  *   src1 = tmpB
    186  * }
    187  * MUL dst.y, src0.y, src1.y
    188  * MOV dst.z, src0.z
    189  * MOV dst.w, src1.w
    190  * MOV dst.x, imm{1.0}
    191  */
    192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
    193 		NINST(1) + NINST(1) - OINST(2))
    194 #define DST_TMP  2
    195 static void
    196 transform_dst(struct tgsi_transform_context *tctx,
    197               struct tgsi_full_instruction *inst)
    198 {
    199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    201    struct tgsi_full_src_register *src0 = &inst->Src[0];
    202    struct tgsi_full_src_register *src1 = &inst->Src[1];
    203    struct tgsi_full_instruction new_inst;
    204 
    205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
    206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
    207       src0 = &ctx->tmp[A].src;
    208    }
    209 
    210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
    211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
    212       src1 = &ctx->tmp[B].src;
    213    }
    214 
    215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    216       /* MUL dst.y, src0.y, src1.y */
    217       new_inst = tgsi_default_full_instruction();
    218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    219       new_inst.Instruction.NumDstRegs = 1;
    220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    221       new_inst.Instruction.NumSrcRegs = 2;
    222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
    223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
    224       tctx->emit_instruction(tctx, &new_inst);
    225    }
    226 
    227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    228       /* MOV dst.z, src0.z */
    229       new_inst = tgsi_default_full_instruction();
    230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    231       new_inst.Instruction.NumDstRegs = 1;
    232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
    233       new_inst.Instruction.NumSrcRegs = 1;
    234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
    235       tctx->emit_instruction(tctx, &new_inst);
    236    }
    237 
    238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    239       /* MOV dst.w, src1.w */
    240       new_inst = tgsi_default_full_instruction();
    241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    242       new_inst.Instruction.NumDstRegs = 1;
    243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    244       new_inst.Instruction.NumSrcRegs = 1;
    245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
    246       tctx->emit_instruction(tctx, &new_inst);
    247    }
    248 
    249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
    250       /* MOV dst.x, imm{1.0} */
    251       new_inst = tgsi_default_full_instruction();
    252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    253       new_inst.Instruction.NumDstRegs = 1;
    254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
    255       new_inst.Instruction.NumSrcRegs = 1;
    256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
    257       tctx->emit_instruction(tctx, &new_inst);
    258    }
    259 }
    260 
    261 /* LRP - Linear Interpolate
    262  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
    263  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
    264  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
    265  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
    266  *
    267  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
    268  * can then become: src0 \times src1 - (src0 \times src2 - src2)
    269  *
    270  * ; needs: 1 tmp
    271  * MAD tmpA, src0, src2, -src2
    272  * MAD dst, src0, src1, -tmpA
    273  */
    274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
    275 #define LRP_TMP  1
    276 static void
    277 transform_lrp(struct tgsi_transform_context *tctx,
    278               struct tgsi_full_instruction *inst)
    279 {
    280    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    281    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    282    struct tgsi_full_src_register *src0 = &inst->Src[0];
    283    struct tgsi_full_src_register *src1 = &inst->Src[1];
    284    struct tgsi_full_src_register *src2 = &inst->Src[2];
    285    struct tgsi_full_instruction new_inst;
    286 
    287    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    288       /* MAD tmpA, src0, src2, -src2 */
    289       new_inst = tgsi_default_full_instruction();
    290       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    291       new_inst.Instruction.NumDstRegs = 1;
    292       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    293       new_inst.Instruction.NumSrcRegs = 3;
    294       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    295       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
    296       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
    297       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
    298       tctx->emit_instruction(tctx, &new_inst);
    299 
    300       /* MAD dst, src0, src1, -tmpA */
    301       new_inst = tgsi_default_full_instruction();
    302       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    303       new_inst.Instruction.NumDstRegs = 1;
    304       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    305       new_inst.Instruction.NumSrcRegs = 3;
    306       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    307       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
    308       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    309       new_inst.Src[2].Register.Negate = true;
    310       tctx->emit_instruction(tctx, &new_inst);
    311    }
    312 }
    313 
    314 /* FRC - Fraction
    315  *  dst.x = src.x - \lfloor src.x\rfloor
    316  *  dst.y = src.y - \lfloor src.y\rfloor
    317  *  dst.z = src.z - \lfloor src.z\rfloor
    318  *  dst.w = src.w - \lfloor src.w\rfloor
    319  *
    320  * ; needs: 1 tmp
    321  * FLR tmpA, src
    322  * SUB dst, src, tmpA
    323  */
    324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
    325 #define FRC_TMP  1
    326 static void
    327 transform_frc(struct tgsi_transform_context *tctx,
    328               struct tgsi_full_instruction *inst)
    329 {
    330    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    331    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    332    struct tgsi_full_src_register *src = &inst->Src[0];
    333    struct tgsi_full_instruction new_inst;
    334 
    335    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    336       /* FLR tmpA, src */
    337       new_inst = tgsi_default_full_instruction();
    338       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    339       new_inst.Instruction.NumDstRegs = 1;
    340       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    341       new_inst.Instruction.NumSrcRegs = 1;
    342       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    343       tctx->emit_instruction(tctx, &new_inst);
    344 
    345       /* SUB dst, src, tmpA */
    346       new_inst = tgsi_default_full_instruction();
    347       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    348       new_inst.Instruction.NumDstRegs = 1;
    349       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    350       new_inst.Instruction.NumSrcRegs = 2;
    351       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
    352       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    353       new_inst.Src[1].Register.Negate = 1;
    354       tctx->emit_instruction(tctx, &new_inst);
    355    }
    356 }
    357 
    358 /* POW - Power
    359  *  dst.x = src0.x^{src1.x}
    360  *  dst.y = src0.x^{src1.x}
    361  *  dst.z = src0.x^{src1.x}
    362  *  dst.w = src0.x^{src1.x}
    363  *
    364  * ; needs: 1 tmp
    365  * LG2 tmpA.x, src0.x
    366  * MUL tmpA.x, src1.x, tmpA.x
    367  * EX2 dst, tmpA.x
    368  */
    369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
    370 #define POW_TMP  1
    371 static void
    372 transform_pow(struct tgsi_transform_context *tctx,
    373               struct tgsi_full_instruction *inst)
    374 {
    375    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    376    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    377    struct tgsi_full_src_register *src0 = &inst->Src[0];
    378    struct tgsi_full_src_register *src1 = &inst->Src[1];
    379    struct tgsi_full_instruction new_inst;
    380 
    381    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    382       /* LG2 tmpA.x, src0.x */
    383       new_inst = tgsi_default_full_instruction();
    384       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    385       new_inst.Instruction.NumDstRegs = 1;
    386       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    387       new_inst.Instruction.NumSrcRegs = 1;
    388       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
    389       tctx->emit_instruction(tctx, &new_inst);
    390 
    391       /* MUL tmpA.x, src1.x, tmpA.x */
    392       new_inst = tgsi_default_full_instruction();
    393       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    394       new_inst.Instruction.NumDstRegs = 1;
    395       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    396       new_inst.Instruction.NumSrcRegs = 2;
    397       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
    398       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    399       tctx->emit_instruction(tctx, &new_inst);
    400 
    401       /* EX2 dst, tmpA.x */
    402       new_inst = tgsi_default_full_instruction();
    403       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    404       new_inst.Instruction.NumDstRegs = 1;
    405       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    406       new_inst.Instruction.NumSrcRegs = 1;
    407       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    408       tctx->emit_instruction(tctx, &new_inst);
    409    }
    410 }
    411 
    412 /* LIT - Light Coefficients
    413  *  dst.x = 1.0
    414  *  dst.y = max(src.x, 0.0)
    415  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
    416  *  dst.w = 1.0
    417  *
    418  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
    419  * MAX tmpA.xy, src.xy, imm{0.0}
    420  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
    421  * LG2 tmpA.y, tmpA.y
    422  * MUL tmpA.y, tmpA.z, tmpA.y
    423  * EX2 tmpA.y, tmpA.y
    424  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
    425  * MOV dst.yz, tmpA.xy
    426  * MOV dst.xw, imm{1.0}
    427  */
    428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
    429 		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
    430 #define LIT_TMP  1
    431 static void
    432 transform_lit(struct tgsi_transform_context *tctx,
    433               struct tgsi_full_instruction *inst)
    434 {
    435    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    436    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    437    struct tgsi_full_src_register *src = &inst->Src[0];
    438    struct tgsi_full_instruction new_inst;
    439 
    440    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
    441       /* MAX tmpA.xy, src.xy, imm{0.0} */
    442       new_inst = tgsi_default_full_instruction();
    443       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
    444       new_inst.Instruction.NumDstRegs = 1;
    445       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
    446       new_inst.Instruction.NumSrcRegs = 2;
    447       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
    448       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
    449       tctx->emit_instruction(tctx, &new_inst);
    450 
    451       /* MIN tmpA.z, src.w, imm{128.0} */
    452       new_inst = tgsi_default_full_instruction();
    453       new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
    454       new_inst.Instruction.NumDstRegs = 1;
    455       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    456       new_inst.Instruction.NumSrcRegs = 2;
    457       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
    458       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
    459       tctx->emit_instruction(tctx, &new_inst);
    460 
    461       /* MAX tmpA.z, tmpA.z, -imm{128.0} */
    462       new_inst = tgsi_default_full_instruction();
    463       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
    464       new_inst.Instruction.NumDstRegs = 1;
    465       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    466       new_inst.Instruction.NumSrcRegs = 2;
    467       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
    468       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
    469       new_inst.Src[1].Register.Negate = true;
    470       tctx->emit_instruction(tctx, &new_inst);
    471 
    472       /* LG2 tmpA.y, tmpA.y */
    473       new_inst = tgsi_default_full_instruction();
    474       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    475       new_inst.Instruction.NumDstRegs = 1;
    476       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    477       new_inst.Instruction.NumSrcRegs = 1;
    478       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    479       tctx->emit_instruction(tctx, &new_inst);
    480 
    481       /* MUL tmpA.y, tmpA.z, tmpA.y */
    482       new_inst = tgsi_default_full_instruction();
    483       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    484       new_inst.Instruction.NumDstRegs = 1;
    485       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    486       new_inst.Instruction.NumSrcRegs = 2;
    487       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
    488       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    489       tctx->emit_instruction(tctx, &new_inst);
    490 
    491       /* EX2 tmpA.y, tmpA.y */
    492       new_inst = tgsi_default_full_instruction();
    493       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    494       new_inst.Instruction.NumDstRegs = 1;
    495       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    496       new_inst.Instruction.NumSrcRegs = 1;
    497       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    498       tctx->emit_instruction(tctx, &new_inst);
    499 
    500       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
    501       new_inst = tgsi_default_full_instruction();
    502       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
    503       new_inst.Instruction.NumDstRegs = 1;
    504       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    505       new_inst.Instruction.NumSrcRegs = 3;
    506       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    507       new_inst.Src[0].Register.Negate = true;
    508       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    509       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
    510       tctx->emit_instruction(tctx, &new_inst);
    511 
    512       /* MOV dst.yz, tmpA.xy */
    513       new_inst = tgsi_default_full_instruction();
    514       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    515       new_inst.Instruction.NumDstRegs = 1;
    516       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
    517       new_inst.Instruction.NumSrcRegs = 1;
    518       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
    519       tctx->emit_instruction(tctx, &new_inst);
    520    }
    521 
    522    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
    523       /* MOV dst.xw, imm{1.0} */
    524       new_inst = tgsi_default_full_instruction();
    525       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    526       new_inst.Instruction.NumDstRegs = 1;
    527       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
    528       new_inst.Instruction.NumSrcRegs = 1;
    529       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
    530       tctx->emit_instruction(tctx, &new_inst);
    531    }
    532 }
    533 
    534 /* EXP - Approximate Exponential Base 2
    535  *  dst.x = 2^{\lfloor src.x\rfloor}
    536  *  dst.y = src.x - \lfloor src.x\rfloor
    537  *  dst.z = 2^{src.x}
    538  *  dst.w = 1.0
    539  *
    540  * ; needs: 1 tmp, imm{1.0}
    541  * if (lowering FLR) {
    542  *   FRC tmpA.x, src.x
    543  *   SUB tmpA.x, src.x, tmpA.x
    544  * } else {
    545  *   FLR tmpA.x, src.x
    546  * }
    547  * EX2 tmpA.y, src.x
    548  * SUB dst.y, src.x, tmpA.x
    549  * EX2 dst.x, tmpA.x
    550  * MOV dst.z, tmpA.y
    551  * MOV dst.w, imm{1.0}
    552  */
    553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
    554 		NINST(1)+ NINST(1) - OINST(1))
    555 #define EXP_TMP  1
    556 static void
    557 transform_exp(struct tgsi_transform_context *tctx,
    558               struct tgsi_full_instruction *inst)
    559 {
    560    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    561    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    562    struct tgsi_full_src_register *src = &inst->Src[0];
    563    struct tgsi_full_instruction new_inst;
    564 
    565    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
    566       if (ctx->config->lower_FLR) {
    567          /* FRC tmpA.x, src.x */
    568          new_inst = tgsi_default_full_instruction();
    569          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    570          new_inst.Instruction.NumDstRegs = 1;
    571          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    572          new_inst.Instruction.NumSrcRegs = 1;
    573          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    574          tctx->emit_instruction(tctx, &new_inst);
    575 
    576          /* SUB tmpA.x, src.x, tmpA.x */
    577          new_inst = tgsi_default_full_instruction();
    578          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    579          new_inst.Instruction.NumDstRegs = 1;
    580          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    581          new_inst.Instruction.NumSrcRegs = 2;
    582          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    583          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    584          new_inst.Src[1].Register.Negate = 1;
    585          tctx->emit_instruction(tctx, &new_inst);
    586      } else {
    587          /* FLR tmpA.x, src.x */
    588          new_inst = tgsi_default_full_instruction();
    589          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    590          new_inst.Instruction.NumDstRegs = 1;
    591          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    592          new_inst.Instruction.NumSrcRegs = 1;
    593          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    594          tctx->emit_instruction(tctx, &new_inst);
    595       }
    596    }
    597 
    598    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    599       /* EX2 tmpA.y, src.x */
    600       new_inst = tgsi_default_full_instruction();
    601       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    602       new_inst.Instruction.NumDstRegs = 1;
    603       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    604       new_inst.Instruction.NumSrcRegs = 1;
    605       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    606       tctx->emit_instruction(tctx, &new_inst);
    607    }
    608 
    609    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    610       /* SUB dst.y, src.x, tmpA.x */
    611       new_inst = tgsi_default_full_instruction();
    612       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    613       new_inst.Instruction.NumDstRegs = 1;
    614       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    615       new_inst.Instruction.NumSrcRegs = 2;
    616       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    617       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    618       new_inst.Src[1].Register.Negate = 1;
    619       tctx->emit_instruction(tctx, &new_inst);
    620    }
    621 
    622    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
    623       /* EX2 dst.x, tmpA.x */
    624       new_inst = tgsi_default_full_instruction();
    625       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    626       new_inst.Instruction.NumDstRegs = 1;
    627       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
    628       new_inst.Instruction.NumSrcRegs = 1;
    629       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
    630       tctx->emit_instruction(tctx, &new_inst);
    631    }
    632 
    633    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
    634       /* MOV dst.z, tmpA.y */
    635       new_inst = tgsi_default_full_instruction();
    636       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    637       new_inst.Instruction.NumDstRegs = 1;
    638       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
    639       new_inst.Instruction.NumSrcRegs = 1;
    640       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
    641       tctx->emit_instruction(tctx, &new_inst);
    642    }
    643 
    644    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    645       /* MOV dst.w, imm{1.0} */
    646       new_inst = tgsi_default_full_instruction();
    647       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    648       new_inst.Instruction.NumDstRegs = 1;
    649       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    650       new_inst.Instruction.NumSrcRegs = 1;
    651       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
    652       tctx->emit_instruction(tctx, &new_inst);
    653    }
    654 }
    655 
    656 /* LOG - Approximate Logarithm Base 2
    657  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
    658  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
    659  *  dst.z = \log_2{|src.x|}
    660  *  dst.w = 1.0
    661  *
    662  * ; needs: 1 tmp, imm{1.0}
    663  * LG2 tmpA.x, |src.x|
    664  * if (lowering FLR) {
    665  *   FRC tmpA.y, tmpA.x
    666  *   SUB tmpA.y, tmpA.x, tmpA.y
    667  * } else {
    668  *   FLR tmpA.y, tmpA.x
    669  * }
    670  * EX2 tmpA.z, tmpA.y
    671  * RCP tmpA.z, tmpA.z
    672  * MUL dst.y, |src.x|, tmpA.z
    673  * MOV dst.xz, tmpA.yx
    674  * MOV dst.w, imm{1.0}
    675  */
    676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
    677 		NINST(2) + NINST(1) + NINST(1) - OINST(1))
    678 #define LOG_TMP  1
    679 static void
    680 transform_log(struct tgsi_transform_context *tctx,
    681               struct tgsi_full_instruction *inst)
    682 {
    683    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    684    struct tgsi_full_dst_register *dst = &inst->Dst[0];
    685    struct tgsi_full_src_register *src = &inst->Src[0];
    686    struct tgsi_full_instruction new_inst;
    687 
    688    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
    689       /* LG2 tmpA.x, |src.x| */
    690       new_inst = tgsi_default_full_instruction();
    691       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
    692       new_inst.Instruction.NumDstRegs = 1;
    693       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    694       new_inst.Instruction.NumSrcRegs = 1;
    695       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
    696       new_inst.Src[0].Register.Absolute = true;
    697       tctx->emit_instruction(tctx, &new_inst);
    698    }
    699 
    700    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
    701       if (ctx->config->lower_FLR) {
    702          /* FRC tmpA.y, tmpA.x */
    703          new_inst = tgsi_default_full_instruction();
    704          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    705          new_inst.Instruction.NumDstRegs = 1;
    706          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    707          new_inst.Instruction.NumSrcRegs = 1;
    708          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    709          tctx->emit_instruction(tctx, &new_inst);
    710 
    711          /* SUB tmpA.y, tmpA.x, tmpA.y */
    712          new_inst = tgsi_default_full_instruction();
    713          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    714          new_inst.Instruction.NumDstRegs = 1;
    715          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    716          new_inst.Instruction.NumSrcRegs = 2;
    717          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    718          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
    719          new_inst.Src[1].Register.Negate = 1;
    720          tctx->emit_instruction(tctx, &new_inst);
    721       } else {
    722          /* FLR tmpA.y, tmpA.x */
    723          new_inst = tgsi_default_full_instruction();
    724          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    725          new_inst.Instruction.NumDstRegs = 1;
    726          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
    727          new_inst.Instruction.NumSrcRegs = 1;
    728          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
    729          tctx->emit_instruction(tctx, &new_inst);
    730       }
    731    }
    732 
    733    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
    734       /* EX2 tmpA.z, tmpA.y */
    735       new_inst = tgsi_default_full_instruction();
    736       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
    737       new_inst.Instruction.NumDstRegs = 1;
    738       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    739       new_inst.Instruction.NumSrcRegs = 1;
    740       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
    741       tctx->emit_instruction(tctx, &new_inst);
    742 
    743       /* RCP tmpA.z, tmpA.z */
    744       new_inst = tgsi_default_full_instruction();
    745       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
    746       new_inst.Instruction.NumDstRegs = 1;
    747       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
    748       new_inst.Instruction.NumSrcRegs = 1;
    749       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
    750       tctx->emit_instruction(tctx, &new_inst);
    751 
    752       /* MUL dst.y, |src.x|, tmpA.z */
    753       new_inst = tgsi_default_full_instruction();
    754       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    755       new_inst.Instruction.NumDstRegs = 1;
    756       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
    757       new_inst.Instruction.NumSrcRegs = 2;
    758       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
    759       new_inst.Src[0].Register.Absolute = true;
    760       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
    761       tctx->emit_instruction(tctx, &new_inst);
    762    }
    763 
    764    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
    765       /* MOV dst.xz, tmpA.yx */
    766       new_inst = tgsi_default_full_instruction();
    767       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    768       new_inst.Instruction.NumDstRegs = 1;
    769       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
    770       new_inst.Instruction.NumSrcRegs = 1;
    771       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
    772       tctx->emit_instruction(tctx, &new_inst);
    773    }
    774 
    775    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
    776       /* MOV dst.w, imm{1.0} */
    777       new_inst = tgsi_default_full_instruction();
    778       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
    779       new_inst.Instruction.NumDstRegs = 1;
    780       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
    781       new_inst.Instruction.NumSrcRegs = 1;
    782       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
    783       tctx->emit_instruction(tctx, &new_inst);
    784    }
    785 }
    786 
    787 /* DP4 - 4-component Dot Product
    788  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
    789  *
    790  * DP3 - 3-component Dot Product
    791  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
    792  *
    793  * DP2 - 2-component Dot Product
    794  *   dst = src0.x \times src1.x + src0.y \times src1.y
    795  *
    796  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
    797  * operations, which is what you'd prefer for a ISA that is natively
    798  * scalar.  Probably a native vector ISA would at least already have
    799  * DP4/DP3 instructions, but perhaps there is room for an alternative
    800  * translation for DP2 using vector instructions.
    801  *
    802  * ; needs: 1 tmp
    803  * MUL tmpA.x, src0.x, src1.x
    804  * MAD tmpA.x, src0.y, src1.y, tmpA.x
    805  * if (DP3 || DP4) {
    806  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
    807  *   if (DP4) {
    808  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
    809  *   }
    810  * }
    811  * ; fixup last instruction to replicate into dst
    812  */
    813 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
    814 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
    815 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
    816 #define DOTP_TMP  1
    817 static void
    818 transform_dotp(struct tgsi_transform_context *tctx,
    819                struct tgsi_full_instruction *inst)
    820 {
    821    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    822    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    823    struct tgsi_full_src_register *src0 = &inst->Src[0];
    824    struct tgsi_full_src_register *src1 = &inst->Src[1];
    825    struct tgsi_full_instruction new_inst;
    826    unsigned opcode = inst->Instruction.Opcode;
    827 
    828    /* NOTE: any potential last instruction must replicate src on all
    829     * components (since it could be re-written to write to final dst)
    830     */
    831 
    832    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    833       /* MUL tmpA.x, src0.x, src1.x */
    834       new_inst = tgsi_default_full_instruction();
    835       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
    836       new_inst.Instruction.NumDstRegs = 1;
    837       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    838       new_inst.Instruction.NumSrcRegs = 2;
    839       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
    840       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
    841       tctx->emit_instruction(tctx, &new_inst);
    842 
    843       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
    844       new_inst = tgsi_default_full_instruction();
    845       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    846       new_inst.Instruction.NumDstRegs = 1;
    847       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    848       new_inst.Instruction.NumSrcRegs = 3;
    849       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
    850       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
    851       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
    852 
    853       if ((opcode == TGSI_OPCODE_DP3) ||
    854           (opcode == TGSI_OPCODE_DP4)) {
    855          tctx->emit_instruction(tctx, &new_inst);
    856 
    857          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
    858          new_inst = tgsi_default_full_instruction();
    859          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    860          new_inst.Instruction.NumDstRegs = 1;
    861          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    862          new_inst.Instruction.NumSrcRegs = 3;
    863          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
    864          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
    865          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
    866 
    867          if (opcode == TGSI_OPCODE_DP4) {
    868             tctx->emit_instruction(tctx, &new_inst);
    869 
    870             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
    871             new_inst = tgsi_default_full_instruction();
    872             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
    873             new_inst.Instruction.NumDstRegs = 1;
    874             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
    875             new_inst.Instruction.NumSrcRegs = 3;
    876             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
    877             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
    878             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
    879          }
    880       }
    881 
    882       /* fixup last instruction to write to dst: */
    883       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    884 
    885       tctx->emit_instruction(tctx, &new_inst);
    886    }
    887 }
    888 
    889 /* FLR - floor, CEIL - ceil
    890  * ; needs: 1 tmp
    891  * if (CEIL) {
    892  *   FRC tmpA, -src
    893  *   ADD dst, src, tmpA
    894  * } else {
    895  *   FRC tmpA, src
    896  *   SUB dst, src, tmpA
    897  * }
    898  */
    899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
    900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
    901 #define FLR_TMP 1
    902 #define CEIL_TMP 1
    903 static void
    904 transform_flr_ceil(struct tgsi_transform_context *tctx,
    905                    struct tgsi_full_instruction *inst)
    906 {
    907    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    908    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    909    struct tgsi_full_src_register *src0 = &inst->Src[0];
    910    struct tgsi_full_instruction new_inst;
    911    unsigned opcode = inst->Instruction.Opcode;
    912 
    913    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    914       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
    915       new_inst = tgsi_default_full_instruction();
    916       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    917       new_inst.Instruction.NumDstRegs = 1;
    918       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    919       new_inst.Instruction.NumSrcRegs = 1;
    920       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    921 
    922       if (opcode == TGSI_OPCODE_CEIL)
    923          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
    924       tctx->emit_instruction(tctx, &new_inst);
    925 
    926       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
    927       new_inst = tgsi_default_full_instruction();
    928       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    929       new_inst.Instruction.NumDstRegs = 1;
    930       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
    931       new_inst.Instruction.NumSrcRegs = 2;
    932       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    933       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    934       if (opcode == TGSI_OPCODE_FLR)
    935          new_inst.Src[1].Register.Negate = 1;
    936       tctx->emit_instruction(tctx, &new_inst);
    937    }
    938 }
    939 
    940 /* TRUNC - truncate off fractional part
    941  *  dst.x = trunc(src.x)
    942  *  dst.y = trunc(src.y)
    943  *  dst.z = trunc(src.z)
    944  *  dst.w = trunc(src.w)
    945  *
    946  * ; needs: 1 tmp
    947  * if (lower FLR) {
    948  *   FRC tmpA, |src|
    949  *   SUB tmpA, |src|, tmpA
    950  * } else {
    951  *   FLR tmpA, |src|
    952  * }
    953  * CMP dst, src, -tmpA, tmpA
    954  */
    955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
    956 #define TRUNC_TMP 1
    957 static void
    958 transform_trunc(struct tgsi_transform_context *tctx,
    959                 struct tgsi_full_instruction *inst)
    960 {
    961    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    962    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    963    struct tgsi_full_src_register *src0 = &inst->Src[0];
    964    struct tgsi_full_instruction new_inst;
    965 
    966    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
    967       if (ctx->config->lower_FLR) {
    968          new_inst = tgsi_default_full_instruction();
    969          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
    970          new_inst.Instruction.NumDstRegs = 1;
    971          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    972          new_inst.Instruction.NumSrcRegs = 1;
    973          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    974          new_inst.Src[0].Register.Absolute = true;
    975          new_inst.Src[0].Register.Negate = false;
    976          tctx->emit_instruction(tctx, &new_inst);
    977 
    978          new_inst = tgsi_default_full_instruction();
    979          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
    980          new_inst.Instruction.NumDstRegs = 1;
    981          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    982          new_inst.Instruction.NumSrcRegs = 2;
    983          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    984          new_inst.Src[0].Register.Absolute = true;
    985          new_inst.Src[0].Register.Negate = false;
    986          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
    987          new_inst.Src[1].Register.Negate = 1;
    988          tctx->emit_instruction(tctx, &new_inst);
    989       } else {
    990          new_inst = tgsi_default_full_instruction();
    991          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
    992          new_inst.Instruction.NumDstRegs = 1;
    993          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
    994          new_inst.Instruction.NumSrcRegs = 1;
    995          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
    996          new_inst.Src[0].Register.Absolute = true;
    997          new_inst.Src[0].Register.Negate = false;
    998          tctx->emit_instruction(tctx, &new_inst);
    999       }
   1000 
   1001       new_inst = tgsi_default_full_instruction();
   1002       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
   1003       new_inst.Instruction.NumDstRegs = 1;
   1004       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
   1005       new_inst.Instruction.NumSrcRegs = 3;
   1006       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
   1007       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1008       new_inst.Src[1].Register.Negate = true;
   1009       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1010       tctx->emit_instruction(tctx, &new_inst);
   1011    }
   1012 }
   1013 
   1014 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
   1015  * in the case of TXP, the clamping must happen *after* projection, so
   1016  * we need to lower TXP to TEX.
   1017  *
   1018  *   MOV tmpA, src0
   1019  *   if (opc == TXP) {
   1020  *     ; do perspective division manually before clamping:
   1021  *     RCP tmpB, tmpA.w
   1022  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
   1023  *     opc = TEX;
   1024  *   }
   1025  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
   1026  *   <opc> dst, tmpA, ...
   1027  */
   1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
   1029 #define SAMP_TMP  2
   1030 static int
   1031 transform_samp(struct tgsi_transform_context *tctx,
   1032                struct tgsi_full_instruction *inst)
   1033 {
   1034    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1035    struct tgsi_full_src_register *coord = &inst->Src[0];
   1036    struct tgsi_full_src_register *samp;
   1037    struct tgsi_full_instruction new_inst;
   1038    /* mask is clamped coords, pmask is all coords (for projection): */
   1039    unsigned mask = 0, pmask = 0, smask;
   1040    unsigned tex = inst->Texture.Texture;
   1041    unsigned opcode = inst->Instruction.Opcode;
   1042    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
   1043 		   (ctx->config->lower_TXP & (1 << tex));
   1044 
   1045    if (opcode == TGSI_OPCODE_TXB2) {
   1046       samp = &inst->Src[2];
   1047    } else {
   1048       samp = &inst->Src[1];
   1049    }
   1050 
   1051    /* convert sampler # to bitmask to test: */
   1052    smask = 1 << samp->Register.Index;
   1053 
   1054    /* check if we actually need to lower this one: */
   1055    if (!(ctx->saturate & smask) && !lower_txp)
   1056       return -1;
   1057 
   1058    /* figure out which coordinates need saturating:
   1059     *   - RECT textures should not get saturated
   1060     *   - array index coords should not get saturated
   1061     */
   1062    switch (tex) {
   1063    case TGSI_TEXTURE_3D:
   1064    case TGSI_TEXTURE_CUBE:
   1065    case TGSI_TEXTURE_CUBE_ARRAY:
   1066    case TGSI_TEXTURE_SHADOWCUBE:
   1067    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   1068       if (ctx->config->saturate_r & smask)
   1069          mask |= TGSI_WRITEMASK_Z;
   1070       pmask |= TGSI_WRITEMASK_Z;
   1071       /* fallthrough */
   1072 
   1073    case TGSI_TEXTURE_2D:
   1074    case TGSI_TEXTURE_2D_ARRAY:
   1075    case TGSI_TEXTURE_SHADOW2D:
   1076    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1077    case TGSI_TEXTURE_2D_MSAA:
   1078    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   1079       if (ctx->config->saturate_t & smask)
   1080          mask |= TGSI_WRITEMASK_Y;
   1081       pmask |= TGSI_WRITEMASK_Y;
   1082       /* fallthrough */
   1083 
   1084    case TGSI_TEXTURE_1D:
   1085    case TGSI_TEXTURE_1D_ARRAY:
   1086    case TGSI_TEXTURE_SHADOW1D:
   1087    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1088       if (ctx->config->saturate_s & smask)
   1089          mask |= TGSI_WRITEMASK_X;
   1090       pmask |= TGSI_WRITEMASK_X;
   1091       break;
   1092 
   1093    case TGSI_TEXTURE_RECT:
   1094    case TGSI_TEXTURE_SHADOWRECT:
   1095       /* we don't saturate, but in case of lower_txp we
   1096        * still need to do the perspective divide:
   1097        */
   1098        pmask = TGSI_WRITEMASK_XY;
   1099        break;
   1100    }
   1101 
   1102    /* sanity check.. driver could be asking to saturate a non-
   1103     * existent coordinate component:
   1104     */
   1105    if (!mask && !lower_txp)
   1106       return -1;
   1107 
   1108    /* MOV tmpA, src0 */
   1109    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
   1110 
   1111    /* This is a bit sad.. we need to clamp *after* the coords
   1112     * are projected, which means lowering TXP to TEX and doing
   1113     * the projection ourself.  But since I haven't figured out
   1114     * how to make the lowering code deliver an electric shock
   1115     * to anyone using GL_CLAMP, we must do this instead:
   1116     */
   1117    if (opcode == TGSI_OPCODE_TXP) {
   1118       /* RCP tmpB.x tmpA.w */
   1119       new_inst = tgsi_default_full_instruction();
   1120       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
   1121       new_inst.Instruction.NumDstRegs = 1;
   1122       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
   1123       new_inst.Instruction.NumSrcRegs = 1;
   1124       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
   1125       tctx->emit_instruction(tctx, &new_inst);
   1126 
   1127       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
   1128       new_inst = tgsi_default_full_instruction();
   1129       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
   1130       new_inst.Instruction.NumDstRegs = 1;
   1131       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
   1132       new_inst.Instruction.NumSrcRegs = 2;
   1133       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
   1134       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
   1135       tctx->emit_instruction(tctx, &new_inst);
   1136 
   1137       opcode = TGSI_OPCODE_TEX;
   1138    }
   1139 
   1140    /* MOV_SAT tmpA.<mask>, tmpA */
   1141    if (mask) {
   1142       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
   1143    }
   1144 
   1145    /* modify the texture samp instruction to take fixed up coord: */
   1146    new_inst = *inst;
   1147    new_inst.Instruction.Opcode = opcode;
   1148    new_inst.Src[0] = ctx->tmp[A].src;
   1149    tctx->emit_instruction(tctx, &new_inst);
   1150 
   1151    return 0;
   1152 }
   1153 
   1154 /* Two-sided color emulation:
   1155  * For each COLOR input, create a corresponding BCOLOR input, plus
   1156  * CMP instruction to select front or back color based on FACE
   1157  */
   1158 #define TWOSIDE_GROW(n)  (                      \
   1159       2 +         /* FACE */                    \
   1160       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
   1161       ((n) * 1) + /* TEMP[] */                  \
   1162       ((n) * NINST(3))   /* CMP instr */        \
   1163       )
   1164 
   1165 static void
   1166 emit_twoside(struct tgsi_transform_context *tctx)
   1167 {
   1168    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1169    struct tgsi_shader_info *info = ctx->info;
   1170    struct tgsi_full_declaration decl;
   1171    struct tgsi_full_instruction new_inst;
   1172    unsigned inbase, tmpbase;
   1173    int i;
   1174 
   1175    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
   1176    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
   1177 
   1178    /* additional inputs for BCOLOR's */
   1179    for (i = 0; i < ctx->two_side_colors; i++) {
   1180       unsigned in_idx = ctx->two_side_idx[i];
   1181       decl = tgsi_default_full_declaration();
   1182       decl.Declaration.File = TGSI_FILE_INPUT;
   1183       decl.Declaration.Semantic = true;
   1184       decl.Range.First = decl.Range.Last = inbase + i;
   1185       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
   1186       decl.Semantic.Index = info->input_semantic_index[in_idx];
   1187       decl.Declaration.Interpolate = true;
   1188       decl.Interp.Interpolate = info->input_interpolate[in_idx];
   1189       decl.Interp.Location = info->input_interpolate_loc[in_idx];
   1190       decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
   1191       tctx->emit_declaration(tctx, &decl);
   1192    }
   1193 
   1194    /* additional input for FACE */
   1195    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
   1196       decl = tgsi_default_full_declaration();
   1197       decl.Declaration.File = TGSI_FILE_INPUT;
   1198       decl.Declaration.Semantic = true;
   1199       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
   1200       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
   1201       decl.Semantic.Index = 0;
   1202       tctx->emit_declaration(tctx, &decl);
   1203 
   1204       ctx->face_idx = decl.Range.First;
   1205    }
   1206 
   1207    /* additional temps for COLOR/BCOLOR selection: */
   1208    for (i = 0; i < ctx->two_side_colors; i++) {
   1209       decl = tgsi_default_full_declaration();
   1210       decl.Declaration.File = TGSI_FILE_TEMPORARY;
   1211       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
   1212       tctx->emit_declaration(tctx, &decl);
   1213    }
   1214 
   1215    /* and finally additional instructions to select COLOR/BCOLOR: */
   1216    for (i = 0; i < ctx->two_side_colors; i++) {
   1217       new_inst = tgsi_default_full_instruction();
   1218       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
   1219 
   1220       new_inst.Instruction.NumDstRegs = 1;
   1221       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
   1222       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
   1223       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1224 
   1225       new_inst.Instruction.NumSrcRegs = 3;
   1226       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
   1227       new_inst.Src[0].Register.Index = ctx->face_idx;
   1228       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
   1229       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
   1230       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
   1231       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
   1232       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
   1233       new_inst.Src[1].Register.Index = inbase + i;
   1234       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
   1235       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
   1236       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1237       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
   1238       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
   1239       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
   1240       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
   1241       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
   1242       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1243       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
   1244 
   1245       tctx->emit_instruction(tctx, &new_inst);
   1246    }
   1247 }
   1248 
   1249 static void
   1250 emit_decls(struct tgsi_transform_context *tctx)
   1251 {
   1252    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1253    struct tgsi_shader_info *info = ctx->info;
   1254    struct tgsi_full_declaration decl;
   1255    struct tgsi_full_immediate immed;
   1256    unsigned tmpbase;
   1257    int i;
   1258 
   1259    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
   1260 
   1261    ctx->color_base = tmpbase + ctx->numtmp;
   1262 
   1263    /* declare immediate: */
   1264    immed = tgsi_default_full_immediate();
   1265    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
   1266    immed.u[0].Float = 0.0;
   1267    immed.u[1].Float = 1.0;
   1268    immed.u[2].Float = 128.0;
   1269    immed.u[3].Float = 0.0;
   1270    tctx->emit_immediate(tctx, &immed);
   1271 
   1272    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
   1273    ctx->imm.Register.Index = info->immediate_count;
   1274    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
   1275    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1276    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1277    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
   1278 
   1279    /* declare temp regs: */
   1280    for (i = 0; i < ctx->numtmp; i++) {
   1281       decl = tgsi_default_full_declaration();
   1282       decl.Declaration.File = TGSI_FILE_TEMPORARY;
   1283       decl.Range.First = decl.Range.Last = tmpbase + i;
   1284       tctx->emit_declaration(tctx, &decl);
   1285 
   1286       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
   1287       ctx->tmp[i].src.Register.Index = tmpbase + i;
   1288       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
   1289       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1290       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1291       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
   1292 
   1293       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
   1294       ctx->tmp[i].dst.Register.Index = tmpbase + i;
   1295       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1296    }
   1297 
   1298    if (ctx->two_side_colors)
   1299       emit_twoside(tctx);
   1300 }
   1301 
   1302 static void
   1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
   1304                     struct tgsi_full_instruction *inst)
   1305 {
   1306    unsigned i, j;
   1307    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
   1308       struct tgsi_src_register *src = &inst->Src[i].Register;
   1309       if (src->File == TGSI_FILE_INPUT) {
   1310          for (j = 0; j < ctx->two_side_colors; j++) {
   1311             if (src->Index == ctx->two_side_idx[j]) {
   1312                src->File = TGSI_FILE_TEMPORARY;
   1313                src->Index = ctx->color_base + j;
   1314                break;
   1315             }
   1316          }
   1317       }
   1318    }
   1319 
   1320 }
   1321 
   1322 static void
   1323 transform_instr(struct tgsi_transform_context *tctx,
   1324 		struct tgsi_full_instruction *inst)
   1325 {
   1326    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
   1327 
   1328    if (!ctx->emitted_decls) {
   1329       emit_decls(tctx);
   1330       ctx->emitted_decls = 1;
   1331    }
   1332 
   1333    /* if emulating two-sided-color, we need to re-write some
   1334     * src registers:
   1335     */
   1336    if (ctx->two_side_colors)
   1337       rename_color_inputs(ctx, inst);
   1338 
   1339    switch (inst->Instruction.Opcode) {
   1340    case TGSI_OPCODE_DST:
   1341       if (!ctx->config->lower_DST)
   1342          goto skip;
   1343       transform_dst(tctx, inst);
   1344       break;
   1345    case TGSI_OPCODE_LRP:
   1346       if (!ctx->config->lower_LRP)
   1347          goto skip;
   1348       transform_lrp(tctx, inst);
   1349       break;
   1350    case TGSI_OPCODE_FRC:
   1351       if (!ctx->config->lower_FRC)
   1352          goto skip;
   1353       transform_frc(tctx, inst);
   1354       break;
   1355    case TGSI_OPCODE_POW:
   1356       if (!ctx->config->lower_POW)
   1357          goto skip;
   1358       transform_pow(tctx, inst);
   1359       break;
   1360    case TGSI_OPCODE_LIT:
   1361       if (!ctx->config->lower_LIT)
   1362          goto skip;
   1363       transform_lit(tctx, inst);
   1364       break;
   1365    case TGSI_OPCODE_EXP:
   1366       if (!ctx->config->lower_EXP)
   1367          goto skip;
   1368       transform_exp(tctx, inst);
   1369       break;
   1370    case TGSI_OPCODE_LOG:
   1371       if (!ctx->config->lower_LOG)
   1372          goto skip;
   1373       transform_log(tctx, inst);
   1374       break;
   1375    case TGSI_OPCODE_DP4:
   1376       if (!ctx->config->lower_DP4)
   1377          goto skip;
   1378       transform_dotp(tctx, inst);
   1379       break;
   1380    case TGSI_OPCODE_DP3:
   1381       if (!ctx->config->lower_DP3)
   1382          goto skip;
   1383       transform_dotp(tctx, inst);
   1384       break;
   1385    case TGSI_OPCODE_DP2:
   1386       if (!ctx->config->lower_DP2)
   1387          goto skip;
   1388       transform_dotp(tctx, inst);
   1389       break;
   1390    case TGSI_OPCODE_FLR:
   1391       if (!ctx->config->lower_FLR)
   1392          goto skip;
   1393       transform_flr_ceil(tctx, inst);
   1394       break;
   1395    case TGSI_OPCODE_CEIL:
   1396       if (!ctx->config->lower_CEIL)
   1397          goto skip;
   1398       transform_flr_ceil(tctx, inst);
   1399       break;
   1400    case TGSI_OPCODE_TRUNC:
   1401       if (!ctx->config->lower_TRUNC)
   1402          goto skip;
   1403       transform_trunc(tctx, inst);
   1404       break;
   1405    case TGSI_OPCODE_TEX:
   1406    case TGSI_OPCODE_TXP:
   1407    case TGSI_OPCODE_TXB:
   1408    case TGSI_OPCODE_TXB2:
   1409    case TGSI_OPCODE_TXL:
   1410       if (transform_samp(tctx, inst))
   1411          goto skip;
   1412       break;
   1413    default:
   1414    skip:
   1415       tctx->emit_instruction(tctx, inst);
   1416       break;
   1417    }
   1418 }
   1419 
   1420 /* returns NULL if no lowering required, else returns the new
   1421  * tokens (which caller is required to free()).  In either case
   1422  * returns the current info.
   1423  */
   1424 const struct tgsi_token *
   1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
   1426                         const struct tgsi_token *tokens,
   1427                         struct tgsi_shader_info *info)
   1428 {
   1429    struct tgsi_lowering_context ctx;
   1430    struct tgsi_token *newtoks;
   1431    int newlen, numtmp;
   1432 
   1433    /* sanity check in case limit is ever increased: */
   1434    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
   1435 
   1436    /* sanity check the lowering */
   1437    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
   1438    assert(!(config->lower_FRC && config->lower_TRUNC));
   1439 
   1440    memset(&ctx, 0, sizeof(ctx));
   1441    ctx.base.transform_instruction = transform_instr;
   1442    ctx.info = info;
   1443    ctx.config = config;
   1444 
   1445    tgsi_scan_shader(tokens, info);
   1446 
   1447    /* if we are adding fragment shader support to emulate two-sided
   1448     * color, then figure out the number of additional inputs we need
   1449     * to create for BCOLOR's..
   1450     */
   1451    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
   1452        config->color_two_side) {
   1453       int i;
   1454       ctx.face_idx = -1;
   1455       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
   1456          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
   1457             ctx.two_side_idx[ctx.two_side_colors++] = i;
   1458          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
   1459             ctx.face_idx = i;
   1460       }
   1461    }
   1462 
   1463    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
   1464 
   1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
   1466    /* if there are no instructions to lower, then we are done: */
   1467    if (!(OPCS(DST) ||
   1468          OPCS(LRP) ||
   1469          OPCS(FRC) ||
   1470          OPCS(POW) ||
   1471          OPCS(LIT) ||
   1472          OPCS(EXP) ||
   1473          OPCS(LOG) ||
   1474          OPCS(DP4) ||
   1475          OPCS(DP3) ||
   1476          OPCS(DP2) ||
   1477          OPCS(FLR) ||
   1478          OPCS(CEIL) ||
   1479          OPCS(TRUNC) ||
   1480          OPCS(TXP) ||
   1481          ctx.two_side_colors ||
   1482          ctx.saturate))
   1483       return NULL;
   1484 
   1485 #if 0  /* debug */
   1486    _debug_printf("BEFORE:");
   1487    tgsi_dump(tokens, 0);
   1488 #endif
   1489 
   1490    numtmp = 0;
   1491    newlen = tgsi_num_tokens(tokens);
   1492    if (OPCS(DST)) {
   1493       newlen += DST_GROW * OPCS(DST);
   1494       numtmp = MAX2(numtmp, DST_TMP);
   1495    }
   1496    if (OPCS(LRP)) {
   1497       newlen += LRP_GROW * OPCS(LRP);
   1498       numtmp = MAX2(numtmp, LRP_TMP);
   1499    }
   1500    if (OPCS(FRC)) {
   1501       newlen += FRC_GROW * OPCS(FRC);
   1502       numtmp = MAX2(numtmp, FRC_TMP);
   1503    }
   1504    if (OPCS(POW)) {
   1505       newlen += POW_GROW * OPCS(POW);
   1506       numtmp = MAX2(numtmp, POW_TMP);
   1507    }
   1508    if (OPCS(LIT)) {
   1509       newlen += LIT_GROW * OPCS(LIT);
   1510       numtmp = MAX2(numtmp, LIT_TMP);
   1511    }
   1512    if (OPCS(EXP)) {
   1513       newlen += EXP_GROW * OPCS(EXP);
   1514       numtmp = MAX2(numtmp, EXP_TMP);
   1515    }
   1516    if (OPCS(LOG)) {
   1517       newlen += LOG_GROW * OPCS(LOG);
   1518       numtmp = MAX2(numtmp, LOG_TMP);
   1519    }
   1520    if (OPCS(DP4)) {
   1521       newlen += DP4_GROW * OPCS(DP4);
   1522       numtmp = MAX2(numtmp, DOTP_TMP);
   1523    }
   1524    if (OPCS(DP3)) {
   1525       newlen += DP3_GROW * OPCS(DP3);
   1526       numtmp = MAX2(numtmp, DOTP_TMP);
   1527    }
   1528    if (OPCS(DP2)) {
   1529       newlen += DP2_GROW * OPCS(DP2);
   1530       numtmp = MAX2(numtmp, DOTP_TMP);
   1531    }
   1532    if (OPCS(FLR)) {
   1533       newlen += FLR_GROW * OPCS(FLR);
   1534       numtmp = MAX2(numtmp, FLR_TMP);
   1535    }
   1536    if (OPCS(CEIL)) {
   1537       newlen += CEIL_GROW * OPCS(CEIL);
   1538       numtmp = MAX2(numtmp, CEIL_TMP);
   1539    }
   1540    if (OPCS(TRUNC)) {
   1541       newlen += TRUNC_GROW * OPCS(TRUNC);
   1542       numtmp = MAX2(numtmp, TRUNC_TMP);
   1543    }
   1544    if (ctx.saturate || config->lower_TXP) {
   1545       int n = 0;
   1546 
   1547       if (ctx.saturate) {
   1548          n = info->opcode_count[TGSI_OPCODE_TEX] +
   1549             info->opcode_count[TGSI_OPCODE_TXP] +
   1550             info->opcode_count[TGSI_OPCODE_TXB] +
   1551             info->opcode_count[TGSI_OPCODE_TXB2] +
   1552             info->opcode_count[TGSI_OPCODE_TXL];
   1553       } else if (config->lower_TXP) {
   1554           n = info->opcode_count[TGSI_OPCODE_TXP];
   1555       }
   1556 
   1557       newlen += SAMP_GROW * n;
   1558       numtmp = MAX2(numtmp, SAMP_TMP);
   1559    }
   1560 
   1561    /* specifically don't include two_side_colors temps in the count: */
   1562    ctx.numtmp = numtmp;
   1563 
   1564    if (ctx.two_side_colors) {
   1565       newlen += TWOSIDE_GROW(ctx.two_side_colors);
   1566       /* note: we permanently consume temp regs, re-writing references
   1567        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
   1568        * instruction that selects which varying to use):
   1569        */
   1570       numtmp += ctx.two_side_colors;
   1571    }
   1572 
   1573    newlen += 2 * numtmp;
   1574    newlen += 5;        /* immediate */
   1575 
   1576    newtoks = tgsi_alloc_tokens(newlen);
   1577    if (!newtoks)
   1578       return NULL;
   1579 
   1580    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
   1581 
   1582    tgsi_scan_shader(newtoks, info);
   1583 
   1584 #if 0  /* debug */
   1585    _debug_printf("AFTER:");
   1586    tgsi_dump(newtoks, 0);
   1587 #endif
   1588 
   1589    return newtoks;
   1590 }
   1591