Home | History | Annotate | Download | only in svga
      1 /**********************************************************
      2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
      3  *
      4  * Permission is hereby granted, free of charge, to any person
      5  * obtaining a copy of this software and associated documentation
      6  * files (the "Software"), to deal in the Software without
      7  * restriction, including without limitation the rights to use, copy,
      8  * modify, merge, publish, distribute, sublicense, and/or sell copies
      9  * of the Software, and to permit persons to whom the Software is
     10  * furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be
     13  * included in all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22  * SOFTWARE.
     23  *
     24  **********************************************************/
     25 
     26 /**
     27  * @file svga_tgsi_vgpu10.c
     28  *
     29  * TGSI -> VGPU10 shader translation.
     30  *
     31  * \author Mingcheng Chen
     32  * \author Brian Paul
     33  */
     34 
     35 #include "pipe/p_compiler.h"
     36 #include "pipe/p_shader_tokens.h"
     37 #include "pipe/p_defines.h"
     38 #include "tgsi/tgsi_build.h"
     39 #include "tgsi/tgsi_dump.h"
     40 #include "tgsi/tgsi_info.h"
     41 #include "tgsi/tgsi_parse.h"
     42 #include "tgsi/tgsi_scan.h"
     43 #include "tgsi/tgsi_two_side.h"
     44 #include "tgsi/tgsi_aa_point.h"
     45 #include "tgsi/tgsi_util.h"
     46 #include "util/u_math.h"
     47 #include "util/u_memory.h"
     48 #include "util/u_bitmask.h"
     49 #include "util/u_debug.h"
     50 #include "util/u_pstipple.h"
     51 
     52 #include "svga_context.h"
     53 #include "svga_debug.h"
     54 #include "svga_link.h"
     55 #include "svga_shader.h"
     56 #include "svga_tgsi.h"
     57 
     58 #include "VGPU10ShaderTokens.h"
     59 
     60 
     61 #define INVALID_INDEX 99999
     62 #define MAX_INTERNAL_TEMPS 3
     63 #define MAX_SYSTEM_VALUES 4
     64 #define MAX_IMMEDIATE_COUNT \
     65         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
     66 #define MAX_TEMP_ARRAYS 64  /* Enough? */
     67 
     68 
     69 /**
     70  * Clipping is complicated.  There's four different cases which we
     71  * handle during VS/GS shader translation:
     72  */
     73 enum clipping_mode
     74 {
     75    CLIP_NONE,     /**< No clipping enabled */
     76    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
     77                    * one or more user-defined clip planes are enabled.  We
     78                    * generate extra code to emit clip distances.
     79                    */
     80    CLIP_DISTANCE, /**< The shader already declares clip distance output
     81                    * registers and has code to write to them.
     82                    */
     83    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
     84                   * has code that writes to the register.  We convert the
     85                   * clipvertex position into one or more clip distances.
     86                   */
     87 };
     88 
     89 
     90 struct svga_shader_emitter_v10
     91 {
     92    /* The token output buffer */
     93    unsigned size;
     94    char *buf;
     95    char *ptr;
     96 
     97    /* Information about the shader and state (does not change) */
     98    struct svga_compile_key key;
     99    struct tgsi_shader_info info;
    100    unsigned unit;
    101 
    102    unsigned inst_start_token;
    103    boolean discard_instruction; /**< throw away current instruction? */
    104 
    105    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
    106    unsigned num_immediates;      /**< Number of immediates emitted */
    107    unsigned common_immediate_pos[8];  /**< literals for common immediates */
    108    unsigned num_common_immediates;
    109    boolean immediates_emitted;
    110 
    111    unsigned num_outputs;      /**< include any extra outputs */
    112                               /**  The first extra output is reserved for
    113                                *   non-adjusted vertex position for
    114                                *   stream output purpose
    115                                */
    116 
    117    /* Temporary Registers */
    118    unsigned num_shader_temps; /**< num of temps used by original shader */
    119    unsigned internal_temp_count;  /**< currently allocated internal temps */
    120    struct {
    121       unsigned start, size;
    122    } temp_arrays[MAX_TEMP_ARRAYS];
    123    unsigned num_temp_arrays;
    124 
    125    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
    126    struct {
    127       unsigned arrayId, index;
    128    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
    129 
    130    /** Number of constants used by original shader for each constant buffer.
    131     * The size should probably always match with that of svga_state.constbufs.
    132     */
    133    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
    134 
    135    /* Samplers */
    136    unsigned num_samplers;
    137    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
    138    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
    139 
    140    /* Address regs (really implemented with temps) */
    141    unsigned num_address_regs;
    142    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
    143 
    144    /* Output register usage masks */
    145    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
    146 
    147    /* To map TGSI system value index to VGPU shader input indexes */
    148    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
    149 
    150    struct {
    151       /* vertex position scale/translation */
    152       unsigned out_index;  /**< the real position output reg */
    153       unsigned tmp_index;  /**< the fake/temp position output reg */
    154       unsigned so_index;   /**< the non-adjusted position output reg */
    155       unsigned prescale_scale_index, prescale_trans_index;
    156       boolean  need_prescale;
    157    } vposition;
    158 
    159    /* For vertex shaders only */
    160    struct {
    161       /* viewport constant */
    162       unsigned viewport_index;
    163 
    164       /* temp index of adjusted vertex attributes */
    165       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
    166    } vs;
    167 
    168    /* For fragment shaders only */
    169    struct {
    170       /* apha test */
    171       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
    172       unsigned color_tmp_index;  /**< fake/temp color output reg */
    173       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
    174 
    175       /* front-face */
    176       unsigned face_input_index; /**< real fragment shader face reg (bool) */
    177       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
    178 
    179       unsigned pstipple_sampler_unit;
    180 
    181       unsigned fragcoord_input_index;  /**< real fragment position input reg */
    182       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
    183    } fs;
    184 
    185    /* For geometry shaders only */
    186    struct {
    187       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
    188       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
    189       unsigned input_size;       /**< size of input arrays */
    190       unsigned prim_id_index;    /**< primitive id register index */
    191       unsigned max_out_vertices; /**< maximum number of output vertices */
    192    } gs;
    193 
    194    /* For vertex or geometry shaders */
    195    enum clipping_mode clip_mode;
    196    unsigned clip_dist_out_index; /**< clip distance output register index */
    197    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
    198    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
    199 
    200    /** Index of temporary holding the clipvertex coordinate */
    201    unsigned clip_vertex_out_index; /**< clip vertex output register index */
    202    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
    203 
    204    /* user clip plane constant slot indexes */
    205    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
    206 
    207    unsigned num_output_writes;
    208    boolean constant_color_output;
    209 
    210    boolean uses_flat_interp;
    211 
    212    /* For all shaders: const reg index for RECT coord scaling */
    213    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
    214 
    215    /* For all shaders: const reg index for texture buffer size */
    216    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
    217 
    218    /* VS/GS/FS Linkage info */
    219    struct shader_linkage linkage;
    220 
    221    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
    222 };
    223 
    224 
    225 static boolean
    226 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
    227 
    228 static boolean
    229 emit_vertex(struct svga_shader_emitter_v10 *emit,
    230             const struct tgsi_full_instruction *inst);
    231 
    232 static char err_buf[128];
    233 
    234 static boolean
    235 expand(struct svga_shader_emitter_v10 *emit)
    236 {
    237    char *new_buf;
    238    unsigned newsize = emit->size * 2;
    239 
    240    if (emit->buf != err_buf)
    241       new_buf = REALLOC(emit->buf, emit->size, newsize);
    242    else
    243       new_buf = NULL;
    244 
    245    if (!new_buf) {
    246       emit->ptr = err_buf;
    247       emit->buf = err_buf;
    248       emit->size = sizeof(err_buf);
    249       return FALSE;
    250    }
    251 
    252    emit->size = newsize;
    253    emit->ptr = new_buf + (emit->ptr - emit->buf);
    254    emit->buf = new_buf;
    255    return TRUE;
    256 }
    257 
    258 /**
    259  * Create and initialize a new svga_shader_emitter_v10 object.
    260  */
    261 static struct svga_shader_emitter_v10 *
    262 alloc_emitter(void)
    263 {
    264    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
    265 
    266    if (!emit)
    267       return NULL;
    268 
    269    /* to initialize the output buffer */
    270    emit->size = 512;
    271    if (!expand(emit)) {
    272       FREE(emit);
    273       return NULL;
    274    }
    275    return emit;
    276 }
    277 
    278 /**
    279  * Free an svga_shader_emitter_v10 object.
    280  */
    281 static void
    282 free_emitter(struct svga_shader_emitter_v10 *emit)
    283 {
    284    assert(emit);
    285    FREE(emit->buf);    /* will be NULL if translation succeeded */
    286    FREE(emit);
    287 }
    288 
    289 static inline boolean
    290 reserve(struct svga_shader_emitter_v10 *emit,
    291         unsigned nr_dwords)
    292 {
    293    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
    294       if (!expand(emit))
    295          return FALSE;
    296    }
    297 
    298    return TRUE;
    299 }
    300 
    301 static boolean
    302 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
    303 {
    304    if (!reserve(emit, 1))
    305       return FALSE;
    306 
    307    *(uint32 *)emit->ptr = dword;
    308    emit->ptr += sizeof dword;
    309    return TRUE;
    310 }
    311 
    312 static boolean
    313 emit_dwords(struct svga_shader_emitter_v10 *emit,
    314             const uint32 *dwords,
    315             unsigned nr)
    316 {
    317    if (!reserve(emit, nr))
    318       return FALSE;
    319 
    320    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
    321    emit->ptr += nr * sizeof *dwords;
    322    return TRUE;
    323 }
    324 
    325 /** Return the number of tokens in the emitter's buffer */
    326 static unsigned
    327 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
    328 {
    329    return (emit->ptr - emit->buf) / sizeof(unsigned);
    330 }
    331 
    332 
    333 /**
    334  * Check for register overflow.  If we overflow we'll set an
    335  * error flag.  This function can be called for register declarations
    336  * or use as src/dst instruction operands.
    337  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
    338                 or VGPU10_OPCODE_DCL_x
    339  * \param index  the register index
    340  */
    341 static void
    342 check_register_index(struct svga_shader_emitter_v10 *emit,
    343                      unsigned operandType, unsigned index)
    344 {
    345    bool overflow_before = emit->register_overflow;
    346 
    347    switch (operandType) {
    348    case VGPU10_OPERAND_TYPE_TEMP:
    349    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
    350    case VGPU10_OPCODE_DCL_TEMPS:
    351       if (index >= VGPU10_MAX_TEMPS) {
    352          emit->register_overflow = TRUE;
    353       }
    354       break;
    355    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
    356    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
    357       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
    358          emit->register_overflow = TRUE;
    359       }
    360       break;
    361    case VGPU10_OPERAND_TYPE_INPUT:
    362    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
    363    case VGPU10_OPCODE_DCL_INPUT:
    364    case VGPU10_OPCODE_DCL_INPUT_SGV:
    365    case VGPU10_OPCODE_DCL_INPUT_SIV:
    366    case VGPU10_OPCODE_DCL_INPUT_PS:
    367    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
    368    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
    369       if ((emit->unit == PIPE_SHADER_VERTEX &&
    370            index >= VGPU10_MAX_VS_INPUTS) ||
    371           (emit->unit == PIPE_SHADER_GEOMETRY &&
    372            index >= VGPU10_MAX_GS_INPUTS) ||
    373           (emit->unit == PIPE_SHADER_FRAGMENT &&
    374            index >= VGPU10_MAX_FS_INPUTS)) {
    375          emit->register_overflow = TRUE;
    376       }
    377       break;
    378    case VGPU10_OPERAND_TYPE_OUTPUT:
    379    case VGPU10_OPCODE_DCL_OUTPUT:
    380    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
    381    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
    382       if ((emit->unit == PIPE_SHADER_VERTEX &&
    383            index >= VGPU10_MAX_VS_OUTPUTS) ||
    384           (emit->unit == PIPE_SHADER_GEOMETRY &&
    385            index >= VGPU10_MAX_GS_OUTPUTS) ||
    386           (emit->unit == PIPE_SHADER_FRAGMENT &&
    387            index >= VGPU10_MAX_FS_OUTPUTS)) {
    388          emit->register_overflow = TRUE;
    389       }
    390       break;
    391    case VGPU10_OPERAND_TYPE_SAMPLER:
    392    case VGPU10_OPCODE_DCL_SAMPLER:
    393       if (index >= VGPU10_MAX_SAMPLERS) {
    394          emit->register_overflow = TRUE;
    395       }
    396       break;
    397    case VGPU10_OPERAND_TYPE_RESOURCE:
    398    case VGPU10_OPCODE_DCL_RESOURCE:
    399       if (index >= VGPU10_MAX_RESOURCES) {
    400          emit->register_overflow = TRUE;
    401       }
    402       break;
    403    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
    404       if (index >= MAX_IMMEDIATE_COUNT) {
    405          emit->register_overflow = TRUE;
    406       }
    407       break;
    408    default:
    409       assert(0);
    410       ; /* nothing */
    411    }
    412 
    413    if (emit->register_overflow && !overflow_before) {
    414       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
    415                    operandType, index);
    416    }
    417 }
    418 
    419 
    420 /**
    421  * Examine misc state to determine the clipping mode.
    422  */
    423 static void
    424 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
    425 {
    426    if (emit->info.num_written_clipdistance > 0) {
    427       emit->clip_mode = CLIP_DISTANCE;
    428    }
    429    else if (emit->info.writes_clipvertex) {
    430       emit->clip_mode = CLIP_VERTEX;
    431    }
    432    else if (emit->key.clip_plane_enable) {
    433       emit->clip_mode = CLIP_LEGACY;
    434    }
    435    else {
    436       emit->clip_mode = CLIP_NONE;
    437    }
    438 }
    439 
    440 
    441 /**
    442  * For clip distance register declarations and clip distance register
    443  * writes we need to mask the declaration usage or instruction writemask
    444  * (respectively) against the set of the really-enabled clipping planes.
    445  *
    446  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
    447  * has a VS that writes to all 8 clip distance registers, but the plane enable
    448  * flags are a subset of that.
    449  *
    450  * This function is used to apply the plane enable flags to the register
    451  * declaration or instruction writemask.
    452  *
    453  * \param writemask  the declaration usage mask or instruction writemask
    454  * \param clip_reg_index  which clip plane register is being declared/written.
    455  *                        The legal values are 0 and 1 (two clip planes per
    456  *                        register, for a total of 8 clip planes)
    457  */
    458 static unsigned
    459 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
    460                       unsigned writemask, unsigned clip_reg_index)
    461 {
    462    unsigned shift;
    463 
    464    assert(clip_reg_index < 2);
    465 
    466    /* four clip planes per clip register: */
    467    shift = clip_reg_index * 4;
    468    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
    469 
    470    return writemask;
    471 }
    472 
    473 
    474 /**
    475  * Translate gallium shader type into VGPU10 type.
    476  */
    477 static VGPU10_PROGRAM_TYPE
    478 translate_shader_type(unsigned type)
    479 {
    480    switch (type) {
    481    case PIPE_SHADER_VERTEX:
    482       return VGPU10_VERTEX_SHADER;
    483    case PIPE_SHADER_GEOMETRY:
    484       return VGPU10_GEOMETRY_SHADER;
    485    case PIPE_SHADER_FRAGMENT:
    486       return VGPU10_PIXEL_SHADER;
    487    default:
    488       assert(!"Unexpected shader type");
    489       return VGPU10_VERTEX_SHADER;
    490    }
    491 }
    492 
    493 
    494 /**
    495  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
    496  * Note: we only need to translate the opcodes for "simple" instructions,
    497  * as seen below.  All other opcodes are handled/translated specially.
    498  */
    499 static VGPU10_OPCODE_TYPE
    500 translate_opcode(unsigned opcode)
    501 {
    502    switch (opcode) {
    503    case TGSI_OPCODE_MOV:
    504       return VGPU10_OPCODE_MOV;
    505    case TGSI_OPCODE_MUL:
    506       return VGPU10_OPCODE_MUL;
    507    case TGSI_OPCODE_ADD:
    508       return VGPU10_OPCODE_ADD;
    509    case TGSI_OPCODE_DP3:
    510       return VGPU10_OPCODE_DP3;
    511    case TGSI_OPCODE_DP4:
    512       return VGPU10_OPCODE_DP4;
    513    case TGSI_OPCODE_MIN:
    514       return VGPU10_OPCODE_MIN;
    515    case TGSI_OPCODE_MAX:
    516       return VGPU10_OPCODE_MAX;
    517    case TGSI_OPCODE_MAD:
    518       return VGPU10_OPCODE_MAD;
    519    case TGSI_OPCODE_SQRT:
    520       return VGPU10_OPCODE_SQRT;
    521    case TGSI_OPCODE_FRC:
    522       return VGPU10_OPCODE_FRC;
    523    case TGSI_OPCODE_FLR:
    524       return VGPU10_OPCODE_ROUND_NI;
    525    case TGSI_OPCODE_FSEQ:
    526       return VGPU10_OPCODE_EQ;
    527    case TGSI_OPCODE_FSGE:
    528       return VGPU10_OPCODE_GE;
    529    case TGSI_OPCODE_FSNE:
    530       return VGPU10_OPCODE_NE;
    531    case TGSI_OPCODE_DDX:
    532       return VGPU10_OPCODE_DERIV_RTX;
    533    case TGSI_OPCODE_DDY:
    534       return VGPU10_OPCODE_DERIV_RTY;
    535    case TGSI_OPCODE_RET:
    536       return VGPU10_OPCODE_RET;
    537    case TGSI_OPCODE_DIV:
    538       return VGPU10_OPCODE_DIV;
    539    case TGSI_OPCODE_IDIV:
    540       return VGPU10_OPCODE_IDIV;
    541    case TGSI_OPCODE_DP2:
    542       return VGPU10_OPCODE_DP2;
    543    case TGSI_OPCODE_BRK:
    544       return VGPU10_OPCODE_BREAK;
    545    case TGSI_OPCODE_IF:
    546       return VGPU10_OPCODE_IF;
    547    case TGSI_OPCODE_ELSE:
    548       return VGPU10_OPCODE_ELSE;
    549    case TGSI_OPCODE_ENDIF:
    550       return VGPU10_OPCODE_ENDIF;
    551    case TGSI_OPCODE_CEIL:
    552       return VGPU10_OPCODE_ROUND_PI;
    553    case TGSI_OPCODE_I2F:
    554       return VGPU10_OPCODE_ITOF;
    555    case TGSI_OPCODE_NOT:
    556       return VGPU10_OPCODE_NOT;
    557    case TGSI_OPCODE_TRUNC:
    558       return VGPU10_OPCODE_ROUND_Z;
    559    case TGSI_OPCODE_SHL:
    560       return VGPU10_OPCODE_ISHL;
    561    case TGSI_OPCODE_AND:
    562       return VGPU10_OPCODE_AND;
    563    case TGSI_OPCODE_OR:
    564       return VGPU10_OPCODE_OR;
    565    case TGSI_OPCODE_XOR:
    566       return VGPU10_OPCODE_XOR;
    567    case TGSI_OPCODE_CONT:
    568       return VGPU10_OPCODE_CONTINUE;
    569    case TGSI_OPCODE_EMIT:
    570       return VGPU10_OPCODE_EMIT;
    571    case TGSI_OPCODE_ENDPRIM:
    572       return VGPU10_OPCODE_CUT;
    573    case TGSI_OPCODE_BGNLOOP:
    574       return VGPU10_OPCODE_LOOP;
    575    case TGSI_OPCODE_ENDLOOP:
    576       return VGPU10_OPCODE_ENDLOOP;
    577    case TGSI_OPCODE_ENDSUB:
    578       return VGPU10_OPCODE_RET;
    579    case TGSI_OPCODE_NOP:
    580       return VGPU10_OPCODE_NOP;
    581    case TGSI_OPCODE_BREAKC:
    582       return VGPU10_OPCODE_BREAKC;
    583    case TGSI_OPCODE_END:
    584       return VGPU10_OPCODE_RET;
    585    case TGSI_OPCODE_F2I:
    586       return VGPU10_OPCODE_FTOI;
    587    case TGSI_OPCODE_IMAX:
    588       return VGPU10_OPCODE_IMAX;
    589    case TGSI_OPCODE_IMIN:
    590       return VGPU10_OPCODE_IMIN;
    591    case TGSI_OPCODE_UDIV:
    592    case TGSI_OPCODE_UMOD:
    593    case TGSI_OPCODE_MOD:
    594       return VGPU10_OPCODE_UDIV;
    595    case TGSI_OPCODE_IMUL_HI:
    596       return VGPU10_OPCODE_IMUL;
    597    case TGSI_OPCODE_INEG:
    598       return VGPU10_OPCODE_INEG;
    599    case TGSI_OPCODE_ISHR:
    600       return VGPU10_OPCODE_ISHR;
    601    case TGSI_OPCODE_ISGE:
    602       return VGPU10_OPCODE_IGE;
    603    case TGSI_OPCODE_ISLT:
    604       return VGPU10_OPCODE_ILT;
    605    case TGSI_OPCODE_F2U:
    606       return VGPU10_OPCODE_FTOU;
    607    case TGSI_OPCODE_UADD:
    608       return VGPU10_OPCODE_IADD;
    609    case TGSI_OPCODE_U2F:
    610       return VGPU10_OPCODE_UTOF;
    611    case TGSI_OPCODE_UCMP:
    612       return VGPU10_OPCODE_MOVC;
    613    case TGSI_OPCODE_UMAD:
    614       return VGPU10_OPCODE_UMAD;
    615    case TGSI_OPCODE_UMAX:
    616       return VGPU10_OPCODE_UMAX;
    617    case TGSI_OPCODE_UMIN:
    618       return VGPU10_OPCODE_UMIN;
    619    case TGSI_OPCODE_UMUL:
    620    case TGSI_OPCODE_UMUL_HI:
    621       return VGPU10_OPCODE_UMUL;
    622    case TGSI_OPCODE_USEQ:
    623       return VGPU10_OPCODE_IEQ;
    624    case TGSI_OPCODE_USGE:
    625       return VGPU10_OPCODE_UGE;
    626    case TGSI_OPCODE_USHR:
    627       return VGPU10_OPCODE_USHR;
    628    case TGSI_OPCODE_USLT:
    629       return VGPU10_OPCODE_ULT;
    630    case TGSI_OPCODE_USNE:
    631       return VGPU10_OPCODE_INE;
    632    case TGSI_OPCODE_SWITCH:
    633       return VGPU10_OPCODE_SWITCH;
    634    case TGSI_OPCODE_CASE:
    635       return VGPU10_OPCODE_CASE;
    636    case TGSI_OPCODE_DEFAULT:
    637       return VGPU10_OPCODE_DEFAULT;
    638    case TGSI_OPCODE_ENDSWITCH:
    639       return VGPU10_OPCODE_ENDSWITCH;
    640    case TGSI_OPCODE_FSLT:
    641       return VGPU10_OPCODE_LT;
    642    case TGSI_OPCODE_ROUND:
    643       return VGPU10_OPCODE_ROUND_NE;
    644    default:
    645       assert(!"Unexpected TGSI opcode in translate_opcode()");
    646       return VGPU10_OPCODE_NOP;
    647    }
    648 }
    649 
    650 
    651 /**
    652  * Translate a TGSI register file type into a VGPU10 operand type.
    653  * \param array  is the TGSI_FILE_TEMPORARY register an array?
    654  */
    655 static VGPU10_OPERAND_TYPE
    656 translate_register_file(enum tgsi_file_type file, boolean array)
    657 {
    658    switch (file) {
    659    case TGSI_FILE_CONSTANT:
    660       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
    661    case TGSI_FILE_INPUT:
    662       return VGPU10_OPERAND_TYPE_INPUT;
    663    case TGSI_FILE_OUTPUT:
    664       return VGPU10_OPERAND_TYPE_OUTPUT;
    665    case TGSI_FILE_TEMPORARY:
    666       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
    667                    : VGPU10_OPERAND_TYPE_TEMP;
    668    case TGSI_FILE_IMMEDIATE:
    669       /* all immediates are 32-bit values at this time so
    670        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
    671        */
    672       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
    673    case TGSI_FILE_SAMPLER:
    674       return VGPU10_OPERAND_TYPE_SAMPLER;
    675    case TGSI_FILE_SYSTEM_VALUE:
    676       return VGPU10_OPERAND_TYPE_INPUT;
    677 
    678    /* XXX TODO more cases to finish */
    679 
    680    default:
    681       assert(!"Bad tgsi register file!");
    682       return VGPU10_OPERAND_TYPE_NULL;
    683    }
    684 }
    685 
    686 
    687 /**
    688  * Emit a null dst register
    689  */
    690 static void
    691 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
    692 {
    693    VGPU10OperandToken0 operand;
    694 
    695    operand.value = 0;
    696    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
    697    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
    698 
    699    emit_dword(emit, operand.value);
    700 }
    701 
    702 
    703 /**
    704  * If the given register is a temporary, return the array ID.
    705  * Else return zero.
    706  */
    707 static unsigned
    708 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
    709                   unsigned file, unsigned index)
    710 {
    711    if (file == TGSI_FILE_TEMPORARY) {
    712       return emit->temp_map[index].arrayId;
    713    }
    714    else {
    715       return 0;
    716    }
    717 }
    718 
    719 
    720 /**
    721  * If the given register is a temporary, convert the index from a TGSI
    722  * TEMPORARY index to a VGPU10 temp index.
    723  */
    724 static unsigned
    725 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
    726                  unsigned file, unsigned index)
    727 {
    728    if (file == TGSI_FILE_TEMPORARY) {
    729       return emit->temp_map[index].index;
    730    }
    731    else {
    732       return index;
    733    }
    734 }
    735 
    736 
    737 /**
    738  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
    739  * Note: the operandType field must already be initialized.
    740  */
    741 static VGPU10OperandToken0
    742 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
    743                         VGPU10OperandToken0 operand0,
    744                         unsigned file,
    745                         boolean indirect, boolean index2D,
    746                         unsigned tempArrayID)
    747 {
    748    unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    749 
    750    /*
    751     * Compute index dimensions
    752     */
    753    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
    754        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
    755       /* there's no swizzle for in-line immediates */
    756       indexDim = VGPU10_OPERAND_INDEX_0D;
    757       assert(operand0.selectionMode == 0);
    758    }
    759    else {
    760       if (index2D ||
    761           tempArrayID > 0 ||
    762           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
    763          indexDim = VGPU10_OPERAND_INDEX_2D;
    764       }
    765       else {
    766          indexDim = VGPU10_OPERAND_INDEX_1D;
    767       }
    768    }
    769 
    770    /*
    771     * Compute index representations (immediate, relative, etc).
    772     */
    773    if (tempArrayID > 0) {
    774       assert(file == TGSI_FILE_TEMPORARY);
    775       /* First index is the array ID, second index is the array element */
    776       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    777       if (indirect) {
    778          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    779       }
    780       else {
    781          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    782       }
    783    }
    784    else if (indirect) {
    785       if (file == TGSI_FILE_CONSTANT) {
    786          /* index[0] indicates which constant buffer while index[1] indicates
    787           * the position in the constant buffer.
    788           */
    789          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    790          index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    791       }
    792       else {
    793          /* All other register files are 1-dimensional */
    794          index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
    795       }
    796    }
    797    else {
    798       index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    799       index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    800    }
    801 
    802    operand0.indexDimension = indexDim;
    803    operand0.index0Representation = index0Rep;
    804    operand0.index1Representation = index1Rep;
    805 
    806    return operand0;
    807 }
    808 
    809 
    810 /**
    811  * Emit the operand for expressing an address register for indirect indexing.
    812  * Note that the address register is really just a temp register.
    813  * \param addr_reg_index  which address register to use
    814  */
    815 static void
    816 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
    817                        unsigned addr_reg_index)
    818 {
    819    unsigned tmp_reg_index;
    820    VGPU10OperandToken0 operand0;
    821 
    822    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
    823 
    824    tmp_reg_index = emit->address_reg_index[addr_reg_index];
    825 
    826    /* operand0 is a simple temporary register, selecting one component */
    827    operand0.value = 0;
    828    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
    829    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
    830    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
    831    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
    832    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
    833    operand0.swizzleX = 0;
    834    operand0.swizzleY = 1;
    835    operand0.swizzleZ = 2;
    836    operand0.swizzleW = 3;
    837 
    838    emit_dword(emit, operand0.value);
    839    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
    840 }
    841 
    842 
    843 /**
    844  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
    845  * \param emit  the emitter context
    846  * \param reg  the TGSI dst register to translate
    847  */
    848 static void
    849 emit_dst_register(struct svga_shader_emitter_v10 *emit,
    850                   const struct tgsi_full_dst_register *reg)
    851 {
    852    unsigned file = reg->Register.File;
    853    unsigned index = reg->Register.Index;
    854    const unsigned sem_name = emit->info.output_semantic_name[index];
    855    const unsigned sem_index = emit->info.output_semantic_index[index];
    856    unsigned writemask = reg->Register.WriteMask;
    857    const unsigned indirect = reg->Register.Indirect;
    858    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
    859    const unsigned index2d = reg->Register.Dimension;
    860    VGPU10OperandToken0 operand0;
    861 
    862    if (file == TGSI_FILE_OUTPUT) {
    863       if (emit->unit == PIPE_SHADER_VERTEX ||
    864           emit->unit == PIPE_SHADER_GEOMETRY) {
    865          if (index == emit->vposition.out_index &&
    866              emit->vposition.tmp_index != INVALID_INDEX) {
    867             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
    868              * vertex position result in a temporary so that we can modify
    869              * it in the post_helper() code.
    870              */
    871             file = TGSI_FILE_TEMPORARY;
    872             index = emit->vposition.tmp_index;
    873          }
    874          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
    875                   emit->clip_dist_tmp_index != INVALID_INDEX) {
    876             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
    877              * We store the clip distance in a temporary first, then
    878              * we'll copy it to the shadow copy and to CLIPDIST with the
    879              * enabled planes mask in emit_clip_distance_instructions().
    880              */
    881             file = TGSI_FILE_TEMPORARY;
    882             index = emit->clip_dist_tmp_index + sem_index;
    883          }
    884          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
    885                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
    886             /* replace the CLIPVERTEX output register with a temporary */
    887             assert(emit->clip_mode == CLIP_VERTEX);
    888             assert(sem_index == 0);
    889             file = TGSI_FILE_TEMPORARY;
    890             index = emit->clip_vertex_tmp_index;
    891          }
    892       }
    893       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
    894          if (sem_name == TGSI_SEMANTIC_POSITION) {
    895             /* Fragment depth output register */
    896             operand0.value = 0;
    897             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
    898             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
    899             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
    900             emit_dword(emit, operand0.value);
    901             return;
    902          }
    903          else if (index == emit->fs.color_out_index[0] &&
    904              emit->fs.color_tmp_index != INVALID_INDEX) {
    905             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
    906              * fragment color result in a temporary so that we can read it
    907              * it in the post_helper() code.
    908              */
    909             file = TGSI_FILE_TEMPORARY;
    910             index = emit->fs.color_tmp_index;
    911          }
    912          else {
    913             /* Typically, for fragment shaders, the output register index
    914              * matches the color semantic index.  But not when we write to
    915              * the fragment depth register.  In that case, OUT[0] will be
    916              * fragdepth and OUT[1] will be the 0th color output.  We need
    917              * to use the semantic index for color outputs.
    918              */
    919             assert(sem_name == TGSI_SEMANTIC_COLOR);
    920             index = emit->info.output_semantic_index[index];
    921 
    922             emit->num_output_writes++;
    923          }
    924       }
    925    }
    926 
    927    /* init operand tokens to all zero */
    928    operand0.value = 0;
    929 
    930    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
    931 
    932    /* the operand has a writemask */
    933    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
    934 
    935    /* Which of the four dest components to write to. Note that we can use a
    936     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
    937     */
    938    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
    939    operand0.mask = writemask;
    940 
    941    /* translate TGSI register file type to VGPU10 operand type */
    942    operand0.operandType = translate_register_file(file, tempArrayId > 0);
    943 
    944    check_register_index(emit, operand0.operandType, index);
    945 
    946    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
    947                                       index2d, tempArrayId);
    948 
    949    /* Emit tokens */
    950    emit_dword(emit, operand0.value);
    951    if (tempArrayId > 0) {
    952       emit_dword(emit, tempArrayId);
    953    }
    954 
    955    emit_dword(emit, remap_temp_index(emit, file, index));
    956 
    957    if (indirect) {
    958       emit_indirect_register(emit, reg->Indirect.Index);
    959    }
    960 }
    961 
    962 
    963 /**
    964  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
    965  */
    966 static void
    967 emit_src_register(struct svga_shader_emitter_v10 *emit,
    968                   const struct tgsi_full_src_register *reg)
    969 {
    970    unsigned file = reg->Register.File;
    971    unsigned index = reg->Register.Index;
    972    const unsigned indirect = reg->Register.Indirect;
    973    const unsigned tempArrayId = get_temp_array_id(emit, file, index);
    974    const unsigned index2d = reg->Register.Dimension;
    975    const unsigned swizzleX = reg->Register.SwizzleX;
    976    const unsigned swizzleY = reg->Register.SwizzleY;
    977    const unsigned swizzleZ = reg->Register.SwizzleZ;
    978    const unsigned swizzleW = reg->Register.SwizzleW;
    979    const unsigned absolute = reg->Register.Absolute;
    980    const unsigned negate = reg->Register.Negate;
    981    bool is_prim_id = FALSE;
    982 
    983    VGPU10OperandToken0 operand0;
    984    VGPU10OperandToken1 operand1;
    985 
    986    if (emit->unit == PIPE_SHADER_FRAGMENT &&
    987       file == TGSI_FILE_INPUT) {
    988       if (index == emit->fs.face_input_index) {
    989          /* Replace INPUT[FACE] with TEMP[FACE] */
    990          file = TGSI_FILE_TEMPORARY;
    991          index = emit->fs.face_tmp_index;
    992       }
    993       else if (index == emit->fs.fragcoord_input_index) {
    994          /* Replace INPUT[POSITION] with TEMP[POSITION] */
    995          file = TGSI_FILE_TEMPORARY;
    996          index = emit->fs.fragcoord_tmp_index;
    997       }
    998       else {
    999          /* We remap fragment shader inputs to that FS input indexes
   1000           * match up with VS/GS output indexes.
   1001           */
   1002          index = emit->linkage.input_map[index];
   1003       }
   1004    }
   1005    else if (emit->unit == PIPE_SHADER_GEOMETRY &&
   1006             file == TGSI_FILE_INPUT) {
   1007       is_prim_id = (index == emit->gs.prim_id_index);
   1008       index = emit->linkage.input_map[index];
   1009    }
   1010    else if (emit->unit == PIPE_SHADER_VERTEX) {
   1011       if (file == TGSI_FILE_INPUT) {
   1012          /* if input is adjusted... */
   1013          if ((emit->key.vs.adjust_attrib_w_1 |
   1014               emit->key.vs.adjust_attrib_itof |
   1015               emit->key.vs.adjust_attrib_utof |
   1016               emit->key.vs.attrib_is_bgra |
   1017               emit->key.vs.attrib_puint_to_snorm |
   1018               emit->key.vs.attrib_puint_to_uscaled |
   1019               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
   1020             file = TGSI_FILE_TEMPORARY;
   1021             index = emit->vs.adjusted_input[index];
   1022          }
   1023       }
   1024       else if (file == TGSI_FILE_SYSTEM_VALUE) {
   1025          assert(index < ARRAY_SIZE(emit->system_value_indexes));
   1026          index = emit->system_value_indexes[index];
   1027       }
   1028    }
   1029 
   1030    operand0.value = operand1.value = 0;
   1031 
   1032    if (is_prim_id) {
   1033       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   1034       operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
   1035    }
   1036    else {
   1037       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1038       operand0.operandType = translate_register_file(file, tempArrayId > 0);
   1039    }
   1040 
   1041    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
   1042                                       index2d, tempArrayId);
   1043 
   1044    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
   1045        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
   1046       /* there's no swizzle for in-line immediates */
   1047       if (swizzleX == swizzleY &&
   1048           swizzleX == swizzleZ &&
   1049           swizzleX == swizzleW) {
   1050          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
   1051       }
   1052       else {
   1053          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   1054       }
   1055 
   1056       operand0.swizzleX = swizzleX;
   1057       operand0.swizzleY = swizzleY;
   1058       operand0.swizzleZ = swizzleZ;
   1059       operand0.swizzleW = swizzleW;
   1060 
   1061       if (absolute || negate) {
   1062          operand0.extended = 1;
   1063          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
   1064          if (absolute && !negate)
   1065             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
   1066          if (!absolute && negate)
   1067             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
   1068          if (absolute && negate)
   1069             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
   1070       }
   1071    }
   1072 
   1073    /* Emit the operand tokens */
   1074    emit_dword(emit, operand0.value);
   1075    if (operand0.extended)
   1076       emit_dword(emit, operand1.value);
   1077 
   1078    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
   1079       /* Emit the four float/int in-line immediate values */
   1080       unsigned *c;
   1081       assert(index < ARRAY_SIZE(emit->immediates));
   1082       assert(file == TGSI_FILE_IMMEDIATE);
   1083       assert(swizzleX < 4);
   1084       assert(swizzleY < 4);
   1085       assert(swizzleZ < 4);
   1086       assert(swizzleW < 4);
   1087       c = (unsigned *) emit->immediates[index];
   1088       emit_dword(emit, c[swizzleX]);
   1089       emit_dword(emit, c[swizzleY]);
   1090       emit_dword(emit, c[swizzleZ]);
   1091       emit_dword(emit, c[swizzleW]);
   1092    }
   1093    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
   1094       /* Emit the register index(es) */
   1095       if (index2d ||
   1096           operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
   1097          emit_dword(emit, reg->Dimension.Index);
   1098       }
   1099 
   1100       if (tempArrayId > 0) {
   1101          emit_dword(emit, tempArrayId);
   1102       }
   1103 
   1104       emit_dword(emit, remap_temp_index(emit, file, index));
   1105 
   1106       if (indirect) {
   1107          emit_indirect_register(emit, reg->Indirect.Index);
   1108       }
   1109    }
   1110 }
   1111 
   1112 
   1113 /**
   1114  * Emit a resource operand (for use with a SAMPLE instruction).
   1115  */
   1116 static void
   1117 emit_resource_register(struct svga_shader_emitter_v10 *emit,
   1118                        unsigned resource_number)
   1119 {
   1120    VGPU10OperandToken0 operand0;
   1121 
   1122    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
   1123 
   1124    /* init */
   1125    operand0.value = 0;
   1126 
   1127    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
   1128    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1129    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1130    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   1131    operand0.swizzleX = VGPU10_COMPONENT_X;
   1132    operand0.swizzleY = VGPU10_COMPONENT_Y;
   1133    operand0.swizzleZ = VGPU10_COMPONENT_Z;
   1134    operand0.swizzleW = VGPU10_COMPONENT_W;
   1135 
   1136    emit_dword(emit, operand0.value);
   1137    emit_dword(emit, resource_number);
   1138 }
   1139 
   1140 
   1141 /**
   1142  * Emit a sampler operand (for use with a SAMPLE instruction).
   1143  */
   1144 static void
   1145 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
   1146                       unsigned sampler_number)
   1147 {
   1148    VGPU10OperandToken0 operand0;
   1149 
   1150    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
   1151 
   1152    /* init */
   1153    operand0.value = 0;
   1154 
   1155    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
   1156    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1157 
   1158    emit_dword(emit, operand0.value);
   1159    emit_dword(emit, sampler_number);
   1160 }
   1161 
   1162 
   1163 /**
   1164  * Emit an operand which reads the IS_FRONT_FACING register.
   1165  */
   1166 static void
   1167 emit_face_register(struct svga_shader_emitter_v10 *emit)
   1168 {
   1169    VGPU10OperandToken0 operand0;
   1170    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
   1171 
   1172    /* init */
   1173    operand0.value = 0;
   1174 
   1175    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
   1176    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   1177    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
   1178    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   1179 
   1180    operand0.swizzleX = VGPU10_COMPONENT_X;
   1181    operand0.swizzleY = VGPU10_COMPONENT_X;
   1182    operand0.swizzleZ = VGPU10_COMPONENT_X;
   1183    operand0.swizzleW = VGPU10_COMPONENT_X;
   1184 
   1185    emit_dword(emit, operand0.value);
   1186    emit_dword(emit, index);
   1187 }
   1188 
   1189 
   1190 /**
   1191  * Emit the token for a VGPU10 opcode.
   1192  * \param saturate   clamp result to [0,1]?
   1193  */
   1194 static void
   1195 emit_opcode(struct svga_shader_emitter_v10 *emit,
   1196             unsigned vgpu10_opcode, boolean saturate)
   1197 {
   1198    VGPU10OpcodeToken0 token0;
   1199 
   1200    token0.value = 0;  /* init all fields to zero */
   1201    token0.opcodeType = vgpu10_opcode;
   1202    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1203    token0.saturate = saturate;
   1204 
   1205    emit_dword(emit, token0.value);
   1206 }
   1207 
   1208 
   1209 /**
   1210  * Emit the token for a VGPU10 resinfo instruction.
   1211  * \param modifier   return type modifier, _uint or _rcpFloat.
   1212  *                   TODO: We may want to remove this parameter if it will
   1213  *                   only ever be used as _uint.
   1214  */
   1215 static void
   1216 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
   1217                     VGPU10_RESINFO_RETURN_TYPE modifier)
   1218 {
   1219    VGPU10OpcodeToken0 token0;
   1220 
   1221    token0.value = 0;  /* init all fields to zero */
   1222    token0.opcodeType = VGPU10_OPCODE_RESINFO;
   1223    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1224    token0.resinfoReturnType = modifier;
   1225 
   1226    emit_dword(emit, token0.value);
   1227 }
   1228 
   1229 
   1230 /**
   1231  * Emit opcode tokens for a texture sample instruction.  Texture instructions
   1232  * can be rather complicated (texel offsets, etc) so we have this specialized
   1233  * function.
   1234  */
   1235 static void
   1236 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
   1237                    unsigned vgpu10_opcode, boolean saturate,
   1238                    const int offsets[3])
   1239 {
   1240    VGPU10OpcodeToken0 token0;
   1241    VGPU10OpcodeToken1 token1;
   1242 
   1243    token0.value = 0;  /* init all fields to zero */
   1244    token0.opcodeType = vgpu10_opcode;
   1245    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
   1246    token0.saturate = saturate;
   1247 
   1248    if (offsets[0] || offsets[1] || offsets[2]) {
   1249       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1250       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1251       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
   1252       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1253       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1254       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
   1255 
   1256       token0.extended = 1;
   1257       token1.value = 0;
   1258       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
   1259       token1.offsetU = offsets[0];
   1260       token1.offsetV = offsets[1];
   1261       token1.offsetW = offsets[2];
   1262    }
   1263 
   1264    emit_dword(emit, token0.value);
   1265    if (token0.extended) {
   1266       emit_dword(emit, token1.value);
   1267    }
   1268 }
   1269 
   1270 
   1271 /**
   1272  * Emit a DISCARD opcode token.
   1273  * If nonzero is set, we'll discard the fragment if the X component is not 0.
   1274  * Otherwise, we'll discard the fragment if the X component is 0.
   1275  */
   1276 static void
   1277 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
   1278 {
   1279    VGPU10OpcodeToken0 opcode0;
   1280 
   1281    opcode0.value = 0;
   1282    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
   1283    if (nonzero)
   1284       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
   1285 
   1286    emit_dword(emit, opcode0.value);
   1287 }
   1288 
   1289 
   1290 /**
   1291  * We need to call this before we begin emitting a VGPU10 instruction.
   1292  */
   1293 static void
   1294 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
   1295 {
   1296    assert(emit->inst_start_token == 0);
   1297    /* Save location of the instruction's VGPU10OpcodeToken0 token.
   1298     * Note, we can't save a pointer because it would become invalid if
   1299     * we have to realloc the output buffer.
   1300     */
   1301    emit->inst_start_token = emit_get_num_tokens(emit);
   1302 }
   1303 
   1304 
   1305 /**
   1306  * We need to call this after we emit the last token of a VGPU10 instruction.
   1307  * This function patches in the opcode token's instructionLength field.
   1308  */
   1309 static void
   1310 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
   1311 {
   1312    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
   1313    unsigned inst_length;
   1314 
   1315    assert(emit->inst_start_token > 0);
   1316 
   1317    if (emit->discard_instruction) {
   1318       /* Back up the emit->ptr to where this instruction started so
   1319        * that we discard the current instruction.
   1320        */
   1321       emit->ptr = (char *) (tokens + emit->inst_start_token);
   1322    }
   1323    else {
   1324       /* Compute instruction length and patch that into the start of
   1325        * the instruction.
   1326        */
   1327       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
   1328 
   1329       assert(inst_length > 0);
   1330 
   1331       tokens[emit->inst_start_token].instructionLength = inst_length;
   1332    }
   1333 
   1334    emit->inst_start_token = 0; /* reset to zero for error checking */
   1335    emit->discard_instruction = FALSE;
   1336 }
   1337 
   1338 
   1339 /**
   1340  * Return index for a free temporary register.
   1341  */
   1342 static unsigned
   1343 get_temp_index(struct svga_shader_emitter_v10 *emit)
   1344 {
   1345    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
   1346    return emit->num_shader_temps + emit->internal_temp_count++;
   1347 }
   1348 
   1349 
   1350 /**
   1351  * Release the temporaries which were generated by get_temp_index().
   1352  */
   1353 static void
   1354 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
   1355 {
   1356    emit->internal_temp_count = 0;
   1357 }
   1358 
   1359 
   1360 /**
   1361  * Create a tgsi_full_src_register.
   1362  */
   1363 static struct tgsi_full_src_register
   1364 make_src_reg(unsigned file, unsigned index)
   1365 {
   1366    struct tgsi_full_src_register reg;
   1367 
   1368    memset(&reg, 0, sizeof(reg));
   1369    reg.Register.File = file;
   1370    reg.Register.Index = index;
   1371    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
   1372    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
   1373    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
   1374    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
   1375    return reg;
   1376 }
   1377 
   1378 
   1379 /**
   1380  * Create a tgsi_full_src_register for a temporary.
   1381  */
   1382 static struct tgsi_full_src_register
   1383 make_src_temp_reg(unsigned index)
   1384 {
   1385    return make_src_reg(TGSI_FILE_TEMPORARY, index);
   1386 }
   1387 
   1388 
   1389 /**
   1390  * Create a tgsi_full_src_register for a constant.
   1391  */
   1392 static struct tgsi_full_src_register
   1393 make_src_const_reg(unsigned index)
   1394 {
   1395    return make_src_reg(TGSI_FILE_CONSTANT, index);
   1396 }
   1397 
   1398 
   1399 /**
   1400  * Create a tgsi_full_src_register for an immediate constant.
   1401  */
   1402 static struct tgsi_full_src_register
   1403 make_src_immediate_reg(unsigned index)
   1404 {
   1405    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
   1406 }
   1407 
   1408 
   1409 /**
   1410  * Create a tgsi_full_dst_register.
   1411  */
   1412 static struct tgsi_full_dst_register
   1413 make_dst_reg(unsigned file, unsigned index)
   1414 {
   1415    struct tgsi_full_dst_register reg;
   1416 
   1417    memset(&reg, 0, sizeof(reg));
   1418    reg.Register.File = file;
   1419    reg.Register.Index = index;
   1420    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
   1421    return reg;
   1422 }
   1423 
   1424 
   1425 /**
   1426  * Create a tgsi_full_dst_register for a temporary.
   1427  */
   1428 static struct tgsi_full_dst_register
   1429 make_dst_temp_reg(unsigned index)
   1430 {
   1431    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
   1432 }
   1433 
   1434 
   1435 /**
   1436  * Create a tgsi_full_dst_register for an output.
   1437  */
   1438 static struct tgsi_full_dst_register
   1439 make_dst_output_reg(unsigned index)
   1440 {
   1441    return make_dst_reg(TGSI_FILE_OUTPUT, index);
   1442 }
   1443 
   1444 
   1445 /**
   1446  * Create negated tgsi_full_src_register.
   1447  */
   1448 static struct tgsi_full_src_register
   1449 negate_src(const struct tgsi_full_src_register *reg)
   1450 {
   1451    struct tgsi_full_src_register neg = *reg;
   1452    neg.Register.Negate = !reg->Register.Negate;
   1453    return neg;
   1454 }
   1455 
   1456 /**
   1457  * Create absolute value of a tgsi_full_src_register.
   1458  */
   1459 static struct tgsi_full_src_register
   1460 absolute_src(const struct tgsi_full_src_register *reg)
   1461 {
   1462    struct tgsi_full_src_register absolute = *reg;
   1463    absolute.Register.Absolute = 1;
   1464    return absolute;
   1465 }
   1466 
   1467 
   1468 /** Return the named swizzle term from the src register */
   1469 static inline unsigned
   1470 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
   1471 {
   1472    switch (term) {
   1473    case TGSI_SWIZZLE_X:
   1474       return reg->Register.SwizzleX;
   1475    case TGSI_SWIZZLE_Y:
   1476       return reg->Register.SwizzleY;
   1477    case TGSI_SWIZZLE_Z:
   1478       return reg->Register.SwizzleZ;
   1479    case TGSI_SWIZZLE_W:
   1480       return reg->Register.SwizzleW;
   1481    default:
   1482       assert(!"Bad swizzle");
   1483       return TGSI_SWIZZLE_X;
   1484    }
   1485 }
   1486 
   1487 
   1488 /**
   1489  * Create swizzled tgsi_full_src_register.
   1490  */
   1491 static struct tgsi_full_src_register
   1492 swizzle_src(const struct tgsi_full_src_register *reg,
   1493             unsigned swizzleX, unsigned swizzleY,
   1494             unsigned swizzleZ, unsigned swizzleW)
   1495 {
   1496    struct tgsi_full_src_register swizzled = *reg;
   1497    /* Note: we swizzle the current swizzle */
   1498    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
   1499    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
   1500    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
   1501    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
   1502    return swizzled;
   1503 }
   1504 
   1505 
   1506 /**
   1507  * Create swizzled tgsi_full_src_register where all the swizzle
   1508  * terms are the same.
   1509  */
   1510 static struct tgsi_full_src_register
   1511 scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
   1512 {
   1513    struct tgsi_full_src_register swizzled = *reg;
   1514    /* Note: we swizzle the current swizzle */
   1515    swizzled.Register.SwizzleX =
   1516    swizzled.Register.SwizzleY =
   1517    swizzled.Register.SwizzleZ =
   1518    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
   1519    return swizzled;
   1520 }
   1521 
   1522 
   1523 /**
   1524  * Create new tgsi_full_dst_register with writemask.
   1525  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
   1526  */
   1527 static struct tgsi_full_dst_register
   1528 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
   1529 {
   1530    struct tgsi_full_dst_register masked = *reg;
   1531    masked.Register.WriteMask = mask;
   1532    return masked;
   1533 }
   1534 
   1535 
   1536 /**
   1537  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
   1538  */
   1539 static boolean
   1540 same_swizzle_terms(const struct tgsi_full_src_register *reg)
   1541 {
   1542    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
   1543            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
   1544            reg->Register.SwizzleZ == reg->Register.SwizzleW);
   1545 }
   1546 
   1547 
   1548 /**
   1549  * Search the vector for the value 'x' and return its position.
   1550  */
   1551 static int
   1552 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
   1553                  union tgsi_immediate_data x)
   1554 {
   1555    unsigned i;
   1556    for (i = 0; i < 4; i++) {
   1557       if (vec[i].Int == x.Int)
   1558          return i;
   1559    }
   1560    return -1;
   1561 }
   1562 
   1563 
   1564 /**
   1565  * Helper used by make_immediate_reg(), make_immediate_reg_4().
   1566  */
   1567 static int
   1568 find_immediate(struct svga_shader_emitter_v10 *emit,
   1569                union tgsi_immediate_data x, unsigned startIndex)
   1570 {
   1571    const unsigned endIndex = emit->num_immediates;
   1572    unsigned i;
   1573 
   1574    assert(emit->immediates_emitted);
   1575 
   1576    /* Search immediates for x, y, z, w */
   1577    for (i = startIndex; i < endIndex; i++) {
   1578       if (x.Int == emit->immediates[i][0].Int ||
   1579           x.Int == emit->immediates[i][1].Int ||
   1580           x.Int == emit->immediates[i][2].Int ||
   1581           x.Int == emit->immediates[i][3].Int) {
   1582          return i;
   1583       }
   1584    }
   1585    /* Should never try to use an immediate value that wasn't pre-declared */
   1586    assert(!"find_immediate() failed!");
   1587    return -1;
   1588 }
   1589 
   1590 
   1591 /**
   1592  * Return a tgsi_full_src_register for an immediate/literal
   1593  * union tgsi_immediate_data[4] value.
   1594  * Note: the values must have been previously declared/allocated in
   1595  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
   1596  * vec4 immediate.
   1597  */
   1598 static struct tgsi_full_src_register
   1599 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
   1600                      const union tgsi_immediate_data imm[4])
   1601 {
   1602    struct tgsi_full_src_register reg;
   1603    unsigned i;
   1604 
   1605    for (i = 0; i < emit->num_common_immediates; i++) {
   1606       /* search for first component value */
   1607       int immpos = find_immediate(emit, imm[0], i);
   1608       int x, y, z, w;
   1609 
   1610       assert(immpos >= 0);
   1611 
   1612       /* find remaining components within the immediate vector */
   1613       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
   1614       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
   1615       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
   1616       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
   1617 
   1618       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
   1619          /* found them all */
   1620          memset(&reg, 0, sizeof(reg));
   1621          reg.Register.File = TGSI_FILE_IMMEDIATE;
   1622          reg.Register.Index = immpos;
   1623          reg.Register.SwizzleX = x;
   1624          reg.Register.SwizzleY = y;
   1625          reg.Register.SwizzleZ = z;
   1626          reg.Register.SwizzleW = w;
   1627          return reg;
   1628       }
   1629       /* else, keep searching */
   1630    }
   1631 
   1632    assert(!"Failed to find immediate register!");
   1633 
   1634    /* Just return IMM[0].xxxx */
   1635    memset(&reg, 0, sizeof(reg));
   1636    reg.Register.File = TGSI_FILE_IMMEDIATE;
   1637    return reg;
   1638 }
   1639 
   1640 
   1641 /**
   1642  * Return a tgsi_full_src_register for an immediate/literal
   1643  * union tgsi_immediate_data value of the form {value, value, value, value}.
   1644  * \sa make_immediate_reg_4() regarding allowed values.
   1645  */
   1646 static struct tgsi_full_src_register
   1647 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
   1648                    union tgsi_immediate_data value)
   1649 {
   1650    struct tgsi_full_src_register reg;
   1651    int immpos = find_immediate(emit, value, 0);
   1652 
   1653    assert(immpos >= 0);
   1654 
   1655    memset(&reg, 0, sizeof(reg));
   1656    reg.Register.File = TGSI_FILE_IMMEDIATE;
   1657    reg.Register.Index = immpos;
   1658    reg.Register.SwizzleX =
   1659    reg.Register.SwizzleY =
   1660    reg.Register.SwizzleZ =
   1661    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
   1662 
   1663    return reg;
   1664 }
   1665 
   1666 
   1667 /**
   1668  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
   1669  * \sa make_immediate_reg_4() regarding allowed values.
   1670  */
   1671 static struct tgsi_full_src_register
   1672 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
   1673                           float x, float y, float z, float w)
   1674 {
   1675    union tgsi_immediate_data imm[4];
   1676    imm[0].Float = x;
   1677    imm[1].Float = y;
   1678    imm[2].Float = z;
   1679    imm[3].Float = w;
   1680    return make_immediate_reg_4(emit, imm);
   1681 }
   1682 
   1683 
   1684 /**
   1685  * Return a tgsi_full_src_register for an immediate/literal float value
   1686  * of the form {value, value, value, value}.
   1687  * \sa make_immediate_reg_4() regarding allowed values.
   1688  */
   1689 static struct tgsi_full_src_register
   1690 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
   1691 {
   1692    union tgsi_immediate_data imm;
   1693    imm.Float = value;
   1694    return make_immediate_reg(emit, imm);
   1695 }
   1696 
   1697 
   1698 /**
   1699  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
   1700  */
   1701 static struct tgsi_full_src_register
   1702 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
   1703                         int x, int y, int z, int w)
   1704 {
   1705    union tgsi_immediate_data imm[4];
   1706    imm[0].Int = x;
   1707    imm[1].Int = y;
   1708    imm[2].Int = z;
   1709    imm[3].Int = w;
   1710    return make_immediate_reg_4(emit, imm);
   1711 }
   1712 
   1713 
   1714 /**
   1715  * Return a tgsi_full_src_register for an immediate/literal int value
   1716  * of the form {value, value, value, value}.
   1717  * \sa make_immediate_reg_4() regarding allowed values.
   1718  */
   1719 static struct tgsi_full_src_register
   1720 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
   1721 {
   1722    union tgsi_immediate_data imm;
   1723    imm.Int = value;
   1724    return make_immediate_reg(emit, imm);
   1725 }
   1726 
   1727 
   1728 /**
   1729  * Allocate space for a union tgsi_immediate_data[4] immediate.
   1730  * \return  the index/position of the immediate.
   1731  */
   1732 static unsigned
   1733 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
   1734                   const union tgsi_immediate_data imm[4])
   1735 {
   1736    unsigned n = emit->num_immediates++;
   1737    assert(!emit->immediates_emitted);
   1738    assert(n < ARRAY_SIZE(emit->immediates));
   1739    emit->immediates[n][0] = imm[0];
   1740    emit->immediates[n][1] = imm[1];
   1741    emit->immediates[n][2] = imm[2];
   1742    emit->immediates[n][3] = imm[3];
   1743    return n;
   1744 }
   1745 
   1746 
   1747 /**
   1748  * Allocate space for a float[4] immediate.
   1749  * \return  the index/position of the immediate.
   1750  */
   1751 static unsigned
   1752 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
   1753                        float x, float y, float z, float w)
   1754 {
   1755    union tgsi_immediate_data imm[4];
   1756    imm[0].Float = x;
   1757    imm[1].Float = y;
   1758    imm[2].Float = z;
   1759    imm[3].Float = w;
   1760    return alloc_immediate_4(emit, imm);
   1761 }
   1762 
   1763 
   1764 /**
   1765  * Allocate space for an int[4] immediate.
   1766  * \return  the index/position of the immediate.
   1767  */
   1768 static unsigned
   1769 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
   1770                        int x, int y, int z, int w)
   1771 {
   1772    union tgsi_immediate_data imm[4];
   1773    imm[0].Int = x;
   1774    imm[1].Int = y;
   1775    imm[2].Int = z;
   1776    imm[3].Int = w;
   1777    return alloc_immediate_4(emit, imm);
   1778 }
   1779 
   1780 
   1781 /**
   1782  * Allocate a shader input to store a system value.
   1783  */
   1784 static unsigned
   1785 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
   1786 {
   1787    const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
   1788    assert(index < ARRAY_SIZE(emit->system_value_indexes));
   1789    emit->system_value_indexes[index] = n;
   1790    return n;
   1791 }
   1792 
   1793 
   1794 /**
   1795  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
   1796  */
   1797 static boolean
   1798 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
   1799                       const struct tgsi_full_immediate *imm)
   1800 {
   1801    /* We don't actually emit any code here.  We just save the
   1802     * immediate values and emit them later.
   1803     */
   1804    alloc_immediate_4(emit, imm->u);
   1805    return TRUE;
   1806 }
   1807 
   1808 
   1809 /**
   1810  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
   1811  * containing all the immediate values previously allocated
   1812  * with alloc_immediate_4().
   1813  */
   1814 static boolean
   1815 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
   1816 {
   1817    VGPU10OpcodeToken0 token;
   1818 
   1819    assert(!emit->immediates_emitted);
   1820 
   1821    token.value = 0;
   1822    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
   1823    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
   1824 
   1825    /* Note: no begin/end_emit_instruction() calls */
   1826    emit_dword(emit, token.value);
   1827    emit_dword(emit, 2 + 4 * emit->num_immediates);
   1828    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
   1829 
   1830    emit->immediates_emitted = TRUE;
   1831 
   1832    return TRUE;
   1833 }
   1834 
   1835 
   1836 /**
   1837  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
   1838  * interpolation mode.
   1839  * \return a VGPU10_INTERPOLATION_x value
   1840  */
   1841 static unsigned
   1842 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
   1843                         unsigned interp, unsigned interpolate_loc)
   1844 {
   1845    if (interp == TGSI_INTERPOLATE_COLOR) {
   1846       interp = emit->key.fs.flatshade ?
   1847          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
   1848    }
   1849 
   1850    switch (interp) {
   1851    case TGSI_INTERPOLATE_CONSTANT:
   1852       return VGPU10_INTERPOLATION_CONSTANT;
   1853    case TGSI_INTERPOLATE_LINEAR:
   1854       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
   1855              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
   1856              VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
   1857    case TGSI_INTERPOLATE_PERSPECTIVE:
   1858       return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
   1859              VGPU10_INTERPOLATION_LINEAR_CENTROID :
   1860              VGPU10_INTERPOLATION_LINEAR;
   1861    default:
   1862       assert(!"Unexpected interpolation mode");
   1863       return VGPU10_INTERPOLATION_CONSTANT;
   1864    }
   1865 }
   1866 
   1867 
   1868 /**
   1869  * Translate a TGSI property to VGPU10.
   1870  * Don't emit any instructions yet, only need to gather the primitive property information.
   1871  * The output primitive topology might be changed later. The final property instructions
   1872  * will be emitted as part of the pre-helper code.
   1873  */
   1874 static boolean
   1875 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
   1876                      const struct tgsi_full_property *prop)
   1877 {
   1878    static const VGPU10_PRIMITIVE primType[] = {
   1879       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
   1880       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
   1881       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
   1882       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
   1883       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
   1884       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
   1885       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
   1886       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
   1887       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
   1888       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
   1889       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
   1890       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
   1891       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
   1892       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
   1893    };
   1894 
   1895    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
   1896       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
   1897       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
   1898       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
   1899       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
   1900       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
   1901       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
   1902       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
   1903       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
   1904       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
   1905       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
   1906       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
   1907       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
   1908       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
   1909       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
   1910    };
   1911 
   1912    static const unsigned inputArraySize[] = {
   1913       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
   1914       1,       /* VGPU10_PRIMITIVE_POINT */
   1915       2,       /* VGPU10_PRIMITIVE_LINE */
   1916       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
   1917       0,
   1918       0,
   1919       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
   1920       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
   1921    };
   1922 
   1923    switch (prop->Property.PropertyName) {
   1924    case TGSI_PROPERTY_GS_INPUT_PRIM:
   1925       assert(prop->u[0].Data < ARRAY_SIZE(primType));
   1926       emit->gs.prim_type = primType[prop->u[0].Data];
   1927       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
   1928       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
   1929       break;
   1930 
   1931    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
   1932       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
   1933       emit->gs.prim_topology = primTopology[prop->u[0].Data];
   1934       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
   1935       break;
   1936 
   1937    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
   1938       emit->gs.max_out_vertices = prop->u[0].Data;
   1939       break;
   1940 
   1941    default:
   1942       break;
   1943    }
   1944 
   1945    return TRUE;
   1946 }
   1947 
   1948 
   1949 static void
   1950 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
   1951                           VGPU10OpcodeToken0 opcode0, unsigned nData,
   1952                           unsigned data)
   1953 {
   1954    begin_emit_instruction(emit);
   1955    emit_dword(emit, opcode0.value);
   1956    if (nData)
   1957       emit_dword(emit, data);
   1958    end_emit_instruction(emit);
   1959 }
   1960 
   1961 
   1962 /**
   1963  * Emit property instructions
   1964  */
   1965 static void
   1966 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
   1967 {
   1968    VGPU10OpcodeToken0 opcode0;
   1969 
   1970    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   1971 
   1972    /* emit input primitive type declaration */
   1973    opcode0.value = 0;
   1974    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
   1975    opcode0.primitive = emit->gs.prim_type;
   1976    emit_property_instruction(emit, opcode0, 0, 0);
   1977 
   1978    /* emit output primitive topology declaration */
   1979    opcode0.value = 0;
   1980    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
   1981    opcode0.primitiveTopology = emit->gs.prim_topology;
   1982    emit_property_instruction(emit, opcode0, 0, 0);
   1983 
   1984    /* emit max output vertices */
   1985    opcode0.value = 0;
   1986    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
   1987    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
   1988 }
   1989 
   1990 
   1991 /**
   1992  * Emit a vgpu10 declaration "instruction".
   1993  * \param index  the register index
   1994  * \param size   array size of the operand. In most cases, it is 1,
   1995  *               but for inputs to geometry shader, the array size varies
   1996  *               depending on the primitive type.
   1997  */
   1998 static void
   1999 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
   2000                       VGPU10OpcodeToken0 opcode0,
   2001                       VGPU10OperandToken0 operand0,
   2002                       VGPU10NameToken name_token,
   2003                       unsigned index, unsigned size)
   2004 {
   2005    assert(opcode0.opcodeType);
   2006    assert(operand0.mask);
   2007 
   2008    begin_emit_instruction(emit);
   2009    emit_dword(emit, opcode0.value);
   2010 
   2011    emit_dword(emit, operand0.value);
   2012 
   2013    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
   2014       /* Next token is the index of the register to declare */
   2015       emit_dword(emit, index);
   2016    }
   2017    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
   2018       /* Next token is the size of the register */
   2019       emit_dword(emit, size);
   2020 
   2021       /* Followed by the index of the register */
   2022       emit_dword(emit, index);
   2023    }
   2024 
   2025    if (name_token.value) {
   2026       emit_dword(emit, name_token.value);
   2027    }
   2028 
   2029    end_emit_instruction(emit);
   2030 }
   2031 
   2032 
   2033 /**
   2034  * Emit the declaration for a shader input.
   2035  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
   2036  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
   2037  * \param dim         index dimension
   2038  * \param index       the input register index
   2039  * \param size        array size of the operand. In most cases, it is 1,
   2040  *                    but for inputs to geometry shader, the array size varies
   2041  *                    depending on the primitive type.
   2042  * \param name        one of VGPU10_NAME_x
   2043  * \parma numComp     number of components
   2044  * \param selMode     component selection mode
   2045  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
   2046  * \param interpMode  interpolation mode
   2047  */
   2048 static void
   2049 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
   2050                        unsigned opcodeType, unsigned operandType,
   2051                        unsigned dim, unsigned index, unsigned size,
   2052                        unsigned name, unsigned numComp,
   2053                        unsigned selMode, unsigned usageMask,
   2054                        unsigned interpMode)
   2055 {
   2056    VGPU10OpcodeToken0 opcode0;
   2057    VGPU10OperandToken0 operand0;
   2058    VGPU10NameToken name_token;
   2059 
   2060    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2061    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
   2062           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
   2063           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
   2064           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
   2065    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
   2066           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
   2067    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
   2068    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
   2069    assert(dim <= VGPU10_OPERAND_INDEX_3D);
   2070    assert(name == VGPU10_NAME_UNDEFINED ||
   2071           name == VGPU10_NAME_POSITION ||
   2072           name == VGPU10_NAME_INSTANCE_ID ||
   2073           name == VGPU10_NAME_VERTEX_ID ||
   2074           name == VGPU10_NAME_PRIMITIVE_ID ||
   2075           name == VGPU10_NAME_IS_FRONT_FACE);
   2076    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
   2077           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
   2078           interpMode == VGPU10_INTERPOLATION_LINEAR ||
   2079           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
   2080           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
   2081           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
   2082 
   2083    check_register_index(emit, opcodeType, index);
   2084 
   2085    opcode0.value = operand0.value = name_token.value = 0;
   2086 
   2087    opcode0.opcodeType = opcodeType;
   2088    opcode0.interpolationMode = interpMode;
   2089 
   2090    operand0.operandType = operandType;
   2091    operand0.numComponents = numComp;
   2092    operand0.selectionMode = selMode;
   2093    operand0.mask = usageMask;
   2094    operand0.indexDimension = dim;
   2095    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2096    if (dim == VGPU10_OPERAND_INDEX_2D)
   2097       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2098 
   2099    name_token.name = name;
   2100 
   2101    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
   2102 }
   2103 
   2104 
   2105 /**
   2106  * Emit the declaration for a shader output.
   2107  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
   2108  * \param index  the output register index
   2109  * \param name  one of VGPU10_NAME_x
   2110  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
   2111  */
   2112 static void
   2113 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
   2114                         unsigned type, unsigned index,
   2115                         unsigned name, unsigned usageMask)
   2116 {
   2117    VGPU10OpcodeToken0 opcode0;
   2118    VGPU10OperandToken0 operand0;
   2119    VGPU10NameToken name_token;
   2120 
   2121    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2122    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
   2123           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
   2124           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
   2125    assert(name == VGPU10_NAME_UNDEFINED ||
   2126           name == VGPU10_NAME_POSITION ||
   2127           name == VGPU10_NAME_PRIMITIVE_ID ||
   2128           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
   2129           name == VGPU10_NAME_CLIP_DISTANCE);
   2130 
   2131    check_register_index(emit, type, index);
   2132 
   2133    opcode0.value = operand0.value = name_token.value = 0;
   2134 
   2135    opcode0.opcodeType = type;
   2136    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
   2137    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   2138    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
   2139    operand0.mask = usageMask;
   2140    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   2141    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2142 
   2143    name_token.name = name;
   2144 
   2145    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
   2146 }
   2147 
   2148 
   2149 /**
   2150  * Emit the declaration for the fragment depth output.
   2151  */
   2152 static void
   2153 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
   2154 {
   2155    VGPU10OpcodeToken0 opcode0;
   2156    VGPU10OperandToken0 operand0;
   2157    VGPU10NameToken name_token;
   2158 
   2159    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   2160 
   2161    opcode0.value = operand0.value = name_token.value = 0;
   2162 
   2163    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
   2164    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
   2165    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
   2166    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
   2167    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   2168 
   2169    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
   2170 }
   2171 
   2172 
   2173 /**
   2174  * Emit the declaration for a system value input/output.
   2175  */
   2176 static void
   2177 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
   2178                               unsigned semantic_name, unsigned index)
   2179 {
   2180    switch (semantic_name) {
   2181    case TGSI_SEMANTIC_INSTANCEID:
   2182       index = alloc_system_value_index(emit, index);
   2183       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
   2184                              VGPU10_OPERAND_TYPE_INPUT,
   2185                              VGPU10_OPERAND_INDEX_1D,
   2186                              index, 1,
   2187                              VGPU10_NAME_INSTANCE_ID,
   2188                              VGPU10_OPERAND_4_COMPONENT,
   2189                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2190                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
   2191                              VGPU10_INTERPOLATION_UNDEFINED);
   2192       break;
   2193    case TGSI_SEMANTIC_VERTEXID:
   2194       index = alloc_system_value_index(emit, index);
   2195       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
   2196                              VGPU10_OPERAND_TYPE_INPUT,
   2197                              VGPU10_OPERAND_INDEX_1D,
   2198                              index, 1,
   2199                              VGPU10_NAME_VERTEX_ID,
   2200                              VGPU10_OPERAND_4_COMPONENT,
   2201                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2202                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
   2203                              VGPU10_INTERPOLATION_UNDEFINED);
   2204       break;
   2205    default:
   2206       ; /* XXX */
   2207    }
   2208 }
   2209 
   2210 /**
   2211  * Translate a TGSI declaration to VGPU10.
   2212  */
   2213 static boolean
   2214 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
   2215                         const struct tgsi_full_declaration *decl)
   2216 {
   2217    switch (decl->Declaration.File) {
   2218    case TGSI_FILE_INPUT:
   2219       /* do nothing - see emit_input_declarations() */
   2220       return TRUE;
   2221 
   2222    case TGSI_FILE_OUTPUT:
   2223       assert(decl->Range.First == decl->Range.Last);
   2224       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
   2225       return TRUE;
   2226 
   2227    case TGSI_FILE_TEMPORARY:
   2228       /* Don't declare the temps here.  Just keep track of how many
   2229        * and emit the declaration later.
   2230        */
   2231       if (decl->Declaration.Array) {
   2232          /* Indexed temporary array.  Save the start index of the array
   2233           * and the size of the array.
   2234           */
   2235          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
   2236          unsigned i;
   2237 
   2238          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
   2239 
   2240          /* Save this array so we can emit the declaration for it later */
   2241          emit->temp_arrays[arrayID].start = decl->Range.First;
   2242          emit->temp_arrays[arrayID].size =
   2243             decl->Range.Last - decl->Range.First + 1;
   2244 
   2245          emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
   2246          assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
   2247          emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
   2248 
   2249          /* Fill in the temp_map entries for this array */
   2250          for (i = decl->Range.First; i <= decl->Range.Last; i++) {
   2251             emit->temp_map[i].arrayId = arrayID;
   2252             emit->temp_map[i].index = i - decl->Range.First;
   2253          }
   2254       }
   2255 
   2256       /* for all temps, indexed or not, keep track of highest index */
   2257       emit->num_shader_temps = MAX2(emit->num_shader_temps,
   2258                                     decl->Range.Last + 1);
   2259       return TRUE;
   2260 
   2261    case TGSI_FILE_CONSTANT:
   2262       /* Don't declare constants here.  Just keep track and emit later. */
   2263       {
   2264          unsigned constbuf = 0, num_consts;
   2265          if (decl->Declaration.Dimension) {
   2266             constbuf = decl->Dim.Index2D;
   2267          }
   2268          /* We throw an assertion here when, in fact, the shader should never
   2269           * have linked due to constbuf index out of bounds, so we shouldn't
   2270           * have reached here.
   2271           */
   2272          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
   2273 
   2274          num_consts = MAX2(emit->num_shader_consts[constbuf],
   2275                            decl->Range.Last + 1);
   2276 
   2277          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
   2278             debug_printf("Warning: constant buffer is declared to size [%u]"
   2279                          " but [%u] is the limit.\n",
   2280                          num_consts,
   2281                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
   2282          }
   2283          /* The linker doesn't enforce the max UBO size so we clamp here */
   2284          emit->num_shader_consts[constbuf] =
   2285             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
   2286       }
   2287       return TRUE;
   2288 
   2289    case TGSI_FILE_IMMEDIATE:
   2290       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
   2291       return FALSE;
   2292 
   2293    case TGSI_FILE_SYSTEM_VALUE:
   2294       emit_system_value_declaration(emit, decl->Semantic.Name,
   2295                                     decl->Range.First);
   2296       return TRUE;
   2297 
   2298    case TGSI_FILE_SAMPLER:
   2299       /* Don't declare samplers here.  Just keep track and emit later. */
   2300       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
   2301       return TRUE;
   2302 
   2303 #if 0
   2304    case TGSI_FILE_RESOURCE:
   2305       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
   2306       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
   2307       assert(!"TGSI_FILE_RESOURCE not handled yet");
   2308       return FALSE;
   2309 #endif
   2310 
   2311    case TGSI_FILE_ADDRESS:
   2312       emit->num_address_regs = MAX2(emit->num_address_regs,
   2313                                     decl->Range.Last + 1);
   2314       return TRUE;
   2315 
   2316    case TGSI_FILE_SAMPLER_VIEW:
   2317       {
   2318          unsigned unit = decl->Range.First;
   2319          assert(decl->Range.First == decl->Range.Last);
   2320          emit->sampler_target[unit] = decl->SamplerView.Resource;
   2321          /* Note: we can ignore YZW return types for now */
   2322          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
   2323       }
   2324       return TRUE;
   2325 
   2326    default:
   2327       assert(!"Unexpected type of declaration");
   2328       return FALSE;
   2329    }
   2330 }
   2331 
   2332 
   2333 
   2334 /**
   2335  * Emit all input declarations.
   2336  */
   2337 static boolean
   2338 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
   2339 {
   2340    unsigned i;
   2341 
   2342    if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2343 
   2344       for (i = 0; i < emit->linkage.num_inputs; i++) {
   2345          unsigned semantic_name = emit->info.input_semantic_name[i];
   2346          unsigned usage_mask = emit->info.input_usage_mask[i];
   2347          unsigned index = emit->linkage.input_map[i];
   2348          unsigned type, interpolationMode, name;
   2349 
   2350          if (usage_mask == 0)
   2351             continue;  /* register is not actually used */
   2352 
   2353          if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2354             /* fragment position input */
   2355             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2356             interpolationMode = VGPU10_INTERPOLATION_LINEAR;
   2357             name = VGPU10_NAME_POSITION;
   2358             if (usage_mask & TGSI_WRITEMASK_W) {
   2359                /* we need to replace use of 'w' with '1/w' */
   2360                emit->fs.fragcoord_input_index = i;
   2361             }
   2362          }
   2363          else if (semantic_name == TGSI_SEMANTIC_FACE) {
   2364             /* fragment front-facing input */
   2365             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2366             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
   2367             name = VGPU10_NAME_IS_FRONT_FACE;
   2368             emit->fs.face_input_index = i;
   2369          }
   2370          else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
   2371             /* primitive ID */
   2372             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
   2373             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
   2374             name = VGPU10_NAME_PRIMITIVE_ID;
   2375          }
   2376          else {
   2377             /* general fragment input */
   2378             type = VGPU10_OPCODE_DCL_INPUT_PS;
   2379             interpolationMode =
   2380                translate_interpolation(emit,
   2381                                        emit->info.input_interpolate[i],
   2382                                        emit->info.input_interpolate_loc[i]);
   2383 
   2384             /* keeps track if flat interpolation mode is being used */
   2385             emit->uses_flat_interp = emit->uses_flat_interp ||
   2386                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
   2387 
   2388             name = VGPU10_NAME_UNDEFINED;
   2389          }
   2390 
   2391          emit_input_declaration(emit, type,
   2392                                 VGPU10_OPERAND_TYPE_INPUT,
   2393                                 VGPU10_OPERAND_INDEX_1D, index, 1,
   2394                                 name,
   2395                                 VGPU10_OPERAND_4_COMPONENT,
   2396                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2397                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2398                                 interpolationMode);
   2399       }
   2400    }
   2401    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
   2402 
   2403       for (i = 0; i < emit->info.num_inputs; i++) {
   2404          unsigned semantic_name = emit->info.input_semantic_name[i];
   2405          unsigned usage_mask = emit->info.input_usage_mask[i];
   2406          unsigned index = emit->linkage.input_map[i];
   2407          unsigned opcodeType, operandType;
   2408          unsigned numComp, selMode;
   2409          unsigned name;
   2410          unsigned dim;
   2411 
   2412          if (usage_mask == 0)
   2413             continue;  /* register is not actually used */
   2414 
   2415          opcodeType = VGPU10_OPCODE_DCL_INPUT;
   2416          operandType = VGPU10_OPERAND_TYPE_INPUT;
   2417          numComp = VGPU10_OPERAND_4_COMPONENT;
   2418          selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
   2419          name = VGPU10_NAME_UNDEFINED;
   2420 
   2421          /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
   2422          dim = VGPU10_OPERAND_INDEX_2D;
   2423 
   2424          if (semantic_name == TGSI_SEMANTIC_PRIMID) {
   2425             /* Primitive ID */
   2426             operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
   2427             dim = VGPU10_OPERAND_INDEX_0D;
   2428             numComp = VGPU10_OPERAND_0_COMPONENT;
   2429             selMode = 0;
   2430 
   2431             /* also save the register index so we can check for
   2432              * primitive id when emit src register. We need to modify the
   2433              * operand type, index dimension when emit primitive id src reg.
   2434              */
   2435             emit->gs.prim_id_index = i;
   2436          }
   2437          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2438             /* vertex position input */
   2439             opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
   2440             name = VGPU10_NAME_POSITION;
   2441          }
   2442 
   2443          emit_input_declaration(emit, opcodeType, operandType,
   2444                                 dim, index,
   2445                                 emit->gs.input_size,
   2446                                 name,
   2447                                 numComp, selMode,
   2448                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2449                                 VGPU10_INTERPOLATION_UNDEFINED);
   2450       }
   2451    }
   2452    else {
   2453       assert(emit->unit == PIPE_SHADER_VERTEX);
   2454 
   2455       for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
   2456          unsigned usage_mask = emit->info.input_usage_mask[i];
   2457          unsigned index = i;
   2458 
   2459          if (usage_mask == 0)
   2460             continue;  /* register is not actually used */
   2461 
   2462          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
   2463                                 VGPU10_OPERAND_TYPE_INPUT,
   2464                                 VGPU10_OPERAND_INDEX_1D, index, 1,
   2465                                 VGPU10_NAME_UNDEFINED,
   2466                                 VGPU10_OPERAND_4_COMPONENT,
   2467                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
   2468                                 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
   2469                                 VGPU10_INTERPOLATION_UNDEFINED);
   2470       }
   2471    }
   2472 
   2473    return TRUE;
   2474 }
   2475 
   2476 
   2477 /**
   2478  * Emit all output declarations.
   2479  */
   2480 static boolean
   2481 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
   2482 {
   2483    unsigned i;
   2484 
   2485    for (i = 0; i < emit->info.num_outputs; i++) {
   2486       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
   2487       const unsigned semantic_name = emit->info.output_semantic_name[i];
   2488       const unsigned semantic_index = emit->info.output_semantic_index[i];
   2489       unsigned index = i;
   2490 
   2491       if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2492          if (semantic_name == TGSI_SEMANTIC_COLOR) {
   2493             assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
   2494 
   2495             emit->fs.color_out_index[semantic_index] = index;
   2496 
   2497             /* The semantic index is the shader's color output/buffer index */
   2498             emit_output_declaration(emit,
   2499                                     VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
   2500                                     VGPU10_NAME_UNDEFINED,
   2501                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2502 
   2503             if (semantic_index == 0) {
   2504                if (emit->key.fs.write_color0_to_n_cbufs > 1) {
   2505                   /* Emit declarations for the additional color outputs
   2506                    * for broadcasting.
   2507                    */
   2508                   unsigned j;
   2509                   for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
   2510                      /* Allocate a new output index */
   2511                      unsigned idx = emit->info.num_outputs + j - 1;
   2512                      emit->fs.color_out_index[j] = idx;
   2513                      emit_output_declaration(emit,
   2514                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
   2515                                         VGPU10_NAME_UNDEFINED,
   2516                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2517                      emit->info.output_semantic_index[idx] = j;
   2518                   }
   2519                }
   2520             }
   2521             else {
   2522                assert(!emit->key.fs.write_color0_to_n_cbufs);
   2523             }
   2524          }
   2525          else if (semantic_name == TGSI_SEMANTIC_POSITION) {
   2526             /* Fragment depth output */
   2527             emit_fragdepth_output_declaration(emit);
   2528          }
   2529          else {
   2530             assert(!"Bad output semantic name");
   2531          }
   2532       }
   2533       else {
   2534          /* VS or GS */
   2535          unsigned name, type;
   2536          unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   2537 
   2538          switch (semantic_name) {
   2539          case TGSI_SEMANTIC_POSITION:
   2540             assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2541             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
   2542             name = VGPU10_NAME_POSITION;
   2543             /* Save the index of the vertex position output register */
   2544             emit->vposition.out_index = index;
   2545             break;
   2546          case TGSI_SEMANTIC_CLIPDIST:
   2547             type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
   2548             name = VGPU10_NAME_CLIP_DISTANCE;
   2549             /* save the starting index of the clip distance output register */
   2550             if (semantic_index == 0)
   2551                emit->clip_dist_out_index = index;
   2552             writemask = emit->output_usage_mask[index];
   2553             writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
   2554             if (writemask == 0x0) {
   2555                continue; /* discard this do-nothing declaration */
   2556             }
   2557             break;
   2558          case TGSI_SEMANTIC_PRIMID:
   2559             assert(emit->unit == PIPE_SHADER_GEOMETRY);
   2560             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
   2561             name = VGPU10_NAME_PRIMITIVE_ID;
   2562             break;
   2563          case TGSI_SEMANTIC_LAYER:
   2564             assert(emit->unit == PIPE_SHADER_GEOMETRY);
   2565             type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
   2566             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
   2567             break;
   2568          case TGSI_SEMANTIC_CLIPVERTEX:
   2569             type = VGPU10_OPCODE_DCL_OUTPUT;
   2570             name = VGPU10_NAME_UNDEFINED;
   2571             emit->clip_vertex_out_index = index;
   2572             break;
   2573          default:
   2574             /* generic output */
   2575             type = VGPU10_OPCODE_DCL_OUTPUT;
   2576             name = VGPU10_NAME_UNDEFINED;
   2577          }
   2578 
   2579          emit_output_declaration(emit, type, index, name, writemask);
   2580       }
   2581    }
   2582 
   2583    if (emit->vposition.so_index != INVALID_INDEX &&
   2584        emit->vposition.out_index != INVALID_INDEX) {
   2585 
   2586       assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2587 
   2588       /* Emit the declaration for the non-adjusted vertex position
   2589        * for stream output purpose
   2590        */
   2591       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2592                               emit->vposition.so_index,
   2593                               VGPU10_NAME_UNDEFINED,
   2594                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
   2595    }
   2596 
   2597    if (emit->clip_dist_so_index != INVALID_INDEX &&
   2598        emit->clip_dist_out_index != INVALID_INDEX) {
   2599 
   2600       assert(emit->unit != PIPE_SHADER_FRAGMENT);
   2601 
   2602       /* Emit the declaration for the clip distance shadow copy which
   2603        * will be used for stream output purpose and for clip distance
   2604        * varying variable
   2605        */
   2606       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2607                               emit->clip_dist_so_index,
   2608                               VGPU10_NAME_UNDEFINED,
   2609                               emit->output_usage_mask[emit->clip_dist_out_index]);
   2610 
   2611       if (emit->info.num_written_clipdistance > 4) {
   2612          /* for the second clip distance register, each handles 4 planes */
   2613          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
   2614                                  emit->clip_dist_so_index + 1,
   2615                                  VGPU10_NAME_UNDEFINED,
   2616                                  emit->output_usage_mask[emit->clip_dist_out_index+1]);
   2617       }
   2618    }
   2619 
   2620    return TRUE;
   2621 }
   2622 
   2623 
   2624 /**
   2625  * Emit the declaration for the temporary registers.
   2626  */
   2627 static boolean
   2628 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
   2629 {
   2630    unsigned total_temps, reg, i;
   2631 
   2632    total_temps = emit->num_shader_temps;
   2633 
   2634    /* If there is indirect access to non-indexable temps in the shader,
   2635     * convert those temps to indexable temps. This works around a bug
   2636     * in the GLSL->TGSI translator exposed in piglit test
   2637     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
   2638     * Internal temps added by the driver remain as non-indexable temps.
   2639     */
   2640    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
   2641        emit->num_temp_arrays == 0) {
   2642       unsigned arrayID;
   2643 
   2644       arrayID = 1;
   2645       emit->num_temp_arrays = arrayID + 1;
   2646       emit->temp_arrays[arrayID].start = 0;
   2647       emit->temp_arrays[arrayID].size = total_temps;
   2648 
   2649       /* Fill in the temp_map entries for this temp array */
   2650       for (i = 0; i < total_temps; i++) {
   2651          emit->temp_map[i].arrayId = arrayID;
   2652          emit->temp_map[i].index = i;
   2653       }
   2654    }
   2655 
   2656    /* Allocate extra temps for specially-implemented instructions,
   2657     * such as LIT.
   2658     */
   2659    total_temps += MAX_INTERNAL_TEMPS;
   2660 
   2661    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
   2662       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
   2663           emit->key.clip_plane_enable ||
   2664           emit->vposition.so_index != INVALID_INDEX) {
   2665          emit->vposition.tmp_index = total_temps;
   2666          total_temps += 1;
   2667       }
   2668 
   2669       if (emit->unit == PIPE_SHADER_VERTEX) {
   2670          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
   2671                                  emit->key.vs.adjust_attrib_itof |
   2672                                  emit->key.vs.adjust_attrib_utof |
   2673                                  emit->key.vs.attrib_is_bgra |
   2674                                  emit->key.vs.attrib_puint_to_snorm |
   2675                                  emit->key.vs.attrib_puint_to_uscaled |
   2676                                  emit->key.vs.attrib_puint_to_sscaled);
   2677          while (attrib_mask) {
   2678             unsigned index = u_bit_scan(&attrib_mask);
   2679             emit->vs.adjusted_input[index] = total_temps++;
   2680          }
   2681       }
   2682 
   2683       if (emit->clip_mode == CLIP_DISTANCE) {
   2684          /* We need to write the clip distance to a temporary register
   2685           * first. Then it will be copied to the shadow copy for
   2686           * the clip distance varying variable and stream output purpose.
   2687           * It will also be copied to the actual CLIPDIST register
   2688           * according to the enabled clip planes
   2689           */
   2690          emit->clip_dist_tmp_index = total_temps++;
   2691          if (emit->info.num_written_clipdistance > 4)
   2692             total_temps++; /* second clip register */
   2693       }
   2694       else if (emit->clip_mode == CLIP_VERTEX) {
   2695          /* We need to convert the TGSI CLIPVERTEX output to one or more
   2696           * clip distances.  Allocate a temp reg for the clipvertex here.
   2697           */
   2698          assert(emit->info.writes_clipvertex > 0);
   2699          emit->clip_vertex_tmp_index = total_temps;
   2700          total_temps++;
   2701       }
   2702    }
   2703    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
   2704       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
   2705           emit->key.fs.white_fragments ||
   2706           emit->key.fs.write_color0_to_n_cbufs > 1) {
   2707          /* Allocate a temp to hold the output color */
   2708          emit->fs.color_tmp_index = total_temps;
   2709          total_temps += 1;
   2710       }
   2711 
   2712       if (emit->fs.face_input_index != INVALID_INDEX) {
   2713          /* Allocate a temp for the +/-1 face register */
   2714          emit->fs.face_tmp_index = total_temps;
   2715          total_temps += 1;
   2716       }
   2717 
   2718       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
   2719          /* Allocate a temp for modified fragment position register */
   2720          emit->fs.fragcoord_tmp_index = total_temps;
   2721          total_temps += 1;
   2722       }
   2723    }
   2724 
   2725    for (i = 0; i < emit->num_address_regs; i++) {
   2726       emit->address_reg_index[i] = total_temps++;
   2727    }
   2728 
   2729    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
   2730     * temp indexes.  Basically, we compact all the non-array temp register
   2731     * indexes into a consecutive series.
   2732     *
   2733     * Before, we may have some TGSI declarations like:
   2734     *   DCL TEMP[0..1], LOCAL
   2735     *   DCL TEMP[2..4], ARRAY(1), LOCAL
   2736     *   DCL TEMP[5..7], ARRAY(2), LOCAL
   2737     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
   2738     *
   2739     * After, we'll have a map like this:
   2740     *   temp_map[0] = { array 0, index 0 }
   2741     *   temp_map[1] = { array 0, index 1 }
   2742     *   temp_map[2] = { array 1, index 0 }
   2743     *   temp_map[3] = { array 1, index 1 }
   2744     *   temp_map[4] = { array 1, index 2 }
   2745     *   temp_map[5] = { array 2, index 0 }
   2746     *   temp_map[6] = { array 2, index 1 }
   2747     *   temp_map[7] = { array 2, index 2 }
   2748     *   temp_map[8] = { array 0, index 2 }
   2749     *   temp_map[9] = { array 0, index 3 }
   2750     *
   2751     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
   2752     * temps numbered 0..3
   2753     *
   2754     * Any time we emit a temporary register index, we'll have to use the
   2755     * temp_map[] table to convert the TGSI index to the VGPU10 index.
   2756     *
   2757     * Finally, we recompute the total_temps value here.
   2758     */
   2759    reg = 0;
   2760    for (i = 0; i < total_temps; i++) {
   2761       if (emit->temp_map[i].arrayId == 0) {
   2762          emit->temp_map[i].index = reg++;
   2763       }
   2764    }
   2765 
   2766    if (0) {
   2767       debug_printf("total_temps %u\n", total_temps);
   2768       for (i = 0; i < total_temps; i++) {
   2769          debug_printf("temp %u ->  array %u  index %u\n",
   2770                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
   2771       }
   2772    }
   2773 
   2774    total_temps = reg;
   2775 
   2776    /* Emit declaration of ordinary temp registers */
   2777    if (total_temps > 0) {
   2778       VGPU10OpcodeToken0 opcode0;
   2779 
   2780       opcode0.value = 0;
   2781       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
   2782 
   2783       begin_emit_instruction(emit);
   2784       emit_dword(emit, opcode0.value);
   2785       emit_dword(emit, total_temps);
   2786       end_emit_instruction(emit);
   2787    }
   2788 
   2789    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
   2790     * it's unused.
   2791     */
   2792    for (i = 1; i < emit->num_temp_arrays; i++) {
   2793       unsigned num_temps = emit->temp_arrays[i].size;
   2794 
   2795       if (num_temps > 0) {
   2796          VGPU10OpcodeToken0 opcode0;
   2797 
   2798          opcode0.value = 0;
   2799          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
   2800 
   2801          begin_emit_instruction(emit);
   2802          emit_dword(emit, opcode0.value);
   2803          emit_dword(emit, i); /* which array */
   2804          emit_dword(emit, num_temps);
   2805          emit_dword(emit, 4); /* num components */
   2806          end_emit_instruction(emit);
   2807 
   2808          total_temps += num_temps;
   2809       }
   2810    }
   2811 
   2812    /* Check that the grand total of all regular and indexed temps is
   2813     * under the limit.
   2814     */
   2815    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
   2816 
   2817    return TRUE;
   2818 }
   2819 
   2820 
   2821 static boolean
   2822 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
   2823 {
   2824    VGPU10OpcodeToken0 opcode0;
   2825    VGPU10OperandToken0 operand0;
   2826    unsigned total_consts, i;
   2827 
   2828    opcode0.value = 0;
   2829    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
   2830    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
   2831    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
   2832 
   2833    operand0.value = 0;
   2834    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
   2835    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
   2836    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2837    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2838    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
   2839    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
   2840    operand0.swizzleX = 0;
   2841    operand0.swizzleY = 1;
   2842    operand0.swizzleZ = 2;
   2843    operand0.swizzleW = 3;
   2844 
   2845    /**
   2846     * Emit declaration for constant buffer [0].  We also allocate
   2847     * room for the extra constants here.
   2848     */
   2849    total_consts = emit->num_shader_consts[0];
   2850 
   2851    /* Now, allocate constant slots for the "extra" constants */
   2852 
   2853    /* Vertex position scale/translation */
   2854    if (emit->vposition.need_prescale) {
   2855       emit->vposition.prescale_scale_index = total_consts++;
   2856       emit->vposition.prescale_trans_index = total_consts++;
   2857    }
   2858 
   2859    if (emit->unit == PIPE_SHADER_VERTEX) {
   2860       if (emit->key.vs.undo_viewport) {
   2861          emit->vs.viewport_index = total_consts++;
   2862       }
   2863    }
   2864 
   2865    /* user-defined clip planes */
   2866    if (emit->key.clip_plane_enable) {
   2867       unsigned n = util_bitcount(emit->key.clip_plane_enable);
   2868       assert(emit->unit == PIPE_SHADER_VERTEX ||
   2869              emit->unit == PIPE_SHADER_GEOMETRY);
   2870       for (i = 0; i < n; i++) {
   2871          emit->clip_plane_const[i] = total_consts++;
   2872       }
   2873    }
   2874 
   2875    /* Texcoord scale factors for RECT textures */
   2876    {
   2877       for (i = 0; i < emit->num_samplers; i++) {
   2878          if (emit->key.tex[i].unnormalized) {
   2879             emit->texcoord_scale_index[i] = total_consts++;
   2880          }
   2881       }
   2882    }
   2883 
   2884    /* Texture buffer sizes */
   2885    for (i = 0; i < emit->num_samplers; i++) {
   2886       if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
   2887          emit->texture_buffer_size_index[i] = total_consts++;
   2888       }
   2889    }
   2890 
   2891    if (total_consts > 0) {
   2892       begin_emit_instruction(emit);
   2893       emit_dword(emit, opcode0.value);
   2894       emit_dword(emit, operand0.value);
   2895       emit_dword(emit, 0);  /* which const buffer slot */
   2896       emit_dword(emit, total_consts);
   2897       end_emit_instruction(emit);
   2898    }
   2899 
   2900    /* Declare remaining constant buffers (UBOs) */
   2901    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
   2902       if (emit->num_shader_consts[i] > 0) {
   2903          begin_emit_instruction(emit);
   2904          emit_dword(emit, opcode0.value);
   2905          emit_dword(emit, operand0.value);
   2906          emit_dword(emit, i);  /* which const buffer slot */
   2907          emit_dword(emit, emit->num_shader_consts[i]);
   2908          end_emit_instruction(emit);
   2909       }
   2910    }
   2911 
   2912    return TRUE;
   2913 }
   2914 
   2915 
   2916 /**
   2917  * Emit declarations for samplers.
   2918  */
   2919 static boolean
   2920 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
   2921 {
   2922    unsigned i;
   2923 
   2924    for (i = 0; i < emit->num_samplers; i++) {
   2925       VGPU10OpcodeToken0 opcode0;
   2926       VGPU10OperandToken0 operand0;
   2927 
   2928       opcode0.value = 0;
   2929       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
   2930       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
   2931 
   2932       operand0.value = 0;
   2933       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   2934       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
   2935       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   2936       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   2937 
   2938       begin_emit_instruction(emit);
   2939       emit_dword(emit, opcode0.value);
   2940       emit_dword(emit, operand0.value);
   2941       emit_dword(emit, i);
   2942       end_emit_instruction(emit);
   2943    }
   2944 
   2945    return TRUE;
   2946 }
   2947 
   2948 
   2949 /**
   2950  * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
   2951  */
   2952 static unsigned
   2953 tgsi_texture_to_resource_dimension(unsigned target, boolean is_array)
   2954 {
   2955    switch (target) {
   2956    case TGSI_TEXTURE_BUFFER:
   2957       return VGPU10_RESOURCE_DIMENSION_BUFFER;
   2958    case TGSI_TEXTURE_1D:
   2959       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2960    case TGSI_TEXTURE_2D:
   2961    case TGSI_TEXTURE_RECT:
   2962       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2963    case TGSI_TEXTURE_3D:
   2964       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
   2965    case TGSI_TEXTURE_CUBE:
   2966       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
   2967    case TGSI_TEXTURE_SHADOW1D:
   2968       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2969    case TGSI_TEXTURE_SHADOW2D:
   2970    case TGSI_TEXTURE_SHADOWRECT:
   2971       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2972    case TGSI_TEXTURE_1D_ARRAY:
   2973    case TGSI_TEXTURE_SHADOW1D_ARRAY:
   2974       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
   2975          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
   2976    case TGSI_TEXTURE_2D_ARRAY:
   2977    case TGSI_TEXTURE_SHADOW2D_ARRAY:
   2978       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
   2979          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2980    case TGSI_TEXTURE_SHADOWCUBE:
   2981       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
   2982    case TGSI_TEXTURE_2D_MSAA:
   2983       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
   2984    case TGSI_TEXTURE_2D_ARRAY_MSAA:
   2985       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
   2986          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
   2987    case TGSI_TEXTURE_CUBE_ARRAY:
   2988       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
   2989    default:
   2990       assert(!"Unexpected resource type");
   2991       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
   2992    }
   2993 }
   2994 
   2995 
   2996 /**
   2997  * Given a tgsi_return_type, return true iff it is an integer type.
   2998  */
   2999 static boolean
   3000 is_integer_type(enum tgsi_return_type type)
   3001 {
   3002    switch (type) {
   3003       case TGSI_RETURN_TYPE_SINT:
   3004       case TGSI_RETURN_TYPE_UINT:
   3005          return TRUE;
   3006       case TGSI_RETURN_TYPE_FLOAT:
   3007       case TGSI_RETURN_TYPE_UNORM:
   3008       case TGSI_RETURN_TYPE_SNORM:
   3009          return FALSE;
   3010       case TGSI_RETURN_TYPE_COUNT:
   3011       default:
   3012          assert(!"is_integer_type: Unknown tgsi_return_type");
   3013          return FALSE;
   3014    }
   3015 }
   3016 
   3017 
   3018 /**
   3019  * Emit declarations for resources.
   3020  * XXX When we're sure that all TGSI shaders will be generated with
   3021  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
   3022  * rework this code.
   3023  */
   3024 static boolean
   3025 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
   3026 {
   3027    unsigned i;
   3028 
   3029    /* Emit resource decl for each sampler */
   3030    for (i = 0; i < emit->num_samplers; i++) {
   3031       VGPU10OpcodeToken0 opcode0;
   3032       VGPU10OperandToken0 operand0;
   3033       VGPU10ResourceReturnTypeToken return_type;
   3034       VGPU10_RESOURCE_RETURN_TYPE rt;
   3035 
   3036       opcode0.value = 0;
   3037       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
   3038       opcode0.resourceDimension =
   3039          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
   3040                                             emit->key.tex[i].is_array);
   3041       operand0.value = 0;
   3042       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
   3043       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
   3044       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
   3045       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
   3046 
   3047 #if 1
   3048       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
   3049       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
   3050       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
   3051       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
   3052       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
   3053       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
   3054       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
   3055       rt = emit->sampler_return_type[i] + 1;
   3056 #else
   3057       switch (emit->sampler_return_type[i]) {
   3058          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
   3059          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
   3060          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
   3061          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
   3062          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
   3063          case TGSI_RETURN_TYPE_COUNT:
   3064          default:
   3065             rt = VGPU10_RETURN_TYPE_FLOAT;
   3066             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
   3067       }
   3068 #endif
   3069 
   3070       return_type.value = 0;
   3071       return_type.component0 = rt;
   3072       return_type.component1 = rt;
   3073       return_type.component2 = rt;
   3074       return_type.component3 = rt;
   3075 
   3076       begin_emit_instruction(emit);
   3077       emit_dword(emit, opcode0.value);
   3078       emit_dword(emit, operand0.value);
   3079       emit_dword(emit, i);
   3080       emit_dword(emit, return_type.value);
   3081       end_emit_instruction(emit);
   3082    }
   3083 
   3084    return TRUE;
   3085 }
   3086 
   3087 static void
   3088 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
   3089                      unsigned opcode,
   3090                      const struct tgsi_full_dst_register *dst,
   3091                      const struct tgsi_full_src_register *src,
   3092                      boolean saturate)
   3093 {
   3094    begin_emit_instruction(emit);
   3095    emit_opcode(emit, opcode, saturate);
   3096    emit_dst_register(emit, dst);
   3097    emit_src_register(emit, src);
   3098    end_emit_instruction(emit);
   3099 }
   3100 
   3101 static void
   3102 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
   3103                      unsigned opcode,
   3104                      const struct tgsi_full_dst_register *dst,
   3105                      const struct tgsi_full_src_register *src1,
   3106                      const struct tgsi_full_src_register *src2,
   3107                      boolean saturate)
   3108 {
   3109    begin_emit_instruction(emit);
   3110    emit_opcode(emit, opcode, saturate);
   3111    emit_dst_register(emit, dst);
   3112    emit_src_register(emit, src1);
   3113    emit_src_register(emit, src2);
   3114    end_emit_instruction(emit);
   3115 }
   3116 
   3117 static void
   3118 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
   3119                      unsigned opcode,
   3120                      const struct tgsi_full_dst_register *dst,
   3121                      const struct tgsi_full_src_register *src1,
   3122                      const struct tgsi_full_src_register *src2,
   3123                      const struct tgsi_full_src_register *src3,
   3124                      boolean saturate)
   3125 {
   3126    begin_emit_instruction(emit);
   3127    emit_opcode(emit, opcode, saturate);
   3128    emit_dst_register(emit, dst);
   3129    emit_src_register(emit, src1);
   3130    emit_src_register(emit, src2);
   3131    emit_src_register(emit, src3);
   3132    end_emit_instruction(emit);
   3133 }
   3134 
   3135 /**
   3136  * Emit the actual clip distance instructions to be used for clipping
   3137  * by copying the clip distance from the temporary registers to the
   3138  * CLIPDIST registers written with the enabled planes mask.
   3139  * Also copy the clip distance from the temporary to the clip distance
   3140  * shadow copy register which will be referenced by the input shader
   3141  */
   3142 static void
   3143 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
   3144 {
   3145    struct tgsi_full_src_register tmp_clip_dist_src;
   3146    struct tgsi_full_dst_register clip_dist_dst;
   3147 
   3148    unsigned i;
   3149    unsigned clip_plane_enable = emit->key.clip_plane_enable;
   3150    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
   3151    int num_written_clipdist = emit->info.num_written_clipdistance;
   3152 
   3153    assert(emit->clip_dist_out_index != INVALID_INDEX);
   3154    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
   3155 
   3156    /**
   3157     * Temporary reset the temporary clip dist register index so
   3158     * that the copy to the real clip dist register will not
   3159     * attempt to copy to the temporary register again
   3160     */
   3161    emit->clip_dist_tmp_index = INVALID_INDEX;
   3162 
   3163    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
   3164 
   3165       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
   3166 
   3167       /**
   3168        * copy to the shadow copy for use by varying variable and
   3169        * stream output. All clip distances
   3170        * will be written regardless of the enabled clipping planes.
   3171        */
   3172       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
   3173                                    emit->clip_dist_so_index + i);
   3174 
   3175       /* MOV clip_dist_so, tmp_clip_dist */
   3176       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
   3177                            &tmp_clip_dist_src, FALSE);
   3178 
   3179       /**
   3180        * copy those clip distances to enabled clipping planes
   3181        * to CLIPDIST registers for clipping
   3182        */
   3183       if (clip_plane_enable & 0xf) {
   3184          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
   3185                                       emit->clip_dist_out_index + i);
   3186          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
   3187 
   3188          /* MOV CLIPDIST, tmp_clip_dist */
   3189          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
   3190                               &tmp_clip_dist_src, FALSE);
   3191       }
   3192       /* four clip planes per clip register */
   3193       clip_plane_enable >>= 4;
   3194    }
   3195    /**
   3196     * set the temporary clip dist register index back to the
   3197     * temporary index for the next vertex
   3198     */
   3199    emit->clip_dist_tmp_index = clip_dist_tmp_index;
   3200 }
   3201 
   3202 /* Declare clip distance output registers for user-defined clip planes
   3203  * or the TGSI_CLIPVERTEX output.
   3204  */
   3205 static void
   3206 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
   3207 {
   3208    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
   3209    unsigned index = emit->num_outputs;
   3210    unsigned plane_mask;
   3211 
   3212    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3213           emit->unit == PIPE_SHADER_GEOMETRY);
   3214    assert(num_clip_planes <= 8);
   3215 
   3216    if (emit->clip_mode != CLIP_LEGACY &&
   3217        emit->clip_mode != CLIP_VERTEX) {
   3218       return;
   3219    }
   3220 
   3221    if (num_clip_planes == 0)
   3222       return;
   3223 
   3224    /* Declare one or two clip output registers.  The number of components
   3225     * in the mask reflects the number of clip planes.  For example, if 5
   3226     * clip planes are needed, we'll declare outputs similar to:
   3227     * dcl_output_siv o2.xyzw, clip_distance
   3228     * dcl_output_siv o3.x, clip_distance
   3229     */
   3230    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
   3231 
   3232    plane_mask = (1 << num_clip_planes) - 1;
   3233    if (plane_mask & 0xf) {
   3234       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   3235       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
   3236                               VGPU10_NAME_CLIP_DISTANCE, cmask);
   3237       emit->num_outputs++;
   3238    }
   3239    if (plane_mask & 0xf0) {
   3240       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
   3241       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
   3242                               VGPU10_NAME_CLIP_DISTANCE, cmask);
   3243       emit->num_outputs++;
   3244    }
   3245 }
   3246 
   3247 
   3248 /**
   3249  * Emit the instructions for writing to the clip distance registers
   3250  * to handle legacy/automatic clip planes.
   3251  * For each clip plane, the distance is the dot product of the vertex
   3252  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
   3253  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
   3254  * output registers already declared.
   3255  */
   3256 static void
   3257 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
   3258                              unsigned vpos_tmp_index)
   3259 {
   3260    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
   3261 
   3262    assert(emit->clip_mode == CLIP_LEGACY);
   3263    assert(num_clip_planes <= 8);
   3264 
   3265    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3266           emit->unit == PIPE_SHADER_GEOMETRY);
   3267 
   3268    for (i = 0; i < num_clip_planes; i++) {
   3269       struct tgsi_full_dst_register dst;
   3270       struct tgsi_full_src_register plane_src, vpos_src;
   3271       unsigned reg_index = emit->clip_dist_out_index + i / 4;
   3272       unsigned comp = i % 4;
   3273       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
   3274 
   3275       /* create dst, src regs */
   3276       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
   3277       dst = writemask_dst(&dst, writemask);
   3278 
   3279       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
   3280       vpos_src = make_src_temp_reg(vpos_tmp_index);
   3281 
   3282       /* DP4 clip_dist, plane, vpos */
   3283       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
   3284                            &plane_src, &vpos_src, FALSE);
   3285    }
   3286 }
   3287 
   3288 
   3289 /**
   3290  * Emit the instructions for computing the clip distance results from
   3291  * the clip vertex temporary.
   3292  * For each clip plane, the distance is the dot product of the clip vertex
   3293  * position (found in a temp reg) and the clip plane coefficients.
   3294  */
   3295 static void
   3296 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
   3297 {
   3298    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
   3299    unsigned i;
   3300    struct tgsi_full_dst_register dst;
   3301    struct tgsi_full_src_register clipvert_src;
   3302    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
   3303 
   3304    assert(emit->unit == PIPE_SHADER_VERTEX ||
   3305           emit->unit == PIPE_SHADER_GEOMETRY);
   3306 
   3307    assert(emit->clip_mode == CLIP_VERTEX);
   3308 
   3309    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
   3310 
   3311    for (i = 0; i < num_clip; i++) {
   3312       struct tgsi_full_src_register plane_src;
   3313       unsigned reg_index = emit->clip_dist_out_index + i / 4;
   3314       unsigned comp = i % 4;
   3315       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
   3316 
   3317       /* create dst, src regs */
   3318       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
   3319       dst = writemask_dst(&dst, writemask);
   3320 
   3321       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
   3322 
   3323       /* DP4 clip_dist, plane, vpos */
   3324       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
   3325                            &plane_src, &clipvert_src, FALSE);
   3326    }
   3327 
   3328    /* copy temporary clip vertex register to the clip vertex register */
   3329 
   3330    assert(emit->clip_vertex_out_index != INVALID_INDEX);
   3331 
   3332    /**
   3333     * temporary reset the temporary clip vertex register index so
   3334     * that copy to the clip vertex register will not attempt
   3335     * to copy to the temporary register again
   3336     */
   3337    emit->clip_vertex_tmp_index = INVALID_INDEX;
   3338 
   3339    /* MOV clip_vertex, clip_vertex_tmp */
   3340    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
   3341    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   3342                         &dst, &clipvert_src, FALSE);
   3343 
   3344    /**
   3345     * set the temporary clip vertex register index back to the
   3346     * temporary index for the next vertex
   3347     */
   3348    emit->clip_vertex_tmp_index = clip_vertex_tmp;
   3349 }
   3350 
   3351 /**
   3352  * Emit code to convert RGBA to BGRA
   3353  */
   3354 static void
   3355 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
   3356                      const struct tgsi_full_dst_register *dst,
   3357                      const struct tgsi_full_src_register *src)
   3358 {
   3359    struct tgsi_full_src_register bgra_src =
   3360       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
   3361 
   3362    begin_emit_instruction(emit);
   3363    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   3364    emit_dst_register(emit, dst);
   3365    emit_src_register(emit, &bgra_src);
   3366    end_emit_instruction(emit);
   3367 }
   3368 
   3369 
   3370 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
   3371 static void
   3372 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
   3373                     const struct tgsi_full_dst_register *dst,
   3374                     const struct tgsi_full_src_register *src)
   3375 {
   3376    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
   3377    struct tgsi_full_src_register two =
   3378       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
   3379    struct tgsi_full_src_register neg_two =
   3380       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
   3381 
   3382    unsigned val_tmp = get_temp_index(emit);
   3383    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
   3384    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
   3385 
   3386    unsigned bias_tmp = get_temp_index(emit);
   3387    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
   3388    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
   3389 
   3390    /* val = src * 2.0 */
   3391    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
   3392                         src, &two, FALSE);
   3393 
   3394    /* bias = src > 0.5 */
   3395    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
   3396                         src, &half, FALSE);
   3397 
   3398    /* bias = bias & -2.0 */
   3399    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
   3400                         &bias_src, &neg_two, FALSE);
   3401 
   3402    /* dst = val + bias */
   3403    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
   3404                         &val_src, &bias_src, FALSE);
   3405 
   3406    free_temp_indexes(emit);
   3407 }
   3408 
   3409 
   3410 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
   3411 static void
   3412 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
   3413                       const struct tgsi_full_dst_register *dst,
   3414                       const struct tgsi_full_src_register *src)
   3415 {
   3416    struct tgsi_full_src_register scale =
   3417       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
   3418 
   3419    /* dst = src * scale */
   3420    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
   3421 }
   3422 
   3423 
   3424 /** Convert from R32_UINT to 10_10_10_2_sscaled */
   3425 static void
   3426 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
   3427                       const struct tgsi_full_dst_register *dst,
   3428                       const struct tgsi_full_src_register *src)
   3429 {
   3430    struct tgsi_full_src_register lshift =
   3431       make_immediate_reg_int4(emit, 22, 12, 2, 0);
   3432    struct tgsi_full_src_register rshift =
   3433       make_immediate_reg_int4(emit, 22, 22, 22, 30);
   3434 
   3435    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
   3436 
   3437    unsigned tmp = get_temp_index(emit);
   3438    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3439    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3440 
   3441    /*
   3442     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
   3443     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
   3444     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
   3445     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
   3446     * dst = i_to_f(r,g,b,a);     # convert to float
   3447     */
   3448    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
   3449                         &src_xxxx, &lshift, FALSE);
   3450    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
   3451                         &tmp_src, &rshift, FALSE);
   3452    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
   3453 
   3454    free_temp_indexes(emit);
   3455 }
   3456 
   3457 
   3458 /**
   3459  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
   3460  */
   3461 static boolean
   3462 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
   3463               const struct tgsi_full_instruction *inst)
   3464 {
   3465    unsigned index = inst->Dst[0].Register.Index;
   3466    struct tgsi_full_dst_register dst;
   3467    unsigned opcode;
   3468 
   3469    assert(index < MAX_VGPU10_ADDR_REGS);
   3470    dst = make_dst_temp_reg(emit->address_reg_index[index]);
   3471 
   3472    /* ARL dst, s0
   3473     * Translates into:
   3474     * FTOI address_tmp, s0
   3475     *
   3476     * UARL dst, s0
   3477     * Translates into:
   3478     * MOV address_tmp, s0
   3479     */
   3480    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
   3481       opcode = VGPU10_OPCODE_FTOI;
   3482    else
   3483       opcode = VGPU10_OPCODE_MOV;
   3484 
   3485    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
   3486 
   3487    return TRUE;
   3488 }
   3489 
   3490 
   3491 /**
   3492  * Emit code for TGSI_OPCODE_CAL instruction.
   3493  */
   3494 static boolean
   3495 emit_cal(struct svga_shader_emitter_v10 *emit,
   3496          const struct tgsi_full_instruction *inst)
   3497 {
   3498    unsigned label = inst->Label.Label;
   3499    VGPU10OperandToken0 operand;
   3500    operand.value = 0;
   3501    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
   3502 
   3503    begin_emit_instruction(emit);
   3504    emit_dword(emit, operand.value);
   3505    emit_dword(emit, label);
   3506    end_emit_instruction(emit);
   3507 
   3508    return TRUE;
   3509 }
   3510 
   3511 
   3512 /**
   3513  * Emit code for TGSI_OPCODE_IABS instruction.
   3514  */
   3515 static boolean
   3516 emit_iabs(struct svga_shader_emitter_v10 *emit,
   3517           const struct tgsi_full_instruction *inst)
   3518 {
   3519    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
   3520     * dst.y = (src0.y < 0) ? -src0.y : src0.y
   3521     * dst.z = (src0.z < 0) ? -src0.z : src0.z
   3522     * dst.w = (src0.w < 0) ? -src0.w : src0.w
   3523     *
   3524     * Translates into
   3525     *   IMAX dst, src, neg(src)
   3526     */
   3527    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
   3528    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
   3529                         &inst->Src[0], &neg_src, FALSE);
   3530 
   3531    return TRUE;
   3532 }
   3533 
   3534 
   3535 /**
   3536  * Emit code for TGSI_OPCODE_CMP instruction.
   3537  */
   3538 static boolean
   3539 emit_cmp(struct svga_shader_emitter_v10 *emit,
   3540          const struct tgsi_full_instruction *inst)
   3541 {
   3542    /* dst.x = (src0.x < 0) ? src1.x : src2.x
   3543     * dst.y = (src0.y < 0) ? src1.y : src2.y
   3544     * dst.z = (src0.z < 0) ? src1.z : src2.z
   3545     * dst.w = (src0.w < 0) ? src1.w : src2.w
   3546     *
   3547     * Translates into
   3548     *   LT tmp, src0, 0.0
   3549     *   MOVC dst, tmp, src1, src2
   3550     */
   3551    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3552    unsigned tmp = get_temp_index(emit);
   3553    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3554    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3555 
   3556    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
   3557                         &inst->Src[0], &zero, FALSE);
   3558    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
   3559                         &tmp_src, &inst->Src[1], &inst->Src[2],
   3560                         inst->Instruction.Saturate);
   3561 
   3562    free_temp_indexes(emit);
   3563 
   3564    return TRUE;
   3565 }
   3566 
   3567 
   3568 /**
   3569  * Emit code for TGSI_OPCODE_DP2A instruction.
   3570  */
   3571 static boolean
   3572 emit_dp2a(struct svga_shader_emitter_v10 *emit,
   3573           const struct tgsi_full_instruction *inst)
   3574 {
   3575    /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
   3576     * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
   3577     * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
   3578     * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
   3579     * Translate into
   3580     *   MAD tmp.x, s0.y, s1.y, s2.x
   3581     *   MAD tmp.x, s0.x, s1.x, tmp.x
   3582     *   MOV dst.xyzw, tmp.xxxx
   3583     */
   3584    unsigned tmp = get_temp_index(emit);
   3585    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3586    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3587 
   3588    struct tgsi_full_src_register tmp_src_xxxx =
   3589       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   3590    struct tgsi_full_dst_register tmp_dst_x =
   3591       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   3592 
   3593    struct tgsi_full_src_register src0_xxxx =
   3594       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   3595    struct tgsi_full_src_register src0_yyyy =
   3596       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   3597    struct tgsi_full_src_register src1_xxxx =
   3598       scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
   3599    struct tgsi_full_src_register src1_yyyy =
   3600       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
   3601    struct tgsi_full_src_register src2_xxxx =
   3602       scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
   3603 
   3604    emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
   3605                         &src1_yyyy, &src2_xxxx, FALSE);
   3606    emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
   3607                         &src1_xxxx, &tmp_src_xxxx, FALSE);
   3608    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   3609                         &tmp_src_xxxx, inst->Instruction.Saturate);
   3610 
   3611    free_temp_indexes(emit);
   3612 
   3613    return TRUE;
   3614 }
   3615 
   3616 
   3617 /**
   3618  * Emit code for TGSI_OPCODE_DPH instruction.
   3619  */
   3620 static boolean
   3621 emit_dph(struct svga_shader_emitter_v10 *emit,
   3622          const struct tgsi_full_instruction *inst)
   3623 {
   3624    /*
   3625     * DP3 tmp, s0, s1
   3626     * ADD dst, tmp, s1.wwww
   3627     */
   3628 
   3629    struct tgsi_full_src_register s1_wwww =
   3630       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
   3631                   TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
   3632 
   3633    unsigned tmp = get_temp_index(emit);
   3634    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3635    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3636 
   3637    /* DP3 tmp, s0, s1 */
   3638    emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
   3639                         &inst->Src[1], FALSE);
   3640 
   3641    /* ADD dst, tmp, s1.wwww */
   3642    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
   3643                         &s1_wwww, inst->Instruction.Saturate);
   3644 
   3645    free_temp_indexes(emit);
   3646 
   3647    return TRUE;
   3648 }
   3649 
   3650 
   3651 /**
   3652  * Emit code for TGSI_OPCODE_DST instruction.
   3653  */
   3654 static boolean
   3655 emit_dst(struct svga_shader_emitter_v10 *emit,
   3656          const struct tgsi_full_instruction *inst)
   3657 {
   3658    /*
   3659     * dst.x = 1
   3660     * dst.y = src0.y * src1.y
   3661     * dst.z = src0.z
   3662     * dst.w = src1.w
   3663     */
   3664 
   3665    struct tgsi_full_src_register s0_yyyy =
   3666       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   3667    struct tgsi_full_src_register s0_zzzz =
   3668       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
   3669    struct tgsi_full_src_register s1_yyyy =
   3670       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
   3671    struct tgsi_full_src_register s1_wwww =
   3672       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
   3673 
   3674    /*
   3675     * If dst and either src0 and src1 are the same we need
   3676     * to create a temporary for it and insert a extra move.
   3677     */
   3678    unsigned tmp_move = get_temp_index(emit);
   3679    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   3680    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   3681 
   3682    /* MOV dst.x, 1.0 */
   3683    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   3684       struct tgsi_full_dst_register dst_x =
   3685          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   3686       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3687 
   3688       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
   3689    }
   3690 
   3691    /* MUL dst.y, s0.y, s1.y */
   3692    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   3693       struct tgsi_full_dst_register dst_y =
   3694          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   3695 
   3696       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
   3697                            &s1_yyyy, inst->Instruction.Saturate);
   3698    }
   3699 
   3700    /* MOV dst.z, s0.z */
   3701    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   3702       struct tgsi_full_dst_register dst_z =
   3703          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   3704 
   3705       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
   3706                            inst->Instruction.Saturate);
   3707   }
   3708 
   3709    /* MOV dst.w, s1.w */
   3710    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   3711       struct tgsi_full_dst_register dst_w =
   3712          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   3713 
   3714       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
   3715                            inst->Instruction.Saturate);
   3716    }
   3717 
   3718    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   3719                         FALSE);
   3720    free_temp_indexes(emit);
   3721 
   3722    return TRUE;
   3723 }
   3724 
   3725 
   3726 
   3727 /**
   3728  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
   3729  */
   3730 static boolean
   3731 emit_endprim(struct svga_shader_emitter_v10 *emit,
   3732              const struct tgsi_full_instruction *inst)
   3733 {
   3734    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   3735 
   3736    /* We can't use emit_simple() because the TGSI instruction has one
   3737     * operand (vertex stream number) which we must ignore for VGPU10.
   3738     */
   3739    begin_emit_instruction(emit);
   3740    emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
   3741    end_emit_instruction(emit);
   3742    return TRUE;
   3743 }
   3744 
   3745 
   3746 /**
   3747  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
   3748  */
   3749 static boolean
   3750 emit_ex2(struct svga_shader_emitter_v10 *emit,
   3751          const struct tgsi_full_instruction *inst)
   3752 {
   3753    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
   3754     * while VGPU10 computes four values.
   3755     *
   3756     * dst = EX2(src):
   3757     *   dst.xyzw = 2.0 ^ src.x
   3758     */
   3759 
   3760    struct tgsi_full_src_register src_xxxx =
   3761       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   3762                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   3763 
   3764    /* EXP tmp, s0.xxxx */
   3765    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
   3766                         inst->Instruction.Saturate);
   3767 
   3768    return TRUE;
   3769 }
   3770 
   3771 
   3772 /**
   3773  * Emit code for TGSI_OPCODE_EXP instruction.
   3774  */
   3775 static boolean
   3776 emit_exp(struct svga_shader_emitter_v10 *emit,
   3777          const struct tgsi_full_instruction *inst)
   3778 {
   3779    /*
   3780     * dst.x = 2 ^ floor(s0.x)
   3781     * dst.y = s0.x - floor(s0.x)
   3782     * dst.z = 2 ^ s0.x
   3783     * dst.w = 1.0
   3784     */
   3785 
   3786    struct tgsi_full_src_register src_xxxx =
   3787       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   3788    unsigned tmp = get_temp_index(emit);
   3789    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3790    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3791 
   3792    /*
   3793     * If dst and src are the same we need to create
   3794     * a temporary for it and insert a extra move.
   3795     */
   3796    unsigned tmp_move = get_temp_index(emit);
   3797    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   3798    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   3799 
   3800    /* only use X component of temp reg */
   3801    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   3802    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   3803 
   3804    /* ROUND_NI tmp.x, s0.x */
   3805    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
   3806                         &src_xxxx, FALSE); /* round to -infinity */
   3807 
   3808    /* EXP dst.x, tmp.x */
   3809    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   3810       struct tgsi_full_dst_register dst_x =
   3811          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   3812 
   3813       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
   3814                            inst->Instruction.Saturate);
   3815    }
   3816 
   3817    /* ADD dst.y, s0.x, -tmp */
   3818    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   3819       struct tgsi_full_dst_register dst_y =
   3820          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   3821       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
   3822 
   3823       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
   3824                            &neg_tmp_src, inst->Instruction.Saturate);
   3825    }
   3826 
   3827    /* EXP dst.z, s0.x */
   3828    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   3829       struct tgsi_full_dst_register dst_z =
   3830          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   3831 
   3832       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
   3833                            inst->Instruction.Saturate);
   3834    }
   3835 
   3836    /* MOV dst.w, 1.0 */
   3837    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   3838       struct tgsi_full_dst_register dst_w =
   3839          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   3840       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3841 
   3842       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
   3843                            FALSE);
   3844    }
   3845 
   3846    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   3847                         FALSE);
   3848 
   3849    free_temp_indexes(emit);
   3850 
   3851    return TRUE;
   3852 }
   3853 
   3854 
   3855 /**
   3856  * Emit code for TGSI_OPCODE_IF instruction.
   3857  */
   3858 static boolean
   3859 emit_if(struct svga_shader_emitter_v10 *emit,
   3860         const struct tgsi_full_instruction *inst)
   3861 {
   3862    VGPU10OpcodeToken0 opcode0;
   3863 
   3864    /* The src register should be a scalar */
   3865    assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
   3866           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
   3867           inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
   3868 
   3869    /* The only special thing here is that we need to set the
   3870     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
   3871     * src.x is non-zero.
   3872     */
   3873    opcode0.value = 0;
   3874    opcode0.opcodeType = VGPU10_OPCODE_IF;
   3875    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
   3876 
   3877    begin_emit_instruction(emit);
   3878    emit_dword(emit, opcode0.value);
   3879    emit_src_register(emit, &inst->Src[0]);
   3880    end_emit_instruction(emit);
   3881 
   3882    return TRUE;
   3883 }
   3884 
   3885 
   3886 /**
   3887  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
   3888  * the register components are negative).
   3889  */
   3890 static boolean
   3891 emit_kill_if(struct svga_shader_emitter_v10 *emit,
   3892              const struct tgsi_full_instruction *inst)
   3893 {
   3894    unsigned tmp = get_temp_index(emit);
   3895    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   3896    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   3897 
   3898    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3899 
   3900    struct tgsi_full_dst_register tmp_dst_x =
   3901       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   3902    struct tgsi_full_src_register tmp_src_xxxx =
   3903       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   3904 
   3905    /* tmp = src[0] < 0.0 */
   3906    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
   3907                         &zero, FALSE);
   3908 
   3909    if (!same_swizzle_terms(&inst->Src[0])) {
   3910       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
   3911        * logically OR the swizzle terms.  Most uses of KILL_IF only
   3912        * test one channel so it's good to avoid these extra steps.
   3913        */
   3914       struct tgsi_full_src_register tmp_src_yyyy =
   3915          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
   3916       struct tgsi_full_src_register tmp_src_zzzz =
   3917          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
   3918       struct tgsi_full_src_register tmp_src_wwww =
   3919          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
   3920 
   3921       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3922                            &tmp_src_yyyy, FALSE);
   3923       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3924                            &tmp_src_zzzz, FALSE);
   3925       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
   3926                            &tmp_src_wwww, FALSE);
   3927    }
   3928 
   3929    begin_emit_instruction(emit);
   3930    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
   3931    emit_src_register(emit, &tmp_src_xxxx);
   3932    end_emit_instruction(emit);
   3933 
   3934    free_temp_indexes(emit);
   3935 
   3936    return TRUE;
   3937 }
   3938 
   3939 
   3940 /**
   3941  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
   3942  */
   3943 static boolean
   3944 emit_kill(struct svga_shader_emitter_v10 *emit,
   3945           const struct tgsi_full_instruction *inst)
   3946 {
   3947    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   3948 
   3949    /* DISCARD if 0.0 is zero */
   3950    begin_emit_instruction(emit);
   3951    emit_discard_opcode(emit, FALSE);
   3952    emit_src_register(emit, &zero);
   3953    end_emit_instruction(emit);
   3954 
   3955    return TRUE;
   3956 }
   3957 
   3958 
   3959 /**
   3960  * Emit code for TGSI_OPCODE_LG2 instruction.
   3961  */
   3962 static boolean
   3963 emit_lg2(struct svga_shader_emitter_v10 *emit,
   3964          const struct tgsi_full_instruction *inst)
   3965 {
   3966    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
   3967     * while VGPU10 computes four values.
   3968     *
   3969     * dst = LG2(src):
   3970     *   dst.xyzw = log2(src.x)
   3971     */
   3972 
   3973    struct tgsi_full_src_register src_xxxx =
   3974       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   3975                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   3976 
   3977    /* LOG tmp, s0.xxxx */
   3978    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
   3979                         inst->Instruction.Saturate);
   3980 
   3981    return TRUE;
   3982 }
   3983 
   3984 
   3985 /**
   3986  * Emit code for TGSI_OPCODE_LIT instruction.
   3987  */
   3988 static boolean
   3989 emit_lit(struct svga_shader_emitter_v10 *emit,
   3990          const struct tgsi_full_instruction *inst)
   3991 {
   3992    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   3993 
   3994    /*
   3995     * If dst and src are the same we need to create
   3996     * a temporary for it and insert a extra move.
   3997     */
   3998    unsigned tmp_move = get_temp_index(emit);
   3999    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
   4000    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
   4001 
   4002    /*
   4003     * dst.x = 1
   4004     * dst.y = max(src.x, 0)
   4005     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
   4006     * dst.w = 1
   4007     */
   4008 
   4009    /* MOV dst.x, 1.0 */
   4010    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   4011       struct tgsi_full_dst_register dst_x =
   4012          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
   4013       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
   4014    }
   4015 
   4016    /* MOV dst.w, 1.0 */
   4017    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   4018       struct tgsi_full_dst_register dst_w =
   4019          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
   4020       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
   4021    }
   4022 
   4023    /* MAX dst.y, src.x, 0.0 */
   4024    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   4025       struct tgsi_full_dst_register dst_y =
   4026          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
   4027       struct tgsi_full_src_register zero =
   4028          make_immediate_reg_float(emit, 0.0f);
   4029       struct tgsi_full_src_register src_xxxx =
   4030          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   4031                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   4032 
   4033       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
   4034                            &zero, inst->Instruction.Saturate);
   4035    }
   4036 
   4037    /*
   4038     * tmp1 = clamp(src.w, -128, 128);
   4039     *   MAX tmp1, src.w, -128
   4040     *   MIN tmp1, tmp1, 128
   4041     *
   4042     * tmp2 = max(tmp2, 0);
   4043     *   MAX tmp2, src.y, 0
   4044     *
   4045     * tmp1 = pow(tmp2, tmp1);
   4046     *   LOG tmp2, tmp2
   4047     *   MUL tmp1, tmp2, tmp1
   4048     *   EXP tmp1, tmp1
   4049     *
   4050     * tmp1 = (src.w == 0) ? 1 : tmp1;
   4051     *   EQ tmp2, 0, src.w
   4052     *   MOVC tmp1, tmp2, 1.0, tmp1
   4053     *
   4054     * dst.z = (0 < src.x) ? tmp1 : 0;
   4055     *   LT tmp2, 0, src.x
   4056     *   MOVC dst.z, tmp2, tmp1, 0.0
   4057     */
   4058    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   4059       struct tgsi_full_dst_register dst_z =
   4060          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
   4061 
   4062       unsigned tmp1 = get_temp_index(emit);
   4063       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4064       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4065       unsigned tmp2 = get_temp_index(emit);
   4066       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4067       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4068 
   4069       struct tgsi_full_src_register src_xxxx =
   4070          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   4071       struct tgsi_full_src_register src_yyyy =
   4072          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   4073       struct tgsi_full_src_register src_wwww =
   4074          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   4075 
   4076       struct tgsi_full_src_register zero =
   4077          make_immediate_reg_float(emit, 0.0f);
   4078       struct tgsi_full_src_register lowerbound =
   4079          make_immediate_reg_float(emit, -128.0f);
   4080       struct tgsi_full_src_register upperbound =
   4081          make_immediate_reg_float(emit, 128.0f);
   4082 
   4083       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
   4084                            &lowerbound, FALSE);
   4085       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
   4086                            &upperbound, FALSE);
   4087       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
   4088                            &zero, FALSE);
   4089 
   4090       /* POW tmp1, tmp2, tmp1 */
   4091       /* LOG tmp2, tmp2 */
   4092       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
   4093                            FALSE);
   4094 
   4095       /* MUL tmp1, tmp2, tmp1 */
   4096       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
   4097                            &tmp1_src, FALSE);
   4098 
   4099       /* EXP tmp1, tmp1 */
   4100       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
   4101                            FALSE);
   4102 
   4103       /* EQ tmp2, 0, src.w */
   4104       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
   4105                            &src_wwww, FALSE);
   4106       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
   4107       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
   4108                            &tmp2_src, &one, &tmp1_src, FALSE);
   4109 
   4110       /* LT tmp2, 0, src.x */
   4111       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
   4112                            &src_xxxx, FALSE);
   4113       /* MOVC dst.z, tmp2, tmp1, 0.0 */
   4114       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
   4115                            &tmp2_src, &tmp1_src, &zero, FALSE);
   4116    }
   4117 
   4118    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
   4119                         FALSE);
   4120    free_temp_indexes(emit);
   4121 
   4122    return TRUE;
   4123 }
   4124 
   4125 
   4126 /**
   4127  * Emit code for TGSI_OPCODE_LOG instruction.
   4128  */
   4129 static boolean
   4130 emit_log(struct svga_shader_emitter_v10 *emit,
   4131          const struct tgsi_full_instruction *inst)
   4132 {
   4133    /*
   4134     * dst.x = floor(lg2(abs(s0.x)))
   4135     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
   4136     * dst.z = lg2(abs(s0.x))
   4137     * dst.w = 1.0
   4138     */
   4139 
   4140    struct tgsi_full_src_register src_xxxx =
   4141       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   4142    unsigned tmp = get_temp_index(emit);
   4143    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4144    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4145    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
   4146 
   4147    /* only use X component of temp reg */
   4148    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4149    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4150 
   4151    /* LOG tmp.x, abs(s0.x) */
   4152    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
   4153       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
   4154                           &abs_src_xxxx, FALSE);
   4155    }
   4156 
   4157    /* MOV dst.z, tmp.x */
   4158    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   4159       struct tgsi_full_dst_register dst_z =
   4160          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
   4161 
   4162       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
   4163                            &tmp_src, inst->Instruction.Saturate);
   4164    }
   4165 
   4166    /* FLR tmp.x, tmp.x */
   4167    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
   4168       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
   4169                            &tmp_src, FALSE);
   4170    }
   4171 
   4172    /* MOV dst.x, tmp.x */
   4173    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   4174       struct tgsi_full_dst_register dst_x =
   4175          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
   4176 
   4177       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
   4178                            inst->Instruction.Saturate);
   4179    }
   4180 
   4181    /* EXP tmp.x, tmp.x */
   4182    /* DIV dst.y, abs(s0.x), tmp.x */
   4183    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   4184       struct tgsi_full_dst_register dst_y =
   4185          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
   4186 
   4187       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
   4188                            FALSE);
   4189       emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
   4190                            &tmp_src, inst->Instruction.Saturate);
   4191    }
   4192 
   4193    /* MOV dst.w, 1.0 */
   4194    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   4195       struct tgsi_full_dst_register dst_w =
   4196          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
   4197       struct tgsi_full_src_register one =
   4198          make_immediate_reg_float(emit, 1.0f);
   4199 
   4200       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
   4201    }
   4202 
   4203    free_temp_indexes(emit);
   4204 
   4205    return TRUE;
   4206 }
   4207 
   4208 
   4209 /**
   4210  * Emit code for TGSI_OPCODE_LRP instruction.
   4211  */
   4212 static boolean
   4213 emit_lrp(struct svga_shader_emitter_v10 *emit,
   4214          const struct tgsi_full_instruction *inst)
   4215 {
   4216    /* dst = LRP(s0, s1, s2):
   4217     *   dst = s0 * (s1 - s2) + s2
   4218     * Translates into:
   4219     *   SUB tmp, s1, s2;        tmp = s1 - s2
   4220     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
   4221     */
   4222    unsigned tmp = get_temp_index(emit);
   4223    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
   4224    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
   4225    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
   4226 
   4227    /* ADD tmp, s1, -s2 */
   4228    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
   4229                         &inst->Src[1], &neg_src2, FALSE);
   4230 
   4231    /* MAD dst, s1, tmp, s3 */
   4232    emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
   4233                         &inst->Src[0], &src_tmp, &inst->Src[2],
   4234                         inst->Instruction.Saturate);
   4235 
   4236    free_temp_indexes(emit);
   4237 
   4238    return TRUE;
   4239 }
   4240 
   4241 
   4242 /**
   4243  * Emit code for TGSI_OPCODE_POW instruction.
   4244  */
   4245 static boolean
   4246 emit_pow(struct svga_shader_emitter_v10 *emit,
   4247          const struct tgsi_full_instruction *inst)
   4248 {
   4249    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
   4250     * src1.x while VGPU10 computes four values.
   4251     *
   4252     * dst = POW(src0, src1):
   4253     *   dst.xyzw = src0.x ^ src1.x
   4254     */
   4255    unsigned tmp = get_temp_index(emit);
   4256    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4257    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4258    struct tgsi_full_src_register src0_xxxx =
   4259       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   4260                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   4261    struct tgsi_full_src_register src1_xxxx =
   4262       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
   4263                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
   4264 
   4265    /* LOG tmp, s0.xxxx */
   4266    emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
   4267                         FALSE);
   4268 
   4269    /* MUL tmp, tmp, s1.xxxx */
   4270    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
   4271                         &src1_xxxx, FALSE);
   4272 
   4273    /* EXP tmp, s0.xxxx */
   4274    emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
   4275                         &tmp_src, inst->Instruction.Saturate);
   4276 
   4277    /* free tmp */
   4278    free_temp_indexes(emit);
   4279 
   4280    return TRUE;
   4281 }
   4282 
   4283 
   4284 /**
   4285  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
   4286  */
   4287 static boolean
   4288 emit_rcp(struct svga_shader_emitter_v10 *emit,
   4289          const struct tgsi_full_instruction *inst)
   4290 {
   4291    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4292 
   4293    unsigned tmp = get_temp_index(emit);
   4294    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4295    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4296 
   4297    struct tgsi_full_dst_register tmp_dst_x =
   4298       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4299    struct tgsi_full_src_register tmp_src_xxxx =
   4300       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4301 
   4302    /* DIV tmp.x, 1.0, s0 */
   4303    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
   4304                         &inst->Src[0], FALSE);
   4305 
   4306    /* MOV dst, tmp.xxxx */
   4307    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4308                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4309 
   4310    free_temp_indexes(emit);
   4311 
   4312    return TRUE;
   4313 }
   4314 
   4315 
   4316 /**
   4317  * Emit code for TGSI_OPCODE_RSQ instruction.
   4318  */
   4319 static boolean
   4320 emit_rsq(struct svga_shader_emitter_v10 *emit,
   4321          const struct tgsi_full_instruction *inst)
   4322 {
   4323    /* dst = RSQ(src):
   4324     *   dst.xyzw = 1 / sqrt(src.x)
   4325     * Translates into:
   4326     *   RSQ tmp, src.x
   4327     *   MOV dst, tmp.xxxx
   4328     */
   4329 
   4330    unsigned tmp = get_temp_index(emit);
   4331    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4332    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4333 
   4334    struct tgsi_full_dst_register tmp_dst_x =
   4335       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4336    struct tgsi_full_src_register tmp_src_xxxx =
   4337       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4338 
   4339    /* RSQ tmp, src.x */
   4340    emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
   4341                         &inst->Src[0], FALSE);
   4342 
   4343    /* MOV dst, tmp.xxxx */
   4344    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4345                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4346 
   4347    /* free tmp */
   4348    free_temp_indexes(emit);
   4349 
   4350    return TRUE;
   4351 }
   4352 
   4353 
   4354 /**
   4355  * Emit code for TGSI_OPCODE_SCS instruction.
   4356  */
   4357 static boolean
   4358 emit_scs(struct svga_shader_emitter_v10 *emit,
   4359          const struct tgsi_full_instruction *inst)
   4360 {
   4361    /* dst.x = cos(src.x)
   4362     * dst.y = sin(src.x)
   4363     * dst.z = 0.0
   4364     * dst.w = 1.0
   4365     */
   4366    struct tgsi_full_dst_register dst_x =
   4367       writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
   4368    struct tgsi_full_dst_register dst_y =
   4369       writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
   4370    struct tgsi_full_dst_register dst_zw =
   4371       writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
   4372 
   4373    struct tgsi_full_src_register zero_one =
   4374       make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
   4375 
   4376    begin_emit_instruction(emit);
   4377    emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
   4378    emit_dst_register(emit, &dst_y);
   4379    emit_dst_register(emit, &dst_x);
   4380    emit_src_register(emit, &inst->Src[0]);
   4381    end_emit_instruction(emit);
   4382 
   4383    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   4384                         &dst_zw, &zero_one, inst->Instruction.Saturate);
   4385 
   4386    return TRUE;
   4387 }
   4388 
   4389 
   4390 /**
   4391  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
   4392  */
   4393 static boolean
   4394 emit_seq(struct svga_shader_emitter_v10 *emit,
   4395          const struct tgsi_full_instruction *inst)
   4396 {
   4397    /* dst = SEQ(s0, s1):
   4398     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
   4399     * Translates into:
   4400     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
   4401     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4402     */
   4403    unsigned tmp = get_temp_index(emit);
   4404    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4405    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4406    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4407    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4408 
   4409    /* EQ tmp, s0, s1 */
   4410    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
   4411                         &inst->Src[1], FALSE);
   4412 
   4413    /* MOVC dst, tmp, one, zero */
   4414    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4415                         &one, &zero, FALSE);
   4416 
   4417    free_temp_indexes(emit);
   4418 
   4419    return TRUE;
   4420 }
   4421 
   4422 
   4423 /**
   4424  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
   4425  */
   4426 static boolean
   4427 emit_sge(struct svga_shader_emitter_v10 *emit,
   4428          const struct tgsi_full_instruction *inst)
   4429 {
   4430    /* dst = SGE(s0, s1):
   4431     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
   4432     * Translates into:
   4433     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
   4434     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4435     */
   4436    unsigned tmp = get_temp_index(emit);
   4437    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4438    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4439    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4440    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4441 
   4442    /* GE tmp, s0, s1 */
   4443    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
   4444                         &inst->Src[1], FALSE);
   4445 
   4446    /* MOVC dst, tmp, one, zero */
   4447    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4448                         &one, &zero, FALSE);
   4449 
   4450    free_temp_indexes(emit);
   4451 
   4452    return TRUE;
   4453 }
   4454 
   4455 
   4456 /**
   4457  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
   4458  */
   4459 static boolean
   4460 emit_sgt(struct svga_shader_emitter_v10 *emit,
   4461          const struct tgsi_full_instruction *inst)
   4462 {
   4463    /* dst = SGT(s0, s1):
   4464     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
   4465     * Translates into:
   4466     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
   4467     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4468     */
   4469    unsigned tmp = get_temp_index(emit);
   4470    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4471    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4472    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4473    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4474 
   4475    /* LT tmp, s1, s0 */
   4476    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
   4477                         &inst->Src[0], FALSE);
   4478 
   4479    /* MOVC dst, tmp, one, zero */
   4480    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4481                         &one, &zero, FALSE);
   4482 
   4483    free_temp_indexes(emit);
   4484 
   4485    return TRUE;
   4486 }
   4487 
   4488 
   4489 /**
   4490  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
   4491  */
   4492 static boolean
   4493 emit_sincos(struct svga_shader_emitter_v10 *emit,
   4494          const struct tgsi_full_instruction *inst)
   4495 {
   4496    unsigned tmp = get_temp_index(emit);
   4497    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4498    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4499 
   4500    struct tgsi_full_src_register tmp_src_xxxx =
   4501       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   4502    struct tgsi_full_dst_register tmp_dst_x =
   4503       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
   4504 
   4505    begin_emit_instruction(emit);
   4506    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
   4507 
   4508    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
   4509    {
   4510       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
   4511       emit_null_dst_register(emit);  /* second destination register */
   4512    }
   4513    else {
   4514       emit_null_dst_register(emit);
   4515       emit_dst_register(emit, &tmp_dst_x);
   4516    }
   4517 
   4518    emit_src_register(emit, &inst->Src[0]);
   4519    end_emit_instruction(emit);
   4520 
   4521    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
   4522                         &tmp_src_xxxx, inst->Instruction.Saturate);
   4523 
   4524    free_temp_indexes(emit);
   4525 
   4526    return TRUE;
   4527 }
   4528 
   4529 
   4530 /**
   4531  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
   4532  */
   4533 static boolean
   4534 emit_sle(struct svga_shader_emitter_v10 *emit,
   4535          const struct tgsi_full_instruction *inst)
   4536 {
   4537    /* dst = SLE(s0, s1):
   4538     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
   4539     * Translates into:
   4540     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
   4541     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4542     */
   4543    unsigned tmp = get_temp_index(emit);
   4544    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4545    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4546    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4547    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4548 
   4549    /* GE tmp, s1, s0 */
   4550    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
   4551                         &inst->Src[0], FALSE);
   4552 
   4553    /* MOVC dst, tmp, one, zero */
   4554    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4555                         &one, &zero, FALSE);
   4556 
   4557    free_temp_indexes(emit);
   4558 
   4559    return TRUE;
   4560 }
   4561 
   4562 
   4563 /**
   4564  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
   4565  */
   4566 static boolean
   4567 emit_slt(struct svga_shader_emitter_v10 *emit,
   4568          const struct tgsi_full_instruction *inst)
   4569 {
   4570    /* dst = SLT(s0, s1):
   4571     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
   4572     * Translates into:
   4573     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
   4574     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4575     */
   4576    unsigned tmp = get_temp_index(emit);
   4577    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4578    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4579    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4580    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4581 
   4582    /* LT tmp, s0, s1 */
   4583    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
   4584                         &inst->Src[1], FALSE);
   4585 
   4586    /* MOVC dst, tmp, one, zero */
   4587    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4588                         &one, &zero, FALSE);
   4589 
   4590    free_temp_indexes(emit);
   4591 
   4592    return TRUE;
   4593 }
   4594 
   4595 
   4596 /**
   4597  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
   4598  */
   4599 static boolean
   4600 emit_sne(struct svga_shader_emitter_v10 *emit,
   4601          const struct tgsi_full_instruction *inst)
   4602 {
   4603    /* dst = SNE(s0, s1):
   4604     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
   4605     * Translates into:
   4606     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
   4607     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
   4608     */
   4609    unsigned tmp = get_temp_index(emit);
   4610    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4611    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4612    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4613    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   4614 
   4615    /* NE tmp, s0, s1 */
   4616    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
   4617                         &inst->Src[1], FALSE);
   4618 
   4619    /* MOVC dst, tmp, one, zero */
   4620    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
   4621                         &one, &zero, FALSE);
   4622 
   4623    free_temp_indexes(emit);
   4624 
   4625    return TRUE;
   4626 }
   4627 
   4628 
   4629 /**
   4630  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
   4631  */
   4632 static boolean
   4633 emit_ssg(struct svga_shader_emitter_v10 *emit,
   4634          const struct tgsi_full_instruction *inst)
   4635 {
   4636    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
   4637     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
   4638     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
   4639     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
   4640     * Translates into:
   4641     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
   4642     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
   4643     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
   4644     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
   4645     */
   4646    struct tgsi_full_src_register zero =
   4647       make_immediate_reg_float(emit, 0.0f);
   4648    struct tgsi_full_src_register one =
   4649       make_immediate_reg_float(emit, 1.0f);
   4650    struct tgsi_full_src_register neg_one =
   4651       make_immediate_reg_float(emit, -1.0f);
   4652 
   4653    unsigned tmp1 = get_temp_index(emit);
   4654    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4655    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4656 
   4657    unsigned tmp2 = get_temp_index(emit);
   4658    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4659    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4660 
   4661    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
   4662                         &zero, FALSE);
   4663    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
   4664                         &neg_one, &zero, FALSE);
   4665    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
   4666                         &inst->Src[0], FALSE);
   4667    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
   4668                         &one, &tmp2_src, FALSE);
   4669 
   4670    free_temp_indexes(emit);
   4671 
   4672    return TRUE;
   4673 }
   4674 
   4675 
   4676 /**
   4677  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
   4678  */
   4679 static boolean
   4680 emit_issg(struct svga_shader_emitter_v10 *emit,
   4681           const struct tgsi_full_instruction *inst)
   4682 {
   4683    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
   4684     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
   4685     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
   4686     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
   4687     * Translates into:
   4688     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
   4689     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
   4690     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
   4691     */
   4692    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
   4693 
   4694    unsigned tmp1 = get_temp_index(emit);
   4695    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   4696    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   4697 
   4698    unsigned tmp2 = get_temp_index(emit);
   4699    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   4700    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   4701 
   4702    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
   4703 
   4704    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
   4705                         &inst->Src[0], &zero, FALSE);
   4706    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
   4707                         &zero, &inst->Src[0], FALSE);
   4708    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
   4709                         &tmp1_src, &neg_tmp2, FALSE);
   4710 
   4711    free_temp_indexes(emit);
   4712 
   4713    return TRUE;
   4714 }
   4715 
   4716 
   4717 /**
   4718  * Emit a comparison instruction.  The dest register will get
   4719  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
   4720  */
   4721 static void
   4722 emit_comparison(struct svga_shader_emitter_v10 *emit,
   4723                 SVGA3dCmpFunc func,
   4724                 const struct tgsi_full_dst_register *dst,
   4725                 const struct tgsi_full_src_register *src0,
   4726                 const struct tgsi_full_src_register *src1)
   4727 {
   4728    struct tgsi_full_src_register immediate;
   4729    VGPU10OpcodeToken0 opcode0;
   4730    boolean swapSrc = FALSE;
   4731 
   4732    /* Sanity checks for svga vs. gallium enums */
   4733    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
   4734    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
   4735 
   4736    opcode0.value = 0;
   4737 
   4738    switch (func) {
   4739    case SVGA3D_CMP_NEVER:
   4740       immediate = make_immediate_reg_int(emit, 0);
   4741       /* MOV dst, {0} */
   4742       begin_emit_instruction(emit);
   4743       emit_dword(emit, VGPU10_OPCODE_MOV);
   4744       emit_dst_register(emit, dst);
   4745       emit_src_register(emit, &immediate);
   4746       end_emit_instruction(emit);
   4747       return;
   4748    case SVGA3D_CMP_ALWAYS:
   4749       immediate = make_immediate_reg_int(emit, -1);
   4750       /* MOV dst, {-1} */
   4751       begin_emit_instruction(emit);
   4752       emit_dword(emit, VGPU10_OPCODE_MOV);
   4753       emit_dst_register(emit, dst);
   4754       emit_src_register(emit, &immediate);
   4755       end_emit_instruction(emit);
   4756       return;
   4757    case SVGA3D_CMP_LESS:
   4758       opcode0.opcodeType = VGPU10_OPCODE_LT;
   4759       break;
   4760    case SVGA3D_CMP_EQUAL:
   4761       opcode0.opcodeType = VGPU10_OPCODE_EQ;
   4762       break;
   4763    case SVGA3D_CMP_LESSEQUAL:
   4764       opcode0.opcodeType = VGPU10_OPCODE_GE;
   4765       swapSrc = TRUE;
   4766       break;
   4767    case SVGA3D_CMP_GREATER:
   4768       opcode0.opcodeType = VGPU10_OPCODE_LT;
   4769       swapSrc = TRUE;
   4770       break;
   4771    case SVGA3D_CMP_NOTEQUAL:
   4772       opcode0.opcodeType = VGPU10_OPCODE_NE;
   4773       break;
   4774    case SVGA3D_CMP_GREATEREQUAL:
   4775       opcode0.opcodeType = VGPU10_OPCODE_GE;
   4776       break;
   4777    default:
   4778       assert(!"Unexpected comparison mode");
   4779       opcode0.opcodeType = VGPU10_OPCODE_EQ;
   4780    }
   4781 
   4782    begin_emit_instruction(emit);
   4783    emit_dword(emit, opcode0.value);
   4784    emit_dst_register(emit, dst);
   4785    if (swapSrc) {
   4786       emit_src_register(emit, src1);
   4787       emit_src_register(emit, src0);
   4788    }
   4789    else {
   4790       emit_src_register(emit, src0);
   4791       emit_src_register(emit, src1);
   4792    }
   4793    end_emit_instruction(emit);
   4794 }
   4795 
   4796 
   4797 /**
   4798  * Get texel/address offsets for a texture instruction.
   4799  */
   4800 static void
   4801 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
   4802                   const struct tgsi_full_instruction *inst, int offsets[3])
   4803 {
   4804    if (inst->Texture.NumOffsets == 1) {
   4805       /* According to OpenGL Shader Language spec the offsets are only
   4806        * fetched from a previously-declared immediate/literal.
   4807        */
   4808       const struct tgsi_texture_offset *off = inst->TexOffsets;
   4809       const unsigned index = off[0].Index;
   4810       const unsigned swizzleX = off[0].SwizzleX;
   4811       const unsigned swizzleY = off[0].SwizzleY;
   4812       const unsigned swizzleZ = off[0].SwizzleZ;
   4813       const union tgsi_immediate_data *imm = emit->immediates[index];
   4814 
   4815       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
   4816 
   4817       offsets[0] = imm[swizzleX].Int;
   4818       offsets[1] = imm[swizzleY].Int;
   4819       offsets[2] = imm[swizzleZ].Int;
   4820    }
   4821    else {
   4822       offsets[0] = offsets[1] = offsets[2] = 0;
   4823    }
   4824 }
   4825 
   4826 
   4827 /**
   4828  * Set up the coordinate register for texture sampling.
   4829  * When we're sampling from a RECT texture we have to scale the
   4830  * unnormalized coordinate to a normalized coordinate.
   4831  * We do that by multiplying the coordinate by an "extra" constant.
   4832  * An alternative would be to use the RESINFO instruction to query the
   4833  * texture's size.
   4834  */
   4835 static struct tgsi_full_src_register
   4836 setup_texcoord(struct svga_shader_emitter_v10 *emit,
   4837                unsigned unit,
   4838                const struct tgsi_full_src_register *coord)
   4839 {
   4840    if (emit->key.tex[unit].unnormalized) {
   4841       unsigned scale_index = emit->texcoord_scale_index[unit];
   4842       unsigned tmp = get_temp_index(emit);
   4843       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   4844       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   4845       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
   4846 
   4847       /* MUL tmp, coord, const[] */
   4848       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
   4849                            coord, &scale_src, FALSE);
   4850       return tmp_src;
   4851    }
   4852    else {
   4853       /* use texcoord as-is */
   4854       return *coord;
   4855    }
   4856 }
   4857 
   4858 
   4859 /**
   4860  * For SAMPLE_C instructions, emit the extra src register which indicates
   4861  * the reference/comparision value.
   4862  */
   4863 static void
   4864 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
   4865                           unsigned target,
   4866                           const struct tgsi_full_src_register *coord)
   4867 {
   4868    struct tgsi_full_src_register coord_src_ref;
   4869    unsigned component;
   4870 
   4871    assert(tgsi_is_shadow_target(target));
   4872 
   4873    assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
   4874    if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
   4875        target == TGSI_TEXTURE_SHADOWCUBE)
   4876       component = TGSI_SWIZZLE_W;
   4877    else
   4878       component = TGSI_SWIZZLE_Z;
   4879 
   4880    coord_src_ref = scalar_src(coord, component);
   4881 
   4882    emit_src_register(emit, &coord_src_ref);
   4883 }
   4884 
   4885 
   4886 /**
   4887  * Info for implementing texture swizzles.
   4888  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
   4889  * functions use this to encapsulate the extra steps needed to perform
   4890  * a texture swizzle, or shadow/depth comparisons.
   4891  * The shadow/depth comparison is only done here if for the cases where
   4892  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
   4893  */
   4894 struct tex_swizzle_info
   4895 {
   4896    boolean swizzled;
   4897    boolean shadow_compare;
   4898    unsigned unit;
   4899    unsigned texture_target;  /**< TGSI_TEXTURE_x */
   4900    struct tgsi_full_src_register tmp_src;
   4901    struct tgsi_full_dst_register tmp_dst;
   4902    const struct tgsi_full_dst_register *inst_dst;
   4903    const struct tgsi_full_src_register *coord_src;
   4904 };
   4905 
   4906 
   4907 /**
   4908  * Do setup for handling texture swizzles or shadow compares.
   4909  * \param unit  the texture unit
   4910  * \param inst  the TGSI texture instruction
   4911  * \param shadow_compare  do shadow/depth comparison?
   4912  * \param swz  returns the swizzle info
   4913  */
   4914 static void
   4915 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
   4916                   unsigned unit,
   4917                   const struct tgsi_full_instruction *inst,
   4918                   boolean shadow_compare,
   4919                   struct tex_swizzle_info *swz)
   4920 {
   4921    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
   4922                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
   4923                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
   4924                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
   4925 
   4926    swz->shadow_compare = shadow_compare;
   4927    swz->texture_target = inst->Texture.Texture;
   4928 
   4929    if (swz->swizzled || shadow_compare) {
   4930       /* Allocate temp register for the result of the SAMPLE instruction
   4931        * and the source of the MOV/compare/swizzle instructions.
   4932        */
   4933       unsigned tmp = get_temp_index(emit);
   4934       swz->tmp_src = make_src_temp_reg(tmp);
   4935       swz->tmp_dst = make_dst_temp_reg(tmp);
   4936 
   4937       swz->unit = unit;
   4938    }
   4939    swz->inst_dst = &inst->Dst[0];
   4940    swz->coord_src = &inst->Src[0];
   4941 }
   4942 
   4943 
   4944 /**
   4945  * Returns the register to put the SAMPLE instruction results into.
   4946  * This will either be the original instruction dst reg (if no swizzle
   4947  * and no shadow comparison) or a temporary reg if there is a swizzle.
   4948  */
   4949 static const struct tgsi_full_dst_register *
   4950 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
   4951 {
   4952    return (swz->swizzled || swz->shadow_compare)
   4953       ? &swz->tmp_dst : swz->inst_dst;
   4954 }
   4955 
   4956 
   4957 /**
   4958  * This emits the MOV instruction that actually implements a texture swizzle
   4959  * and/or shadow comparison.
   4960  */
   4961 static void
   4962 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
   4963                 const struct tex_swizzle_info *swz)
   4964 {
   4965    if (swz->shadow_compare) {
   4966       /* Emit extra instructions to compare the fetched texel value against
   4967        * a texture coordinate component.  The result of the comparison
   4968        * is 0.0 or 1.0.
   4969        */
   4970       struct tgsi_full_src_register coord_src;
   4971       struct tgsi_full_src_register texel_src =
   4972          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
   4973       struct tgsi_full_src_register one =
   4974          make_immediate_reg_float(emit, 1.0f);
   4975       /* convert gallium comparison func to SVGA comparison func */
   4976       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
   4977 
   4978       assert(emit->unit == PIPE_SHADER_FRAGMENT);
   4979 
   4980       switch (swz->texture_target) {
   4981       case TGSI_TEXTURE_SHADOW2D:
   4982       case TGSI_TEXTURE_SHADOWRECT:
   4983       case TGSI_TEXTURE_SHADOW1D_ARRAY:
   4984          coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
   4985          break;
   4986       case TGSI_TEXTURE_SHADOW1D:
   4987          coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
   4988          break;
   4989       case TGSI_TEXTURE_SHADOWCUBE:
   4990       case TGSI_TEXTURE_SHADOW2D_ARRAY:
   4991          coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
   4992          break;
   4993       default:
   4994          assert(!"Unexpected texture target in end_tex_swizzle()");
   4995          coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
   4996       }
   4997 
   4998       /* COMPARE tmp, coord, texel */
   4999       /* XXX it would seem that the texel and coord arguments should
   5000        * be transposed here, but piglit tests indicate otherwise.
   5001        */
   5002       emit_comparison(emit, compare_func,
   5003                       &swz->tmp_dst, &texel_src, &coord_src);
   5004 
   5005       /* AND dest, tmp, {1.0} */
   5006       begin_emit_instruction(emit);
   5007       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
   5008       if (swz->swizzled) {
   5009          emit_dst_register(emit, &swz->tmp_dst);
   5010       }
   5011       else {
   5012          emit_dst_register(emit, swz->inst_dst);
   5013       }
   5014       emit_src_register(emit, &swz->tmp_src);
   5015       emit_src_register(emit, &one);
   5016       end_emit_instruction(emit);
   5017    }
   5018 
   5019    if (swz->swizzled) {
   5020       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
   5021       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
   5022       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
   5023       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
   5024       unsigned writemask_0 = 0, writemask_1 = 0;
   5025       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
   5026 
   5027       /* Swizzle w/out zero/one terms */
   5028       struct tgsi_full_src_register src_swizzled =
   5029          swizzle_src(&swz->tmp_src,
   5030                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
   5031                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
   5032                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
   5033                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
   5034 
   5035       /* MOV dst, color(tmp).<swizzle> */
   5036       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   5037                            swz->inst_dst, &src_swizzled, FALSE);
   5038 
   5039       /* handle swizzle zero terms */
   5040       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
   5041                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
   5042                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
   5043                      ((swz_a == PIPE_SWIZZLE_0) << 3));
   5044 
   5045       if (writemask_0) {
   5046          struct tgsi_full_src_register zero = int_tex ?
   5047             make_immediate_reg_int(emit, 0) :
   5048             make_immediate_reg_float(emit, 0.0f);
   5049          struct tgsi_full_dst_register dst =
   5050             writemask_dst(swz->inst_dst, writemask_0);
   5051 
   5052          /* MOV dst.writemask_0, {0,0,0,0} */
   5053          emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   5054                               &dst, &zero, FALSE);
   5055       }
   5056 
   5057       /* handle swizzle one terms */
   5058       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
   5059                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
   5060                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
   5061                      ((swz_a == PIPE_SWIZZLE_1) << 3));
   5062 
   5063       if (writemask_1) {
   5064          struct tgsi_full_src_register one = int_tex ?
   5065             make_immediate_reg_int(emit, 1) :
   5066             make_immediate_reg_float(emit, 1.0f);
   5067          struct tgsi_full_dst_register dst =
   5068             writemask_dst(swz->inst_dst, writemask_1);
   5069 
   5070          /* MOV dst.writemask_1, {1,1,1,1} */
   5071          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
   5072       }
   5073    }
   5074 }
   5075 
   5076 
   5077 /**
   5078  * Emit code for TGSI_OPCODE_SAMPLE instruction.
   5079  */
   5080 static boolean
   5081 emit_sample(struct svga_shader_emitter_v10 *emit,
   5082             const struct tgsi_full_instruction *inst)
   5083 {
   5084    const unsigned resource_unit = inst->Src[1].Register.Index;
   5085    const unsigned sampler_unit = inst->Src[2].Register.Index;
   5086    struct tgsi_full_src_register coord;
   5087    int offsets[3];
   5088    struct tex_swizzle_info swz_info;
   5089 
   5090    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
   5091 
   5092    get_texel_offsets(emit, inst, offsets);
   5093 
   5094    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
   5095 
   5096    /* SAMPLE dst, coord(s0), resource, sampler */
   5097    begin_emit_instruction(emit);
   5098 
   5099    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
   5100                       inst->Instruction.Saturate, offsets);
   5101    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5102    emit_src_register(emit, &coord);
   5103    emit_resource_register(emit, resource_unit);
   5104    emit_sampler_register(emit, sampler_unit);
   5105    end_emit_instruction(emit);
   5106 
   5107    end_tex_swizzle(emit, &swz_info);
   5108 
   5109    free_temp_indexes(emit);
   5110 
   5111    return TRUE;
   5112 }
   5113 
   5114 
   5115 /**
   5116  * Check if a texture instruction is valid.
   5117  * An example of an invalid texture instruction is doing shadow comparison
   5118  * with an integer-valued texture.
   5119  * If we detect an invalid texture instruction, we replace it with:
   5120  *   MOV dst, {1,1,1,1};
   5121  * \return TRUE if valid, FALSE if invalid.
   5122  */
   5123 static boolean
   5124 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
   5125                          const struct tgsi_full_instruction *inst)
   5126 {
   5127    const unsigned unit = inst->Src[1].Register.Index;
   5128    const unsigned target = inst->Texture.Texture;
   5129    boolean valid = TRUE;
   5130 
   5131    if (tgsi_is_shadow_target(target) &&
   5132        is_integer_type(emit->sampler_return_type[unit])) {
   5133       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
   5134       valid = FALSE;
   5135    }
   5136    /* XXX might check for other conditions in the future here */
   5137 
   5138    if (!valid) {
   5139       /* emit a MOV dst, {1,1,1,1} instruction. */
   5140       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
   5141       begin_emit_instruction(emit);
   5142       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   5143       emit_dst_register(emit, &inst->Dst[0]);
   5144       emit_src_register(emit, &one);
   5145       end_emit_instruction(emit);
   5146    }
   5147 
   5148    return valid;
   5149 }
   5150 
   5151 
   5152 /**
   5153  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
   5154  */
   5155 static boolean
   5156 emit_tex(struct svga_shader_emitter_v10 *emit,
   5157          const struct tgsi_full_instruction *inst)
   5158 {
   5159    const uint unit = inst->Src[1].Register.Index;
   5160    unsigned target = inst->Texture.Texture;
   5161    unsigned opcode;
   5162    struct tgsi_full_src_register coord;
   5163    int offsets[3];
   5164    struct tex_swizzle_info swz_info;
   5165 
   5166    /* check that the sampler returns a float */
   5167    if (!is_valid_tex_instruction(emit, inst))
   5168       return TRUE;
   5169 
   5170    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5171 
   5172    get_texel_offsets(emit, inst, offsets);
   5173 
   5174    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5175 
   5176    /* SAMPLE dst, coord(s0), resource, sampler */
   5177    begin_emit_instruction(emit);
   5178 
   5179    if (tgsi_is_shadow_target(target))
   5180       opcode = VGPU10_OPCODE_SAMPLE_C;
   5181    else
   5182       opcode = VGPU10_OPCODE_SAMPLE;
   5183 
   5184    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5185    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5186    emit_src_register(emit, &coord);
   5187    emit_resource_register(emit, unit);
   5188    emit_sampler_register(emit, unit);
   5189    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
   5190       emit_tex_compare_refcoord(emit, target, &coord);
   5191    }
   5192    end_emit_instruction(emit);
   5193 
   5194    end_tex_swizzle(emit, &swz_info);
   5195 
   5196    free_temp_indexes(emit);
   5197 
   5198    return TRUE;
   5199 }
   5200 
   5201 
   5202 /**
   5203  * Emit code for TGSI_OPCODE_TXP (projective texture)
   5204  */
   5205 static boolean
   5206 emit_txp(struct svga_shader_emitter_v10 *emit,
   5207          const struct tgsi_full_instruction *inst)
   5208 {
   5209    const uint unit = inst->Src[1].Register.Index;
   5210    unsigned target = inst->Texture.Texture;
   5211    unsigned opcode;
   5212    int offsets[3];
   5213    unsigned tmp = get_temp_index(emit);
   5214    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   5215    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   5216    struct tgsi_full_src_register src0_wwww =
   5217       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5218    struct tgsi_full_src_register coord;
   5219    struct tex_swizzle_info swz_info;
   5220 
   5221    /* check that the sampler returns a float */
   5222    if (!is_valid_tex_instruction(emit, inst))
   5223       return TRUE;
   5224 
   5225    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5226 
   5227    get_texel_offsets(emit, inst, offsets);
   5228 
   5229    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5230 
   5231    /* DIV tmp, coord, coord.wwww */
   5232    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
   5233                         &coord, &src0_wwww, FALSE);
   5234 
   5235    /* SAMPLE dst, coord(tmp), resource, sampler */
   5236    begin_emit_instruction(emit);
   5237 
   5238    if (tgsi_is_shadow_target(target))
   5239       opcode = VGPU10_OPCODE_SAMPLE_C;
   5240    else
   5241       opcode = VGPU10_OPCODE_SAMPLE;
   5242 
   5243    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5244    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5245    emit_src_register(emit, &tmp_src);  /* projected coord */
   5246    emit_resource_register(emit, unit);
   5247    emit_sampler_register(emit, unit);
   5248    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
   5249       emit_tex_compare_refcoord(emit, target, &tmp_src);
   5250    }
   5251    end_emit_instruction(emit);
   5252 
   5253    end_tex_swizzle(emit, &swz_info);
   5254 
   5255    free_temp_indexes(emit);
   5256 
   5257    return TRUE;
   5258 }
   5259 
   5260 
   5261 /*
   5262  * Emit code for TGSI_OPCODE_XPD instruction.
   5263  */
   5264 static boolean
   5265 emit_xpd(struct svga_shader_emitter_v10 *emit,
   5266          const struct tgsi_full_instruction *inst)
   5267 {
   5268    /* dst.x = src0.y * src1.z - src1.y * src0.z
   5269     * dst.y = src0.z * src1.x - src1.z * src0.x
   5270     * dst.z = src0.x * src1.y - src1.x * src0.y
   5271     * dst.w = 1
   5272     */
   5273    struct tgsi_full_src_register s0_xxxx =
   5274       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
   5275    struct tgsi_full_src_register s0_yyyy =
   5276       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
   5277    struct tgsi_full_src_register s0_zzzz =
   5278       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
   5279 
   5280    struct tgsi_full_src_register s1_xxxx =
   5281       scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
   5282    struct tgsi_full_src_register s1_yyyy =
   5283       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
   5284    struct tgsi_full_src_register s1_zzzz =
   5285       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
   5286 
   5287    unsigned tmp1 = get_temp_index(emit);
   5288    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
   5289    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
   5290 
   5291    unsigned tmp2 = get_temp_index(emit);
   5292    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
   5293    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
   5294    struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
   5295 
   5296    unsigned tmp3 = get_temp_index(emit);
   5297    struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
   5298    struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
   5299    struct tgsi_full_dst_register tmp3_dst_x =
   5300       writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
   5301    struct tgsi_full_dst_register tmp3_dst_y =
   5302       writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
   5303    struct tgsi_full_dst_register tmp3_dst_z =
   5304       writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
   5305    struct tgsi_full_dst_register tmp3_dst_w =
   5306       writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
   5307 
   5308    /* Note: we put all the intermediate computations into tmp3 in case
   5309     * the XPD dest register is that same as one of the src regs (in which
   5310     * case we could clobber a src reg before we're done with it) .
   5311     *
   5312     * Note: we could get by with just one temp register instead of three
   5313     * since we're doing scalar operations and there's enough room in one
   5314     * temp for everything.
   5315     */
   5316 
   5317    /* MUL tmp1, src0.y, src1.z */
   5318    /* MUL tmp2, src1.y, src0.z */
   5319    /* ADD tmp3.x, tmp1, -tmp2 */
   5320    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
   5321       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
   5322                            &s0_yyyy, &s1_zzzz, FALSE);
   5323       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
   5324                            &s1_yyyy, &s0_zzzz, FALSE);
   5325       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
   5326                            &tmp1_src, &neg_tmp2_src, FALSE);
   5327    }
   5328 
   5329    /* MUL tmp1, src0.z, src1.x */
   5330    /* MUL tmp2, src1.z, src0.x */
   5331    /* ADD tmp3.y, tmp1, -tmp2 */
   5332    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
   5333       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
   5334                            &s1_xxxx, FALSE);
   5335       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
   5336                            &s0_xxxx, FALSE);
   5337       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
   5338                            &tmp1_src, &neg_tmp2_src, FALSE);
   5339    }
   5340 
   5341    /* MUL tmp1, src0.x, src1.y */
   5342    /* MUL tmp2, src1.x, src0.y */
   5343    /* ADD tmp3.z, tmp1, -tmp2 */
   5344    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
   5345       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
   5346                            &s1_yyyy, FALSE);
   5347       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
   5348                            &s0_yyyy, FALSE);
   5349       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
   5350                            &tmp1_src, &neg_tmp2_src, FALSE);
   5351    }
   5352 
   5353    /* MOV tmp3.w, 1.0 */
   5354    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
   5355       struct tgsi_full_src_register one =
   5356          make_immediate_reg_float(emit, 1.0f);
   5357 
   5358       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
   5359    }
   5360 
   5361    /* MOV dst, tmp3 */
   5362    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
   5363                         inst->Instruction.Saturate);
   5364 
   5365 
   5366    free_temp_indexes(emit);
   5367 
   5368    return TRUE;
   5369 }
   5370 
   5371 
   5372 /**
   5373  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
   5374  */
   5375 static boolean
   5376 emit_txd(struct svga_shader_emitter_v10 *emit,
   5377          const struct tgsi_full_instruction *inst)
   5378 {
   5379    const uint unit = inst->Src[3].Register.Index;
   5380    unsigned target = inst->Texture.Texture;
   5381    int offsets[3];
   5382    struct tgsi_full_src_register coord;
   5383    struct tex_swizzle_info swz_info;
   5384 
   5385    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
   5386                      &swz_info);
   5387 
   5388    get_texel_offsets(emit, inst, offsets);
   5389 
   5390    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5391 
   5392    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
   5393    begin_emit_instruction(emit);
   5394    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
   5395                       inst->Instruction.Saturate, offsets);
   5396    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5397    emit_src_register(emit, &coord);
   5398    emit_resource_register(emit, unit);
   5399    emit_sampler_register(emit, unit);
   5400    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
   5401    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
   5402    end_emit_instruction(emit);
   5403 
   5404    end_tex_swizzle(emit, &swz_info);
   5405 
   5406    free_temp_indexes(emit);
   5407 
   5408    return TRUE;
   5409 }
   5410 
   5411 
   5412 /**
   5413  * Emit code for TGSI_OPCODE_TXF (texel fetch)
   5414  */
   5415 static boolean
   5416 emit_txf(struct svga_shader_emitter_v10 *emit,
   5417          const struct tgsi_full_instruction *inst)
   5418 {
   5419    const uint unit = inst->Src[1].Register.Index;
   5420    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
   5421    int offsets[3];
   5422    struct tex_swizzle_info swz_info;
   5423 
   5424    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
   5425 
   5426    get_texel_offsets(emit, inst, offsets);
   5427 
   5428    if (msaa) {
   5429       /* Fetch one sample from an MSAA texture */
   5430       struct tgsi_full_src_register sampleIndex =
   5431          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5432       /* LD_MS dst, coord(s0), resource, sampleIndex */
   5433       begin_emit_instruction(emit);
   5434       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
   5435                          inst->Instruction.Saturate, offsets);
   5436       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5437       emit_src_register(emit, &inst->Src[0]);
   5438       emit_resource_register(emit, unit);
   5439       emit_src_register(emit, &sampleIndex);
   5440       end_emit_instruction(emit);
   5441    }
   5442    else {
   5443       /* Fetch one texel specified by integer coordinate */
   5444       /* LD dst, coord(s0), resource */
   5445       begin_emit_instruction(emit);
   5446       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
   5447                          inst->Instruction.Saturate, offsets);
   5448       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5449       emit_src_register(emit, &inst->Src[0]);
   5450       emit_resource_register(emit, unit);
   5451       end_emit_instruction(emit);
   5452    }
   5453 
   5454    end_tex_swizzle(emit, &swz_info);
   5455 
   5456    free_temp_indexes(emit);
   5457 
   5458    return TRUE;
   5459 }
   5460 
   5461 
   5462 /**
   5463  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
   5464  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
   5465  */
   5466 static boolean
   5467 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
   5468              const struct tgsi_full_instruction *inst)
   5469 {
   5470    unsigned target = inst->Texture.Texture;
   5471    unsigned opcode, unit;
   5472    int offsets[3];
   5473    struct tgsi_full_src_register coord, lod_bias;
   5474    struct tex_swizzle_info swz_info;
   5475 
   5476    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
   5477           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
   5478           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
   5479 
   5480    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
   5481       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
   5482       unit = inst->Src[2].Register.Index;
   5483    }
   5484    else {
   5485       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
   5486       unit = inst->Src[1].Register.Index;
   5487    }
   5488 
   5489    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
   5490                      &swz_info);
   5491 
   5492    get_texel_offsets(emit, inst, offsets);
   5493 
   5494    coord = setup_texcoord(emit, unit, &inst->Src[0]);
   5495 
   5496    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
   5497    begin_emit_instruction(emit);
   5498    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
   5499       opcode = VGPU10_OPCODE_SAMPLE_L;
   5500    }
   5501    else {
   5502       opcode = VGPU10_OPCODE_SAMPLE_B;
   5503    }
   5504    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
   5505    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
   5506    emit_src_register(emit, &coord);
   5507    emit_resource_register(emit, unit);
   5508    emit_sampler_register(emit, unit);
   5509    emit_src_register(emit, &lod_bias);
   5510    end_emit_instruction(emit);
   5511 
   5512    end_tex_swizzle(emit, &swz_info);
   5513 
   5514    free_temp_indexes(emit);
   5515 
   5516    return TRUE;
   5517 }
   5518 
   5519 
   5520 /**
   5521  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
   5522  */
   5523 static boolean
   5524 emit_txq(struct svga_shader_emitter_v10 *emit,
   5525          const struct tgsi_full_instruction *inst)
   5526 {
   5527    const uint unit = inst->Src[1].Register.Index;
   5528 
   5529    if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
   5530       /* RESINFO does not support querying texture buffers, so we instead
   5531        * store texture buffer sizes in shader constants, then copy them to
   5532        * implement TXQ instead of emitting RESINFO.
   5533        * MOV dst, const[texture_buffer_size_index[unit]]
   5534        */
   5535       struct tgsi_full_src_register size_src =
   5536          make_src_const_reg(emit->texture_buffer_size_index[unit]);
   5537       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
   5538                            FALSE);
   5539    } else {
   5540       /* RESINFO dst, srcMipLevel, resource */
   5541       begin_emit_instruction(emit);
   5542       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
   5543       emit_dst_register(emit, &inst->Dst[0]);
   5544       emit_src_register(emit, &inst->Src[0]);
   5545       emit_resource_register(emit, unit);
   5546       end_emit_instruction(emit);
   5547    }
   5548 
   5549    free_temp_indexes(emit);
   5550 
   5551    return TRUE;
   5552 }
   5553 
   5554 
   5555 /**
   5556  * Emit a simple instruction (like ADD, MUL, MIN, etc).
   5557  */
   5558 static boolean
   5559 emit_simple(struct svga_shader_emitter_v10 *emit,
   5560             const struct tgsi_full_instruction *inst)
   5561 {
   5562    const unsigned opcode = inst->Instruction.Opcode;
   5563    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
   5564    unsigned i;
   5565 
   5566    begin_emit_instruction(emit);
   5567    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
   5568                inst->Instruction.Saturate);
   5569    for (i = 0; i < op->num_dst; i++) {
   5570       emit_dst_register(emit, &inst->Dst[i]);
   5571    }
   5572    for (i = 0; i < op->num_src; i++) {
   5573       emit_src_register(emit, &inst->Src[i]);
   5574    }
   5575    end_emit_instruction(emit);
   5576 
   5577    return TRUE;
   5578 }
   5579 
   5580 
   5581 /**
   5582  * We only special case the MOV instruction to try to detect constant
   5583  * color writes in the fragment shader.
   5584  */
   5585 static boolean
   5586 emit_mov(struct svga_shader_emitter_v10 *emit,
   5587          const struct tgsi_full_instruction *inst)
   5588 {
   5589    const struct tgsi_full_src_register *src = &inst->Src[0];
   5590    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
   5591 
   5592    if (emit->unit == PIPE_SHADER_FRAGMENT &&
   5593        dst->Register.File == TGSI_FILE_OUTPUT &&
   5594        dst->Register.Index == 0 &&
   5595        src->Register.File == TGSI_FILE_CONSTANT &&
   5596        !src->Register.Indirect) {
   5597       emit->constant_color_output = TRUE;
   5598    }
   5599 
   5600    return emit_simple(emit, inst);
   5601 }
   5602 
   5603 
   5604 /**
   5605  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
   5606  * where TGSI only uses one dest register.
   5607  */
   5608 static boolean
   5609 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
   5610                  const struct tgsi_full_instruction *inst,
   5611                  unsigned dst_count,
   5612                  unsigned dst_index)
   5613 {
   5614    const unsigned opcode = inst->Instruction.Opcode;
   5615    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
   5616    unsigned i;
   5617 
   5618    begin_emit_instruction(emit);
   5619    emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
   5620                inst->Instruction.Saturate);
   5621 
   5622    for (i = 0; i < dst_count; i++) {
   5623       if (i == dst_index) {
   5624          emit_dst_register(emit, &inst->Dst[0]);
   5625       } else {
   5626          emit_null_dst_register(emit);
   5627       }
   5628    }
   5629 
   5630    for (i = 0; i < op->num_src; i++) {
   5631       emit_src_register(emit, &inst->Src[i]);
   5632    }
   5633    end_emit_instruction(emit);
   5634 
   5635    return TRUE;
   5636 }
   5637 
   5638 
   5639 /**
   5640  * Translate a single TGSI instruction to VGPU10.
   5641  */
   5642 static boolean
   5643 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
   5644                         unsigned inst_number,
   5645                         const struct tgsi_full_instruction *inst)
   5646 {
   5647    const unsigned opcode = inst->Instruction.Opcode;
   5648 
   5649    switch (opcode) {
   5650    case TGSI_OPCODE_ADD:
   5651    case TGSI_OPCODE_AND:
   5652    case TGSI_OPCODE_BGNLOOP:
   5653    case TGSI_OPCODE_BRK:
   5654    case TGSI_OPCODE_CEIL:
   5655    case TGSI_OPCODE_CONT:
   5656    case TGSI_OPCODE_DDX:
   5657    case TGSI_OPCODE_DDY:
   5658    case TGSI_OPCODE_DIV:
   5659    case TGSI_OPCODE_DP2:
   5660    case TGSI_OPCODE_DP3:
   5661    case TGSI_OPCODE_DP4:
   5662    case TGSI_OPCODE_ELSE:
   5663    case TGSI_OPCODE_ENDIF:
   5664    case TGSI_OPCODE_ENDLOOP:
   5665    case TGSI_OPCODE_ENDSUB:
   5666    case TGSI_OPCODE_F2I:
   5667    case TGSI_OPCODE_F2U:
   5668    case TGSI_OPCODE_FLR:
   5669    case TGSI_OPCODE_FRC:
   5670    case TGSI_OPCODE_FSEQ:
   5671    case TGSI_OPCODE_FSGE:
   5672    case TGSI_OPCODE_FSLT:
   5673    case TGSI_OPCODE_FSNE:
   5674    case TGSI_OPCODE_I2F:
   5675    case TGSI_OPCODE_IMAX:
   5676    case TGSI_OPCODE_IMIN:
   5677    case TGSI_OPCODE_INEG:
   5678    case TGSI_OPCODE_ISGE:
   5679    case TGSI_OPCODE_ISHR:
   5680    case TGSI_OPCODE_ISLT:
   5681    case TGSI_OPCODE_MAD:
   5682    case TGSI_OPCODE_MAX:
   5683    case TGSI_OPCODE_MIN:
   5684    case TGSI_OPCODE_MUL:
   5685    case TGSI_OPCODE_NOP:
   5686    case TGSI_OPCODE_NOT:
   5687    case TGSI_OPCODE_OR:
   5688    case TGSI_OPCODE_RET:
   5689    case TGSI_OPCODE_UADD:
   5690    case TGSI_OPCODE_USEQ:
   5691    case TGSI_OPCODE_USGE:
   5692    case TGSI_OPCODE_USLT:
   5693    case TGSI_OPCODE_UMIN:
   5694    case TGSI_OPCODE_UMAD:
   5695    case TGSI_OPCODE_UMAX:
   5696    case TGSI_OPCODE_ROUND:
   5697    case TGSI_OPCODE_SQRT:
   5698    case TGSI_OPCODE_SHL:
   5699    case TGSI_OPCODE_TRUNC:
   5700    case TGSI_OPCODE_U2F:
   5701    case TGSI_OPCODE_UCMP:
   5702    case TGSI_OPCODE_USHR:
   5703    case TGSI_OPCODE_USNE:
   5704    case TGSI_OPCODE_XOR:
   5705       /* simple instructions */
   5706       return emit_simple(emit, inst);
   5707 
   5708    case TGSI_OPCODE_MOV:
   5709       return emit_mov(emit, inst);
   5710    case TGSI_OPCODE_EMIT:
   5711       return emit_vertex(emit, inst);
   5712    case TGSI_OPCODE_ENDPRIM:
   5713       return emit_endprim(emit, inst);
   5714    case TGSI_OPCODE_IABS:
   5715       return emit_iabs(emit, inst);
   5716    case TGSI_OPCODE_ARL:
   5717       /* fall-through */
   5718    case TGSI_OPCODE_UARL:
   5719       return emit_arl_uarl(emit, inst);
   5720    case TGSI_OPCODE_BGNSUB:
   5721       /* no-op */
   5722       return TRUE;
   5723    case TGSI_OPCODE_CAL:
   5724       return emit_cal(emit, inst);
   5725    case TGSI_OPCODE_CMP:
   5726       return emit_cmp(emit, inst);
   5727    case TGSI_OPCODE_COS:
   5728       return emit_sincos(emit, inst);
   5729    case TGSI_OPCODE_DP2A:
   5730       return emit_dp2a(emit, inst);
   5731    case TGSI_OPCODE_DPH:
   5732       return emit_dph(emit, inst);
   5733    case TGSI_OPCODE_DST:
   5734       return emit_dst(emit, inst);
   5735    case TGSI_OPCODE_EX2:
   5736       return emit_ex2(emit, inst);
   5737    case TGSI_OPCODE_EXP:
   5738       return emit_exp(emit, inst);
   5739    case TGSI_OPCODE_IF:
   5740       return emit_if(emit, inst);
   5741    case TGSI_OPCODE_KILL:
   5742       return emit_kill(emit, inst);
   5743    case TGSI_OPCODE_KILL_IF:
   5744       return emit_kill_if(emit, inst);
   5745    case TGSI_OPCODE_LG2:
   5746       return emit_lg2(emit, inst);
   5747    case TGSI_OPCODE_LIT:
   5748       return emit_lit(emit, inst);
   5749    case TGSI_OPCODE_LOG:
   5750       return emit_log(emit, inst);
   5751    case TGSI_OPCODE_LRP:
   5752       return emit_lrp(emit, inst);
   5753    case TGSI_OPCODE_POW:
   5754       return emit_pow(emit, inst);
   5755    case TGSI_OPCODE_RCP:
   5756       return emit_rcp(emit, inst);
   5757    case TGSI_OPCODE_RSQ:
   5758       return emit_rsq(emit, inst);
   5759    case TGSI_OPCODE_SAMPLE:
   5760       return emit_sample(emit, inst);
   5761    case TGSI_OPCODE_SCS:
   5762       return emit_scs(emit, inst);
   5763    case TGSI_OPCODE_SEQ:
   5764       return emit_seq(emit, inst);
   5765    case TGSI_OPCODE_SGE:
   5766       return emit_sge(emit, inst);
   5767    case TGSI_OPCODE_SGT:
   5768       return emit_sgt(emit, inst);
   5769    case TGSI_OPCODE_SIN:
   5770       return emit_sincos(emit, inst);
   5771    case TGSI_OPCODE_SLE:
   5772       return emit_sle(emit, inst);
   5773    case TGSI_OPCODE_SLT:
   5774       return emit_slt(emit, inst);
   5775    case TGSI_OPCODE_SNE:
   5776       return emit_sne(emit, inst);
   5777    case TGSI_OPCODE_SSG:
   5778       return emit_ssg(emit, inst);
   5779    case TGSI_OPCODE_ISSG:
   5780       return emit_issg(emit, inst);
   5781    case TGSI_OPCODE_TEX:
   5782       return emit_tex(emit, inst);
   5783    case TGSI_OPCODE_TXP:
   5784       return emit_txp(emit, inst);
   5785    case TGSI_OPCODE_TXB:
   5786    case TGSI_OPCODE_TXB2:
   5787    case TGSI_OPCODE_TXL:
   5788       return emit_txl_txb(emit, inst);
   5789    case TGSI_OPCODE_TXD:
   5790       return emit_txd(emit, inst);
   5791    case TGSI_OPCODE_TXF:
   5792       return emit_txf(emit, inst);
   5793    case TGSI_OPCODE_TXQ:
   5794       return emit_txq(emit, inst);
   5795    case TGSI_OPCODE_UIF:
   5796       return emit_if(emit, inst);
   5797    case TGSI_OPCODE_XPD:
   5798       return emit_xpd(emit, inst);
   5799    case TGSI_OPCODE_UMUL_HI:
   5800    case TGSI_OPCODE_IMUL_HI:
   5801    case TGSI_OPCODE_UDIV:
   5802    case TGSI_OPCODE_IDIV:
   5803       /* These cases use only the FIRST of two destination registers */
   5804       return emit_simple_1dst(emit, inst, 2, 0);
   5805    case TGSI_OPCODE_UMUL:
   5806    case TGSI_OPCODE_UMOD:
   5807    case TGSI_OPCODE_MOD:
   5808       /* These cases use only the SECOND of two destination registers */
   5809       return emit_simple_1dst(emit, inst, 2, 1);
   5810    case TGSI_OPCODE_END:
   5811       if (!emit_post_helpers(emit))
   5812          return FALSE;
   5813       return emit_simple(emit, inst);
   5814 
   5815    default:
   5816       debug_printf("Unimplemented tgsi instruction %s\n",
   5817                    tgsi_get_opcode_name(opcode));
   5818       return FALSE;
   5819    }
   5820 
   5821    return TRUE;
   5822 }
   5823 
   5824 
   5825 /**
   5826  * Emit the extra instructions to adjust the vertex position.
   5827  * There are two possible adjustments:
   5828  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
   5829  *    "prescale" and "pretranslate" values.
   5830  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
   5831  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
   5832  */
   5833 static void
   5834 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
   5835                        unsigned vs_pos_tmp_index)
   5836 {
   5837    struct tgsi_full_src_register tmp_pos_src;
   5838    struct tgsi_full_dst_register pos_dst;
   5839 
   5840    /* Don't bother to emit any extra vertex instructions if vertex position is
   5841     * not written out
   5842     */
   5843    if (emit->vposition.out_index == INVALID_INDEX)
   5844       return;
   5845 
   5846    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
   5847    pos_dst = make_dst_output_reg(emit->vposition.out_index);
   5848 
   5849    /* If non-adjusted vertex position register index
   5850     * is valid, copy the vertex position from the temporary
   5851     * vertex position register before it is modified by the
   5852     * prescale computation.
   5853     */
   5854    if (emit->vposition.so_index != INVALID_INDEX) {
   5855       struct tgsi_full_dst_register pos_so_dst =
   5856          make_dst_output_reg(emit->vposition.so_index);
   5857 
   5858       /* MOV pos_so, tmp_pos */
   5859       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
   5860                            &tmp_pos_src, FALSE);
   5861    }
   5862 
   5863    if (emit->vposition.need_prescale) {
   5864       /* This code adjusts the vertex position to match the VGPU10 convention.
   5865        * If p is the position computed by the shader (usually by applying the
   5866        * modelview and projection matrices), the new position q is computed by:
   5867        *
   5868        * q.x = p.w * trans.x + p.x * scale.x
   5869        * q.y = p.w * trans.y + p.y * scale.y
   5870        * q.z = p.w * trans.z + p.z * scale.z;
   5871        * q.w = p.w * trans.w + p.w;
   5872        */
   5873       struct tgsi_full_src_register tmp_pos_src_w =
   5874          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
   5875       struct tgsi_full_dst_register tmp_pos_dst =
   5876          make_dst_temp_reg(vs_pos_tmp_index);
   5877       struct tgsi_full_dst_register tmp_pos_dst_xyz =
   5878          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
   5879 
   5880       struct tgsi_full_src_register prescale_scale =
   5881          make_src_const_reg(emit->vposition.prescale_scale_index);
   5882       struct tgsi_full_src_register prescale_trans =
   5883          make_src_const_reg(emit->vposition.prescale_trans_index);
   5884 
   5885       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
   5886       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
   5887                            &tmp_pos_src, &prescale_scale, FALSE);
   5888 
   5889       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
   5890       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
   5891                            &prescale_trans, &tmp_pos_src, FALSE);
   5892    }
   5893    else if (emit->key.vs.undo_viewport) {
   5894       /* This code computes the final vertex position from the temporary
   5895        * vertex position by undoing the viewport transformation and the
   5896        * divide-by-W operation (we convert window coords back to clip coords).
   5897        * This is needed when we use the 'draw' module for fallbacks.
   5898        * If p is the temp pos in window coords, then the NDC coord q is:
   5899        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
   5900        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
   5901        *   q.z = p.z * p.w
   5902        *   q.w = p.w
   5903        * CONST[vs_viewport_index] contains:
   5904        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
   5905        */
   5906       struct tgsi_full_dst_register tmp_pos_dst =
   5907          make_dst_temp_reg(vs_pos_tmp_index);
   5908       struct tgsi_full_dst_register tmp_pos_dst_xy =
   5909          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
   5910       struct tgsi_full_src_register tmp_pos_src_wwww =
   5911          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
   5912 
   5913       struct tgsi_full_dst_register pos_dst_xyz =
   5914          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
   5915       struct tgsi_full_dst_register pos_dst_w =
   5916          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
   5917 
   5918       struct tgsi_full_src_register vp_xyzw =
   5919          make_src_const_reg(emit->vs.viewport_index);
   5920       struct tgsi_full_src_register vp_zwww =
   5921          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
   5922                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
   5923 
   5924       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
   5925       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
   5926                            &tmp_pos_src, &vp_zwww, FALSE);
   5927 
   5928       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
   5929       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
   5930                            &tmp_pos_src, &vp_xyzw, FALSE);
   5931 
   5932       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
   5933       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
   5934                            &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
   5935 
   5936       /* MOV pos.w, tmp_pos.w */
   5937       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
   5938                            &tmp_pos_src, FALSE);
   5939    }
   5940    else if (vs_pos_tmp_index != INVALID_INDEX) {
   5941       /* This code is to handle the case where the temporary vertex
   5942        * position register is created when the vertex shader has stream
   5943        * output and prescale is disabled because rasterization is to be
   5944        * discarded.
   5945        */
   5946       struct tgsi_full_dst_register pos_dst =
   5947          make_dst_output_reg(emit->vposition.out_index);
   5948 
   5949       /* MOV pos, tmp_pos */
   5950       begin_emit_instruction(emit);
   5951       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   5952       emit_dst_register(emit, &pos_dst);
   5953       emit_src_register(emit, &tmp_pos_src);
   5954       end_emit_instruction(emit);
   5955    }
   5956 }
   5957 
   5958 static void
   5959 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
   5960 {
   5961    if (emit->clip_mode == CLIP_DISTANCE) {
   5962       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
   5963       emit_clip_distance_instructions(emit);
   5964 
   5965    } else if (emit->clip_mode == CLIP_VERTEX) {
   5966       /* Convert TGSI CLIPVERTEX to CLIPDIST */
   5967       emit_clip_vertex_instructions(emit);
   5968    }
   5969 
   5970    /**
   5971     * Emit vertex position and take care of legacy user planes only if
   5972     * there is a valid vertex position register index.
   5973     * This is to take care of the case
   5974     * where the shader doesn't output vertex position. Then in
   5975     * this case, don't bother to emit more vertex instructions.
   5976     */
   5977    if (emit->vposition.out_index == INVALID_INDEX)
   5978       return;
   5979 
   5980    /**
   5981     * Emit per-vertex clipping instructions for legacy user defined clip planes.
   5982     * NOTE: we must emit the clip distance instructions before the
   5983     * emit_vpos_instructions() call since the later function will change
   5984     * the TEMP[vs_pos_tmp_index] value.
   5985     */
   5986    if (emit->clip_mode == CLIP_LEGACY) {
   5987       /* Emit CLIPDIST for legacy user defined clip planes */
   5988       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
   5989    }
   5990 }
   5991 
   5992 
   5993 /**
   5994  * Emit extra per-vertex instructions.  This includes clip-coordinate
   5995  * space conversion and computing clip distances.  This is called for
   5996  * each GS emit-vertex instruction and at the end of VS translation.
   5997  */
   5998 static void
   5999 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
   6000 {
   6001    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
   6002 
   6003    /* Emit clipping instructions based on clipping mode */
   6004    emit_clipping_instructions(emit);
   6005 
   6006    /**
   6007     * Reset the temporary vertex position register index
   6008     * so that emit_dst_register() will use the real vertex position output
   6009     */
   6010    emit->vposition.tmp_index = INVALID_INDEX;
   6011 
   6012    /* Emit vertex position instructions */
   6013    emit_vpos_instructions(emit, vs_pos_tmp_index);
   6014 
   6015    /* Restore original vposition.tmp_index value for the next GS vertex.
   6016     * It doesn't matter for VS.
   6017     */
   6018    emit->vposition.tmp_index = vs_pos_tmp_index;
   6019 }
   6020 
   6021 /**
   6022  * Translate the TGSI_OPCODE_EMIT GS instruction.
   6023  */
   6024 static boolean
   6025 emit_vertex(struct svga_shader_emitter_v10 *emit,
   6026             const struct tgsi_full_instruction *inst)
   6027 {
   6028    unsigned ret = TRUE;
   6029 
   6030    assert(emit->unit == PIPE_SHADER_GEOMETRY);
   6031 
   6032    emit_vertex_instructions(emit);
   6033 
   6034    /* We can't use emit_simple() because the TGSI instruction has one
   6035     * operand (vertex stream number) which we must ignore for VGPU10.
   6036     */
   6037    begin_emit_instruction(emit);
   6038    emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
   6039    end_emit_instruction(emit);
   6040 
   6041    return ret;
   6042 }
   6043 
   6044 
   6045 /**
   6046  * Emit the extra code to convert from VGPU10's boolean front-face
   6047  * register to TGSI's signed front-face register.
   6048  *
   6049  * TODO: Make temporary front-face register a scalar.
   6050  */
   6051 static void
   6052 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
   6053 {
   6054    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6055 
   6056    if (emit->fs.face_input_index != INVALID_INDEX) {
   6057       /* convert vgpu10 boolean face register to gallium +/-1 value */
   6058       struct tgsi_full_dst_register tmp_dst =
   6059          make_dst_temp_reg(emit->fs.face_tmp_index);
   6060       struct tgsi_full_src_register one =
   6061          make_immediate_reg_float(emit, 1.0f);
   6062       struct tgsi_full_src_register neg_one =
   6063          make_immediate_reg_float(emit, -1.0f);
   6064 
   6065       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
   6066       begin_emit_instruction(emit);
   6067       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
   6068       emit_dst_register(emit, &tmp_dst);
   6069       emit_face_register(emit);
   6070       emit_src_register(emit, &one);
   6071       emit_src_register(emit, &neg_one);
   6072       end_emit_instruction(emit);
   6073    }
   6074 }
   6075 
   6076 
   6077 /**
   6078  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
   6079  */
   6080 static void
   6081 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
   6082 {
   6083    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6084 
   6085    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
   6086       struct tgsi_full_dst_register tmp_dst =
   6087          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
   6088       struct tgsi_full_dst_register tmp_dst_xyz =
   6089          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
   6090       struct tgsi_full_dst_register tmp_dst_w =
   6091          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
   6092       struct tgsi_full_src_register one =
   6093          make_immediate_reg_float(emit, 1.0f);
   6094       struct tgsi_full_src_register fragcoord =
   6095          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
   6096 
   6097       /* save the input index */
   6098       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
   6099       /* set to invalid to prevent substitution in emit_src_register() */
   6100       emit->fs.fragcoord_input_index = INVALID_INDEX;
   6101 
   6102       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
   6103       begin_emit_instruction(emit);
   6104       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
   6105       emit_dst_register(emit, &tmp_dst_xyz);
   6106       emit_src_register(emit, &fragcoord);
   6107       end_emit_instruction(emit);
   6108 
   6109       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
   6110       begin_emit_instruction(emit);
   6111       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
   6112       emit_dst_register(emit, &tmp_dst_w);
   6113       emit_src_register(emit, &one);
   6114       emit_src_register(emit, &fragcoord);
   6115       end_emit_instruction(emit);
   6116 
   6117       /* restore saved value */
   6118       emit->fs.fragcoord_input_index = fragcoord_input_index;
   6119    }
   6120 }
   6121 
   6122 
   6123 /**
   6124  * Emit extra instructions to adjust VS inputs/attributes.  This can
   6125  * mean casting a vertex attribute from int to float or setting the
   6126  * W component to 1, or both.
   6127  */
   6128 static void
   6129 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
   6130 {
   6131    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
   6132    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
   6133    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
   6134    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
   6135    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
   6136    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
   6137    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
   6138 
   6139    unsigned adjust_mask = (save_w_1_mask |
   6140                            save_itof_mask |
   6141                            save_utof_mask |
   6142                            save_is_bgra_mask |
   6143                            save_puint_to_snorm_mask |
   6144                            save_puint_to_uscaled_mask |
   6145                            save_puint_to_sscaled_mask);
   6146 
   6147    assert(emit->unit == PIPE_SHADER_VERTEX);
   6148 
   6149    if (adjust_mask) {
   6150       struct tgsi_full_src_register one =
   6151          make_immediate_reg_float(emit, 1.0f);
   6152 
   6153       struct tgsi_full_src_register one_int =
   6154          make_immediate_reg_int(emit, 1);
   6155 
   6156       /* We need to turn off these bitmasks while emitting the
   6157        * instructions below, then restore them afterward.
   6158        */
   6159       emit->key.vs.adjust_attrib_w_1 = 0;
   6160       emit->key.vs.adjust_attrib_itof = 0;
   6161       emit->key.vs.adjust_attrib_utof = 0;
   6162       emit->key.vs.attrib_is_bgra = 0;
   6163       emit->key.vs.attrib_puint_to_snorm = 0;
   6164       emit->key.vs.attrib_puint_to_uscaled = 0;
   6165       emit->key.vs.attrib_puint_to_sscaled = 0;
   6166 
   6167       while (adjust_mask) {
   6168          unsigned index = u_bit_scan(&adjust_mask);
   6169 
   6170          /* skip the instruction if this vertex attribute is not being used */
   6171          if (emit->info.input_usage_mask[index] == 0)
   6172             continue;
   6173 
   6174          unsigned tmp = emit->vs.adjusted_input[index];
   6175          struct tgsi_full_src_register input_src =
   6176             make_src_reg(TGSI_FILE_INPUT, index);
   6177 
   6178          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   6179          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   6180          struct tgsi_full_dst_register tmp_dst_w =
   6181             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
   6182 
   6183          /* ITOF/UTOF/MOV tmp, input[index] */
   6184          if (save_itof_mask & (1 << index)) {
   6185             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
   6186                                  &tmp_dst, &input_src, FALSE);
   6187          }
   6188          else if (save_utof_mask & (1 << index)) {
   6189             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
   6190                                  &tmp_dst, &input_src, FALSE);
   6191          }
   6192          else if (save_puint_to_snorm_mask & (1 << index)) {
   6193             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
   6194          }
   6195          else if (save_puint_to_uscaled_mask & (1 << index)) {
   6196             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
   6197          }
   6198          else if (save_puint_to_sscaled_mask & (1 << index)) {
   6199             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
   6200          }
   6201          else {
   6202             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
   6203             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   6204                                  &tmp_dst, &input_src, FALSE);
   6205          }
   6206 
   6207          if (save_is_bgra_mask & (1 << index)) {
   6208             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
   6209          }
   6210 
   6211          if (save_w_1_mask & (1 << index)) {
   6212             /* MOV tmp.w, 1.0 */
   6213             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
   6214                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   6215                                     &tmp_dst_w, &one_int, FALSE);
   6216             }
   6217             else {
   6218                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
   6219                                     &tmp_dst_w, &one, FALSE);
   6220             }
   6221          }
   6222       }
   6223 
   6224       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
   6225       emit->key.vs.adjust_attrib_itof = save_itof_mask;
   6226       emit->key.vs.adjust_attrib_utof = save_utof_mask;
   6227       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
   6228       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
   6229       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
   6230       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
   6231    }
   6232 }
   6233 
   6234 
   6235 /**
   6236  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
   6237  * to implement some instructions.  We pre-allocate those values here
   6238  * in the immediate constant buffer.
   6239  */
   6240 static void
   6241 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
   6242 {
   6243    unsigned n = 0;
   6244 
   6245    emit->common_immediate_pos[n++] =
   6246       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
   6247 
   6248    emit->common_immediate_pos[n++] =
   6249       alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
   6250 
   6251    emit->common_immediate_pos[n++] =
   6252       alloc_immediate_int4(emit, 0, 1, 0, -1);
   6253 
   6254    if (emit->key.vs.attrib_puint_to_snorm) {
   6255       emit->common_immediate_pos[n++] =
   6256          alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
   6257    }
   6258 
   6259    if (emit->key.vs.attrib_puint_to_uscaled) {
   6260       emit->common_immediate_pos[n++] =
   6261          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
   6262    }
   6263 
   6264    if (emit->key.vs.attrib_puint_to_sscaled) {
   6265       emit->common_immediate_pos[n++] =
   6266          alloc_immediate_int4(emit, 22, 12, 2, 0);
   6267 
   6268       emit->common_immediate_pos[n++] =
   6269          alloc_immediate_int4(emit, 22, 30, 0, 0);
   6270    }
   6271 
   6272    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
   6273    emit->num_common_immediates = n;
   6274 }
   6275 
   6276 
   6277 /**
   6278  * Emit any extra/helper declarations/code that we might need between
   6279  * the declaration section and code section.
   6280  */
   6281 static boolean
   6282 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
   6283 {
   6284    /* Properties */
   6285    if (emit->unit == PIPE_SHADER_GEOMETRY)
   6286       emit_property_instructions(emit);
   6287 
   6288    /* Declare inputs */
   6289    if (!emit_input_declarations(emit))
   6290       return FALSE;
   6291 
   6292    /* Declare outputs */
   6293    if (!emit_output_declarations(emit))
   6294       return FALSE;
   6295 
   6296    /* Declare temporary registers */
   6297    emit_temporaries_declaration(emit);
   6298 
   6299    /* Declare constant registers */
   6300    emit_constant_declaration(emit);
   6301 
   6302    /* Declare samplers and resources */
   6303    emit_sampler_declarations(emit);
   6304    emit_resource_declarations(emit);
   6305 
   6306    /* Declare clip distance output registers */
   6307    if (emit->unit == PIPE_SHADER_VERTEX ||
   6308        emit->unit == PIPE_SHADER_GEOMETRY) {
   6309       emit_clip_distance_declarations(emit);
   6310    }
   6311 
   6312    alloc_common_immediates(emit);
   6313 
   6314    if (emit->unit == PIPE_SHADER_FRAGMENT &&
   6315        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
   6316       float alpha = emit->key.fs.alpha_ref;
   6317       emit->fs.alpha_ref_index =
   6318          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
   6319    }
   6320 
   6321    /* Now, emit the constant block containing all the immediates
   6322     * declared by shader, as well as the extra ones seen above.
   6323     */
   6324    emit_vgpu10_immediates_block(emit);
   6325 
   6326    if (emit->unit == PIPE_SHADER_FRAGMENT) {
   6327       emit_frontface_instructions(emit);
   6328       emit_fragcoord_instructions(emit);
   6329    }
   6330    else if (emit->unit == PIPE_SHADER_VERTEX) {
   6331       emit_vertex_attrib_instructions(emit);
   6332    }
   6333 
   6334    return TRUE;
   6335 }
   6336 
   6337 
   6338 /**
   6339  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
   6340  * against the alpha reference value and discards the fragment if the
   6341  * comparison fails.
   6342  */
   6343 static void
   6344 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
   6345                              unsigned fs_color_tmp_index)
   6346 {
   6347    /* compare output color's alpha to alpha ref and kill */
   6348    unsigned tmp = get_temp_index(emit);
   6349    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
   6350    struct tgsi_full_src_register tmp_src_x =
   6351       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
   6352    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
   6353    struct tgsi_full_src_register color_src =
   6354       make_src_temp_reg(fs_color_tmp_index);
   6355    struct tgsi_full_src_register color_src_w =
   6356       scalar_src(&color_src, TGSI_SWIZZLE_W);
   6357    struct tgsi_full_src_register ref_src =
   6358       make_src_immediate_reg(emit->fs.alpha_ref_index);
   6359    struct tgsi_full_dst_register color_dst =
   6360       make_dst_output_reg(emit->fs.color_out_index[0]);
   6361 
   6362    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6363 
   6364    /* dst = src0 'alpha_func' src1 */
   6365    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
   6366                    &color_src_w, &ref_src);
   6367 
   6368    /* DISCARD if dst.x == 0 */
   6369    begin_emit_instruction(emit);
   6370    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
   6371    emit_src_register(emit, &tmp_src_x);
   6372    end_emit_instruction(emit);
   6373 
   6374    /* If we don't need to broadcast the color below or set fragments to
   6375     * white, emit final color here.
   6376     */
   6377    if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
   6378        !emit->key.fs.white_fragments) {
   6379       /* MOV output.color, tempcolor */
   6380       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
   6381                            &color_src, FALSE);     /* XXX saturate? */
   6382    }
   6383 
   6384    free_temp_indexes(emit);
   6385 }
   6386 
   6387 
   6388 /**
   6389  * When we need to emit white for all fragments (for emulating XOR logicop
   6390  * mode), this function copies white into the temporary color output register.
   6391  */
   6392 static void
   6393 emit_set_color_white(struct svga_shader_emitter_v10 *emit,
   6394                      unsigned fs_color_tmp_index)
   6395 {
   6396    struct tgsi_full_dst_register color_dst =
   6397       make_dst_temp_reg(fs_color_tmp_index);
   6398    struct tgsi_full_src_register white =
   6399       make_immediate_reg_float(emit, 1.0f);
   6400 
   6401    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
   6402 }
   6403 
   6404 
   6405 /**
   6406  * Emit instructions for writing a single color output to multiple
   6407  * color buffers.
   6408  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
   6409  * when key.fs.white_fragments is true).
   6410  * property is set and the number of render targets is greater than one.
   6411  * \param fs_color_tmp_index  index of the temp register that holds the
   6412  *                            color to broadcast.
   6413  */
   6414 static void
   6415 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
   6416                                  unsigned fs_color_tmp_index)
   6417 {
   6418    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
   6419    unsigned i;
   6420    struct tgsi_full_src_register color_src =
   6421       make_src_temp_reg(fs_color_tmp_index);
   6422 
   6423    assert(emit->unit == PIPE_SHADER_FRAGMENT);
   6424 
   6425    for (i = 0; i < n; i++) {
   6426       unsigned output_reg = emit->fs.color_out_index[i];
   6427       struct tgsi_full_dst_register color_dst =
   6428          make_dst_output_reg(output_reg);
   6429 
   6430       /* Fill in this semantic here since we'll use it later in
   6431        * emit_dst_register().
   6432        */
   6433       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
   6434 
   6435       /* MOV output.color[i], tempcolor */
   6436       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
   6437                            &color_src, FALSE);     /* XXX saturate? */
   6438    }
   6439 }
   6440 
   6441 
   6442 /**
   6443  * Emit extra helper code after the original shader code, but before the
   6444  * last END/RET instruction.
   6445  * For vertex shaders this means emitting the extra code to apply the
   6446  * prescale scale/translation.
   6447  */
   6448 static boolean
   6449 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
   6450 {
   6451    if (emit->unit == PIPE_SHADER_VERTEX) {
   6452       emit_vertex_instructions(emit);
   6453    }
   6454    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
   6455       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
   6456 
   6457       /* We no longer want emit_dst_register() to substitute the
   6458        * temporary fragment color register for the real color output.
   6459        */
   6460       emit->fs.color_tmp_index = INVALID_INDEX;
   6461 
   6462       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
   6463          emit_alpha_test_instructions(emit, fs_color_tmp_index);
   6464       }
   6465       if (emit->key.fs.white_fragments) {
   6466          emit_set_color_white(emit, fs_color_tmp_index);
   6467       }
   6468       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
   6469           emit->key.fs.white_fragments) {
   6470          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
   6471       }
   6472    }
   6473 
   6474    return TRUE;
   6475 }
   6476 
   6477 
   6478 /**
   6479  * Translate the TGSI tokens into VGPU10 tokens.
   6480  */
   6481 static boolean
   6482 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
   6483                          const struct tgsi_token *tokens)
   6484 {
   6485    struct tgsi_parse_context parse;
   6486    boolean ret = TRUE;
   6487    boolean pre_helpers_emitted = FALSE;
   6488    unsigned inst_number = 0;
   6489 
   6490    tgsi_parse_init(&parse, tokens);
   6491 
   6492    while (!tgsi_parse_end_of_tokens(&parse)) {
   6493       tgsi_parse_token(&parse);
   6494 
   6495       switch (parse.FullToken.Token.Type) {
   6496       case TGSI_TOKEN_TYPE_IMMEDIATE:
   6497          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
   6498          if (!ret)
   6499             goto done;
   6500          break;
   6501 
   6502       case TGSI_TOKEN_TYPE_DECLARATION:
   6503          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
   6504          if (!ret)
   6505             goto done;
   6506          break;
   6507 
   6508       case TGSI_TOKEN_TYPE_INSTRUCTION:
   6509          if (!pre_helpers_emitted) {
   6510             ret = emit_pre_helpers(emit);
   6511             if (!ret)
   6512                goto done;
   6513             pre_helpers_emitted = TRUE;
   6514          }
   6515          ret = emit_vgpu10_instruction(emit, inst_number++,
   6516                                        &parse.FullToken.FullInstruction);
   6517          if (!ret)
   6518             goto done;
   6519          break;
   6520 
   6521       case TGSI_TOKEN_TYPE_PROPERTY:
   6522          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
   6523          if (!ret)
   6524             goto done;
   6525          break;
   6526 
   6527       default:
   6528          break;
   6529       }
   6530    }
   6531 
   6532 done:
   6533    tgsi_parse_free(&parse);
   6534    return ret;
   6535 }
   6536 
   6537 
   6538 /**
   6539  * Emit the first VGPU10 shader tokens.
   6540  */
   6541 static boolean
   6542 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
   6543 {
   6544    VGPU10ProgramToken ptoken;
   6545 
   6546    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
   6547    ptoken.majorVersion = 4;
   6548    ptoken.minorVersion = 0;
   6549    ptoken.programType = translate_shader_type(emit->unit);
   6550    if (!emit_dword(emit, ptoken.value))
   6551       return FALSE;
   6552 
   6553    /* Second token: total length of shader, in tokens.  We can't fill this
   6554     * in until we're all done.  Emit zero for now.
   6555     */
   6556    return emit_dword(emit, 0);
   6557 }
   6558 
   6559 
   6560 static boolean
   6561 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
   6562 {
   6563    VGPU10ProgramToken *tokens;
   6564 
   6565    /* Replace the second token with total shader length */
   6566    tokens = (VGPU10ProgramToken *) emit->buf;
   6567    tokens[1].value = emit_get_num_tokens(emit);
   6568 
   6569    return TRUE;
   6570 }
   6571 
   6572 
   6573 /**
   6574  * Modify the FS to read the BCOLORs and use the FACE register
   6575  * to choose between the front/back colors.
   6576  */
   6577 static const struct tgsi_token *
   6578 transform_fs_twoside(const struct tgsi_token *tokens)
   6579 {
   6580    if (0) {
   6581       debug_printf("Before tgsi_add_two_side ------------------\n");
   6582       tgsi_dump(tokens,0);
   6583    }
   6584    tokens = tgsi_add_two_side(tokens);
   6585    if (0) {
   6586       debug_printf("After tgsi_add_two_side ------------------\n");
   6587       tgsi_dump(tokens, 0);
   6588    }
   6589    return tokens;
   6590 }
   6591 
   6592 
   6593 /**
   6594  * Modify the FS to do polygon stipple.
   6595  */
   6596 static const struct tgsi_token *
   6597 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
   6598                       const struct tgsi_token *tokens)
   6599 {
   6600    const struct tgsi_token *new_tokens;
   6601    unsigned unit;
   6602 
   6603    if (0) {
   6604       debug_printf("Before pstipple ------------------\n");
   6605       tgsi_dump(tokens,0);
   6606    }
   6607 
   6608    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
   6609                                                      TGSI_FILE_INPUT);
   6610 
   6611    emit->fs.pstipple_sampler_unit = unit;
   6612 
   6613    /* Setup texture state for stipple */
   6614    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
   6615    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
   6616    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
   6617    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
   6618    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
   6619 
   6620    if (0) {
   6621       debug_printf("After pstipple ------------------\n");
   6622       tgsi_dump(new_tokens, 0);
   6623    }
   6624 
   6625    return new_tokens;
   6626 }
   6627 
   6628 /**
   6629  * Modify the FS to support anti-aliasing point.
   6630  */
   6631 static const struct tgsi_token *
   6632 transform_fs_aapoint(const struct tgsi_token *tokens,
   6633                      int aa_coord_index)
   6634 {
   6635    if (0) {
   6636       debug_printf("Before tgsi_add_aa_point ------------------\n");
   6637       tgsi_dump(tokens,0);
   6638    }
   6639    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
   6640    if (0) {
   6641       debug_printf("After tgsi_add_aa_point ------------------\n");
   6642       tgsi_dump(tokens, 0);
   6643    }
   6644    return tokens;
   6645 }
   6646 
   6647 /**
   6648  * This is the main entrypoint for the TGSI -> VPGU10 translator.
   6649  */
   6650 struct svga_shader_variant *
   6651 svga_tgsi_vgpu10_translate(struct svga_context *svga,
   6652                            const struct svga_shader *shader,
   6653                            const struct svga_compile_key *key,
   6654                            unsigned unit)
   6655 {
   6656    struct svga_shader_variant *variant = NULL;
   6657    struct svga_shader_emitter_v10 *emit;
   6658    const struct tgsi_token *tokens = shader->tokens;
   6659    struct svga_vertex_shader *vs = svga->curr.vs;
   6660    struct svga_geometry_shader *gs = svga->curr.gs;
   6661 
   6662    assert(unit == PIPE_SHADER_VERTEX ||
   6663           unit == PIPE_SHADER_GEOMETRY ||
   6664           unit == PIPE_SHADER_FRAGMENT);
   6665 
   6666    /* These two flags cannot be used together */
   6667    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
   6668 
   6669    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
   6670    /*
   6671     * Setup the code emitter
   6672     */
   6673    emit = alloc_emitter();
   6674    if (!emit)
   6675       goto done;
   6676 
   6677    emit->unit = unit;
   6678    emit->key = *key;
   6679 
   6680    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
   6681                                    emit->key.gs.need_prescale);
   6682    emit->vposition.tmp_index = INVALID_INDEX;
   6683    emit->vposition.so_index = INVALID_INDEX;
   6684    emit->vposition.out_index = INVALID_INDEX;
   6685 
   6686    emit->fs.color_tmp_index = INVALID_INDEX;
   6687    emit->fs.face_input_index = INVALID_INDEX;
   6688    emit->fs.fragcoord_input_index = INVALID_INDEX;
   6689 
   6690    emit->gs.prim_id_index = INVALID_INDEX;
   6691 
   6692    emit->clip_dist_out_index = INVALID_INDEX;
   6693    emit->clip_dist_tmp_index = INVALID_INDEX;
   6694    emit->clip_dist_so_index = INVALID_INDEX;
   6695    emit->clip_vertex_out_index = INVALID_INDEX;
   6696 
   6697    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
   6698       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
   6699    }
   6700 
   6701    if (unit == PIPE_SHADER_FRAGMENT) {
   6702       if (key->fs.light_twoside) {
   6703          tokens = transform_fs_twoside(tokens);
   6704       }
   6705       if (key->fs.pstipple) {
   6706          const struct tgsi_token *new_tokens =
   6707             transform_fs_pstipple(emit, tokens);
   6708          if (tokens != shader->tokens) {
   6709             /* free the two-sided shader tokens */
   6710             tgsi_free_tokens(tokens);
   6711          }
   6712          tokens = new_tokens;
   6713       }
   6714       if (key->fs.aa_point) {
   6715          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
   6716       }
   6717    }
   6718 
   6719    if (SVGA_DEBUG & DEBUG_TGSI) {
   6720       debug_printf("#####################################\n");
   6721       debug_printf("### TGSI Shader %u\n", shader->id);
   6722       tgsi_dump(tokens, 0);
   6723    }
   6724 
   6725    /**
   6726     * Rescan the header if the token string is different from the one
   6727     * included in the shader; otherwise, the header info is already up-to-date
   6728     */
   6729    if (tokens != shader->tokens) {
   6730       tgsi_scan_shader(tokens, &emit->info);
   6731    } else {
   6732       emit->info = shader->info;
   6733    }
   6734 
   6735    emit->num_outputs = emit->info.num_outputs;
   6736 
   6737    if (unit == PIPE_SHADER_FRAGMENT) {
   6738       /* Compute FS input remapping to match the output from VS/GS */
   6739       if (gs) {
   6740          svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
   6741       } else {
   6742          assert(vs);
   6743          svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
   6744       }
   6745    } else if (unit == PIPE_SHADER_GEOMETRY) {
   6746       assert(vs);
   6747       svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
   6748    }
   6749 
   6750    determine_clipping_mode(emit);
   6751 
   6752    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
   6753       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
   6754          /* if there is stream output declarations associated
   6755           * with this shader or the shader writes to ClipDistance
   6756           * then reserve extra registers for the non-adjusted vertex position
   6757           * and the ClipDistance shadow copy
   6758           */
   6759          emit->vposition.so_index = emit->num_outputs++;
   6760 
   6761          if (emit->clip_mode == CLIP_DISTANCE) {
   6762             emit->clip_dist_so_index = emit->num_outputs++;
   6763             if (emit->info.num_written_clipdistance > 4)
   6764                emit->num_outputs++;
   6765          }
   6766       }
   6767    }
   6768 
   6769    /*
   6770     * Do actual shader translation.
   6771     */
   6772    if (!emit_vgpu10_header(emit)) {
   6773       debug_printf("svga: emit VGPU10 header failed\n");
   6774       goto cleanup;
   6775    }
   6776 
   6777    if (!emit_vgpu10_instructions(emit, tokens)) {
   6778       debug_printf("svga: emit VGPU10 instructions failed\n");
   6779       goto cleanup;
   6780    }
   6781 
   6782    if (!emit_vgpu10_tail(emit)) {
   6783       debug_printf("svga: emit VGPU10 tail failed\n");
   6784       goto cleanup;
   6785    }
   6786 
   6787    if (emit->register_overflow) {
   6788       goto cleanup;
   6789    }
   6790 
   6791    /*
   6792     * Create, initialize the 'variant' object.
   6793     */
   6794    variant = svga_new_shader_variant(svga);
   6795    if (!variant)
   6796       goto cleanup;
   6797 
   6798    variant->shader = shader;
   6799    variant->nr_tokens = emit_get_num_tokens(emit);
   6800    variant->tokens = (const unsigned *)emit->buf;
   6801    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
   6802    memcpy(&variant->key, key, sizeof(*key));
   6803    variant->id = UTIL_BITMASK_INVALID_INDEX;
   6804 
   6805    /* The extra constant starting offset starts with the number of
   6806     * shader constants declared in the shader.
   6807     */
   6808    variant->extra_const_start = emit->num_shader_consts[0];
   6809    if (key->gs.wide_point) {
   6810       /**
   6811        * The extra constant added in the transformed shader
   6812        * for inverse viewport scale is to be supplied by the driver.
   6813        * So the extra constant starting offset needs to be reduced by 1.
   6814        */
   6815       assert(variant->extra_const_start > 0);
   6816       variant->extra_const_start--;
   6817    }
   6818 
   6819    variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
   6820 
   6821    /* If there was exactly one write to a fragment shader output register
   6822     * and it came from a constant buffer, we know all fragments will have
   6823     * the same color (except for blending).
   6824     */
   6825    variant->constant_color_output =
   6826       emit->constant_color_output && emit->num_output_writes == 1;
   6827 
   6828    /** keep track in the variant if flat interpolation is used
   6829     *  for any of the varyings.
   6830     */
   6831    variant->uses_flat_interp = emit->uses_flat_interp;
   6832 
   6833    if (tokens != shader->tokens) {
   6834       tgsi_free_tokens(tokens);
   6835    }
   6836 
   6837 cleanup:
   6838    free_emitter(emit);
   6839 
   6840 done:
   6841    SVGA_STATS_TIME_POP(svga_sws(svga));
   6842    return variant;
   6843 }
   6844